Completed
Push — master ( e5b792...18cc61 )
by De
31s
created

comics.py (7 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    try:
333
        url = cls.first_url
334
    except AttributeError:
335
        url = input("Get starting URL: ")
336
    print(url)
337
    comic = cls.get_prev_link(get_soup_at_url(url))
338
    while comic:
339
        url = cls.get_url_from_link(comic)
340
        print(url)
341
        comic = cls.get_prev_link(get_soup_at_url(url))
342
    return {'href': url}
343
344
345
class GenericEmptyComic(GenericComic):
346
    """Generic class for comics where nothing is to be done.
347
348
    It can be useful to deactivate temporarily comics that do not work
349
    properly by replacing `def MyComic(GenericWhateverComic)` with
350
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
351
    _categories = ('EMPTY', )
352
353
    @classmethod
354
    def get_next_comic(cls, last_comic):
355
        """Implementation of get_next_comic returning no comics."""
356
        cls.log("comic is considered as empty - returning no comic")
357
        return []
358
359
360 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
361
    """Class to retrieve Extra Fabulous Comics."""
362
    name = 'efc'
363
    long_name = 'Extra Fabulous Comics'
364
    url = 'http://extrafabulouscomics.com'
365
    get_first_comic_link = get_a_navi_navifirst
366
    get_navi_link = get_link_rel_next
367
368
    @classmethod
369
    def get_comic_info(cls, soup, link):
370
        """Get information about a particular comics."""
371
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
372
        imgs = soup.find_all('img', src=img_src_re)
373
        title = soup.find('meta', property='og:title')['content']
374
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
375
        day = string_to_date(date_str, "%Y-%m-%d")
376
        return {
377
            'title': title,
378
            'img': [i['src'] for i in imgs],
379
            'month': day.month,
380
            'year': day.year,
381
            'day': day.day,
382
            'prefix': title + '-'
383
        }
384
385
386 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
387
    """Generic class to retrieve comics from Le Monde blogs."""
388
    _categories = ('LEMONDE', 'FRANCAIS')
389
    get_navi_link = get_link_rel_next
390
    get_first_comic_link = simulate_first_link
391
    first_url = NotImplemented
392
393
    @classmethod
394
    def get_comic_info(cls, soup, link):
395
        """Get information about a particular comics."""
396
        url2 = soup.find('link', rel='shortlink')['href']
397
        title = soup.find('meta', property='og:title')['content']
398
        date_str = soup.find("span", class_="entry-date").string
399
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
400
        imgs = soup.find_all('meta', property='og:image')
401
        return {
402
            'title': title,
403
            'url2': url2,
404
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
405
            'month': day.month,
406
            'year': day.year,
407
            'day': day.day,
408
        }
409
410
411
class ZepWorld(GenericLeMondeBlog):
412
    """Class to retrieve Zep World comics."""
413
    name = "zep"
414
    long_name = "Zep World"
415
    url = "http://zepworld.blog.lemonde.fr"
416
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
417
418
419
class Vidberg(GenericLeMondeBlog):
420
    """Class to retrieve Vidberg comics."""
421
    name = 'vidberg'
422
    long_name = "Vidberg - l'actu en patates"
423
    url = "http://vidberg.blog.lemonde.fr"
424
    # Not the first but I didn't find an efficient way to retrieve it
425
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
426
427
428
class Plantu(GenericLeMondeBlog):
429
    """Class to retrieve Plantu comics."""
430
    name = 'plantu'
431
    long_name = "Plantu"
432
    url = "http://plantu.blog.lemonde.fr"
433
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
434
435
436
class XavierGorce(GenericLeMondeBlog):
437
    """Class to retrieve Xavier Gorce comics."""
438
    name = 'gorce'
439
    long_name = "Xavier Gorce"
440
    url = "http://xaviergorce.blog.lemonde.fr"
441
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
442
443
444
class CartooningForPeace(GenericLeMondeBlog):
445
    """Class to retrieve Cartooning For Peace comics."""
446
    name = 'forpeace'
447
    long_name = "Cartooning For Peace"
448
    url = "http://cartooningforpeace.blog.lemonde.fr"
449
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
450
451
452
class Aurel(GenericLeMondeBlog):
453
    """Class to retrieve Aurel comics."""
454
    name = 'aurel'
455
    long_name = "Aurel"
456
    url = "http://aurel.blog.lemonde.fr"
457
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
458
459
460
class LesCulottees(GenericLeMondeBlog):
461
    """Class to retrieve Les Culottees comics."""
462
    name = 'culottees'
463
    long_name = 'Les Culottees'
464
    url = "http://lesculottees.blog.lemonde.fr"
465
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
466
467
468
class UneAnneeAuLycee(GenericLeMondeBlog):
469
    """Class to retrieve Une Annee Au Lycee comics."""
470
    name = 'lycee'
471
    long_name = 'Une Annee au Lycee'
472
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
473
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
474
475
476
class Rall(GenericNavigableComic):
477
    """Class to retrieve Ted Rall comics."""
478
    # Also on http://www.gocomics.com/tedrall
479
    name = 'rall'
480
    long_name = "Ted Rall"
481
    url = "http://rall.com/comic"
482
    _categories = ('RALL', )
483
    get_navi_link = get_link_rel_next
484
    get_first_comic_link = simulate_first_link
485
    # Not the first but I didn't find an efficient way to retrieve it
486
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
487
488
    @classmethod
489
    def get_comic_info(cls, soup, link):
490
        """Get information about a particular comics."""
491
        title = soup.find('meta', property='og:title')['content']
492
        author = soup.find("span", class_="author vcard").find("a").string
493
        date_str = soup.find("span", class_="entry-date").string
494
        day = string_to_date(date_str, "%B %d, %Y")
495
        desc = soup.find('meta', property='og:description')['content']
496
        imgs = soup.find('div', class_='entry-content').find_all('img')
497
        imgs = imgs[:-7]  # remove social media buttons
498
        return {
499
            'title': title,
500
            'author': author,
501
            'month': day.month,
502
            'year': day.year,
503
            'day': day.day,
504
            'description': desc,
505
            'img': [i['src'] for i in imgs],
506
        }
507
508
509
class Dilem(GenericNavigableComic):
510
    """Class to retrieve Ali Dilem comics."""
511
    name = 'dilem'
512
    long_name = 'Ali Dilem'
513
    url = 'http://information.tv5monde.com/dilem'
514
    _categories = ('FRANCAIS', )
515
    get_url_from_link = join_cls_url_to_href
516
    get_first_comic_link = simulate_first_link
517
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
518
519
    @classmethod
520
    def get_navi_link(cls, last_soup, next_):
521
        """Get link to next or previous comic."""
522
        # prev is next / next is prev
523
        li = last_soup.find('li', class_='prev' if next_ else 'next')
524
        return li.find('a') if li else None
525
526
    @classmethod
527
    def get_comic_info(cls, soup, link):
528
        """Get information about a particular comics."""
529
        short_url = soup.find('link', rel='shortlink')['href']
530
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
531
        imgs = soup.find_all('meta', property='og:image')
532
        date_str = soup.find('span', property='dc:date')['content']
533
        date_str = date_str[:10]
534
        day = string_to_date(date_str, "%Y-%m-%d")
535
        return {
536
            'short_url': short_url,
537
            'title': title,
538
            'img': [i['content'] for i in imgs],
539
            'day': day.day,
540
            'month': day.month,
541
            'year': day.year,
542
        }
543
544
545
class SpaceAvalanche(GenericNavigableComic):
546
    """Class to retrieve Space Avalanche comics."""
547
    name = 'avalanche'
548
    long_name = 'Space Avalanche'
549
    url = 'http://www.spaceavalanche.com'
550
    get_navi_link = get_link_rel_next
551
552
    @classmethod
553
    def get_first_comic_link(cls):
554
        """Get link to first comics."""
555
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
556
557
    @classmethod
558
    def get_comic_info(cls, soup, link):
559
        """Get information about a particular comics."""
560
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
561
        title = link['title']
562
        url = cls.get_url_from_link(link)
563
        year, month, day = [int(s)
564
                            for s in url_date_re.match(url).groups()]
565
        imgs = soup.find("div", class_="entry").find_all("img")
566
        return {
567
            'title': title,
568
            'day': day,
569
            'month': month,
570
            'year': year,
571
            'img': [i['src'] for i in imgs],
572
        }
573
574
575
class ZenPencils(GenericNavigableComic):
576
    """Class to retrieve ZenPencils comics."""
577
    # Also on http://zenpencils.tumblr.com
578
    # Also on http://www.gocomics.com/zen-pencils
579
    name = 'zenpencils'
580
    long_name = 'Zen Pencils'
581
    url = 'http://zenpencils.com'
582
    _categories = ('ZENPENCILS', )
583
    get_navi_link = get_link_rel_next
584
    get_first_comic_link = simulate_first_link
585
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
586
587
    @classmethod
588
    def get_comic_info(cls, soup, link):
589
        """Get information about a particular comics."""
590
        imgs = soup.find('div', id='comic').find_all('img')
591
        # imgs2 = soup.find_all('meta', property='og:image')
592
        post = soup.find('div', class_='post-content')
593
        author = post.find("span", class_="post-author").find("a").string
594
        title = soup.find('h2', class_='post-title').string
595
        date_str = post.find('span', class_='post-date').string
596
        day = string_to_date(date_str, "%B %d, %Y")
597
        assert imgs
598
        assert all(i['alt'] == i['title'] for i in imgs)
599
        assert all(i['alt'] in (title, "") for i in imgs)
600
        return {
601
            'title': title,
602
            'author': author,
603
            'day': day.day,
604
            'month': day.month,
605
            'year': day.year,
606
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
607
        }
608
609
610
class ItsTheTie(GenericEmptyComic, GenericNavigableComic):
611
    """Class to retrieve It's the tie comics."""
612
    # Also on http://itsthetie.tumblr.com
613
    # Also on https://tapastic.com/series/itsthetie
614
    name = 'tie'
615
    long_name = "It's the tie"
616
    url = "http://itsthetie.com"
617
    _categories = ('TIE', )
618
    get_first_comic_link = get_div_navfirst_a
619
    get_navi_link = get_a_rel_next
620
621
    @classmethod
622
    def get_comic_info(cls, soup, link):
623
        """Get information about a particular comics."""
624
        title = soup.find('h1', class_='comic-title').find('a').string
625
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
626
        day = string_to_date(date_str, "%B %d, %Y")
627
        # Bonus images may or may not be in meta og:image.
628
        imgs = soup.find_all('meta', property='og:image')
629
        imgs_src = [i['content'] for i in imgs]
630
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
631
        bonus_src = [b['data-oversrc'] for b in bonus]
632
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
633
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
634
        tag_meta = soup.find('meta', property='article:tag')
635
        tags = tag_meta['content'] if tag_meta else ""
636
        return {
637
            'title': title,
638
            'month': day.month,
639
            'year': day.year,
640
            'day': day.day,
641
            'img': all_imgs_src,
642
            'tags': tags,
643
        }
644
645
646
class PenelopeBagieu(GenericNavigableComic):
647
    """Class to retrieve comics from Penelope Bagieu's blog."""
648 View Code Duplication
    name = 'bagieu'
649
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
650
    url = 'http://www.penelope-jolicoeur.com'
651
    _categories = ('FRANCAIS', )
652
    get_navi_link = get_link_rel_next
653
    get_first_comic_link = simulate_first_link
654
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
655
656
    @classmethod
657
    def get_comic_info(cls, soup, link):
658
        """Get information about a particular comics."""
659
        date_str = soup.find('h2', class_='date-header').string
660
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
661
        imgs = soup.find('div', class_='entry-body').find_all('img')
662
        title = soup.find('h3', class_='entry-header').string
663
        return {
664
            'title': title,
665
            'img': [i['src'] for i in imgs],
666
            'month': day.month,
667
            'year': day.year,
668
            'day': day.day,
669
        }
670
671
672
class OneOneOneOneComic(GenericEmptyComic, GenericNavigableComic):
673
    """Class to retrieve 1111 Comics."""
674 View Code Duplication
    # Also on http://comics1111.tumblr.com
675
    # Also on https://tapastic.com/series/1111-Comics
676
    name = '1111'
677
    long_name = '1111 Comics'
678
    url = 'http://www.1111comics.me'
679
    _categories = ('ONEONEONEONE', )
680
    get_first_comic_link = get_div_navfirst_a
681
    get_navi_link = get_link_rel_next
682
683
    @classmethod
684
    def get_comic_info(cls, soup, link):
685
        """Get information about a particular comics."""
686
        title = soup.find('h1', class_='comic-title').find('a').string
687
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
688
        day = string_to_date(date_str, "%B %d, %Y")
689
        imgs = soup.find_all('meta', property='og:image')
690
        return {
691
            'title': title,
692
            'month': day.month,
693
            'year': day.year,
694
            'day': day.day,
695
            'img': [i['content'] for i in imgs],
696
        }
697
698
699
class AngryAtNothing(GenericEmptyComic, GenericNavigableComic):
700
    """Class to retrieve Angry at Nothing comics."""
701 View Code Duplication
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
702
    # Also on http://angryatnothing.tumblr.com
703
    name = 'angry'
704
    long_name = 'Angry At Nothing'
705
    url = 'http://www.angryatnothing.net'
706
    get_first_comic_link = get_div_navfirst_a
707
    get_navi_link = get_a_rel_next
708
709
    @classmethod
710
    def get_comic_info(cls, soup, link):
711
        """Get information about a particular comics."""
712
        title = soup.find('h1', class_='comic-title').find('a').string
713
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
714
        day = string_to_date(date_str, "%B %d, %Y")
715
        imgs = soup.find_all('meta', property='og:image')
716
        return {
717
            'title': title,
718
            'month': day.month,
719
            'year': day.year,
720
            'day': day.day,
721
            'img': [i['content'] for i in imgs],
722
        }
723
724
725
class NeDroid(GenericNavigableComic):
726
    """Class to retrieve NeDroid comics."""
727
    name = 'nedroid'
728
    long_name = 'NeDroid'
729
    url = 'http://nedroid.com'
730 View Code Duplication
    get_first_comic_link = get_div_navfirst_a
731
    get_navi_link = get_link_rel_next
732
    get_url_from_link = join_cls_url_to_href
733
734
    @classmethod
735
    def get_comic_info(cls, soup, link):
736
        """Get information about a particular comics."""
737
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
738
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
739
        num = int(short_url_re.match(short_url).groups()[0])
740
        imgs = soup.find('div', id='comic').find_all('img')
741
        assert len(imgs) == 1
742
        title = imgs[0]['alt']
743
        title2 = imgs[0]['title']
744
        return {
745
            'short_url': short_url,
746
            'title': title,
747
            'title2': title2,
748
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
749
            'num': num,
750
        }
751
752
753
class Garfield(GenericNavigableComic):
754
    """Class to retrieve Garfield comics."""
755
    # Also on http://www.gocomics.com/garfield
756
    name = 'garfield'
757
    long_name = 'Garfield'
758
    url = 'https://garfield.com'
759
    _categories = ('GARFIELD', )
760
    get_first_comic_link = simulate_first_link
761
    first_url = 'https://garfield.com/comic/1978/06/19'
762
763
    @classmethod
764
    def get_navi_link(cls, last_soup, next_):
765
        """Get link to next or previous comic."""
766
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
767
768
    @classmethod
769
    def get_comic_info(cls, soup, link):
770
        """Get information about a particular comics."""
771
        url = cls.get_url_from_link(link)
772
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
773
        year, month, day = [int(s) for s in date_re.match(url).groups()]
774
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
775
        return {
776
            'month': month,
777
            'year': year,
778
            'day': day,
779
            'img': [i['src'] for i in imgs],
780
        }
781
782 View Code Duplication
783
class Dilbert(GenericNavigableComic):
784
    """Class to retrieve Dilbert comics."""
785
    # Also on http://www.gocomics.com/dilbert-classics
786
    name = 'dilbert'
787
    long_name = 'Dilbert'
788
    url = 'http://dilbert.com'
789
    get_url_from_link = join_cls_url_to_href
790
    get_first_comic_link = simulate_first_link
791
    first_url = 'http://dilbert.com/strip/1989-04-16'
792
793
    @classmethod
794
    def get_navi_link(cls, last_soup, next_):
795
        """Get link to next or previous comic."""
796
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
797
        return link.find('a') if link else None
798
799
    @classmethod
800
    def get_comic_info(cls, soup, link):
801
        """Get information about a particular comics."""
802
        title = soup.find('meta', property='og:title')['content']
803
        imgs = soup.find_all('meta', property='og:image')
804
        desc = soup.find('meta', property='og:description')['content']
805
        date_str = soup.find('meta', property='article:publish_date')['content']
806
        day = string_to_date(date_str, "%B %d, %Y")
807
        author = soup.find('meta', property='article:author')['content']
808
        tags = soup.find('meta', property='article:tag')['content']
809
        return {
810
            'title': title,
811
            'description': desc,
812
            'img': [i['content'] for i in imgs],
813
            'author': author,
814
            'tags': tags,
815
            'day': day.day,
816
            'month': day.month,
817
            'year': day.year
818
        }
819
820
821
class VictimsOfCircumsolar(GenericEmptyComic, GenericNavigableComic):
822
    """Class to retrieve VictimsOfCircumsolar comics."""
823
    # Also on https://victimsofcomics.tumblr.com
824
    name = 'circumsolar'
825
    long_name = 'Victims Of Circumsolar'
826
    url = 'http://www.victimsofcircumsolar.com'
827
    get_navi_link = get_a_navi_comicnavnext_navinext
828
    get_first_comic_link = simulate_first_link
829
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
830
831
    @classmethod
832
    def get_comic_info(cls, soup, link):
833
        """Get information about a particular comics."""
834
        # Date is on the archive page
835
        title = soup.find_all('meta', property='og:title')[-1]['content']
836
        desc = soup.find_all('meta', property='og:description')[-1]['content']
837
        imgs = soup.find('div', id='comic').find_all('img')
838
        assert all(i['title'] == i['alt'] == title for i in imgs)
839
        return {
840
            'title': title,
841
            'description': desc,
842
            'img': [i['src'] for i in imgs],
843
        }
844
845
846
class ThreeWordPhrase(GenericNavigableComic):
847
    """Class to retrieve Three Word Phrase comics."""
848
    # Also on http://www.threewordphrase.tumblr.com
849
    name = 'threeword'
850
    long_name = 'Three Word Phrase'
851
    url = 'http://threewordphrase.com'
852
    get_url_from_link = join_cls_url_to_href
853
854
    @classmethod
855
    def get_first_comic_link(cls):
856
        """Get link to first comics."""
857
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
858
859
    @classmethod
860
    def get_navi_link(cls, last_soup, next_):
861
        """Get link to next or previous comic."""
862
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
863
        return None if link.get('href') is None else link
864
865
    @classmethod
866
    def get_comic_info(cls, soup, link):
867
        """Get information about a particular comics."""
868
        title = soup.find('title')
869
        imgs = [img for img in soup.find_all('img')
870
                if not img['src'].endswith(
871
                    ('link.gif', '32.png', 'twpbookad.jpg',
872
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
873
        return {
874
            'title': title.string if title else None,
875
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
876
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
877
        }
878
879
880
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
881
    """Class to retrieve Deadly Panel comics."""
882
    # Also on https://tapastic.com/series/deadlypanel
883
    # Also on https://deadlypanel.tumblr.com
884
    name = 'deadly'
885
    long_name = 'Deadly Panel'
886
    url = 'http://www.deadlypanel.com'
887
    get_first_comic_link = get_a_navi_navifirst
888
    get_navi_link = get_a_navi_comicnavnext_navinext
889
890
    @classmethod
891
    def get_comic_info(cls, soup, link):
892
        """Get information about a particular comics."""
893
        imgs = soup.find('div', id='comic').find_all('img')
894
        assert all(i['alt'] == i['title'] for i in imgs)
895
        return {
896
            'img': [i['src'] for i in imgs],
897
        }
898
899
900
class TheGentlemanArmchair(GenericNavigableComic):
901
    """Class to retrieve The Gentleman Armchair comics."""
902 View Code Duplication
    name = 'gentlemanarmchair'
903
    long_name = 'The Gentleman Armchair'
904
    url = 'http://thegentlemansarmchair.com'
905
    get_first_comic_link = get_a_navi_navifirst
906
    get_navi_link = get_link_rel_next
907
908
    @classmethod
909
    def get_comic_info(cls, soup, link):
910
        """Get information about a particular comics."""
911
        title = soup.find('h2', class_='post-title').string
912
        author = soup.find("span", class_="post-author").find("a").string
913
        date_str = soup.find('span', class_='post-date').string
914
        day = string_to_date(date_str, "%B %d, %Y")
915
        imgs = soup.find('div', id='comic').find_all('img')
916
        return {
917
            'img': [i['src'] for i in imgs],
918
            'title': title,
919
            'author': author,
920
            'month': day.month,
921
            'year': day.year,
922
            'day': day.day,
923
        }
924
925
926
class ImogenQuest(GenericNavigableComic):
927
    """Class to retrieve Imogen Quest comics."""
928 View Code Duplication
    # Also on http://imoquest.tumblr.com
929
    name = 'imogen'
930
    long_name = 'Imogen Quest'
931
    url = 'http://imogenquest.net'
932
    get_first_comic_link = get_div_navfirst_a
933
    get_navi_link = get_a_rel_next
934
935
    @classmethod
936
    def get_comic_info(cls, soup, link):
937
        """Get information about a particular comics."""
938
        title = soup.find('h2', class_='post-title').string
939
        author = soup.find("span", class_="post-author").find("a").string
940
        date_str = soup.find('span', class_='post-date').string
941
        day = string_to_date(date_str, '%B %d, %Y')
942
        imgs = soup.find('div', class_='comicpane').find_all('img')
943
        assert all(i['alt'] == i['title'] for i in imgs)
944
        title2 = imgs[0]['title']
945
        return {
946
            'day': day.day,
947
            'month': day.month,
948
            'year': day.year,
949
            'img': [i['src'] for i in imgs],
950
            'title': title,
951
            'title2': title2,
952
            'author': author,
953
        }
954
955
956
class MyExtraLife(GenericNavigableComic):
957
    """Class to retrieve My Extra Life comics."""
958 View Code Duplication
    name = 'extralife'
959
    long_name = 'My Extra Life'
960
    url = 'http://www.myextralife.com'
961
    get_navi_link = get_link_rel_next
962
963
    @classmethod
964
    def get_first_comic_link(cls):
965
        """Get link to first comics."""
966
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
967
968
    @classmethod
969
    def get_comic_info(cls, soup, link):
970
        """Get information about a particular comics."""
971
        title = soup.find("h1", class_="comic_title").string
972
        date_str = soup.find("span", class_="comic_date").string
973
        day = string_to_date(date_str, "%B %d, %Y")
974
        imgs = soup.find_all("img", class_="comic")
975
        assert all(i['alt'] == i['title'] == title for i in imgs)
976
        return {
977
            'title': title,
978
            'img': [i['src'] for i in imgs if i["src"]],
979
            'day': day.day,
980
            'month': day.month,
981
            'year': day.year
982
        }
983
984
985
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
986
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
987
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
988
    # Also on http://smbc-comics.tumblr.com
989
    name = 'smbc'
990
    long_name = 'Saturday Morning Breakfast Cereal'
991
    url = 'http://www.smbc-comics.com'
992
    _categories = ('SMBC', )
993
    get_navi_link = get_a_rel_next
994
995
    @classmethod
996
    def get_first_comic_link(cls):
997
        """Get link to first comics."""
998
        return get_soup_at_url(cls.url).find('a', rel='start')
999
1000
    @classmethod
1001
    def get_comic_info(cls, soup, link):
1002
        """Get information about a particular comics."""
1003
        image1 = soup.find('img', id='cc-comic')
1004
        image_url1 = image1['src']
1005
        aftercomic = soup.find('div', id='aftercomic')
1006
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1007
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1008
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1009
        day = string_to_date(date_str, "%B %d, %Y")
1010
        return {
1011
            'title': image1['title'],
1012
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
1013
            'day': day.day,
1014
            'month': day.month,
1015
            'year': day.year
1016
        }
1017
1018
1019
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1020
    """Class to retrieve Perry Bible Fellowship comics."""
1021
    name = 'pbf'
1022
    long_name = 'Perry Bible Fellowship'
1023
    url = 'http://pbfcomics.com'
1024
    get_url_from_archive_element = join_cls_url_to_href
1025
1026
    @classmethod
1027
    def get_archive_elements(cls):
1028
        soup = get_soup_at_url(cls.url)
1029
        thumbnails = soup.find('div', id='all_thumbnails')
1030
        return reversed(thumbnails.find_all('a'))
1031
1032
    @classmethod
1033
    def get_comic_info(cls, soup, link):
1034
        """Get information about a particular comics."""
1035
        name = soup.find('meta', property='og:title')['content']
1036
        imgs = soup.find_all('meta', property='og:image')
1037
        assert len(imgs) == 1
1038
        return {
1039
            'name': name,
1040
            'img': [i['content'] for i in imgs],
1041
        }
1042
1043
1044
class Mercworks(GenericNavigableComic):
1045
    """Class to retrieve Mercworks comics."""
1046 View Code Duplication
    # Also on http://mercworks.tumblr.com
1047
    name = 'mercworks'
1048
    long_name = 'Mercworks'
1049
    url = 'http://mercworks.net'
1050
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1051
    get_navi_link = get_link_rel_next
1052
1053
    @classmethod
1054
    def get_comic_info(cls, soup, link):
1055
        """Get information about a particular comics."""
1056
        title = soup.find('meta', property='og:title')['content']
1057
        metadesc = soup.find('meta', property='og:description')
1058
        desc = metadesc['content'] if metadesc else ""
1059
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1060
        day = string_to_date(date_str, "%Y-%m-%d")
1061
        imgs = soup.find_all('meta', property='og:image')
1062
        return {
1063
            'img': [i['content'] for i in imgs],
1064
            'title': title,
1065
            'desc': desc,
1066
            'day': day.day,
1067
            'month': day.month,
1068
            'year': day.year
1069
        }
1070
1071
1072
class BerkeleyMews(GenericListableComic):
1073
    """Class to retrieve Berkeley Mews comics."""
1074
    # Also on http://mews.tumblr.com
1075
    # Also on http://www.gocomics.com/berkeley-mews
1076
    name = 'berkeley'
1077
    long_name = 'Berkeley Mews'
1078
    url = 'http://www.berkeleymews.com'
1079
    _categories = ('BERKELEY', )
1080
    get_url_from_archive_element = get_href
1081
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1082
1083
    @classmethod
1084
    def get_archive_elements(cls):
1085
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1086
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1087
1088
    @classmethod
1089
    def get_comic_info(cls, soup, link):
1090
        """Get information about a particular comics."""
1091
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1092
        url = cls.get_url_from_archive_element(link)
1093
        num = int(cls.comic_num_re.match(url).groups()[0])
1094
        img = soup.find('div', id='comic').find('img')
1095
        assert all(i['alt'] == i['title'] for i in [img])
1096
        title2 = img['title']
1097
        img_url = img['src']
1098
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1099
        return {
1100
            'num': num,
1101
            'title': link.string,
1102
            'title2': title2,
1103
            'img': [img_url],
1104
            'year': year,
1105
            'month': month,
1106
            'day': day,
1107
        }
1108
1109
1110
class GenericBouletCorp(GenericNavigableComic):
1111
    """Generic class to retrieve BouletCorp comics in different languages."""
1112
    # Also on https://bouletcorp.tumblr.com
1113
    _categories = ('BOULET', )
1114
    get_navi_link = get_link_rel_next
1115
1116
    @classmethod
1117
    def get_first_comic_link(cls):
1118
        """Get link to first comics."""
1119
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1120
1121
    @classmethod
1122
    def get_comic_info(cls, soup, link):
1123
        """Get information about a particular comics."""
1124
        url = cls.get_url_from_link(link)
1125
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1126
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1127
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1128
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1129
        title = soup.find('title').string
1130
        return {
1131
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1132
            'title': title,
1133
            'texts': texts,
1134
            'year': year,
1135
            'month': month,
1136
            'day': day,
1137
        }
1138
1139
1140
class BouletCorp(GenericBouletCorp):
1141
    """Class to retrieve BouletCorp comics."""
1142
    name = 'boulet'
1143
    long_name = 'Boulet Corp'
1144
    url = 'http://www.bouletcorp.com'
1145
    _categories = ('FRANCAIS', )
1146
1147
1148
class BouletCorpEn(GenericBouletCorp):
1149
    """Class to retrieve EnglishBouletCorp comics."""
1150
    name = 'boulet_en'
1151
    long_name = 'Boulet Corp English'
1152
    url = 'http://english.bouletcorp.com'
1153
1154
1155
class AmazingSuperPowers(GenericNavigableComic):
1156
    """Class to retrieve Amazing Super Powers comics."""
1157 View Code Duplication
    name = 'asp'
1158
    long_name = 'Amazing Super Powers'
1159
    url = 'http://www.amazingsuperpowers.com'
1160
    get_first_comic_link = get_a_navi_navifirst
1161
    get_navi_link = get_a_navi_navinext
1162
1163
    @classmethod
1164
    def get_comic_info(cls, soup, link):
1165
        """Get information about a particular comics."""
1166
        author = soup.find("span", class_="post-author").find("a").string
1167
        date_str = soup.find('span', class_='post-date').string
1168
        day = string_to_date(date_str, "%B %d, %Y")
1169
        imgs = soup.find('div', id='comic').find_all('img')
1170
        title = ' '.join(i['title'] for i in imgs)
1171
        assert all(i['alt'] == i['title'] for i in imgs)
1172
        return {
1173
            'title': title,
1174
            'author': author,
1175
            'img': [img['src'] for img in imgs],
1176
            'day': day.day,
1177
            'month': day.month,
1178
            'year': day.year
1179
        }
1180
1181
1182
class ToonHole(GenericNavigableComic):
1183
    """Class to retrieve Toon Holes comics."""
1184
    # Also on http://tapastic.com/series/TOONHOLE
1185
    name = 'toonhole'
1186
    long_name = 'Toon Hole'
1187
    url = 'http://www.toonhole.com'
1188
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1189
    get_navi_link = get_link_rel_next
1190
1191 View Code Duplication
    @classmethod
1192
    def get_comic_info(cls, soup, link):
1193
        """Get information about a particular comics."""
1194
        short_url = soup.find('link', rel='shortlink')['href']
1195
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1196
        day = string_to_date(date_str, "%B %d, %Y")
1197
        imgs = soup.find('div', id='comic').find_all('img')
1198
        if imgs:
1199
            img = imgs[0]
1200
            title = img['alt']
1201
            assert img['title'] == title
1202
        else:
1203
            title = ""
1204
        return {
1205
            'short_url': short_url,
1206
            'title': title,
1207
            'month': day.month,
1208
            'year': day.year,
1209
            'day': day.day,
1210
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1211
        }
1212
1213
1214
class Channelate(GenericNavigableComic):
1215
    """Class to retrieve Channelate comics."""
1216
    name = 'channelate'
1217
    long_name = 'Channelate'
1218
    url = 'http://www.channelate.com'
1219
    get_first_comic_link = get_div_navfirst_a
1220
    get_navi_link = get_link_rel_next
1221
    get_url_from_link = join_cls_url_to_href
1222
1223
    @classmethod
1224
    def get_comic_info(cls, soup, link):
1225
        """Get information about a particular comics."""
1226
        author = soup.find("span", class_="post-author").find("a").string
1227
        date_str = soup.find('span', class_='post-date').string
1228
        day = string_to_date(date_str, '%Y/%m/%d')
1229
        title = soup.find('meta', property='og:title')['content']
1230
        post = soup.find('div', id='comic')
1231
        imgs = post.find_all('img') if post else []
1232
        extra_url = None
1233
        extra_div = soup.find('div', id='extrapanelbutton')
1234
        if extra_div:
1235
            extra_url = extra_div.find('a')['href']
1236
            extra_soup = get_soup_at_url(extra_url)
1237
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1238
            imgs.extend(extra_imgs)
1239
        return {
1240
            'url_extra': extra_url,
1241
            'title': title,
1242
            'author': author,
1243
            'month': day.month,
1244
            'year': day.year,
1245
            'day': day.day,
1246
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1247
        }
1248
1249
1250
class CyanideAndHappiness(GenericNavigableComic):
1251
    """Class to retrieve Cyanide And Happiness comics."""
1252
    name = 'cyanide'
1253
    long_name = 'Cyanide and Happiness'
1254
    url = 'http://explosm.net'
1255
    _categories = ('NSFW', )
1256
    get_url_from_link = join_cls_url_to_href
1257
1258
    @classmethod
1259
    def get_first_comic_link(cls):
1260
        """Get link to first comics."""
1261
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1262
1263
    @classmethod
1264
    def get_navi_link(cls, last_soup, next_):
1265
        """Get link to next or previous comic."""
1266
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1267
        return None if link.get('href') is None else link
1268
1269
    @classmethod
1270
    def get_comic_info(cls, soup, link):
1271
        """Get information about a particular comics."""
1272
        url2 = soup.find('meta', property='og:url')['content']
1273
        num = int(url2.split('/')[-2])
1274
        date_str = soup.find('h3').find('a').string
1275
        day = string_to_date(date_str, '%Y.%m.%d')
1276
        author = soup.find('small', class_="author-credit-name").string
1277
        assert author.startswith('by ')
1278
        author = author[3:]
1279
        imgs = soup.find_all('img', id='main-comic')
1280
        return {
1281
            'num': num,
1282
            'author': author,
1283
            'month': day.month,
1284
            'year': day.year,
1285
            'day': day.day,
1286
            'prefix': '%d-' % num,
1287
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1288
        }
1289
1290
1291
class MrLovenstein(GenericComic):
1292
    """Class to retrieve Mr Lovenstein comics."""
1293
    # Also on https://tapastic.com/series/MrLovenstein
1294
    name = 'mrlovenstein'
1295
    long_name = 'Mr. Lovenstein'
1296
    url = 'http://www.mrlovenstein.com'
1297
1298
    @classmethod
1299
    def get_next_comic(cls, last_comic):
1300
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1301
        # TODO: more info from http://www.mrlovenstein.com/archive
1302
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1303
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1304
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1305
        first, last = min(nums), max(nums)
1306
        if last_comic:
1307
            first = last_comic['num'] + 1
1308
        for num in range(first, last + 1):
1309
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1310
            soup = get_soup_at_url(url)
1311
            imgs = list(
1312
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1313
            description = soup.find('meta', attrs={'name': 'description'})['content']
1314
            yield {
1315
                'url': url,
1316
                'num': num,
1317
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1318
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1319
                'description': description,
1320
            }
1321
1322
1323
class DinosaurComics(GenericListableComic):
1324
    """Class to retrieve Dinosaur Comics comics."""
1325
    name = 'dinosaur'
1326
    long_name = 'Dinosaur Comics'
1327
    url = 'http://www.qwantz.com'
1328
    get_url_from_archive_element = get_href
1329
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1330
1331
    @classmethod
1332
    def get_archive_elements(cls):
1333
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1334
        # first link is random -> skip it
1335
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1336
1337
    @classmethod
1338
    def get_comic_info(cls, soup, link):
1339
        """Get information about a particular comics."""
1340
        url = cls.get_url_from_archive_element(link)
1341
        num = int(cls.comic_link_re.match(url).groups()[0])
1342
        date_str = link.string
1343
        text = link.next_sibling.string
1344
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1345
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1346
        img = soup.find('img', src=comic_img_re)
1347
        return {
1348
            'month': day.month,
1349
            'year': day.year,
1350
            'day': day.day,
1351
            'img': [img.get('src')],
1352
            'title': img.get('title'),
1353
            'text': text,
1354
            'num': num,
1355
        }
1356
1357
1358
class ButterSafe(GenericListableComic):
1359
    """Class to retrieve Butter Safe comics."""
1360
    name = 'butter'
1361 View Code Duplication
    long_name = 'ButterSafe'
1362
    url = 'http://buttersafe.com'
1363
    get_url_from_archive_element = get_href
1364
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1365
1366
    @classmethod
1367
    def get_archive_elements(cls):
1368
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1369
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1370
1371
    @classmethod
1372
    def get_comic_info(cls, soup, link):
1373
        """Get information about a particular comics."""
1374
        url = cls.get_url_from_archive_element(link)
1375
        title = link.string
1376
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1377
        img = soup.find('div', id='comic').find('img')
1378
        assert img['alt'] == title
1379
        return {
1380
            'title': title,
1381
            'day': day,
1382
            'month': month,
1383
            'year': year,
1384
            'img': [img['src']],
1385
        }
1386
1387
1388
class CalvinAndHobbes(GenericComic):
1389
    """Class to retrieve Calvin and Hobbes comics."""
1390
    # Also on http://www.gocomics.com/calvinandhobbes/
1391
    name = 'calvin'
1392
    long_name = 'Calvin and Hobbes'
1393
    # This is not through any official webpage but eh...
1394
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1395
1396
    @classmethod
1397
    def get_next_comic(cls, last_comic):
1398
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1399
        last_date = get_date_for_comic(
1400
            last_comic) if last_comic else date(1985, 11, 1)
1401
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1402
        img_re = re.compile('')
1403
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1404
            url = link['href']
1405
            year, month = link_re.match(url).groups()
1406
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1407
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1408
                month_url = urljoin_wrapper(cls.url, url)
1409
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1410
                    img_src = img['src']
1411
                    day = int(img_re.match(img_src).groups()[0])
1412
                    comic_date = date(int(year), int(month), day)
1413
                    if comic_date > last_date:
1414
                        yield {
1415
                            'url': month_url,
1416
                            'year': int(year),
1417
                            'month': int(month),
1418
                            'day': int(day),
1419
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1420
                        }
1421
                        last_date = comic_date
1422
1423
1424
class AbstruseGoose(GenericListableComic):
1425
    """Class to retrieve AbstruseGoose Comics."""
1426
    name = 'abstruse'
1427 View Code Duplication
    long_name = 'Abstruse Goose'
1428
    url = 'http://abstrusegoose.com'
1429
    get_url_from_archive_element = get_href
1430
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1431
    comic_img_re = re.compile('^%s/strips/.*' % url)
1432
1433
    @classmethod
1434
    def get_archive_elements(cls):
1435
        archive_url = urljoin_wrapper(cls.url, 'archive')
1436
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1437
1438
    @classmethod
1439
    def get_comic_info(cls, soup, archive_elt):
1440
        comic_url = cls.get_url_from_archive_element(archive_elt)
1441
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1442
        return {
1443
            'num': num,
1444
            'title': archive_elt.string,
1445
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1446
        }
1447
1448
1449
class PhDComics(GenericNavigableComic):
1450
    """Class to retrieve PHD Comics."""
1451
    name = 'phd'
1452
    long_name = 'PhD Comics'
1453
    url = 'http://phdcomics.com/comics/archive.php'
1454
1455
    @classmethod
1456
    def get_first_comic_link(cls):
1457
        """Get link to first comics."""
1458
        soup = get_soup_at_url(cls.url)
1459
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1460
        return None if img is None else img.parent
1461
1462
    @classmethod
1463
    def get_navi_link(cls, last_soup, next_):
1464
        """Get link to next or previous comic."""
1465
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1466
        img = last_soup.find('img', src=url)
1467
        return None if img is None else img.parent
1468
1469
    @classmethod
1470
    def get_comic_info(cls, soup, link):
1471
        """Get information about a particular comics."""
1472
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1473
        imgs = soup.find_all('meta', property='og:image')
1474
        return {
1475
            'img': [i['content'] for i in imgs],
1476
            'title': title,
1477
        }
1478
1479
1480
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1481
    """Class to retrieve Octopuns comics."""
1482 View Code Duplication
    # Also on http://octopuns.tumblr.com
1483
    name = 'octopuns'
1484
    long_name = 'Octopuns'
1485
    url = 'http://www.octopuns.net'
1486
1487
    @classmethod
1488
    def get_first_comic_link(cls):
1489
        """Get link to first comics."""
1490
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1491
1492
    @classmethod
1493
    def get_navi_link(cls, last_soup, next_):
1494
        """Get link to next or previous comic."""
1495
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1496
        return None if link.get('href') is None else link
1497
1498
    @classmethod
1499
    def get_comic_info(cls, soup, link):
1500
        """Get information about a particular comics."""
1501
        title = soup.find('h3', class_='post-title entry-title').string
1502
        date_str = soup.find('h2', class_='date-header').string
1503
        day = string_to_date(date_str, "%A, %B %d, %Y")
1504
        imgs = soup.find_all('link', rel='image_src')
1505
        return {
1506
            'img': [i['href'] for i in imgs],
1507
            'title': title,
1508
            'day': day.day,
1509
            'month': day.month,
1510
            'year': day.year,
1511
        }
1512
1513
1514
class Quarktees(GenericNavigableComic):
1515
    """Class to retrieve the Quarktees comics."""
1516
    name = 'quarktees'
1517
    long_name = 'Quarktees'
1518
    url = 'http://www.quarktees.com/blogs/news'
1519
    get_url_from_link = join_cls_url_to_href
1520
    get_first_comic_link = simulate_first_link
1521
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1522
1523
    @classmethod
1524
    def get_navi_link(cls, last_soup, next_):
1525
        """Get link to next or previous comic."""
1526
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1527
1528
    @classmethod
1529
    def get_comic_info(cls, soup, link):
1530
        """Get information about a particular comics."""
1531
        title = soup.find('meta', property='og:title')['content']
1532
        article = soup.find('div', class_='single-article')
1533
        imgs = article.find_all('img')
1534
        return {
1535
            'title': title,
1536
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1537
        }
1538
1539
1540
class OverCompensating(GenericNavigableComic):
1541
    """Class to retrieve the Over Compensating comics."""
1542
    name = 'compensating'
1543
    long_name = 'Over Compensating'
1544
    url = 'http://www.overcompensating.com'
1545
    get_url_from_link = join_cls_url_to_href
1546
1547
    @classmethod
1548
    def get_first_comic_link(cls):
1549
        """Get link to first comics."""
1550
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1551
1552
    @classmethod
1553
    def get_navi_link(cls, last_soup, next_):
1554
        """Get link to next or previous comic."""
1555
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1556
1557
    @classmethod
1558
    def get_comic_info(cls, soup, link):
1559
        """Get information about a particular comics."""
1560
        img_src_re = re.compile('^/oc/comics/.*')
1561
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1562
        comic_url = cls.get_url_from_link(link)
1563
        num = int(comic_num_re.match(comic_url).groups()[0])
1564
        img = soup.find('img', src=img_src_re)
1565
        return {
1566
            'num': num,
1567
            'img': [urljoin_wrapper(comic_url, img['src'])],
1568
            'title': img.get('title')
1569
        }
1570
1571
1572
class Oglaf(GenericNavigableComic):
1573
    """Class to retrieve Oglaf comics."""
1574
    name = 'oglaf'
1575
    long_name = 'Oglaf [NSFW]'
1576
    url = 'http://oglaf.com'
1577
    _categories = ('NSFW', )
1578
    get_url_from_link = join_cls_url_to_href
1579
1580
    @classmethod
1581
    def get_first_comic_link(cls):
1582
        """Get link to first comics."""
1583
        return get_soup_at_url(cls.url).find("div", id="st").parent
1584
1585
    @classmethod
1586
    def get_navi_link(cls, last_soup, next_):
1587
        """Get link to next or previous comic."""
1588
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1589
        return div.parent if div else None
1590
1591
    @classmethod
1592
    def get_comic_info(cls, soup, link):
1593
        """Get information about a particular comics."""
1594
        title = soup.find('title').string
1595
        title_imgs = soup.find('div', id='tt').find_all('img')
1596
        assert len(title_imgs) == 1
1597
        strip_imgs = soup.find_all('img', id='strip')
1598
        assert len(strip_imgs) == 1
1599
        imgs = title_imgs + strip_imgs
1600
        desc = ' '.join(i['title'] for i in imgs)
1601
        return {
1602
            'title': title,
1603
            'img': [i['src'] for i in imgs],
1604
            'description': desc,
1605
        }
1606
1607
1608
class ScandinaviaAndTheWorld(GenericNavigableComic):
1609
    """Class to retrieve Scandinavia And The World comics."""
1610
    name = 'satw'
1611
    long_name = 'Scandinavia And The World'
1612
    url = 'http://satwcomic.com'
1613
    get_first_comic_link = simulate_first_link
1614
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1615
1616
    @classmethod
1617
    def get_navi_link(cls, last_soup, next_):
1618
        """Get link to next or previous comic."""
1619
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1620
1621
    @classmethod
1622
    def get_comic_info(cls, soup, link):
1623
        """Get information about a particular comics."""
1624
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1625
        desc = soup.find('meta', property='og:description')['content']
1626
        imgs = soup.find_all('img', itemprop="image")
1627
        return {
1628
            'title': title,
1629
            'description': desc,
1630
            'img': [i['src'] for i in imgs],
1631
        }
1632
1633
1634
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1635
    """Class to retrieve the Something Of That Ilk comics."""
1636
    name = 'somethingofthatilk'
1637
    long_name = 'Something Of That Ilk'
1638
    url = 'http://www.somethingofthatilk.com'
1639
1640
1641
class InfiniteMonkeyBusiness(GenericNavigableComic):
1642
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1643
    name = 'monkey'
1644
    long_name = 'Infinite Monkey Business'
1645
    url = 'http://infinitemonkeybusiness.net'
1646
    get_navi_link = get_a_navi_comicnavnext_navinext
1647
    get_first_comic_link = simulate_first_link
1648
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1649
1650
    @classmethod
1651
    def get_comic_info(cls, soup, link):
1652
        """Get information about a particular comics."""
1653
        title = soup.find('meta', property='og:title')['content']
1654
        imgs = soup.find('div', id='comic').find_all('img')
1655
        return {
1656
            'title': title,
1657
            'img': [i['src'] for i in imgs],
1658
        }
1659
1660
1661
class Wondermark(GenericListableComic):
1662
    """Class to retrieve the Wondermark comics."""
1663
    name = 'wondermark'
1664
    long_name = 'Wondermark'
1665
    url = 'http://wondermark.com'
1666
    get_url_from_archive_element = get_href
1667
1668
    @classmethod
1669
    def get_archive_elements(cls):
1670
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1671
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1672
1673
    @classmethod
1674
    def get_comic_info(cls, soup, link):
1675
        """Get information about a particular comics."""
1676
        date_str = soup.find('div', class_='postdate').find('em').string
1677
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1678
        div = soup.find('div', id='comic')
1679
        if div:
1680
            img = div.find('img')
1681
            img_src = [img['src']]
1682
            alt = img['alt']
1683
            assert alt == img['title']
1684
            title = soup.find('meta', property='og:title')['content']
1685
        else:
1686
            img_src = []
1687
            alt = ''
1688
            title = ''
1689
        return {
1690
            'month': day.month,
1691
            'year': day.year,
1692
            'day': day.day,
1693
            'img': img_src,
1694
            'title': title,
1695
            'alt': alt,
1696
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1697
        }
1698
1699
1700
class WarehouseComic(GenericNavigableComic):
1701
    """Class to retrieve Warehouse Comic comics."""
1702 View Code Duplication
    name = 'warehouse'
1703
    long_name = 'Warehouse Comic'
1704
    url = 'http://warehousecomic.com'
1705
    get_first_comic_link = get_a_navi_navifirst
1706
    get_navi_link = get_link_rel_next
1707
1708
    @classmethod
1709
    def get_comic_info(cls, soup, link):
1710
        """Get information about a particular comics."""
1711
        title = soup.find('h2', class_='post-title').string
1712
        date_str = soup.find('span', class_='post-date').string
1713
        day = string_to_date(date_str, "%B %d, %Y")
1714
        imgs = soup.find('div', id='comic').find_all('img')
1715
        return {
1716
            'img': [i['src'] for i in imgs],
1717
            'title': title,
1718
            'day': day.day,
1719
            'month': day.month,
1720
            'year': day.year,
1721
        }
1722
1723
1724
class JustSayEh(GenericNavigableComic):
1725
    """Class to retrieve Just Say Eh comics."""
1726
    # Also on http//tapastic.com/series/Just-Say-Eh
1727
    name = 'justsayeh'
1728
    long_name = 'Just Say Eh'
1729
    url = 'http://www.justsayeh.com'
1730
    get_first_comic_link = get_a_navi_navifirst
1731
    get_navi_link = get_a_navi_comicnavnext_navinext
1732
1733
    @classmethod
1734
    def get_comic_info(cls, soup, link):
1735
        """Get information about a particular comics."""
1736
        title = soup.find('h2', class_='post-title').string
1737
        imgs = soup.find("div", id="comic").find_all("img")
1738
        assert all(i['alt'] == i['title'] for i in imgs)
1739
        alt = imgs[0]['alt']
1740
        return {
1741
            'img': [i['src'] for i in imgs],
1742
            'title': title,
1743
            'alt': alt,
1744
        }
1745
1746
1747
class MouseBearComedy(GenericNavigableComic):
1748
    """Class to retrieve Mouse Bear Comedy comics."""
1749 View Code Duplication
    # Also on http://mousebearcomedy.tumblr.com
1750
    name = 'mousebear'
1751
    long_name = 'Mouse Bear Comedy'
1752
    url = 'http://www.mousebearcomedy.com'
1753
    get_first_comic_link = get_a_navi_navifirst
1754
    get_navi_link = get_a_navi_comicnavnext_navinext
1755
1756
    @classmethod
1757
    def get_comic_info(cls, soup, link):
1758
        """Get information about a particular comics."""
1759
        title = soup.find('h2', class_='post-title').string
1760
        author = soup.find("span", class_="post-author").find("a").string
1761
        date_str = soup.find("span", class_="post-date").string
1762
        day = string_to_date(date_str, '%B %d, %Y')
1763
        imgs = soup.find("div", id="comic").find_all("img")
1764
        assert all(i['alt'] == i['title'] == title for i in imgs)
1765
        return {
1766
            'day': day.day,
1767
            'month': day.month,
1768
            'year': day.year,
1769
            'img': [i['src'] for i in imgs],
1770
            'title': title,
1771
            'author': author,
1772
        }
1773
1774
1775
class BigFootJustice(GenericNavigableComic):
1776
    """Class to retrieve Big Foot Justice comics."""
1777 View Code Duplication
    # Also on http://tapastic.com/series/bigfoot-justice
1778
    name = 'bigfoot'
1779
    long_name = 'Big Foot Justice'
1780
    url = 'http://bigfootjustice.com'
1781
    get_first_comic_link = get_a_navi_navifirst
1782
    get_navi_link = get_a_navi_comicnavnext_navinext
1783
1784
    @classmethod
1785
    def get_comic_info(cls, soup, link):
1786
        """Get information about a particular comics."""
1787
        imgs = soup.find('div', id='comic').find_all('img')
1788
        assert all(i['title'] == i['alt'] for i in imgs)
1789
        title = ' '.join(i['title'] for i in imgs)
1790
        return {
1791
            'img': [i['src'] for i in imgs],
1792
            'title': title,
1793
        }
1794
1795
1796
class RespawnComic(GenericNavigableComic):
1797
    """Class to retrieve Respawn Comic."""
1798
    # Also on https://respawncomic.tumblr.com
1799
    name = 'respawn'
1800
    long_name = 'Respawn Comic'
1801
    url = 'http://respawncomic.com '
1802
    _categories = ('RESPAWN', )
1803
    get_navi_link = get_a_rel_next
1804
    get_first_comic_link = simulate_first_link
1805
    first_url = 'http://respawncomic.com/comic/c0001/'
1806
1807
    @classmethod
1808
    def get_comic_info(cls, soup, link):
1809
        """Get information about a particular comics."""
1810
        title = soup.find('meta', property='og:title')['content']
1811
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1812
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1813
        date_str = date_str[:10]
1814
        day = string_to_date(date_str, "%Y-%m-%d")
1815
        imgs = soup.find_all('meta', property='og:image')
1816
        skip_imgs = {
1817
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1818
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1819
        }
1820
        return {
1821
            'title': title,
1822
            'author': author,
1823
            'day': day.day,
1824
            'month': day.month,
1825
            'year': day.year,
1826
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1827
        }
1828
1829
1830
class SafelyEndangered(GenericNavigableComic):
1831
    """Class to retrieve Safely Endangered comics."""
1832 View Code Duplication
    # Also on http://tumblr.safelyendangered.com
1833
    name = 'endangered'
1834
    long_name = 'Safely Endangered'
1835
    url = 'http://www.safelyendangered.com'
1836
    get_navi_link = get_link_rel_next
1837
    get_first_comic_link = simulate_first_link
1838
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1839
1840
    @classmethod
1841
    def get_comic_info(cls, soup, link):
1842
        """Get information about a particular comics."""
1843
        title = soup.find('h2', class_='post-title').string
1844
        date_str = soup.find('span', class_='post-date').string
1845
        day = string_to_date(date_str, '%B %d, %Y')
1846
        imgs = soup.find('div', id='comic').find_all('img')
1847
        alt = imgs[0]['alt']
1848
        assert all(i['alt'] == i['title'] for i in imgs)
1849
        return {
1850
            'day': day.day,
1851
            'month': day.month,
1852
            'year': day.year,
1853
            'img': [i['src'] for i in imgs],
1854
            'title': title,
1855
            'alt': alt,
1856
        }
1857
1858
1859
class PicturesInBoxes(GenericNavigableComic):
1860
    """Class to retrieve Pictures In Boxes comics."""
1861 View Code Duplication
    # Also on https://picturesinboxescomic.tumblr.com
1862
    name = 'picturesinboxes'
1863
    long_name = 'Pictures in Boxes'
1864
    url = 'http://www.picturesinboxes.com'
1865
    get_navi_link = get_a_navi_navinext
1866
    get_first_comic_link = simulate_first_link
1867
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1868
1869
    @classmethod
1870
    def get_comic_info(cls, soup, link):
1871
        """Get information about a particular comics."""
1872
        title = soup.find('h2', class_='post-title').string
1873
        author = soup.find("span", class_="post-author").find("a").string
1874
        date_str = soup.find('span', class_='post-date').string
1875
        day = string_to_date(date_str, '%B %d, %Y')
1876
        imgs = soup.find('div', class_='comicpane').find_all('img')
1877
        assert imgs
1878
        assert all(i['title'] == i['alt'] == title for i in imgs)
1879
        return {
1880
            'day': day.day,
1881
            'month': day.month,
1882
            'year': day.year,
1883
            'img': [i['src'] for i in imgs],
1884
            'title': title,
1885
            'author': author,
1886
        }
1887
1888
1889
class Penmen(GenericNavigableComic):
1890
    """Class to retrieve Penmen comics."""
1891 View Code Duplication
    name = 'penmen'
1892
    long_name = 'Penmen'
1893
    url = 'http://penmen.com'
1894
    get_navi_link = get_link_rel_next
1895
    get_first_comic_link = simulate_first_link
1896
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1897
1898
    @classmethod
1899
    def get_comic_info(cls, soup, link):
1900
        """Get information about a particular comics."""
1901
        title = soup.find('title').string
1902
        imgs = soup.find('div', class_='entry-content').find_all('img')
1903
        short_url = soup.find('link', rel='shortlink')['href']
1904
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1905
        date_str = soup.find('time')['datetime'][:10]
1906
        day = string_to_date(date_str, "%Y-%m-%d")
1907
        return {
1908
            'title': title,
1909
            'short_url': short_url,
1910
            'img': [i['src'] for i in imgs],
1911
            'tags': tags,
1912
            'month': day.month,
1913
            'year': day.year,
1914
            'day': day.day,
1915
        }
1916
1917
1918
class TheDoghouseDiaries(GenericNavigableComic):
1919
    """Class to retrieve The Dog House Diaries comics."""
1920
    name = 'doghouse'
1921
    long_name = 'The Dog House Diaries'
1922
    url = 'http://thedoghousediaries.com'
1923
1924
    @classmethod
1925
    def get_first_comic_link(cls):
1926
        """Get link to first comics."""
1927
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1928
1929
    @classmethod
1930
    def get_navi_link(cls, last_soup, next_):
1931
        """Get link to next or previous comic."""
1932
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1933
1934
    @classmethod
1935
    def get_comic_info(cls, soup, link):
1936
        """Get information about a particular comics."""
1937
        comic_img_re = re.compile('^dhdcomics/.*')
1938
        img = soup.find('img', src=comic_img_re)
1939
        comic_url = cls.get_url_from_link(link)
1940
        return {
1941
            'title': soup.find('h2', id='titleheader').string,
1942
            'title2': soup.find('div', id='subtext').string,
1943
            'alt': img.get('title'),
1944
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1945
            'num': int(comic_url.split('/')[-1]),
1946
        }
1947
1948
1949
class InvisibleBread(GenericListableComic):
1950
    """Class to retrieve Invisible Bread comics."""
1951
    # Also on http://www.gocomics.com/invisible-bread
1952
    name = 'invisiblebread'
1953
    long_name = 'Invisible Bread'
1954
    url = 'http://invisiblebread.com'
1955
1956
    @classmethod
1957
    def get_archive_elements(cls):
1958
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1959
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1960
1961
    @classmethod
1962
    def get_url_from_archive_element(cls, td):
1963
        return td.find('a')['href']
1964
1965
    @classmethod
1966
    def get_comic_info(cls, soup, td):
1967
        """Get information about a particular comics."""
1968
        url = cls.get_url_from_archive_element(td)
1969
        title = td.find('a').string
1970
        month_and_day = td.previous_sibling.string
1971
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1972
        year = link_re.match(url).groups()[0]
1973
        date_str = month_and_day + ' ' + year
1974
        day = string_to_date(date_str, '%b %d %Y')
1975
        imgs = [soup.find('div', id='comic').find('img')]
1976
        assert len(imgs) == 1
1977
        assert all(i['title'] == i['alt'] == title for i in imgs)
1978
        return {
1979
            'month': day.month,
1980
            'year': day.year,
1981
            'day': day.day,
1982
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1983
            'title': title,
1984
        }
1985
1986
1987
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1988
    """Class to retrieve Disco Bleach Comics."""
1989
    name = 'discobleach'
1990
    long_name = 'Disco Bleach'
1991
    url = 'http://discobleach.com'
1992
1993
1994
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1995
    """Class to retrieve TubeyToons comics."""
1996
    # Also on http://tapastic.com/series/Tubey-Toons
1997
    # Also on https://tubeytoons.tumblr.com
1998
    name = 'tubeytoons'
1999
    long_name = 'Tubey Toons'
2000
    url = 'http://tubeytoons.com'
2001
    _categories = ('TUNEYTOONS', )
2002
2003
2004
class CompletelySeriousComics(GenericNavigableComic):
2005
    """Class to retrieve Completely Serious comics."""
2006 View Code Duplication
    name = 'completelyserious'
2007
    long_name = 'Completely Serious Comics'
2008
    url = 'http://completelyseriouscomics.com'
2009
    get_first_comic_link = get_a_navi_navifirst
2010
    get_navi_link = get_a_navi_navinext
2011
2012
    @classmethod
2013
    def get_comic_info(cls, soup, link):
2014
        """Get information about a particular comics."""
2015
        title = soup.find('h2', class_='post-title').string
2016
        author = soup.find('span', class_='post-author').contents[1].string
2017
        date_str = soup.find('span', class_='post-date').string
2018
        day = string_to_date(date_str, '%B %d, %Y')
2019
        imgs = soup.find('div', class_='comicpane').find_all('img')
2020
        assert imgs
2021
        alt = imgs[0]['title']
2022
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2023
        return {
2024
            'month': day.month,
2025
            'year': day.year,
2026
            'day': day.day,
2027
            'img': [i['src'] for i in imgs],
2028
            'title': title,
2029
            'alt': alt,
2030
            'author': author,
2031
        }
2032
2033
2034
class PoorlyDrawnLines(GenericListableComic):
2035
    """Class to retrieve Poorly Drawn Lines comics."""
2036
    # Also on http://pdlcomics.tumblr.com
2037 View Code Duplication
    name = 'poorlydrawn'
2038
    long_name = 'Poorly Drawn Lines'
2039
    url = 'https://www.poorlydrawnlines.com'
2040
    _categories = ('POORLYDRAWN', )
2041
    get_url_from_archive_element = get_href
2042
2043
    @classmethod
2044
    def get_comic_info(cls, soup, link):
2045
        """Get information about a particular comics."""
2046
        imgs = soup.find('div', class_='post').find_all('img')
2047
        assert len(imgs) <= 1
2048
        return {
2049
            'img': [i['src'] for i in imgs],
2050
            'title': imgs[0].get('title', "") if imgs else "",
2051
        }
2052
2053
    @classmethod
2054
    def get_archive_elements(cls):
2055
        archive_url = urljoin_wrapper(cls.url, 'archive')
2056
        url_re = re.compile('^%s/comic/.' % cls.url)
2057
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2058
2059
2060
class LoadingComics(GenericNavigableComic):
2061
    """Class to retrieve Loading Artist comics."""
2062 View Code Duplication
    name = 'loadingartist'
2063
    long_name = 'Loading Artist'
2064
    url = 'http://www.loadingartist.com/latest'
2065
2066
    @classmethod
2067
    def get_first_comic_link(cls):
2068
        """Get link to first comics."""
2069
        return get_soup_at_url(cls.url).find('a', title="First")
2070
2071
    @classmethod
2072
    def get_navi_link(cls, last_soup, next_):
2073
        """Get link to next or previous comic."""
2074
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2075
2076
    @classmethod
2077
    def get_comic_info(cls, soup, link):
2078
        """Get information about a particular comics."""
2079
        title = soup.find('h1').string
2080
        date_str = soup.find('span', class_='date').string.strip()
2081
        day = string_to_date(date_str, "%B %d, %Y")
2082
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2083
        return {
2084
            'title': title,
2085
            'img': [i['src'] for i in imgs],
2086
            'month': day.month,
2087
            'year': day.year,
2088
            'day': day.day,
2089
        }
2090
2091
2092
class ChuckleADuck(GenericNavigableComic):
2093
    """Class to retrieve Chuckle-A-Duck comics."""
2094 View Code Duplication
    name = 'chuckleaduck'
2095
    long_name = 'Chuckle-A-duck'
2096
    url = 'http://chuckleaduck.com'
2097
    get_first_comic_link = get_div_navfirst_a
2098
    get_navi_link = get_link_rel_next
2099
2100
    @classmethod
2101
    def get_comic_info(cls, soup, link):
2102
        """Get information about a particular comics."""
2103
        date_str = soup.find('span', class_='post-date').string
2104
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2105
        author = soup.find('span', class_='post-author').string
2106
        div = soup.find('div', id='comic')
2107
        imgs = div.find_all('img') if div else []
2108
        title = imgs[0]['title'] if imgs else ""
2109
        assert all(i['title'] == i['alt'] == title for i in imgs)
2110
        return {
2111
            'month': day.month,
2112
            'year': day.year,
2113
            'day': day.day,
2114
            'img': [i['src'] for i in imgs],
2115
            'title': title,
2116
            'author': author,
2117
        }
2118
2119
2120
class DepressedAlien(GenericNavigableComic):
2121
    """Class to retrieve Depressed Alien Comics."""
2122
    name = 'depressedalien'
2123
    long_name = 'Depressed Alien'
2124
    url = 'http://depressedalien.com'
2125
    get_url_from_link = join_cls_url_to_href
2126
2127
    @classmethod
2128
    def get_first_comic_link(cls):
2129
        """Get link to first comics."""
2130
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2131
2132
    @classmethod
2133
    def get_navi_link(cls, last_soup, next_):
2134
        """Get link to next or previous comic."""
2135
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2136
2137
    @classmethod
2138
    def get_comic_info(cls, soup, link):
2139
        """Get information about a particular comics."""
2140
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2141
        imgs = soup.find_all('meta', property='og:image')
2142
        return {
2143
            'title': title,
2144
            'img': [i['content'] for i in imgs],
2145
        }
2146
2147
2148 View Code Duplication
class TurnOffUs(GenericListableComic):
2149
    """Class to retrieve TurnOffUs comics."""
2150
    name = 'turnoffus'
2151
    long_name = 'Turn Off Us'
2152
    url = 'http://turnoff.us'
2153
    get_url_from_archive_element = join_cls_url_to_href
2154
2155
    @classmethod
2156
    def get_archive_elements(cls):
2157
        archive_url = urljoin_wrapper(cls.url, 'all')
2158
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2159
        return reversed(post_list.find_all('a', class_='post-link'))
2160
2161
    @classmethod
2162
    def get_comic_info(cls, soup, archive_elt):
2163
        """Get information about a particular comics."""
2164
        title = soup.find('meta', property='og:title')['content']
2165
        imgs = soup.find_all('meta', property='og:image')
2166
        return {
2167
            'title': title,
2168
            'img': [i['content'] for i in imgs],
2169
        }
2170
2171
2172
class ThingsInSquares(GenericListableComic):
2173
    """Class to retrieve Things In Squares comics."""
2174
    # This can be retrieved in other languages
2175
    # Also on https://tapastic.com/series/Things-in-Squares
2176
    name = 'squares'
2177
    long_name = 'Things in squares'
2178
    url = 'http://www.thingsinsquares.com'
2179
2180
    @classmethod
2181
    def get_comic_info(cls, soup, tr):
2182
        """Get information about a particular comics."""
2183
        _, td2, td3 = tr.find_all('td')
2184
        a = td2.find('a')
2185
        date_str = td3.string
2186
        day = string_to_date(date_str, "%m.%d.%y")
2187
        title = a.string
2188
        title2 = soup.find('meta', property='og:title')['content']
2189
        desc = soup.find('meta', property='og:description')
2190
        description = desc['content'] if desc else ''
2191
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2192
        imgs = soup.find('div', class_='entry-content').find_all('img')
2193
        return {
2194
            'day': day.day,
2195
            'month': day.month,
2196
            'year': day.year,
2197
            'title': title,
2198
            'title2': title2,
2199
            'description': description,
2200
            'tags': tags,
2201
            'img': [i['src'] for i in imgs],
2202
            'alt': ' '.join(i['alt'] for i in imgs),
2203
        }
2204
2205
    @classmethod
2206
    def get_url_from_archive_element(cls, tr):
2207
        _, td2, __ = tr.find_all('td')
2208
        return td2.find('a')['href']
2209
2210
    @classmethod
2211
    def get_archive_elements(cls):
2212
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2213
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2214
2215
2216
class HappleTea(GenericNavigableComic):
2217
    """Class to retrieve Happle Tea Comics."""
2218 View Code Duplication
    name = 'happletea'
2219
    long_name = 'Happle Tea'
2220
    url = 'http://www.happletea.com'
2221
    get_first_comic_link = get_a_navi_navifirst
2222
    get_navi_link = get_link_rel_next
2223
2224
    @classmethod
2225
    def get_comic_info(cls, soup, link):
2226
        """Get information about a particular comics."""
2227
        imgs = soup.find('div', id='comic').find_all('img')
2228
        post = soup.find('div', class_='post-content')
2229
        title = post.find('h2', class_='post-title').string
2230
        author = post.find('a', rel='author').string
2231
        date_str = post.find('span', class_='post-date').string
2232
        day = string_to_date(date_str, "%B %d, %Y")
2233
        assert all(i['alt'] == i['title'] for i in imgs)
2234
        return {
2235
            'title': title,
2236
            'img': [i['src'] for i in imgs],
2237
            'alt': ''.join(i['alt'] for i in imgs),
2238
            'month': day.month,
2239
            'year': day.year,
2240
            'day': day.day,
2241
            'author': author,
2242
        }
2243
2244
2245
class RockPaperScissors(GenericNavigableComic):
2246
    """Class to retrieve Rock Paper Scissors comics."""
2247
    name = 'rps'
2248
    long_name = 'Rock Paper Scissors'
2249
    url = 'http://rps-comics.com'
2250
    get_first_comic_link = get_a_navi_navifirst
2251
    get_navi_link = get_link_rel_next
2252
2253
    @classmethod
2254
    def get_comic_info(cls, soup, link):
2255
        """Get information about a particular comics."""
2256
        title = soup.find('title').string
2257
        imgs = soup.find_all('meta', property='og:image')
2258
        short_url = soup.find('link', rel='shortlink')['href']
2259
        transcript = soup.find('div', id='transcript-content').string
2260
        return {
2261
            'title': title,
2262
            'transcript': transcript,
2263
            'short_url': short_url,
2264
            'img': [i['content'] for i in imgs],
2265
        }
2266
2267
2268
class FatAwesomeComics(GenericNavigableComic):
2269
    """Class to retrieve Fat Awesome Comics."""
2270
    # Also on http://fatawesomecomedy.tumblr.com
2271
    name = 'fatawesome'
2272
    long_name = 'Fat Awesome'
2273
    url = 'http://fatawesome.com/comics'
2274
    get_navi_link = get_a_rel_next
2275
    get_first_comic_link = simulate_first_link
2276
    first_url = 'http://fatawesome.com/shortbus/'
2277
2278
    @classmethod
2279
    def get_comic_info(cls, soup, link):
2280
        """Get information about a particular comics."""
2281
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2282
        description = soup.find('meta', attrs={'name': 'description'})['content']
2283
        tags_prop = soup.find('meta', property='article:tag')
2284
        tags = tags_prop['content'] if tags_prop else ""
2285
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2286
        day = string_to_date(date_str, "%Y-%m-%d")
2287
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2288
        assert len(imgs) == 1
2289
        return {
2290
            'title': title,
2291
            'description': description,
2292
            'tags': tags,
2293
            'alt': "".join(i['alt'] for i in imgs),
2294
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2295
            'month': day.month,
2296
            'year': day.year,
2297
            'day': day.day,
2298
        }
2299
2300
2301 View Code Duplication
class JuliasDrawings(GenericListableComic):
2302
    """Class to retrieve Julia's Drawings."""
2303
    name = 'julia'
2304
    long_name = "Julia's Drawings"
2305
    url = 'https://drawings.jvns.ca'
2306
    get_url_from_archive_element = get_href
2307
2308
    @classmethod
2309
    def get_archive_elements(cls):
2310
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2311
        return [art.find('a') for art in reversed(articles)]
2312
2313
    @classmethod
2314
    def get_comic_info(cls, soup, archive_elt):
2315
        """Get information about a particular comics."""
2316
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2317
        day = string_to_date(date_str, "%Y-%m-%d")
2318
        title = soup.find('h3', class_='p-post-title').string
2319
        imgs = soup.find('section', class_='post-content').find_all('img')
2320
        return {
2321
            'title': title,
2322
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2323
            'month': day.month,
2324
            'year': day.year,
2325
            'day': day.day,
2326
        }
2327
2328
2329
class AnythingComic(GenericListableComic):
2330
    """Class to retrieve Anything Comics."""
2331
    # Also on http://tapastic.com/series/anything
2332
    name = 'anythingcomic'
2333
    long_name = 'Anything Comic'
2334
    url = 'http://www.anythingcomic.com'
2335
2336
    @classmethod
2337
    def get_archive_elements(cls):
2338
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2339
        # The first 2 <tr>'s do not correspond to comics
2340
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2341
2342
    @classmethod
2343
    def get_url_from_archive_element(cls, tr):
2344
        """Get url corresponding to an archive element."""
2345
        _, td_comic, td_date, _ = tr.find_all('td')
2346 View Code Duplication
        link = td_comic.find('a')
2347
        return urljoin_wrapper(cls.url, link['href'])
2348
2349
    @classmethod
2350
    def get_comic_info(cls, soup, tr):
2351
        """Get information about a particular comics."""
2352
        td_num, td_comic, td_date, _ = tr.find_all('td')
2353
        num = int(td_num.string)
2354
        link = td_comic.find('a')
2355
        title = link.string
2356
        imgs = soup.find_all('img', id='comic_image')
2357
        date_str = td_date.string
2358
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2359
        assert len(imgs) == 1
2360
        assert all(i.get('alt') == i.get('title') for i in imgs)
2361
        return {
2362
            'num': num,
2363
            'title': title,
2364
            'alt': imgs[0].get('alt', ''),
2365
            'img': [i['src'] for i in imgs],
2366
            'month': day.month,
2367
            'year': day.year,
2368
            'day': day.day,
2369
        }
2370
2371
2372
class LonnieMillsap(GenericNavigableComic):
2373
    """Class to retrieve Lonnie Millsap's comics."""
2374 View Code Duplication
    name = 'millsap'
2375
    long_name = 'Lonnie Millsap'
2376
    url = 'http://www.lonniemillsap.com'
2377
    get_navi_link = get_link_rel_next
2378
    get_first_comic_link = simulate_first_link
2379
    first_url = 'http://www.lonniemillsap.com/?p=42'
2380
2381
    @classmethod
2382
    def get_comic_info(cls, soup, link):
2383
        """Get information about a particular comics."""
2384
        title = soup.find('h2', class_='post-title').string
2385
        post = soup.find('div', class_='post-content')
2386
        author = post.find("span", class_="post-author").find("a").string
2387
        date_str = post.find("span", class_="post-date").string
2388
        day = string_to_date(date_str, "%B %d, %Y")
2389
        imgs = post.find("div", class_="entry").find_all("img")
2390
        return {
2391
            'title': title,
2392
            'author': author,
2393
            'img': [i['src'] for i in imgs],
2394
            'month': day.month,
2395
            'year': day.year,
2396
            'day': day.day,
2397
        }
2398
2399
2400
class LinsEditions(GenericNavigableComic):
2401
    """Class to retrieve L.I.N.S. Editions comics."""
2402
    # Also on https://linscomics.tumblr.com
2403
    # Now on https://warandpeas.com
2404
    name = 'lins'
2405
    long_name = 'L.I.N.S. Editions'
2406
    url = 'https://linsedition.com'
2407
    _categories = ('LINS', )
2408
    get_navi_link = get_link_rel_next
2409
    get_first_comic_link = simulate_first_link
2410
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2411
2412
    @classmethod
2413
    def get_comic_info(cls, soup, link):
2414
        """Get information about a particular comics."""
2415
        title = soup.find('meta', property='og:title')['content']
2416
        imgs = soup.find_all('meta', property='og:image')
2417
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2418
        day = string_to_date(date_str, "%Y-%m-%d")
2419
        return {
2420
            'title': title,
2421
            'img': [i['content'] for i in imgs],
2422
            'month': day.month,
2423
            'year': day.year,
2424
            'day': day.day,
2425
        }
2426
2427
2428
class ThorsThundershack(GenericNavigableComic):
2429
    """Class to retrieve Thor's Thundershack comics."""
2430
    # Also on http://tapastic.com/series/Thors-Thundershac
2431
    name = 'thor'
2432
    long_name = 'Thor\'s Thundershack'
2433
    url = 'http://www.thorsthundershack.com'
2434
    _categories = ('THOR', )
2435
    get_url_from_link = join_cls_url_to_href
2436
2437
    @classmethod
2438
    def get_first_comic_link(cls):
2439
        """Get link to first comics."""
2440
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2441
2442
    @classmethod
2443
    def get_navi_link(cls, last_soup, next_):
2444
        """Get link to next or previous comic."""
2445
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2446
            if link['href'] != '/comic':
2447
                return link
2448
        return None
2449
2450 View Code Duplication
    @classmethod
2451
    def get_comic_info(cls, soup, link):
2452
        """Get information about a particular comics."""
2453
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2454
        description = soup.find('div', itemprop='articleBody').text
2455
        author = soup.find('span', itemprop='author copyrightHolder').string
2456
        imgs = soup.find_all('img', itemprop='image')
2457
        assert all(i['title'] == i['alt'] for i in imgs)
2458
        alt = imgs[0]['alt'] if imgs else ""
2459
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2460
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2461
        return {
2462
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2463
            'month': day.month,
2464
            'year': day.year,
2465
            'day': day.day,
2466
            'author': author,
2467
            'title': title,
2468
            'alt': alt,
2469
            'description': description,
2470
        }
2471
2472
2473
class GerbilWithAJetpack(GenericNavigableComic):
2474
    """Class to retrieve GerbilWithAJetpack comics."""
2475
    name = 'gerbil'
2476 View Code Duplication
    long_name = 'Gerbil With A Jetpack'
2477
    url = 'http://gerbilwithajetpack.com'
2478
    get_first_comic_link = get_a_navi_navifirst
2479
    get_navi_link = get_a_rel_next
2480
2481
    @classmethod
2482
    def get_comic_info(cls, soup, link):
2483
        """Get information about a particular comics."""
2484
        title = soup.find('h2', class_='post-title').string
2485
        author = soup.find("span", class_="post-author").find("a").string
2486
        date_str = soup.find("span", class_="post-date").string
2487
        day = string_to_date(date_str, "%B %d, %Y")
2488
        imgs = soup.find("div", id="comic").find_all("img")
2489
        alt = imgs[0]['alt']
2490
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2491
        return {
2492
            'img': [i['src'] for i in imgs],
2493
            'title': title,
2494
            'alt': alt,
2495
            'author': author,
2496
            'day': day.day,
2497
            'month': day.month,
2498
            'year': day.year
2499
        }
2500
2501
2502
class EveryDayBlues(GenericEmptyComic, GenericNavigableComic):
2503
    """Class to retrieve EveryDayBlues Comics."""
2504 View Code Duplication
    name = "blues"
2505
    long_name = "Every Day Blues"
2506
    url = "http://everydayblues.net"
2507
    get_first_comic_link = get_a_navi_navifirst
2508
    get_navi_link = get_link_rel_next
2509
2510
    @classmethod
2511
    def get_comic_info(cls, soup, link):
2512
        """Get information about a particular comics."""
2513
        title = soup.find("h2", class_="post-title").string
2514
        author = soup.find("span", class_="post-author").find("a").string
2515
        date_str = soup.find("span", class_="post-date").string
2516
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2517
        imgs = soup.find("div", id="comic").find_all("img")
2518
        assert all(i['alt'] == i['title'] == title for i in imgs)
2519
        assert len(imgs) <= 1
2520
        return {
2521
            'img': [i['src'] for i in imgs],
2522
            'title': title,
2523
            'author': author,
2524
            'day': day.day,
2525
            'month': day.month,
2526
            'year': day.year
2527
        }
2528
2529
2530
class BiterComics(GenericNavigableComic):
2531
    """Class to retrieve Biter Comics."""
2532
    name = "biter"
2533
    long_name = "Biter Comics"
2534 View Code Duplication
    url = "http://www.bitercomics.com"
2535
    get_first_comic_link = get_a_navi_navifirst
2536
    get_navi_link = get_link_rel_next
2537
2538
    @classmethod
2539
    def get_comic_info(cls, soup, link):
2540
        """Get information about a particular comics."""
2541
        title = soup.find("h1", class_="entry-title").string
2542
        author = soup.find("span", class_="author vcard").find("a").string
2543
        date_str = soup.find("span", class_="entry-date").string
2544
        day = string_to_date(date_str, "%B %d, %Y")
2545
        imgs = soup.find("div", id="comic").find_all("img")
2546
        assert all(i['alt'] == i['title'] for i in imgs)
2547
        assert len(imgs) == 1
2548
        alt = imgs[0]['alt']
2549
        return {
2550
            'img': [i['src'] for i in imgs],
2551
            'title': title,
2552
            'alt': alt,
2553
            'author': author,
2554
            'day': day.day,
2555
            'month': day.month,
2556
            'year': day.year
2557
        }
2558
2559
2560
class TheAwkwardYeti(GenericNavigableComic):
2561
    """Class to retrieve The Awkward Yeti comics."""
2562
    # Also on http://www.gocomics.com/the-awkward-yeti
2563
    # Also on http://larstheyeti.tumblr.com
2564
    # Also on https://tapastic.com/series/TheAwkwardYeti
2565
    name = 'yeti'
2566
    long_name = 'The Awkward Yeti'
2567
    url = 'http://theawkwardyeti.com'
2568
    _categories = ('YETI', )
2569
    get_first_comic_link = get_a_navi_navifirst
2570
    get_navi_link = get_link_rel_next
2571
2572
    @classmethod
2573
    def get_comic_info(cls, soup, link):
2574
        """Get information about a particular comics."""
2575
        title = soup.find('h2', class_='post-title').string
2576
        date_str = soup.find("span", class_="post-date").string
2577
        day = string_to_date(date_str, "%B %d, %Y")
2578
        imgs = soup.find("div", id="comic").find_all("img")
2579
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2580
        return {
2581
            'img': [i['src'] for i in imgs],
2582
            'title': title,
2583 View Code Duplication
            'day': day.day,
2584
            'month': day.month,
2585
            'year': day.year
2586
        }
2587
2588
2589
class PleasantThoughts(GenericNavigableComic):
2590
    """Class to retrieve Pleasant Thoughts comics."""
2591
    name = 'pleasant'
2592
    long_name = 'Pleasant Thoughts'
2593
    url = 'http://pleasant-thoughts.com'
2594
    get_first_comic_link = get_a_navi_navifirst
2595
    get_navi_link = get_link_rel_next
2596
2597
    @classmethod
2598
    def get_comic_info(cls, soup, link):
2599
        """Get information about a particular comics."""
2600
        post = soup.find('div', class_='post-content')
2601
        title = post.find('h2', class_='post-title').string
2602
        imgs = post.find("div", class_="entry").find_all("img")
2603
        return {
2604
            'title': title,
2605
            'img': [i['src'] for i in imgs],
2606
        }
2607
2608
2609
class MisterAndMe(GenericNavigableComic):
2610
    """Class to retrieve Mister & Me Comics."""
2611
    # Also on http://www.gocomics.com/mister-and-me
2612
    # Also on https://tapastic.com/series/Mister-and-Me
2613
    name = 'mister'
2614
    long_name = 'Mister & Me'
2615
    url = 'http://www.mister-and-me.com'
2616
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2617
    get_navi_link = get_link_rel_next
2618
2619
    @classmethod
2620
    def get_comic_info(cls, soup, link):
2621
        """Get information about a particular comics."""
2622
        title = soup.find('h2', class_='post-title').string
2623
        author = soup.find("span", class_="post-author").find("a").string
2624
        date_str = soup.find("span", class_="post-date").string
2625
        day = string_to_date(date_str, "%B %d, %Y")
2626
        imgs = soup.find("div", id="comic").find_all("img")
2627
        assert all(i['alt'] == i['title'] for i in imgs)
2628
        assert len(imgs) <= 1
2629
        alt = imgs[0]['alt'] if imgs else ""
2630
        return {
2631
            'img': [i['src'] for i in imgs],
2632
            'title': title,
2633
            'alt': alt,
2634
            'author': author,
2635
            'day': day.day,
2636
            'month': day.month,
2637
            'year': day.year
2638
        }
2639
2640
2641
class LastPlaceComics(GenericNavigableComic):
2642
    """Class to retrieve Last Place Comics."""
2643
    name = 'lastplace'
2644
    long_name = 'Last Place Comics'
2645 View Code Duplication
    url = "http://lastplacecomics.com"
2646
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2647
    get_navi_link = get_link_rel_next
2648
2649
    @classmethod
2650
    def get_comic_info(cls, soup, link):
2651
        """Get information about a particular comics."""
2652
        title = soup.find('h2', class_='post-title').string
2653
        author = soup.find("span", class_="post-author").find("a").string
2654
        date_str = soup.find("span", class_="post-date").string
2655
        day = string_to_date(date_str, "%B %d, %Y")
2656
        imgs = soup.find("div", id="comic").find_all("img")
2657
        assert all(i['alt'] == i['title'] for i in imgs)
2658
        assert len(imgs) <= 1
2659
        alt = imgs[0]['alt'] if imgs else ""
2660
        return {
2661
            'img': [i['src'] for i in imgs],
2662
            'title': title,
2663
            'alt': alt,
2664
            'author': author,
2665
            'day': day.day,
2666
            'month': day.month,
2667
            'year': day.year
2668
        }
2669
2670
2671
class TalesOfAbsurdity(GenericNavigableComic):
2672
    """Class to retrieve Tales Of Absurdity comics."""
2673
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2674
    # Also on http://talesofabsurdity.tumblr.com
2675
    name = 'absurdity'
2676
    long_name = 'Tales of Absurdity'
2677 View Code Duplication
    url = 'http://talesofabsurdity.com'
2678
    _categories = ('ABSURDITY', )
2679
    get_first_comic_link = get_a_navi_navifirst
2680
    get_navi_link = get_a_navi_comicnavnext_navinext
2681
2682
    @classmethod
2683
    def get_comic_info(cls, soup, link):
2684
        """Get information about a particular comics."""
2685
        title = soup.find('h2', class_='post-title').string
2686
        author = soup.find("span", class_="post-author").find("a").string
2687
        date_str = soup.find("span", class_="post-date").string
2688
        day = string_to_date(date_str, "%B %d, %Y")
2689
        imgs = soup.find("div", id="comic").find_all("img")
2690
        assert all(i['alt'] == i['title'] for i in imgs)
2691
        alt = imgs[0]['alt'] if imgs else ""
2692
        return {
2693
            'img': [i['src'] for i in imgs],
2694
            'title': title,
2695
            'alt': alt,
2696
            'author': author,
2697
            'day': day.day,
2698
            'month': day.month,
2699
            'year': day.year
2700
        }
2701
2702
2703
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2704
    """Class to retrieve Endless Origami Comics."""
2705
    name = "origami"
2706
    long_name = "Endless Origami"
2707
    url = "http://endlessorigami.com"
2708
    get_first_comic_link = get_a_navi_navifirst
2709
    get_navi_link = get_link_rel_next
2710
2711
    @classmethod
2712
    def get_comic_info(cls, soup, link):
2713
        """Get information about a particular comics."""
2714
        title = soup.find('h2', class_='post-title').string
2715
        author = soup.find("span", class_="post-author").find("a").string
2716
        date_str = soup.find("span", class_="post-date").string
2717
        day = string_to_date(date_str, "%B %d, %Y")
2718
        imgs = soup.find("div", id="comic").find_all("img")
2719
        assert all(i['alt'] == i['title'] for i in imgs)
2720
        alt = imgs[0]['alt'] if imgs else ""
2721
        return {
2722
            'img': [i['src'] for i in imgs],
2723
            'title': title,
2724
            'alt': alt,
2725
            'author': author,
2726
            'day': day.day,
2727
            'month': day.month,
2728
            'year': day.year
2729
        }
2730 View Code Duplication
2731
2732
class PlanC(GenericNavigableComic):
2733
    """Class to retrieve Plan C comics."""
2734
    name = 'planc'
2735
    long_name = 'Plan C'
2736
    url = 'http://www.plancomic.com'
2737
    get_first_comic_link = get_a_navi_navifirst
2738
    get_navi_link = get_a_navi_comicnavnext_navinext
2739
2740
    @classmethod
2741
    def get_comic_info(cls, soup, link):
2742
        """Get information about a particular comics."""
2743
        title = soup.find('h2', class_='post-title').string
2744
        date_str = soup.find("span", class_="post-date").string
2745
        day = string_to_date(date_str, "%B %d, %Y")
2746
        imgs = soup.find('div', id='comic').find_all('img')
2747
        return {
2748 View Code Duplication
            'title': title,
2749
            'img': [i['src'] for i in imgs],
2750
            'month': day.month,
2751
            'year': day.year,
2752
            'day': day.day,
2753
        }
2754
2755
2756
class BuniComic(GenericNavigableComic):
2757
    """Class to retrieve Buni Comics."""
2758
    name = 'buni'
2759
    long_name = 'BuniComics'
2760
    url = 'http://www.bunicomic.com'
2761
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2762
    get_navi_link = get_link_rel_next
2763
2764
    @classmethod
2765
    def get_comic_info(cls, soup, link):
2766
        """Get information about a particular comics."""
2767
        imgs = soup.find('div', id='comic').find_all('img')
2768
        assert all(i['alt'] == i['title'] for i in imgs)
2769
        assert len(imgs) == 1
2770
        return {
2771
            'img': [i['src'] for i in imgs],
2772
            'title': imgs[0]['title'],
2773
        }
2774
2775
2776
class GenericCommitStrip(GenericNavigableComic):
2777
    """Generic class to retrieve Commit Strips in different languages."""
2778
    get_navi_link = get_a_rel_next
2779
    get_first_comic_link = simulate_first_link
2780
    first_url = NotImplemented
2781
2782
    @classmethod
2783
    def get_comic_info(cls, soup, link):
2784
        """Get information about a particular comics."""
2785
        desc = soup.find('meta', property='og:description')['content']
2786
        title = soup.find('meta', property='og:title')['content']
2787
        imgs = soup.find('div', class_='entry-content').find_all('img')
2788 View Code Duplication
        title2 = ' '.join(i.get('title', '') for i in imgs)
2789
        return {
2790
            'title': title,
2791
            'title2': title2,
2792
            'description': desc,
2793
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2794
        }
2795
2796
2797
class CommitStripFr(GenericCommitStrip):
2798
    """Class to retrieve Commit Strips in French."""
2799
    name = 'commit_fr'
2800
    long_name = 'Commit Strip (Fr)'
2801
    url = 'http://www.commitstrip.com/fr'
2802
    _categories = ('FRANCAIS', )
2803
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2804
2805
2806
class CommitStripEn(GenericCommitStrip):
2807
    """Class to retrieve Commit Strips in English."""
2808
    name = 'commit_en'
2809
    long_name = 'Commit Strip (En)'
2810
    url = 'http://www.commitstrip.com/en'
2811
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2812
2813
2814
class GenericBoumerie(GenericNavigableComic):
2815
    """Generic class to retrieve Boumeries comics in different languages."""
2816
    get_first_comic_link = get_a_navi_navifirst
2817
    get_navi_link = get_link_rel_next
2818
    date_format = NotImplemented
2819
    lang = NotImplemented
2820
2821
    @classmethod
2822
    def get_comic_info(cls, soup, link):
2823
        """Get information about a particular comics."""
2824
        title = soup.find('h2', class_='post-title').string
2825
        short_url = soup.find('link', rel='shortlink')['href']
2826
        author = soup.find("span", class_="post-author").find("a").string
2827
        date_str = soup.find('span', class_='post-date').string
2828
        day = string_to_date(date_str, cls.date_format, cls.lang)
2829
        imgs = soup.find('div', id='comic').find_all('img')
2830
        assert all(i['alt'] == i['title'] for i in imgs)
2831
        return {
2832
            'short_url': short_url,
2833
            'img': [i['src'] for i in imgs],
2834
            'title': title,
2835
            'author': author,
2836
            'month': day.month,
2837
            'year': day.year,
2838
            'day': day.day,
2839
        }
2840
2841
2842
class BoumerieEn(GenericBoumerie):
2843
    """Class to retrieve Boumeries comics in English."""
2844
    name = 'boumeries_en'
2845
    long_name = 'Boumeries (En)'
2846
    url = 'http://comics.boumerie.com'
2847
    date_format = "%B %d, %Y"
2848
    lang = 'en_GB.UTF-8'
2849
2850
2851
class BoumerieFr(GenericBoumerie):
2852
    """Class to retrieve Boumeries comics in French."""
2853
    name = 'boumeries_fr'
2854
    long_name = 'Boumeries (Fr)'
2855
    url = 'http://bd.boumerie.com'
2856
    _categories = ('FRANCAIS', )
2857
    date_format = "%A, %d %B %Y"
2858
    lang = "fr_FR.utf8"
2859
2860
2861
class UnearthedComics(GenericNavigableComic):
2862
    """Class to retrieve Unearthed comics."""
2863
    # Also on http://tapastic.com/series/UnearthedComics
2864
    # Also on https://unearthedcomics.tumblr.com
2865
    name = 'unearthed'
2866
    long_name = 'Unearthed Comics'
2867
    url = 'http://unearthedcomics.com'
2868
    _categories = ('UNEARTHED', )
2869 View Code Duplication
    get_navi_link = get_link_rel_next
2870
    get_first_comic_link = simulate_first_link
2871
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2872
2873
    @classmethod
2874
    def get_comic_info(cls, soup, link):
2875
        """Get information about a particular comics."""
2876
        short_url = soup.find('link', rel='shortlink')['href']
2877
        title_elt = soup.find('h1') or soup.find('h2')
2878
        title = title_elt.string if title_elt else ""
2879
        desc = soup.find('meta', property='og:description')
2880
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2881
        day = string_to_date(date_str, "%Y-%m-%d")
2882
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2883
        imgs = post.find_all('img')
2884
        return {
2885
            'title': title,
2886
            'description': desc,
2887
            'url2': short_url,
2888
            'img': [i['src'] for i in imgs],
2889
            'month': day.month,
2890
            'year': day.year,
2891
            'day': day.day,
2892
        }
2893
2894
2895
class Optipess(GenericNavigableComic):
2896
    """Class to retrieve Optipess comics."""
2897
    name = 'optipess'
2898
    long_name = 'Optipess'
2899
    url = 'http://www.optipess.com'
2900
    get_first_comic_link = get_a_navi_navifirst
2901
    get_navi_link = get_link_rel_next
2902
2903
    @classmethod
2904
    def get_comic_info(cls, soup, link):
2905
        """Get information about a particular comics."""
2906
        title = soup.find('h2', class_='post-title').string
2907
        author = soup.find("span", class_="post-author").find("a").string
2908
        comic = soup.find('div', id='comic')
2909
        imgs = comic.find_all('img') if comic else []
2910
        alt = imgs[0]['title'] if imgs else ""
2911
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2912
        date_str = soup.find('span', class_='post-date').string
2913
        day = string_to_date(date_str, "%B %d, %Y")
2914
        return {
2915
            'title': title,
2916
            'alt': alt,
2917
            'author': author,
2918
            'img': [i['src'] for i in imgs],
2919
            'month': day.month,
2920
            'year': day.year,
2921
            'day': day.day,
2922
        }
2923
2924
2925
class PainTrainComic(GenericNavigableComic):
2926
    """Class to retrieve Pain Train Comics."""
2927
    name = 'paintrain'
2928
    long_name = 'Pain Train Comics'
2929
    url = 'http://paintraincomic.com'
2930
    get_first_comic_link = get_a_navi_navifirst
2931
    get_navi_link = get_link_rel_next
2932
2933
    @classmethod
2934
    def get_comic_info(cls, soup, link):
2935
        """Get information about a particular comics."""
2936
        title = soup.find('h2', class_='post-title').string
2937
        short_url = soup.find('link', rel='shortlink')['href']
2938
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2939
        num = int(short_url_re.match(short_url).groups()[0])
2940
        imgs = soup.find('div', id='comic').find_all('img')
2941
        alt = imgs[0]['title']
2942
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2943
        date_str = soup.find('span', class_='post-date').string
2944
        day = string_to_date(date_str, "%d/%m/%Y")
2945
        return {
2946
            'short_url': short_url,
2947
            'num': num,
2948
            'img': [i['src'] for i in imgs],
2949
            'month': day.month,
2950
            'year': day.year,
2951
            'day': day.day,
2952
            'alt': alt,
2953
            'title': title,
2954
        }
2955
2956
2957
class MoonBeard(GenericNavigableComic):
2958
    """Class to retrieve MoonBeard comics."""
2959
    # Also on http://blog.squiresjam.es
2960
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2961
    name = 'moonbeard'
2962
    long_name = 'Moon Beard'
2963
    url = 'http://moonbeard.com'
2964
    get_first_comic_link = get_a_navi_navifirst
2965
    get_navi_link = get_a_navi_navinext
2966
2967
    @classmethod
2968
    def get_comic_info(cls, soup, link):
2969
        """Get information about a particular comics."""
2970
        title = soup.find('h2', class_='post-title').string
2971
        short_url = soup.find('link', rel='shortlink')['href']
2972
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2973
        num = int(short_url_re.match(short_url).groups()[0])
2974
        imgs = soup.find('div', id='comic').find_all('img')
2975
        alt = imgs[0]['title']
2976
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2977
        date_str = soup.find('span', class_='post-date').string
2978
        day = string_to_date(date_str, "%B %d, %Y")
2979
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2980
        author = soup.find('span', class_='post-author').string
2981
        return {
2982
            'short_url': short_url,
2983
            'num': num,
2984
            'img': [i['src'] for i in imgs],
2985
            'month': day.month,
2986
            'year': day.year,
2987
            'day': day.day,
2988
            'title': title,
2989
            'tags': tags,
2990
            'alt': alt,
2991
            'author': author,
2992
        }
2993
2994
2995
class AHammADay(GenericEmptyComic, GenericNavigableComic):
2996
    """Class to retrieve class A Hamm A Day comics."""
2997
    name = 'hamm'
2998
    long_name = 'A Hamm A Day'
2999 View Code Duplication
    url = 'http://www.ahammaday.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3000
    get_url_from_link = join_cls_url_to_href
3001
    get_first_comic_link = simulate_first_link
3002
    first_url = 'http://www.ahammaday.com/today/3/6/french'
3003
3004
    @classmethod
3005
    def get_navi_link(cls, last_soup, next_):
3006
        """Get link to next or previous comic."""
3007
        # prev is next / next is prev
3008
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
3009
3010
    @classmethod
3011
    def get_comic_info(cls, soup, link):
3012
        """Get information about a particular comics."""
3013
        date_str = soup.find('time', class_='published')['datetime']
3014
        day = string_to_date(date_str, "%Y-%m-%d")
3015
        author = soup.find('span', class_='blog-author').find('a').string
3016
        title = soup.find('meta', property='og:title')['content']
3017
        imgs = soup.find_all('meta', itemprop='image')
3018
        return {
3019
            'img': [i['content'] for i in imgs],
3020
            'title': title,
3021
            'author': author,
3022
            'day': day.day,
3023
            'month': day.month,
3024
            'year': day.year,
3025
        }
3026
3027
3028
class LittleLifeLines(GenericNavigableComic):
3029
    """Class to retrieve Little Life Lines comics."""
3030
    # Also on https://little-life-lines.tumblr.com
3031
    name = 'life'
3032
    long_name = 'Little Life Lines'
3033
    url = 'http://www.littlelifelines.com'
3034
    get_url_from_link = join_cls_url_to_href
3035
    get_first_comic_link = simulate_first_link
3036
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3037
3038
    @classmethod
3039
    def get_navi_link(cls, last_soup, next_):
3040
        """Get link to next or previous comic."""
3041
        # prev is next / next is prev
3042
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3043
        return li.find('a') if li else None
3044
3045
    @classmethod
3046
    def get_comic_info(cls, soup, link):
3047
        """Get information about a particular comics."""
3048
        title = soup.find('meta', property='og:title')['content']
3049
        desc = soup.find('meta', property='og:description')['content']
3050
        date_str = soup.find('time', class_='published')['datetime']
3051
        day = string_to_date(date_str, "%Y-%m-%d")
3052
        author = soup.find('a', rel='author').string
3053
        div_content = soup.find('div', class_="body entry-content")
3054
        imgs = div_content.find_all('img')
3055
        imgs = [i for i in imgs if i.get('src') is not None]
3056
        alt = imgs[0]['alt']
3057
        return {
3058
            'title': title,
3059
            'alt': alt,
3060
            'description': desc,
3061
            'author': author,
3062
            'day': day.day,
3063
            'month': day.month,
3064
            'year': day.year,
3065
            'img': [i['src'] for i in imgs],
3066
        }
3067
3068
3069
class GenericWordPressInkblot(GenericNavigableComic):
3070
    """Generic class to retrieve comics using WordPress with Inkblot."""
3071
    get_navi_link = get_link_rel_next
3072
3073
    @classmethod
3074
    def get_first_comic_link(cls):
3075
        """Get link to first comics."""
3076
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3077
3078
    @classmethod
3079
    def get_comic_info(cls, soup, link):
3080
        """Get information about a particular comics."""
3081
        title = soup.find('meta', property='og:title')['content']
3082
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3083
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3084
        day = string_to_date(date_str, "%Y-%m-%d")
3085
        return {
3086
            'title': title,
3087
            'day': day.day,
3088
            'month': day.month,
3089
            'year': day.year,
3090
            'img': [i['src'] for i in imgs],
3091
        }
3092
3093
3094
class EverythingsStupid(GenericWordPressInkblot):
3095
    """Class to retrieve Everything's stupid Comics."""
3096
    # Also on http://tapastic.com/series/EverythingsStupid
3097
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3098
    # Also on http://everythingsstupidcomics.tumblr.com
3099
    name = 'stupid'
3100
    long_name = "Everything's Stupid"
3101
    url = 'http://everythingsstupid.net'
3102
3103
3104
class TheIsmComics(GenericWordPressInkblot):
3105
    """Class to retrieve The Ism Comics."""
3106
    # Also on https://tapastic.com/series/TheIsm (?)
3107
    name = 'theism'
3108
    long_name = "The Ism"
3109
    url = 'http://www.theism-comics.com'
3110
3111
3112
class WoodenPlankStudios(GenericWordPressInkblot):
3113
    """Class to retrieve Wooden Plank Studios comics."""
3114
    name = 'woodenplank'
3115
    long_name = 'Wooden Plank Studios'
3116
    url = 'http://woodenplankstudios.com'
3117
3118
3119
class ElectricBunnyComic(GenericNavigableComic):
3120
    """Class to retrieve Electric Bunny Comics."""
3121
    # Also on http://electricbunnycomics.tumblr.com
3122
    name = 'bunny'
3123
    long_name = 'Electric Bunny Comic'
3124
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3125
    get_url_from_link = join_cls_url_to_href
3126
3127
    @classmethod
3128
    def get_first_comic_link(cls):
3129
        """Get link to first comics."""
3130
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3131
3132
    @classmethod
3133
    def get_navi_link(cls, last_soup, next_):
3134
        """Get link to next or previous comic."""
3135
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3136
        return img.parent if img else None
3137
3138
    @classmethod
3139
    def get_comic_info(cls, soup, link):
3140
        """Get information about a particular comics."""
3141
        title = soup.find('meta', property='og:title')['content']
3142
        imgs = soup.find_all('meta', property='og:image')
3143
        return {
3144
            'title': title,
3145
            'img': [i['content'] for i in imgs],
3146
        }
3147
3148
3149
class SheldonComics(GenericNavigableComic):
3150
    """Class to retrieve Sheldon comics."""
3151
    # Also on http://www.gocomics.com/sheldon
3152
    name = 'sheldon'
3153
    long_name = 'Sheldon Comics'
3154
    url = 'http://www.sheldoncomics.com'
3155
3156 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3157
    def get_first_comic_link(cls):
3158
        """Get link to first comics."""
3159
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3160
3161
    @classmethod
3162
    def get_navi_link(cls, last_soup, next_):
3163
        """Get link to next or previous comic."""
3164
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3165
            if link['href'] != 'http://www.sheldoncomics.com':
3166
                return link
3167
        return None
3168
3169
    @classmethod
3170
    def get_comic_info(cls, soup, link):
3171
        """Get information about a particular comics."""
3172
        imgs = soup.find("div", id="comic-foot").find_all("img")
3173
        assert all(i['alt'] == i['title'] for i in imgs)
3174
        assert len(imgs) == 1
3175
        title = imgs[0]['title']
3176
        return {
3177
            'title': title,
3178
            'img': [i['src'] for i in imgs],
3179
        }
3180
3181
3182
class Ubertool(GenericNavigableComic):
3183 View Code Duplication
    """Class to retrieve Ubertool comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3184
    # Also on https://ubertool.tumblr.com
3185
    # Also on https://tapastic.com/series/ubertool
3186
    name = 'ubertool'
3187
    long_name = 'Ubertool'
3188
    url = 'http://ubertoolcomic.com'
3189
    _categories = ('UBERTOOL', )
3190
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3191
    get_navi_link = get_a_comicnavbase_comicnavnext
3192
3193
    @classmethod
3194
    def get_comic_info(cls, soup, link):
3195
        """Get information about a particular comics."""
3196
        title = soup.find('h2', class_='post-title').string
3197
        date_str = soup.find('span', class_='post-date').string
3198
        day = string_to_date(date_str, "%B %d, %Y")
3199
        imgs = soup.find('div', id='comic').find_all('img')
3200
        return {
3201
            'img': [i['src'] for i in imgs],
3202
            'title': title,
3203
            'month': day.month,
3204
            'year': day.year,
3205
            'day': day.day,
3206
        }
3207
3208
3209
class EarthExplodes(GenericNavigableComic):
3210
    """Class to retrieve The Earth Explodes comics."""
3211
    name = 'earthexplodes'
3212
    long_name = 'The Earth Explodes'
3213
    url = 'http://www.earthexplodes.com'
3214
    get_url_from_link = join_cls_url_to_href
3215
    get_first_comic_link = simulate_first_link
3216
    first_url = 'http://www.earthexplodes.com/comics/000/'
3217
3218
    @classmethod
3219
    def get_navi_link(cls, last_soup, next_):
3220
        """Get link to next or previous comic."""
3221
        return last_soup.find('a', id='next' if next_ else 'prev')
3222
3223
    @classmethod
3224
    def get_comic_info(cls, soup, link):
3225
        """Get information about a particular comics."""
3226
        title = soup.find('title').string
3227
        imgs = soup.find('div', id='image').find_all('img')
3228
        alt = imgs[0].get('title', '')
3229
        return {
3230
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3231
            'title': title,
3232
            'alt': alt,
3233
        }
3234
3235
3236
class PomComics(GenericNavigableComic):
3237
    """Class to retrieve PomComics."""
3238
    name = 'pom'
3239
    long_name = 'Pom Comics / Piece of Me'
3240
    url = 'http://www.pomcomic.com'
3241
    get_url_from_link = join_cls_url_to_href
3242
3243
    @classmethod
3244
    def get_first_comic_link(cls):
3245
        """Get link to first comics."""
3246
        return get_soup_at_url(cls.url).find('a', class_='btn_first')
3247
3248
    @classmethod
3249
    def get_navi_link(cls, last_soup, next_):
3250
        """Get link to next or previous comic."""
3251
        return last_soup.find('a', class_='btn_next' if next_ else 'btn_prev')
3252
3253
    @classmethod
3254
    def get_comic_info(cls, soup, link):
3255
        """Get information about a particular comics."""
3256
        title = soup.find('h1', id="comic-name").string
3257
        desc = soup.find('meta', property='og:description')['content']
3258
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3259
        imgs = soup.find('div', class_='comic').find_all('img')
3260
        return {
3261
            'title': title,
3262
            'desc': desc,
3263
            'tags': tags,
3264
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3265
        }
3266
3267
3268
class CubeDrone(GenericEmptyComic, GenericNavigableComic):
3269
    """Class to retrieve Cube Drone comics."""
3270
    name = 'cubedrone'
3271
    long_name = 'Cube Drone'
3272
    url = 'http://cube-drone.com/comics'
3273
    get_url_from_link = join_cls_url_to_href
3274
3275
    @classmethod
3276
    def get_first_comic_link(cls):
3277
        """Get link to first comics."""
3278
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3279
3280
    @classmethod
3281
    def get_navi_link(cls, last_soup, next_):
3282
        """Get link to next or previous comic."""
3283
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3284
        return last_soup.find('span', class_=class_).parent
3285
3286
    @classmethod
3287
    def get_comic_info(cls, soup, link):
3288
        """Get information about a particular comics."""
3289
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3290
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3291
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3292
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3293
        imgs = soup.find_all('img', class_='comic img-responsive')
3294
        title2 = imgs[0]['title']
3295
        alt = imgs[0]['alt']
3296
        return {
3297
            'url2': url2,
3298
            'title': title,
3299
            'title2': title2,
3300
            'alt': alt,
3301
            'img': [i['src'] for i in imgs],
3302
        }
3303
3304
3305
class MakeItStoopid(GenericNavigableComic):
3306
    """Class to retrieve Make It Stoopid Comics."""
3307
    name = 'stoopid'
3308
    long_name = 'Make it stoopid'
3309
    url = 'http://makeitstoopid.com/comic.php'
3310
3311
    @classmethod
3312
    def get_nav(cls, soup):
3313
        """Get the navigation elements from soup object."""
3314
        cnav = soup.find_all(class_='cnav')
3315 View Code Duplication
        nav1, nav2 = cnav[:5], cnav[5:]
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3316
        assert nav1 == nav2
3317
        # begin, prev, archive, next_, end = nav1
3318
        return [None if i.get('href') is None else i for i in nav1]
3319
3320
    @classmethod
3321
    def get_first_comic_link(cls):
3322
        """Get link to first comics."""
3323
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3324
3325
    @classmethod
3326
    def get_navi_link(cls, last_soup, next_):
3327
        """Get link to next or previous comic."""
3328
        return cls.get_nav(last_soup)[3 if next_ else 1]
3329
3330
    @classmethod
3331
    def get_comic_info(cls, soup, link):
3332
        """Get information about a particular comics."""
3333
        title = link['title']
3334
        imgs = soup.find_all('img', id='comicimg')
3335
        return {
3336
            'title': title,
3337
            'img': [i['src'] for i in imgs],
3338
        }
3339
3340
3341
class OffTheLeashDog(GenericNavigableComic):
3342
    """Class to retrieve Off The Leash Dog comics."""
3343
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3344
    # Also on http://www.rupertfawcettcartoons.com
3345
    name = 'offtheleash'
3346
    long_name = 'Off The Leash Dog'
3347
    url = 'http://offtheleashdogcartoons.com'
3348
    _categories = ('FAWCETT', )
3349
    get_navi_link = get_a_rel_next
3350
    get_first_comic_link = simulate_first_link
3351
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3352
3353
    @classmethod
3354
    def get_comic_info(cls, soup, link):
3355
        """Get information about a particular comics."""
3356
        print(link)
3357
        title = soup.find("h1", class_="entry-title").string
3358
        imgs = soup.find('div', class_='entry-content').find_all('img')
3359
        return {
3360
            'title': title,
3361
            'img': [i['src'] for i in imgs],
3362
        }
3363
3364
3365
class MarketoonistComics(GenericNavigableComic):
3366
    """Class to retrieve Marketoonist Comics."""
3367
    name = 'marketoonist'
3368
    long_name = 'Marketoonist'
3369
    url = 'https://marketoonist.com/cartoons'
3370
    get_first_comic_link = simulate_first_link
3371
    get_navi_link = get_link_rel_next
3372
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3373 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3374
    @classmethod
3375
    def get_comic_info(cls, soup, link):
3376
        """Get information about a particular comics."""
3377
        imgs = soup.find_all('meta', property='og:image')
3378
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3379
        day = string_to_date(date_str, "%Y-%m-%d")
3380
        title = soup.find('meta', property='og:title')['content']
3381
        return {
3382
            'img': [i['content'] for i in imgs],
3383
            'day': day.day,
3384
            'month': day.month,
3385
            'year': day.year,
3386
            'title': title,
3387
        }
3388
3389
3390
class ConsoliaComics(GenericNavigableComic):
3391
    """Class to retrieve Consolia comics."""
3392
    name = 'consolia'
3393
    long_name = 'consolia'
3394
    url = 'https://consolia-comic.com'
3395
    get_url_from_link = join_cls_url_to_href
3396
3397
    @classmethod
3398
    def get_first_comic_link(cls):
3399
        """Get link to first comics."""
3400
        return get_soup_at_url(cls.url).find('a', class_='first')
3401
3402
    @classmethod
3403
    def get_navi_link(cls, last_soup, next_):
3404
        """Get link to next or previous comic."""
3405
        return last_soup.find('a', class_='next' if next_ else 'prev')
3406
3407
    @classmethod
3408
    def get_comic_info(cls, soup, link):
3409
        """Get information about a particular comics."""
3410
        title = soup.find('meta', property='og:title')['content']
3411
        date_str = soup.find('time')["datetime"]
3412
        day = string_to_date(date_str, "%Y-%m-%d")
3413
        imgs = soup.find_all('meta', property='og:image')
3414
        return {
3415
            'title': title,
3416
            'img': [i['content'] for i in imgs],
3417
            'day': day.day,
3418
            'month': day.month,
3419
            'year': day.year,
3420
        }
3421
3422
3423
class TuMourrasMoinsBete(GenericNavigableComic):
3424
    """Class to retrieve Tu Mourras Moins Bete comics."""
3425
    name = 'mourrasmoinsbete'
3426
    long_name = 'Tu Mourras Moins Bete'
3427
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3428
    _categories = ('FRANCAIS', )
3429
    get_first_comic_link = simulate_first_link
3430
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3431
3432
    @classmethod
3433
    def get_navi_link(cls, last_soup, next_):
3434
        """Get link to next or previous comic."""
3435
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3436
3437
    @classmethod
3438
    def get_comic_info(cls, soup, link):
3439
        """Get information about a particular comics."""
3440 View Code Duplication
        title = soup.find('title').string
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3441
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3442
        author = soup.find('span', itemprop='author').string
3443
        return {
3444
            'img': [i['src'] for i in imgs],
3445
            'author': author,
3446
            'title': title,
3447
        }
3448
3449
3450
class GeekAndPoke(GenericNavigableComic):
3451
    """Class to retrieve Geek And Poke comics."""
3452
    name = 'geek'
3453
    long_name = 'Geek And Poke'
3454
    url = 'http://geek-and-poke.com'
3455
    get_url_from_link = join_cls_url_to_href
3456
    get_first_comic_link = simulate_first_link
3457
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3458
3459
    @classmethod
3460
    def get_navi_link(cls, last_soup, next_):
3461
        """Get link to next or previous comic."""
3462
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3463
3464
    @classmethod
3465
    def get_comic_info(cls, soup, link):
3466
        """Get information about a particular comics."""
3467
        title = soup.find('meta', property='og:title')['content']
3468
        desc = soup.find('meta', property='og:description')['content']
3469
        date_str = soup.find('time', class_='published')['datetime']
3470
        day = string_to_date(date_str, "%Y-%m-%d")
3471
        author = soup.find('a', rel='author').string
3472
        div_content = (soup.find('div', class_="body entry-content") or
3473
                       soup.find('div', class_="special-content"))
3474
        imgs = div_content.find_all('img')
3475
        imgs = [i for i in imgs if i.get('src') is not None]
3476
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3477
        alt = imgs[0].get('alt', "") if imgs else []
3478
        return {
3479
            'title': title,
3480
            'alt': alt,
3481
            'description': desc,
3482
            'author': author,
3483
            'day': day.day,
3484
            'month': day.month,
3485
            'year': day.year,
3486
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3487
        }
3488
3489
3490
class GloryOwlComix(GenericNavigableComic):
3491
    """Class to retrieve Glory Owl comics."""
3492
    name = 'gloryowl'
3493
    long_name = 'Glory Owl'
3494
    url = 'http://gloryowlcomix.blogspot.fr'
3495
    _categories = ('NSFW', 'FRANCAIS')
3496
    get_first_comic_link = simulate_first_link
3497
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3498
3499
    @classmethod
3500
    def get_navi_link(cls, last_soup, next_):
3501
        """Get link to next or previous comic."""
3502
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3503
3504
    @classmethod
3505
    def get_comic_info(cls, soup, link):
3506
        """Get information about a particular comics."""
3507
        title = soup.find('title').string
3508
        imgs = soup.find_all('link', rel='image_src')
3509
        author = soup.find('a', rel='author').string
3510
        return {
3511
            'img': [i['href'] for i in imgs],
3512
            'author': author,
3513
            'title': title,
3514
        }
3515
3516
3517
class GenericTumblrV1(GenericComic):
3518
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3519
    _categories = ('TUMBLR', )
3520
3521
    @classmethod
3522
    def get_next_comic(cls, last_comic):
3523
        """Generic implementation of get_next_comic for Tumblr comics."""
3524
        for p in cls.get_posts(last_comic):
3525
            comic = cls.get_comic_info(p)
3526
            if comic is not None:
3527
                yield comic
3528
3529
    @classmethod
3530
    def get_url_from_post(cls, post):
3531
        url = post['url']
3532
        if not url.startswith(cls.url):
3533
            print("url '%s' does not start with '%s'" % (url, cls.url))
3534
        return url
3535
3536
    @classmethod
3537
    def get_api_url(cls):
3538
        return urljoin_wrapper(cls.url, '/api/read/')
3539
3540
    @classmethod
3541
    def get_api_url_for_id(cls, tumblr_id):
3542
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3543
3544
    @classmethod
3545
    def get_comic_info(cls, post):
3546
        """Get information about a particular comics."""
3547
        type_ = post['type']
3548
        if type_ != 'photo':
3549
            return None
3550
        tumblr_id = int(post['id'])
3551
        api_url = cls.get_api_url_for_id(tumblr_id)
3552
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3553
        caption = post.find('photo-caption')
3554
        title = caption.string if caption else ""
3555
        tags = ' '.join(t.string for t in post.find_all('tag'))
3556
        # Photos may appear in 'photo' tags and/or straight in the post
3557
        photo_tags = post.find_all('photo')
3558
        if not photo_tags:
3559
            photo_tags = [post]
3560
        # Images are in multiple resolutions - taking the first one
3561
        imgs = [photo.find('photo-url') for photo in photo_tags]
3562
        return {
3563
            'url': cls.get_url_from_post(post),
3564
            'url2': post['url-with-slug'],
3565
            'day': day.day,
3566
            'month': day.month,
3567
            'year': day.year,
3568
            'title': title,
3569
            'tags': tags,
3570
            'img': [i.string for i in imgs],
3571
            'tumblr-id': tumblr_id,
3572
            'api_url': api_url,
3573
        }
3574
3575
    @classmethod
3576
    def get_posts(cls, last_comic, nb_post_per_call=10):
3577
        """Get posts using API. nb_post_per_call is max 50.
3578
3579
        Posts are retrieved from newer to older as per the tumblr v1 api
3580
        but are returned in chronological order."""
3581
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3582
        posts_acc = []
3583
        if last_comic is not None:
3584
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3585
            # might end up spending a lot of time looking for something that
3586
            # doesn't exist. Failing early and clearly might be a better option.
3587
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3588
            try:
3589
                get_soup_at_url(last_api_url)
3590
            except urllib.error.HTTPError:
3591
                try:
3592
                    get_soup_at_url(cls.url)
3593
                except urllib.error.HTTPError:
3594
                    print("Did not find previous post nor main url %s" % cls.url)
3595
                else:
3596
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3597
                return reversed(posts_acc)
3598
        api_url = cls.get_api_url()
3599
        posts = get_soup_at_url(api_url).find('posts')
3600
        start, total = int(posts['start']), int(posts['total'])
3601
        assert start == 0
3602
        for starting_num in range(0, total, nb_post_per_call):
3603
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3604
            posts2 = get_soup_at_url(api_url2).find('posts')
3605
            start2, total2 = int(posts2['start']), int(posts2['total'])
3606
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3607
            # This may happen and should be handled in the future
3608
            assert total == total2, "%d != %d" % (total, total2)
3609
            for p in posts2.find_all('post'):
3610
                tumblr_id = int(p['id'])
3611
                if waiting_for_id and waiting_for_id == tumblr_id:
3612
                    return reversed(posts_acc)
3613
                posts_acc.append(p)
3614
        if waiting_for_id is None:
3615
            return reversed(posts_acc)
3616
        print("Did not find %s : there might be a problem" % waiting_for_id)
3617
        return []
3618
3619
3620
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3621
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3622
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3623
    # Also on http://www.smbc-comics.com
3624
    name = 'smbc-tumblr'
3625
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3626
    url = 'http://smbc-comics.tumblr.com'
3627
    _categories = ('SMBC', )
3628
3629
3630
class IrwinCardozo(GenericTumblrV1):
3631
    """Class to retrieve Irwin Cardozo Comics."""
3632
    name = 'irwinc'
3633
    long_name = 'Irwin Cardozo'
3634
    url = 'http://irwincardozocomics.tumblr.com'
3635
3636
3637
class AccordingToDevin(GenericTumblrV1):
3638
    """Class to retrieve According To Devin comics."""
3639
    name = 'devin'
3640
    long_name = 'According To Devin'
3641
    url = 'http://accordingtodevin.tumblr.com'
3642
3643
3644
class ItsTheTieTumblr(GenericTumblrV1):
3645
    """Class to retrieve It's the tie comics."""
3646
    # Also on http://itsthetie.com
3647
    # Also on https://tapastic.com/series/itsthetie
3648
    name = 'tie-tumblr'
3649
    long_name = "It's the tie (from Tumblr)"
3650
    url = "http://itsthetie.tumblr.com"
3651
    _categories = ('TIE', )
3652
3653
3654
class OctopunsTumblr(GenericTumblrV1):
3655
    """Class to retrieve Octopuns comics."""
3656
    # Also on http://www.octopuns.net
3657
    name = 'octopuns-tumblr'
3658
    long_name = 'Octopuns (from Tumblr)'
3659
    url = 'http://octopuns.tumblr.com'
3660
3661
3662
class PicturesInBoxesTumblr(GenericTumblrV1):
3663
    """Class to retrieve Pictures In Boxes comics."""
3664
    # Also on http://www.picturesinboxes.com
3665
    name = 'picturesinboxes-tumblr'
3666
    long_name = 'Pictures in Boxes (from Tumblr)'
3667
    url = 'https://picturesinboxescomic.tumblr.com'
3668
3669
3670
class TubeyToonsTumblr(GenericTumblrV1):
3671
    """Class to retrieve TubeyToons comics."""
3672
    # Also on http://tapastic.com/series/Tubey-Toons
3673
    # Also on http://tubeytoons.com
3674
    name = 'tubeytoons-tumblr'
3675
    long_name = 'Tubey Toons (from Tumblr)'
3676
    url = 'https://tubeytoons.tumblr.com'
3677
    _categories = ('TUNEYTOONS', )
3678
3679
3680
class UnearthedComicsTumblr(GenericTumblrV1):
3681
    """Class to retrieve Unearthed comics."""
3682
    # Also on http://tapastic.com/series/UnearthedComics
3683
    # Also on http://unearthedcomics.com
3684
    name = 'unearthed-tumblr'
3685
    long_name = 'Unearthed Comics (from Tumblr)'
3686
    url = 'https://unearthedcomics.tumblr.com'
3687
    _categories = ('UNEARTHED', )
3688
3689
3690
class PieComic(GenericTumblrV1):
3691
    """Class to retrieve Pie Comic comics."""
3692
    name = 'pie'
3693
    long_name = 'Pie Comic'
3694
    url = "http://piecomic.tumblr.com"
3695
3696
3697
class MrEthanDiamond(GenericTumblrV1):
3698
    """Class to retrieve Mr Ethan Diamond comics."""
3699
    name = 'diamond'
3700
    long_name = 'Mr Ethan Diamond'
3701
    url = 'http://mrethandiamond.tumblr.com'
3702
3703
3704
class Flocci(GenericTumblrV1):
3705
    """Class to retrieve floccinaucinihilipilification comics."""
3706
    name = 'flocci'
3707
    long_name = 'floccinaucinihilipilification'
3708
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3709
3710
3711
class UpAndOut(GenericTumblrV1):
3712
    """Class to retrieve Up & Out comics."""
3713
    # Also on http://tapastic.com/series/UP-and-OUT
3714
    name = 'upandout'
3715
    long_name = 'Up And Out (from Tumblr)'
3716
    url = 'http://upandoutcomic.tumblr.com'
3717
3718
3719
class Pundemonium(GenericTumblrV1):
3720
    """Class to retrieve Pundemonium comics."""
3721
    name = 'pundemonium'
3722
    long_name = 'Pundemonium'
3723
    url = 'http://monstika.tumblr.com'
3724
3725
3726
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3727
    """Class to retrieve Poorly Drawn Lines comics."""
3728
    # Also on http://poorlydrawnlines.com
3729
    name = 'poorlydrawn-tumblr'
3730
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3731
    url = 'http://pdlcomics.tumblr.com'
3732
    _categories = ('POORLYDRAWN', )
3733
3734
3735
class PearShapedComics(GenericTumblrV1):
3736
    """Class to retrieve Pear Shaped Comics."""
3737
    name = 'pearshaped'
3738
    long_name = 'Pear-Shaped Comics'
3739
    url = 'http://pearshapedcomics.com'
3740
3741
3742
class PondScumComics(GenericTumblrV1):
3743
    """Class to retrieve Pond Scum Comics."""
3744
    name = 'pond'
3745
    long_name = 'Pond Scum'
3746
    url = 'http://pondscumcomic.tumblr.com'
3747
3748
3749
class MercworksTumblr(GenericTumblrV1):
3750
    """Class to retrieve Mercworks comics."""
3751
    # Also on http://mercworks.net
3752
    name = 'mercworks-tumblr'
3753
    long_name = 'Mercworks (from Tumblr)'
3754
    url = 'http://mercworks.tumblr.com'
3755
3756
3757
class OwlTurdTumblr(GenericTumblrV1):
3758
    """Class to retrieve Owl Turd comics."""
3759
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3760
    name = 'owlturd-tumblr'
3761
    long_name = 'Owl Turd (from Tumblr)'
3762
    url = 'http://owlturd.com'
3763
    _categories = ('OWLTURD', )
3764
3765
3766
class VectorBelly(GenericTumblrV1):
3767
    """Class to retrieve Vector Belly comics."""
3768
    # Also on http://vectorbelly.com
3769
    name = 'vector'
3770
    long_name = 'Vector Belly'
3771
    url = 'http://vectorbelly.tumblr.com'
3772
3773
3774
class GoneIntoRapture(GenericTumblrV1):
3775
    """Class to retrieve Gone Into Rapture comics."""
3776
    # Also on http://goneintorapture.tumblr.com
3777
    # Also on http://tapastic.com/series/Goneintorapture
3778
    name = 'rapture'
3779
    long_name = 'Gone Into Rapture'
3780
    url = 'http://goneintorapture.com'
3781
3782
3783
class TheOatmealTumblr(GenericTumblrV1):
3784
    """Class to retrieve The Oatmeal comics."""
3785
    # Also on http://theoatmeal.com
3786
    name = 'oatmeal-tumblr'
3787
    long_name = 'The Oatmeal (from Tumblr)'
3788
    url = 'http://oatmeal.tumblr.com'
3789
3790
3791
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3792
    """Class to retrieve Heck If I Know Comics."""
3793
    # Also on http://tapastic.com/series/Regular
3794
    name = 'heck-tumblr'
3795
    long_name = 'Heck if I Know comics (from Tumblr)'
3796
    url = 'http://heckifiknowcomics.com'
3797
3798
3799
class MyJetPack(GenericTumblrV1):
3800
    """Class to retrieve My Jet Pack comics."""
3801
    name = 'jetpack'
3802
    long_name = 'My Jet Pack'
3803
    url = 'http://myjetpack.tumblr.com'
3804
3805
3806
class CheerUpEmoKidTumblr(GenericTumblrV1):
3807
    """Class to retrieve CheerUpEmoKid comics."""
3808
    # Also on http://www.cheerupemokid.com
3809
    # Also on http://tapastic.com/series/CUEK
3810
    name = 'cuek-tumblr'
3811
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3812
    url = 'https://enzocomics.tumblr.com'
3813
3814
3815
class ForLackOfABetterComic(GenericTumblrV1):
3816
    """Class to retrieve For Lack Of A Better Comics."""
3817
    # Also on http://forlackofabettercomic.com
3818
    name = 'lack'
3819
    long_name = 'For Lack Of A Better Comic'
3820
    url = 'http://forlackofabettercomic.tumblr.com'
3821
3822
3823
class ZenPencilsTumblr(GenericTumblrV1):
3824
    """Class to retrieve ZenPencils comics."""
3825
    # Also on http://zenpencils.com
3826
    # Also on http://www.gocomics.com/zen-pencils
3827
    name = 'zenpencils-tumblr'
3828
    long_name = 'Zen Pencils (from Tumblr)'
3829
    url = 'http://zenpencils.tumblr.com'
3830
    _categories = ('ZENPENCILS', )
3831
3832
3833
class ThreeWordPhraseTumblr(GenericTumblrV1):
3834
    """Class to retrieve Three Word Phrase comics."""
3835
    # Also on http://threewordphrase.com
3836
    name = 'threeword-tumblr'
3837
    long_name = 'Three Word Phrase (from Tumblr)'
3838
    url = 'http://threewordphrase.tumblr.com'
3839
3840
3841
class TimeTrabbleTumblr(GenericTumblrV1):
3842
    """Class to retrieve Time Trabble comics."""
3843
    # Also on http://timetrabble.com
3844
    name = 'timetrabble-tumblr'
3845
    long_name = 'Time Trabble (from Tumblr)'
3846
    url = 'http://timetrabble.tumblr.com'
3847
3848
3849
class SafelyEndangeredTumblr(GenericTumblrV1):
3850
    """Class to retrieve Safely Endangered comics."""
3851
    # Also on http://www.safelyendangered.com
3852
    name = 'endangered-tumblr'
3853
    long_name = 'Safely Endangered (from Tumblr)'
3854
    url = 'http://tumblr.safelyendangered.com'
3855
3856
3857
class MouseBearComedyTumblr(GenericTumblrV1):
3858
    """Class to retrieve Mouse Bear Comedy comics."""
3859
    # Also on http://www.mousebearcomedy.com
3860
    name = 'mousebear-tumblr'
3861
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3862
    url = 'http://mousebearcomedy.tumblr.com'
3863
3864
3865
class BouletCorpTumblr(GenericTumblrV1):
3866
    """Class to retrieve BouletCorp comics."""
3867
    # Also on http://www.bouletcorp.com
3868
    name = 'boulet-tumblr'
3869
    long_name = 'Boulet Corp (from Tumblr)'
3870
    url = 'https://bouletcorp.tumblr.com'
3871
    _categories = ('BOULET', )
3872
3873
3874
class TheAwkwardYetiTumblr(GenericTumblrV1):
3875
    """Class to retrieve The Awkward Yeti comics."""
3876
    # Also on http://www.gocomics.com/the-awkward-yeti
3877
    # Also on http://theawkwardyeti.com
3878
    # Also on https://tapastic.com/series/TheAwkwardYeti
3879
    name = 'yeti-tumblr'
3880
    long_name = 'The Awkward Yeti (from Tumblr)'
3881
    url = 'http://larstheyeti.tumblr.com'
3882
    _categories = ('YETI', )
3883
3884
3885
class NellucNhoj(GenericTumblrV1):
3886
    """Class to retrieve NellucNhoj comics."""
3887
    name = 'nhoj'
3888
    long_name = 'Nelluc Nhoj'
3889
    url = 'http://nellucnhoj.com'
3890
3891
3892
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3893
    """Class to retrieve Down The Upward Spiral comics."""
3894
    # Also on http://www.downtheupwardspiral.com
3895
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
3896
    name = 'spiral-tumblr'
3897
    long_name = 'Down the Upward Spiral (from Tumblr)'
3898
    url = 'http://downtheupwardspiral.tumblr.com'
3899
3900
3901
class AsPerUsualTumblr(GenericTumblrV1):
3902
    """Class to retrieve As Per Usual comics."""
3903
    # Also on https://tapastic.com/series/AsPerUsual
3904
    name = 'usual-tumblr'
3905
    long_name = 'As Per Usual (from Tumblr)'
3906
    url = 'http://as-per-usual.tumblr.com'
3907
    categories = ('DAMILEE', )
3908
3909
3910
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3911
    """Class to retrieve Hot Comics For Cool People."""
3912
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3913
    # Also on http://hotcomics.biz (links to tumblr)
3914
    # Also on http://hcfcp.com (links to tumblr)
3915
    name = 'hotcomics-tumblr'
3916
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3917
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3918
    categories = ('DAMILEE', )
3919
3920
3921
class OneOneOneOneComicTumblr(GenericTumblrV1):
3922
    """Class to retrieve 1111 Comics."""
3923
    # Also on http://www.1111comics.me
3924
    # Also on https://tapastic.com/series/1111-Comics
3925
    name = '1111-tumblr'
3926
    long_name = '1111 Comics (from Tumblr)'
3927
    url = 'http://comics1111.tumblr.com'
3928
    _categories = ('ONEONEONEONE', )
3929
3930
3931
class JhallComicsTumblr(GenericTumblrV1):
3932
    """Class to retrieve Jhall Comics."""
3933
    # Also on http://jhallcomics.com
3934
    name = 'jhall-tumblr'
3935
    long_name = 'Jhall Comics (from Tumblr)'
3936
    url = 'http://jhallcomics.tumblr.com'
3937
3938
3939
class BerkeleyMewsTumblr(GenericTumblrV1):
3940
    """Class to retrieve Berkeley Mews comics."""
3941
    # Also on http://www.gocomics.com/berkeley-mews
3942
    # Also on http://www.berkeleymews.com
3943
    name = 'berkeley-tumblr'
3944
    long_name = 'Berkeley Mews (from Tumblr)'
3945
    url = 'http://mews.tumblr.com'
3946
    _categories = ('BERKELEY', )
3947
3948
3949
class JoanCornellaTumblr(GenericTumblrV1):
3950
    """Class to retrieve Joan Cornella comics."""
3951
    # Also on http://joancornella.net
3952
    name = 'cornella-tumblr'
3953
    long_name = 'Joan Cornella (from Tumblr)'
3954
    url = 'http://cornellajoan.tumblr.com'
3955
3956
3957
class RespawnComicTumblr(GenericTumblrV1):
3958
    """Class to retrieve Respawn Comic."""
3959
    # Also on http://respawncomic.com
3960
    name = 'respawn-tumblr'
3961
    long_name = 'Respawn Comic (from Tumblr)'
3962
    url = 'https://respawncomic.tumblr.com'
3963
3964
3965
class ChrisHallbeckTumblr(GenericTumblrV1):
3966
    """Class to retrieve Chris Hallbeck comics."""
3967
    # Also on https://tapastic.com/ChrisHallbeck
3968
    # Also on http://maximumble.com
3969
    # Also on http://minimumble.com
3970
    # Also on http://thebookofbiff.com
3971
    name = 'hallbeck-tumblr'
3972
    long_name = 'Chris Hallback (from Tumblr)'
3973
    url = 'https://chrishallbeck.tumblr.com'
3974
    _categories = ('HALLBACK', )
3975
3976
3977
class ComicNuggets(GenericTumblrV1):
3978
    """Class to retrieve Comic Nuggets."""
3979
    name = 'nuggets'
3980
    long_name = 'Comic Nuggets'
3981
    url = 'http://comicnuggets.com'
3982
3983
3984
class PigeonGazetteTumblr(GenericTumblrV1):
3985
    """Class to retrieve The Pigeon Gazette comics."""
3986
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3987
    name = 'pigeon-tumblr'
3988
    long_name = 'The Pigeon Gazette (from Tumblr)'
3989
    url = 'http://thepigeongazette.tumblr.com'
3990
3991
3992
class CancerOwl(GenericTumblrV1):
3993
    """Class to retrieve Cancer Owl comics."""
3994
    # Also on http://cancerowl.com
3995
    name = 'cancerowl-tumblr'
3996
    long_name = 'Cancer Owl (from Tumblr)'
3997
    url = 'http://cancerowl.tumblr.com'
3998
3999
4000
class FowlLanguageTumblr(GenericTumblrV1):
4001
    """Class to retrieve Fowl Language comics."""
4002
    # Also on http://www.fowllanguagecomics.com
4003
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4004
    # Also on http://www.gocomics.com/fowl-language
4005
    name = 'fowllanguage-tumblr'
4006
    long_name = 'Fowl Language Comics (from Tumblr)'
4007
    url = 'http://fowllanguagecomics.tumblr.com'
4008
    _categories = ('FOWLLANGUAGE', )
4009
4010
4011
class TheOdd1sOutTumblr(GenericTumblrV1):
4012
    """Class to retrieve The Odd 1s Out comics."""
4013
    # Also on http://theodd1sout.com
4014
    # Also on https://tapastic.com/series/Theodd1sout
4015
    name = 'theodd-tumblr'
4016
    long_name = 'The Odd 1s Out (from Tumblr)'
4017
    url = 'http://theodd1sout.tumblr.com'
4018
4019
4020
class TheUnderfoldTumblr(GenericTumblrV1):
4021
    """Class to retrieve The Underfold comics."""
4022
    # Also on http://theunderfold.com
4023
    name = 'underfold-tumblr'
4024
    long_name = 'The Underfold (from Tumblr)'
4025
    url = 'http://theunderfold.tumblr.com'
4026
4027
4028
class LolNeinTumblr(GenericTumblrV1):
4029
    """Class to retrieve Lol Nein comics."""
4030
    # Also on http://lolnein.com
4031
    name = 'lolnein-tumblr'
4032
    long_name = 'Lol Nein (from Tumblr)'
4033
    url = 'http://lolneincom.tumblr.com'
4034
4035
4036
class FatAwesomeComicsTumblr(GenericTumblrV1):
4037
    """Class to retrieve Fat Awesome Comics."""
4038
    # Also on http://fatawesome.com/comics
4039
    name = 'fatawesome-tumblr'
4040
    long_name = 'Fat Awesome (from Tumblr)'
4041
    url = 'http://fatawesomecomedy.tumblr.com'
4042
4043
4044
class TheWorldIsFlatTumblr(GenericTumblrV1):
4045
    """Class to retrieve The World Is Flat Comics."""
4046
    # Also on https://tapastic.com/series/The-World-is-Flat
4047
    name = 'flatworld-tumblr'
4048
    long_name = 'The World Is Flat (from Tumblr)'
4049
    url = 'http://theworldisflatcomics.com'
4050
4051
4052
class DorrisMc(GenericTumblrV1):
4053
    """Class to retrieve Dorris Mc Comics"""
4054
    # Also on http://www.gocomics.com/dorris-mccomics
4055
    name = 'dorrismc'
4056
    long_name = 'Dorris Mc'
4057
    url = 'http://dorrismccomics.com'
4058
4059
4060
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
4061
    """Class to retrieve Leleoz comics."""
4062
    # Also on https://tapastic.com/series/Leleoz
4063
    name = 'leleoz-tumblr'
4064
    long_name = 'Leleoz (from Tumblr)'
4065
    url = 'http://leleozcomics.tumblr.com'
4066
4067
4068
class MoonBeardTumblr(GenericTumblrV1):
4069
    """Class to retrieve MoonBeard comics."""
4070
    # Also on http://moonbeard.com
4071
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4072
    name = 'moonbeard-tumblr'
4073
    long_name = 'Moon Beard (from Tumblr)'
4074
    url = 'http://blog.squiresjam.es'
4075
4076
4077
class AComik(GenericTumblrV1):
4078
    """Class to retrieve A Comik"""
4079
    name = 'comik'
4080
    long_name = 'A Comik'
4081
    url = 'http://acomik.com'
4082
4083
4084
class ClassicRandy(GenericTumblrV1):
4085
    """Class to retrieve Classic Randy comics."""
4086
    name = 'randy'
4087
    long_name = 'Classic Randy'
4088
    url = 'http://classicrandy.tumblr.com'
4089
4090
4091
class DagssonTumblr(GenericTumblrV1):
4092
    """Class to retrieve Dagsson comics."""
4093
    # Also on http://www.dagsson.com
4094
    name = 'dagsson-tumblr'
4095
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4096
    url = 'https://hugleikurdagsson.tumblr.com'
4097
4098
4099
class LinsEditionsTumblr(GenericTumblrV1):
4100
    """Class to retrieve L.I.N.S. Editions comics."""
4101
    # Also on https://linsedition.com
4102
    # Now on http://warandpeas.tumblr.com
4103
    name = 'lins-tumblr'
4104
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4105
    url = 'https://linscomics.tumblr.com'
4106
    _categories = ('LINS', )
4107
4108
4109
class WarAndPeasTumblr(GenericTumblrV1):
4110
    """Class to retrieve War And Peas comics."""
4111
    # Was on https://linscomics.tumblr.com
4112
    name = 'warandpeas-tumblr'
4113
    long_name = 'War And Peas (from Tumblr)'
4114
    url = 'http://warandpeas.tumblr.com'
4115
    _categories = ('WARANDPEAS', )
4116
4117
4118
class OrigamiHotDish(GenericTumblrV1):
4119
    """Class to retrieve Origami Hot Dish comics."""
4120
    name = 'origamihotdish'
4121
    long_name = 'Origami Hot Dish'
4122
    url = 'http://origamihotdish.com'
4123
4124
4125
class HitAndMissComicsTumblr(GenericTumblrV1):
4126
    """Class to retrieve Hit and Miss Comics."""
4127
    name = 'hitandmiss'
4128
    long_name = 'Hit and Miss Comics'
4129
    url = 'https://hitandmisscomics.tumblr.com'
4130
4131
4132
class HMBlanc(GenericTumblrV1):
4133
    """Class to retrieve HM Blanc comics."""
4134
    name = 'hmblanc'
4135
    long_name = 'HM Blanc'
4136
    url = 'http://hmblanc.tumblr.com'
4137
4138
4139
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4140
    """Class to retrieve Tales Of Absurdity comics."""
4141
    # Also on http://talesofabsurdity.com
4142
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4143
    name = 'absurdity-tumblr'
4144
    long_name = 'Tales of Absurdity (from Tumblr)'
4145
    url = 'http://talesofabsurdity.tumblr.com'
4146
    _categories = ('ABSURDITY', )
4147
4148
4149
class RobbieAndBobby(GenericTumblrV1):
4150
    """Class to retrieve Robbie And Bobby comics."""
4151
    # Also on http://robbieandbobby.com
4152
    name = 'robbie-tumblr'
4153
    long_name = 'Robbie And Bobby (from Tumblr)'
4154
    url = 'http://robbieandbobby.tumblr.com'
4155
4156
4157
class ElectricBunnyComicTumblr(GenericTumblrV1):
4158
    """Class to retrieve Electric Bunny Comics."""
4159
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4160
    name = 'bunny-tumblr'
4161
    long_name = 'Electric Bunny Comic (from Tumblr)'
4162
    url = 'http://electricbunnycomics.tumblr.com'
4163
4164
4165
class Hoomph(GenericTumblrV1):
4166
    """Class to retrieve Hoomph comics."""
4167
    name = 'hoomph'
4168
    long_name = 'Hoomph'
4169
    url = 'http://hoom.ph'
4170
4171
4172
class BFGFSTumblr(GenericTumblrV1):
4173
    """Class to retrieve BFGFS comics."""
4174
    # Also on https://tapastic.com/series/BFGFS
4175
    # Also on http://bfgfs.com
4176
    name = 'bfgfs-tumblr'
4177
    long_name = 'BFGFS (from Tumblr)'
4178
    url = 'https://bfgfs.tumblr.com'
4179
4180
4181
class DoodleForFood(GenericTumblrV1):
4182
    """Class to retrieve Doodle For Food comics."""
4183
    # Also on https://tapastic.com/series/Doodle-for-Food
4184
    name = 'doodle'
4185
    long_name = 'Doodle For Food'
4186
    url = 'http://www.doodleforfood.com'
4187
4188
4189
class CassandraCalinTumblr(GenericTumblrV1):
4190
    """Class to retrieve C. Cassandra comics."""
4191
    # Also on http://cassandracalin.com
4192
    # Also on https://tapastic.com/series/C-Cassandra-comics
4193
    name = 'cassandra-tumblr'
4194
    long_name = 'Cassandra Calin (from Tumblr)'
4195
    url = 'http://c-cassandra.tumblr.com'
4196
4197
4198
class DougWasTaken(GenericTumblrV1):
4199
    """Class to retrieve Doug Was Taken comics."""
4200
    name = 'doug'
4201
    long_name = 'Doug Was Taken'
4202
    url = 'https://dougwastaken.tumblr.com'
4203
4204
4205
class MandatoryRollerCoaster(GenericTumblrV1):
4206
    """Class to retrieve Mandatory Roller Coaster comics."""
4207
    name = 'rollercoaster'
4208
    long_name = 'Mandatory Roller Coaster'
4209
    url = 'http://mandatoryrollercoaster.com'
4210
4211
4212
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4213
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4214
    name = 'cperspqccltt'
4215
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4216
    url = 'http://marcoandco.tumblr.com'
4217
4218
4219
class TheGrohlTroll(GenericTumblrV1):
4220
    """Class to retrieve The Grohl Troll comics."""
4221
    name = 'grohltroll'
4222
    long_name = 'The Grohl Troll'
4223
    url = 'http://thegrohltroll.com'
4224
4225
4226
class WebcomicName(GenericTumblrV1):
4227
    """Class to retrieve Webcomic Name comics."""
4228
    name = 'webcomicname'
4229
    long_name = 'Webcomic Name'
4230
    url = 'http://webcomicname.com'
4231
4232
4233
class BooksOfAdam(GenericTumblrV1):
4234
    """Class to retrieve Books of Adam comics."""
4235
    # Also on http://www.booksofadam.com
4236
    name = 'booksofadam'
4237
    long_name = 'Books of Adam'
4238
    url = 'http://booksofadam.tumblr.com'
4239
4240
4241
class HarkAVagrant(GenericTumblrV1):
4242
    """Class to retrieve Hark A Vagrant comics."""
4243
    # Also on http://www.harkavagrant.com
4244
    name = 'hark-tumblr'
4245
    long_name = 'Hark A Vagrant (from Tumblr)'
4246
    url = 'http://beatonna.tumblr.com'
4247
4248
4249
class OurSuperAdventureTumblr(GenericTumblrV1):
4250
    """Class to retrieve Our Super Adventure comics."""
4251
    # Also on https://tapastic.com/series/Our-Super-Adventure
4252
    # Also on http://www.oursuperadventure.com
4253
    # http://sarahgraley.com
4254
    name = 'superadventure-tumblr'
4255
    long_name = 'Our Super Adventure (from Tumblr)'
4256
    url = 'http://sarahssketchbook.tumblr.com'
4257
4258
4259
class JakeLikesOnions(GenericTumblrV1):
4260
    """Class to retrieve Jake Likes Onions comics."""
4261
    name = 'jake'
4262
    long_name = 'Jake Likes Onions'
4263
    url = 'http://jakelikesonions.com'
4264
4265
4266
class InYourFaceCake(GenericTumblrV1):
4267
    """Class to retrieve In Your Face Cake comics."""
4268
    name = 'inyourfacecake-tumblr'
4269
    long_name = 'In Your Face Cake (from Tumblr)'
4270
    url = 'https://in-your-face-cake.tumblr.com'
4271
4272
4273
class Robospunk(GenericTumblrV1):
4274
    """Class to retrieve Robospunk comics."""
4275
    name = 'robospunk'
4276
    long_name = 'Robospunk'
4277
    url = 'http://robospunk.com'
4278
4279
4280
class BananaTwinky(GenericTumblrV1):
4281
    """Class to retrieve Banana Twinky comics."""
4282
    name = 'banana'
4283
    long_name = 'Banana Twinky'
4284
    url = 'https://bananatwinky.tumblr.com'
4285
4286
4287
class YesterdaysPopcornTumblr(GenericTumblrV1):
4288
    """Class to retrieve Yesterday's Popcorn comics."""
4289
    # Also on http://www.yesterdayspopcorn.com
4290
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4291
    name = 'popcorn-tumblr'
4292
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4293
    url = 'http://yesterdayspopcorn.tumblr.com'
4294
4295
4296
class TwistedDoodles(GenericTumblrV1):
4297
    """Class to retrieve Twisted Doodles comics."""
4298
    name = 'twisted'
4299
    long_name = 'Twisted Doodles'
4300
    url = 'http://www.twisteddoodles.com'
4301
4302
4303
class UbertoolTumblr(GenericTumblrV1):
4304
    """Class to retrieve Ubertool comics."""
4305
    # Also on http://ubertoolcomic.com
4306
    # Also on https://tapastic.com/series/ubertool
4307
    name = 'ubertool-tumblr'
4308
    long_name = 'Ubertool (from Tumblr)'
4309
    url = 'https://ubertool.tumblr.com'
4310
    _categories = ('UBERTOOL', )
4311
4312 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4313
class LittleLifeLinesTumblr(GenericTumblrV1):
4314
    """Class to retrieve Little Life Lines comics."""
4315
    # Also on http://www.littlelifelines.com
4316
    name = 'life-tumblr'
4317
    long_name = 'Little Life Lines (from Tumblr)'
4318
    url = 'https://little-life-lines.tumblr.com'
4319
4320
4321
class TheyCanTalk(GenericTumblrV1):
4322
    """Class to retrieve They Can Talk comics."""
4323
    name = 'theycantalk'
4324
    long_name = 'They Can Talk'
4325
    url = 'http://theycantalk.com'
4326
4327
4328
class Will5NeverCome(GenericTumblrV1):
4329
    """Class to retrieve Will 5:00 Never Come comics."""
4330
    name = 'will5'
4331
    long_name = 'Will 5:00 Never Come ?'
4332
    url = 'http://will5nevercome.com'
4333
4334
4335
class Sephko(GenericTumblrV1):
4336
    """Class to retrieve Sephko Comics."""
4337
    # Also on http://www.sephko.com
4338
    name = 'sephko'
4339
    long_name = 'Sephko'
4340
    url = 'https://sephko.tumblr.com'
4341
4342
4343
class BlazersAtDawn(GenericTumblrV1):
4344
    """Class to retrieve Blazers At Dawn Comics."""
4345
    name = 'blazers'
4346
    long_name = 'Blazers At Dawn'
4347
    url = 'http://blazersatdawn.tumblr.com'
4348
4349
4350
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4351
    """Class to retrieve Art By Moga Comics."""
4352
    name = 'moga'
4353
    long_name = 'Art By Moga'
4354
    url = 'http://artbymoga.tumblr.com'
4355
4356
4357
class VerbalVomitTumblr(GenericTumblrV1):
4358
    """Class to retrieve Verbal Vomit comics."""
4359
    # Also on http://www.verbal-vomit.com
4360
    name = 'vomit-tumblr'
4361
    long_name = 'Verbal Vomit (from Tumblr)'
4362
    url = 'http://verbalvomits.tumblr.com'
4363
4364
4365
class LibraryComic(GenericTumblrV1):
4366
    """Class to retrieve LibraryComic."""
4367
    # Also on http://librarycomic.com
4368
    name = 'library-tumblr'
4369
    long_name = 'LibraryComic (from Tumblr)'
4370
    url = 'https://librarycomic.tumblr.com'
4371
4372
4373
class TizzyStitchBirdTumblr(GenericTumblrV1):
4374
    """Class to retrieve Tizzy Stitch Bird comics."""
4375
    # Also on http://tizzystitchbird.com
4376
    # Also on https://tapastic.com/series/TizzyStitchbird
4377
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4378
    name = 'tizzy-tumblr'
4379
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4380
    url = 'http://tizzystitchbird.tumblr.com'
4381
4382
4383
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4384
    """Class to retrieve VictimsOfCircumsolar comics."""
4385
    # Also on http://www.victimsofcircumsolar.com
4386
    name = 'circumsolar-tumblr'
4387
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4388
    url = 'https://victimsofcomics.tumblr.com'
4389
4390
4391
class RockPaperCynicTumblr(GenericTumblrV1):
4392
    """Class to retrieve RockPaperCynic comics."""
4393
    # Also on http://www.rockpapercynic.com
4394
    # Also on https://tapastic.com/series/rockpapercynic
4395
    name = 'rpc-tumblr'
4396
    long_name = 'Rock Paper Cynic (from Tumblr)'
4397
    url = 'http://rockpapercynic.tumblr.com'
4398
4399
4400
class DeadlyPanelTumblr(GenericTumblrV1):
4401
    """Class to retrieve Deadly Panel comics."""
4402
    # Also on http://www.deadlypanel.com
4403
    # Also on https://tapastic.com/series/deadlypanel
4404
    name = 'deadly-tumblr'
4405
    long_name = 'Deadly Panel (from Tumblr)'
4406
    url = 'https://deadlypanel.tumblr.com'
4407
4408
4409
class CatanaComics(GenericTumblrV1):
4410
    """Class to retrieve Catana comics."""
4411
    name = 'catana'
4412
    long_name = 'Catana'
4413
    url = 'http://www.catanacomics.com'
4414
4415
4416
class AngryAtNothingTumblr(GenericTumblrV1):
4417
    """Class to retrieve Angry at Nothing comics."""
4418
    # Also on http://www.angryatnothing.net
4419
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4420
    name = 'angry-tumblr'
4421
    long_name = 'Angry At Nothing (from Tumblr)'
4422
    url = 'http://angryatnothing.tumblr.com'
4423
4424
4425
class ShanghaiTango(GenericTumblrV1):
4426
    """Class to retrieve Shanghai Tango comic."""
4427
    name = 'tango'
4428
    long_name = 'Shanghai Tango'
4429
    url = 'http://tango2010weibo.tumblr.com'
4430
4431
4432
class OffTheLeashDogTumblr(GenericTumblrV1):
4433
    """Class to retrieve Off The Leash Dog comics."""
4434
    # Also on http://offtheleashdogcartoons.com
4435
    # Also on http://www.rupertfawcettcartoons.com
4436
    name = 'offtheleash-tumblr'
4437
    long_name = 'Off The Leash Dog (from Tumblr)'
4438
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4439
    _categories = ('FAWCETT', )
4440
4441
4442
class ImogenQuestTumblr(GenericTumblrV1):
4443
    """Class to retrieve Imogen Quest comics."""
4444
    # Also on http://imogenquest.net
4445
    name = 'imogen-tumblr'
4446
    long_name = 'Imogen Quest (from Tumblr)'
4447
    url = 'http://imoquest.tumblr.com'
4448
4449
4450
class Shitfest(GenericTumblrV1):
4451
    """Class to retrieve Shitfest comics."""
4452
    name = 'shitfest'
4453
    long_name = 'Shitfest'
4454
    url = 'http://shitfestcomic.com'
4455
4456
4457
class IceCreamSandwichComics(GenericTumblrV1):
4458
    """Class to retrieve Ice Cream Sandwich Comics."""
4459
    name = 'icecream'
4460
    long_name = 'Ice Cream Sandwich Comics'
4461
    url = 'http://icecreamsandwichcomics.com'
4462
4463
4464
class Dustinteractive(GenericTumblrV1):
4465
    """Class to retrieve Dustinteractive comics."""
4466
    name = 'dustinteractive'
4467
    long_name = 'Dustinteractive'
4468
    url = 'http://dustinteractive.com'
4469
4470
4471
class StickyCinemaFloor(GenericTumblrV1):
4472
    """Class to retrieve Sticky Cinema Floor comics."""
4473
    name = 'stickycinema'
4474
    long_name = 'Sticky Cinema Floor'
4475
    url = 'https://stickycinemafloor.tumblr.com'
4476
4477
4478
class IncidentalComicsTumblr(GenericTumblrV1):
4479
    """Class to retrieve Incidental Comics."""
4480
    # Also on http://www.incidentalcomics.com
4481
    name = 'incidental-tumblr'
4482
    long_name = 'Incidental Comics (from Tumblr)'
4483
    url = 'http://incidentalcomics.tumblr.com'
4484
4485
4486
class APleasantWasteOfTimeTumblr(GenericTumblrV1):
4487
    """Class to retrieve A Pleasant Waste Of Time comics."""
4488
    # Also on https://tapas.io/series/A-Pleasant-
4489
    name = 'pleasant-waste-tumblr'
4490
    long_name = 'A Pleasant Waste Of Time (from Tumblr)'
4491
    url = 'https://artjcf.tumblr.com'
4492
    _categories = ('WASTE', )
4493
4494
4495
class HorovitzComicsTumblr(GenericTumblrV1):
4496
    """Class to retrieve Horovitz new comics."""
4497
    # Also on http://www.horovitzcomics.com
4498
    name = 'horovitz-tumblr'
4499
    long_name = 'Horovitz (from Tumblr)'
4500
    url = 'https://horovitzcomics.tumblr.com'
4501
    _categories = ('HOROVITZ', )
4502
4503
4504
class DeepDarkFearsTumblr(GenericTumblrV1):
4505
    """Class to retrieve DeepvDarkvFears comics."""
4506
    name = 'deep-dark-fears-tumblr'
4507
    long_name = 'Deep Dark Fears (from Tumblr)'
4508
    url = 'http://deep-dark-fears.tumblr.com'
4509
4510
4511
class JamesOfNoTradesTumblr(GenericTumblrV1):
4512
    """Class to retrieve JamesOfNoTrades comics."""
4513
    # Also on http://jamesofnotrades.com
4514
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
4515
    # Also on https://tapas.io/series/James-of-No-Trades
4516
    name = 'jamesofnotrades-tumblr'
4517
    long_name = 'James Of No Trades (from Tumblr)'
4518
    url = 'http://jamesfregan.tumblr.com'
4519
    _categories = ('JAMESOFNOTRADES', )
4520
4521
4522
class HorovitzComics(GenericEmptyComic, GenericListableComic):
4523
    """Generic class to handle the logic common to the different comics from Horovitz."""
4524
    # Also on https://horovitzcomics.tumblr.com
4525
    url = 'http://www.horovitzcomics.com'
4526
    _categories = ('HOROVITZ', )
4527
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4528
    link_re = NotImplemented
4529
    get_url_from_archive_element = join_cls_url_to_href
4530
4531
    @classmethod
4532
    def get_comic_info(cls, soup, link):
4533
        """Get information about a particular comics."""
4534
        href = link['href']
4535
        num = int(cls.link_re.match(href).groups()[0])
4536
        title = link.string
4537
        imgs = soup.find_all('img', id='comic')
4538
        assert len(imgs) == 1
4539
        year, month, day = [int(s)
4540
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4541
        return {
4542
            'title': title,
4543
            'day': day,
4544
            'month': month,
4545
            'year': year,
4546
            'img': [i['src'] for i in imgs],
4547
            'num': num,
4548
        }
4549
4550
    @classmethod
4551
    def get_archive_elements(cls):
4552
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4553
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4554
4555
4556
class HorovitzNew(HorovitzComics):
4557
    """Class to retrieve Horovitz new comics."""
4558
    name = 'horovitznew'
4559
    long_name = 'Horovitz New'
4560
    link_re = re.compile('^/comics/new/([0-9]+)$')
4561
4562
4563
class HorovitzClassic(HorovitzComics):
4564
    """Class to retrieve Horovitz classic comics."""
4565
    name = 'horovitzclassic'
4566
    long_name = 'Horovitz Classic'
4567
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4568
4569
4570
class GenericGoComic(GenericNavigableComic):
4571
    """Generic class to handle the logic common to comics from gocomics.com."""
4572
    _categories = ('GOCOMIC', )
4573
4574
    @classmethod
4575
    def get_first_comic_link(cls):
4576
        """Get link to first comics."""
4577
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4578
4579
    @classmethod
4580
    def get_navi_link(cls, last_soup, next_):
4581
        """Get link to next or previous comic."""
4582
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4583
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right hidden-sm-up sm '
4584
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4585
4586
    @classmethod
4587
    def get_url_from_link(cls, link):
4588
        gocomics = 'http://www.gocomics.com'
4589
        return urljoin_wrapper(gocomics, link['href'])
4590
4591
    @classmethod
4592
    def get_comic_info(cls, soup, link):
4593
        """Get information about a particular comics."""
4594
        date_str = soup.find('meta', property='article:published_time')['content']
4595
        day = string_to_date(date_str, "%Y-%m-%d")
4596
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4597
        author = soup.find('meta', property='article:author')['content']
4598
        tags = soup.find('meta', property='article:tag')['content']
4599
        return {
4600
            'day': day.day,
4601
            'month': day.month,
4602
            'year': day.year,
4603
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4604
            'author': author,
4605
            'tags': tags,
4606
        }
4607
4608
4609
class PearlsBeforeSwine(GenericGoComic):
4610
    """Class to retrieve Pearls Before Swine comics."""
4611
    name = 'pearls'
4612
    long_name = 'Pearls Before Swine'
4613
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4614
4615
4616
class Peanuts(GenericGoComic):
4617
    """Class to retrieve Peanuts comics."""
4618
    name = 'peanuts'
4619
    long_name = 'Peanuts'
4620
    url = 'http://www.gocomics.com/peanuts'
4621
4622
4623
class MattWuerker(GenericGoComic):
4624
    """Class to retrieve Matt Wuerker comics."""
4625
    name = 'wuerker'
4626
    long_name = 'Matt Wuerker'
4627
    url = 'http://www.gocomics.com/mattwuerker'
4628
4629
4630
class TomToles(GenericGoComic):
4631
    """Class to retrieve Tom Toles comics."""
4632
    name = 'toles'
4633
    long_name = 'Tom Toles'
4634
    url = 'http://www.gocomics.com/tomtoles'
4635
4636
4637
class BreakOfDay(GenericGoComic):
4638
    """Class to retrieve Break Of Day comics."""
4639
    name = 'breakofday'
4640
    long_name = 'Break Of Day'
4641
    url = 'http://www.gocomics.com/break-of-day'
4642
4643
4644
class Brevity(GenericGoComic):
4645
    """Class to retrieve Brevity comics."""
4646
    name = 'brevity'
4647
    long_name = 'Brevity'
4648
    url = 'http://www.gocomics.com/brevitypanel'
4649
4650
4651
class MichaelRamirez(GenericGoComic):
4652
    """Class to retrieve Michael Ramirez comics."""
4653
    name = 'ramirez'
4654
    long_name = 'Michael Ramirez'
4655
    url = 'http://www.gocomics.com/michaelramirez'
4656
4657
4658
class MikeLuckovich(GenericGoComic):
4659
    """Class to retrieve Mike Luckovich comics."""
4660
    name = 'luckovich'
4661
    long_name = 'Mike Luckovich'
4662
    url = 'http://www.gocomics.com/mikeluckovich'
4663
4664
4665
class JimBenton(GenericGoComic):
4666
    """Class to retrieve Jim Benton comics."""
4667
    # Also on http://jimbenton.tumblr.com
4668
    name = 'benton'
4669
    long_name = 'Jim Benton'
4670
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4671
4672
4673
class TheArgyleSweater(GenericGoComic):
4674
    """Class to retrieve the Argyle Sweater comics."""
4675
    name = 'argyle'
4676
    long_name = 'Argyle Sweater'
4677
    url = 'http://www.gocomics.com/theargylesweater'
4678
4679
4680
class SunnyStreet(GenericGoComic):
4681
    """Class to retrieve Sunny Street comics."""
4682
    # Also on http://www.sunnystreetcomics.com
4683
    name = 'sunny'
4684
    long_name = 'Sunny Street'
4685
    url = 'http://www.gocomics.com/sunny-street'
4686
4687
4688
class OffTheMark(GenericGoComic):
4689
    """Class to retrieve Off The Mark comics."""
4690
    # Also on https://www.offthemark.com
4691
    name = 'offthemark'
4692
    long_name = 'Off The Mark'
4693
    url = 'http://www.gocomics.com/offthemark'
4694
4695
4696
class WuMo(GenericGoComic):
4697
    """Class to retrieve WuMo comics."""
4698
    # Also on http://wumo.com
4699
    name = 'wumo'
4700
    long_name = 'WuMo'
4701
    url = 'http://www.gocomics.com/wumo'
4702
4703
4704
class LunarBaboon(GenericGoComic):
4705
    """Class to retrieve Lunar Baboon comics."""
4706
    # Also on http://www.lunarbaboon.com
4707
    # Also on https://tapastic.com/series/Lunarbaboon
4708
    name = 'lunarbaboon'
4709
    long_name = 'Lunar Baboon'
4710
    url = 'http://www.gocomics.com/lunarbaboon'
4711
4712
4713
class SandersenGocomic(GenericGoComic):
4714
    """Class to retrieve Sarah Andersen comics."""
4715
    # Also on http://sarahcandersen.com
4716
    # Also on http://tapastic.com/series/Doodle-Time
4717
    name = 'sandersen-goc'
4718
    long_name = 'Sarah Andersen (from GoComics)'
4719
    url = 'http://www.gocomics.com/sarahs-scribbles'
4720
4721
4722
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4723
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4724
    # Also on http://smbc-comics.tumblr.com
4725
    # Also on http://www.smbc-comics.com
4726
    name = 'smbc-goc'
4727
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4728
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4729
    _categories = ('SMBC', )
4730
4731
4732
class CalvinAndHobbesGoComic(GenericGoComic):
4733
    """Class to retrieve Calvin and Hobbes comics."""
4734
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4735
    name = 'calvin-goc'
4736
    long_name = 'Calvin and Hobbes (from GoComics)'
4737
    url = 'http://www.gocomics.com/calvinandhobbes'
4738
4739
4740
class RallGoComic(GenericGoComic):
4741
    """Class to retrieve Ted Rall comics."""
4742
    # Also on http://rall.com/comic
4743
    name = 'rall-goc'
4744
    long_name = "Ted Rall (from GoComics)"
4745
    url = "http://www.gocomics.com/ted-rall"
4746
    _categories = ('RALL', )
4747
4748
4749
class TheAwkwardYetiGoComic(GenericGoComic):
4750
    """Class to retrieve The Awkward Yeti comics."""
4751
    # Also on http://larstheyeti.tumblr.com
4752
    # Also on http://theawkwardyeti.com
4753
    # Also on https://tapastic.com/series/TheAwkwardYeti
4754
    name = 'yeti-goc'
4755
    long_name = 'The Awkward Yeti (from GoComics)'
4756
    url = 'http://www.gocomics.com/the-awkward-yeti'
4757
    _categories = ('YETI', )
4758
4759
4760
class BerkeleyMewsGoComics(GenericGoComic):
4761
    """Class to retrieve Berkeley Mews comics."""
4762
    # Also on http://mews.tumblr.com
4763
    # Also on http://www.berkeleymews.com
4764
    name = 'berkeley-goc'
4765
    long_name = 'Berkeley Mews (from GoComics)'
4766
    url = 'http://www.gocomics.com/berkeley-mews'
4767
    _categories = ('BERKELEY', )
4768
4769
4770
class SheldonGoComics(GenericGoComic):
4771
    """Class to retrieve Sheldon comics."""
4772
    # Also on http://www.sheldoncomics.com
4773
    name = 'sheldon-goc'
4774
    long_name = 'Sheldon Comics (from GoComics)'
4775
    url = 'http://www.gocomics.com/sheldon'
4776
4777
4778
class FowlLanguageGoComics(GenericGoComic):
4779
    """Class to retrieve Fowl Language comics."""
4780
    # Also on http://www.fowllanguagecomics.com
4781
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4782
    # Also on http://fowllanguagecomics.tumblr.com
4783
    name = 'fowllanguage-goc'
4784
    long_name = 'Fowl Language Comics (from GoComics)'
4785
    url = 'http://www.gocomics.com/fowl-language'
4786
    _categories = ('FOWLLANGUAGE', )
4787
4788
4789
class NickAnderson(GenericGoComic):
4790
    """Class to retrieve Nick Anderson comics."""
4791
    name = 'nickanderson'
4792
    long_name = 'Nick Anderson'
4793
    url = 'http://www.gocomics.com/nickanderson'
4794
4795
4796
class GarfieldGoComics(GenericGoComic):
4797
    """Class to retrieve Garfield comics."""
4798
    # Also on http://garfield.com
4799
    name = 'garfield-goc'
4800
    long_name = 'Garfield (from GoComics)'
4801
    url = 'http://www.gocomics.com/garfield'
4802
    _categories = ('GARFIELD', )
4803
4804
4805
class DorrisMcGoComics(GenericGoComic):
4806
    """Class to retrieve Dorris Mc Comics"""
4807
    # Also on http://dorrismccomics.com
4808
    name = 'dorrismc-goc'
4809
    long_name = 'Dorris Mc (from GoComics)'
4810
    url = 'http://www.gocomics.com/dorris-mccomics'
4811
4812
4813
class FoxTrot(GenericGoComic):
4814
    """Class to retrieve FoxTrot comics."""
4815
    name = 'foxtrot'
4816
    long_name = 'FoxTrot'
4817
    url = 'http://www.gocomics.com/foxtrot'
4818
4819
4820
class FoxTrotClassics(GenericGoComic):
4821
    """Class to retrieve FoxTrot Classics comics."""
4822
    name = 'foxtrot-classics'
4823
    long_name = 'FoxTrot Classics'
4824
    url = 'http://www.gocomics.com/foxtrotclassics'
4825
4826
4827
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4828
    """Class to retrieve Mister & Me Comics."""
4829
    # Also on http://www.mister-and-me.com
4830
    # Also on https://tapastic.com/series/Mister-and-Me
4831
    name = 'mister-goc'
4832
    long_name = 'Mister & Me (from GoComics)'
4833
    url = 'http://www.gocomics.com/mister-and-me'
4834
4835
4836
class NonSequitur(GenericGoComic):
4837
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4838
    name = 'nonsequitur'
4839
    long_name = 'Non Sequitur'
4840
    url = 'http://www.gocomics.com/nonsequitur'
4841
4842
4843
class GenericTapasticComic(GenericListableComic):
4844
    """Generic class to handle the logic common to comics from tapastic.com."""
4845
    _categories = ('TAPASTIC', )
4846
4847
    @classmethod
4848
    def get_comic_info(cls, soup, archive_elt):
4849
        """Get information about a particular comics."""
4850
        timestamp = int(archive_elt['publishDate']) / 1000.0
4851
        day = datetime.datetime.fromtimestamp(timestamp).date()
4852
        imgs = soup.find_all('img', class_='art-image')
4853
        if not imgs:
4854
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4855
            return None
4856
        assert len(imgs) > 0
4857
        return {
4858
            'day': day.day,
4859
            'year': day.year,
4860
            'month': day.month,
4861
            'img': [i['src'] for i in imgs],
4862
            'title': archive_elt['title'],
4863
        }
4864
4865
    @classmethod
4866
    def get_url_from_archive_element(cls, archive_elt):
4867
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4868
4869
    @classmethod
4870
    def get_archive_elements(cls):
4871
        pref, suff = 'episodeList : ', ','
4872
        # Information is stored in the javascript part
4873
        # I don't know the clean way to get it so this is the ugly way.
4874
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4875
        return json.loads(string)
4876
4877
4878
class VegetablesForDessert(GenericTapasticComic):
4879
    """Class to retrieve Vegetables For Dessert comics."""
4880
    # Also on http://vegetablesfordessert.tumblr.com
4881
    name = 'vegetables'
4882
    long_name = 'Vegetables For Dessert'
4883
    url = 'http://tapastic.com/series/vegetablesfordessert'
4884
4885
4886
class FowlLanguageTapa(GenericTapasticComic):
4887
    """Class to retrieve Fowl Language comics."""
4888
    # Also on http://www.fowllanguagecomics.com
4889
    # Also on http://fowllanguagecomics.tumblr.com
4890
    # Also on http://www.gocomics.com/fowl-language
4891
    name = 'fowllanguage-tapa'
4892
    long_name = 'Fowl Language Comics (from Tapastic)'
4893
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4894
    _categories = ('FOWLLANGUAGE', )
4895
4896
4897
class OscillatingProfundities(GenericTapasticComic):
4898
    """Class to retrieve Oscillating Profundities comics."""
4899
    name = 'oscillating'
4900
    long_name = 'Oscillating Profundities'
4901
    url = 'http://tapastic.com/series/oscillatingprofundities'
4902
4903
4904
class ZnoflatsComics(GenericTapasticComic):
4905
    """Class to retrieve Znoflats comics."""
4906
    name = 'znoflats'
4907
    long_name = 'Znoflats Comics'
4908
    url = 'http://tapastic.com/series/Znoflats-Comics'
4909
4910
4911
class SandersenTapastic(GenericTapasticComic):
4912
    """Class to retrieve Sarah Andersen comics."""
4913
    # Also on http://sarahcandersen.com
4914
    # Also on http://www.gocomics.com/sarahs-scribbles
4915
    name = 'sandersen-tapa'
4916
    long_name = 'Sarah Andersen (from Tapastic)'
4917
    url = 'http://tapastic.com/series/Doodle-Time'
4918
4919
4920
class TubeyToonsTapastic(GenericTapasticComic):
4921
    """Class to retrieve TubeyToons comics."""
4922
    # Also on http://tubeytoons.com
4923
    # Also on https://tubeytoons.tumblr.com
4924
    name = 'tubeytoons-tapa'
4925
    long_name = 'Tubey Toons (from Tapastic)'
4926
    url = 'http://tapastic.com/series/Tubey-Toons'
4927
    _categories = ('TUNEYTOONS', )
4928
4929
4930
class AnythingComicTapastic(GenericTapasticComic):
4931
    """Class to retrieve Anything Comics."""
4932
    # Also on http://www.anythingcomic.com
4933
    name = 'anythingcomic-tapa'
4934
    long_name = 'Anything Comic (from Tapastic)'
4935
    url = 'http://tapastic.com/series/anything'
4936
4937
4938
class UnearthedComicsTapastic(GenericTapasticComic):
4939
    """Class to retrieve Unearthed comics."""
4940
    # Also on http://unearthedcomics.com
4941
    # Also on https://unearthedcomics.tumblr.com
4942
    name = 'unearthed-tapa'
4943
    long_name = 'Unearthed Comics (from Tapastic)'
4944
    url = 'http://tapastic.com/series/UnearthedComics'
4945
    _categories = ('UNEARTHED', )
4946
4947
4948
class EverythingsStupidTapastic(GenericTapasticComic):
4949
    """Class to retrieve Everything's stupid Comics."""
4950
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4951
    # Also on http://everythingsstupid.net
4952
    name = 'stupid-tapa'
4953
    long_name = "Everything's Stupid (from Tapastic)"
4954
    url = 'http://tapastic.com/series/EverythingsStupid'
4955
4956
4957
class JustSayEhTapastic(GenericTapasticComic):
4958
    """Class to retrieve Just Say Eh comics."""
4959
    # Also on http://www.justsayeh.com
4960
    name = 'justsayeh-tapa'
4961
    long_name = 'Just Say Eh (from Tapastic)'
4962
    url = 'http://tapastic.com/series/Just-Say-Eh'
4963
4964
4965
class ThorsThundershackTapastic(GenericTapasticComic):
4966
    """Class to retrieve Thor's Thundershack comics."""
4967
    # Also on http://www.thorsthundershack.com
4968
    name = 'thor-tapa'
4969
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4970
    url = 'http://tapastic.com/series/Thors-Thundershac'
4971
    _categories = ('THOR', )
4972
4973
4974
class OwlTurdTapastic(GenericTapasticComic):
4975
    """Class to retrieve Owl Turd comics."""
4976
    # Also on http://owlturd.com
4977
    name = 'owlturd-tapa'
4978
    long_name = 'Owl Turd (from Tapastic)'
4979
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4980
    _categories = ('OWLTURD', )
4981
4982
4983
class GoneIntoRaptureTapastic(GenericTapasticComic):
4984
    """Class to retrieve Gone Into Rapture comics."""
4985
    # Also on http://goneintorapture.tumblr.com
4986
    # Also on http://goneintorapture.com
4987
    name = 'rapture-tapa'
4988
    long_name = 'Gone Into Rapture (from Tapastic)'
4989
    url = 'http://tapastic.com/series/Goneintorapture'
4990
4991
4992
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4993
    """Class to retrieve Heck If I Know Comics."""
4994
    # Also on http://heckifiknowcomics.com
4995
    name = 'heck-tapa'
4996
    long_name = 'Heck if I Know comics (from Tapastic)'
4997
    url = 'http://tapastic.com/series/Regular'
4998
4999
5000
class CheerUpEmoKidTapa(GenericTapasticComic):
5001
    """Class to retrieve CheerUpEmoKid comics."""
5002
    # Also on http://www.cheerupemokid.com
5003
    # Also on https://enzocomics.tumblr.com
5004
    name = 'cuek-tapa'
5005
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
5006
    url = 'http://tapastic.com/series/CUEK'
5007
5008
5009
class BigFootJusticeTapa(GenericTapasticComic):
5010
    """Class to retrieve Big Foot Justice comics."""
5011
    # Also on http://bigfootjustice.com
5012
    name = 'bigfoot-tapa'
5013
    long_name = 'Big Foot Justice (from Tapastic)'
5014
    url = 'http://tapastic.com/series/bigfoot-justice'
5015
5016
5017
class UpAndOutTapa(GenericTapasticComic):
5018
    """Class to retrieve Up & Out comics."""
5019
    # Also on http://upandoutcomic.tumblr.com
5020
    name = 'upandout-tapa'
5021
    long_name = 'Up And Out (from Tapastic)'
5022
    url = 'http://tapastic.com/series/UP-and-OUT'
5023
5024
5025
class ToonHoleTapa(GenericTapasticComic):
5026
    """Class to retrieve Toon Holes comics."""
5027
    # Also on http://www.toonhole.com
5028
    name = 'toonhole-tapa'
5029
    long_name = 'Toon Hole (from Tapastic)'
5030
    url = 'http://tapastic.com/series/TOONHOLE'
5031
5032
5033
class AngryAtNothingTapa(GenericTapasticComic):
5034
    """Class to retrieve Angry at Nothing comics."""
5035
    # Also on http://www.angryatnothing.net
5036
    # Also on http://angryatnothing.tumblr.com
5037
    name = 'angry-tapa'
5038
    long_name = 'Angry At Nothing (from Tapastic)'
5039
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
5040
5041
5042
class LeleozTapa(GenericTapasticComic):
5043
    """Class to retrieve Leleoz comics."""
5044
    # Also on http://leleozcomics.tumblr.com
5045
    name = 'leleoz-tapa'
5046
    long_name = 'Leleoz (from Tapastic)'
5047
    url = 'https://tapastic.com/series/Leleoz'
5048
5049
5050
class TheAwkwardYetiTapa(GenericTapasticComic):
5051
    """Class to retrieve The Awkward Yeti comics."""
5052
    # Also on http://www.gocomics.com/the-awkward-yeti
5053
    # Also on http://theawkwardyeti.com
5054
    # Also on http://larstheyeti.tumblr.com
5055
    name = 'yeti-tapa'
5056
    long_name = 'The Awkward Yeti (from Tapastic)'
5057
    url = 'https://tapastic.com/series/TheAwkwardYeti'
5058
    _categories = ('YETI', )
5059
5060
5061
class AsPerUsualTapa(GenericTapasticComic):
5062
    """Class to retrieve As Per Usual comics."""
5063
    # Also on http://as-per-usual.tumblr.com
5064
    name = 'usual-tapa'
5065
    long_name = 'As Per Usual (from Tapastic)'
5066
    url = 'https://tapastic.com/series/AsPerUsual'
5067
    categories = ('DAMILEE', )
5068
5069
5070
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
5071
    """Class to retrieve Hot Comics For Cool People."""
5072
    # Also on http://hotcomicsforcoolpeople.tumblr.com
5073
    # Also on http://hotcomics.biz (links to tumblr)
5074
    # Also on http://hcfcp.com (links to tumblr)
5075
    name = 'hotcomics-tapa'
5076
    long_name = 'Hot Comics For Cool People (from Tapastic)'
5077
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
5078
    categories = ('DAMILEE', )
5079
5080
5081
class OneOneOneOneComicTapa(GenericTapasticComic):
5082
    """Class to retrieve 1111 Comics."""
5083
    # Also on http://www.1111comics.me
5084
    # Also on http://comics1111.tumblr.com
5085
    name = '1111-tapa'
5086
    long_name = '1111 Comics (from Tapastic)'
5087
    url = 'https://tapastic.com/series/1111-Comics'
5088
    _categories = ('ONEONEONEONE', )
5089
5090
5091
class TumbleDryTapa(GenericTapasticComic):
5092
    """Class to retrieve Tumble Dry comics."""
5093
    # Also on http://tumbledrycomics.com
5094
    name = 'tumbledry-tapa'
5095
    long_name = 'Tumblr Dry (from Tapastic)'
5096
    url = 'https://tapastic.com/series/TumbleDryComics'
5097
5098
5099
class DeadlyPanelTapa(GenericTapasticComic):
5100
    """Class to retrieve Deadly Panel comics."""
5101
    # Also on http://www.deadlypanel.com
5102
    # Also on https://deadlypanel.tumblr.com
5103
    name = 'deadly-tapa'
5104
    long_name = 'Deadly Panel (from Tapastic)'
5105
    url = 'https://tapastic.com/series/deadlypanel'
5106
5107
5108
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5109
    """Class to retrieve Chris Hallbeck comics."""
5110
    # Also on https://chrishallbeck.tumblr.com
5111
    # Also on http://maximumble.com
5112
    name = 'hallbeckmaxi-tapa'
5113
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5114
    url = 'https://tapastic.com/series/Maximumble'
5115
    _categories = ('HALLBACK', )
5116
5117
5118
class ChrisHallbeckMiniTapa(GenericEmptyComic, GenericTapasticComic):
5119
    """Class to retrieve Chris Hallbeck comics."""
5120
    # Also on https://chrishallbeck.tumblr.com
5121
    # Also on http://minimumble.com
5122
    name = 'hallbeckmini-tapa'
5123
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5124
    url = 'https://tapastic.com/series/Minimumble'
5125
    _categories = ('HALLBACK', )
5126
5127
5128
class ChrisHallbeckBiffTapa(GenericEmptyComic, GenericTapasticComic):
5129
    """Class to retrieve Chris Hallbeck comics."""
5130
    # Also on https://chrishallbeck.tumblr.com
5131
    # Also on http://thebookofbiff.com
5132
    name = 'hallbeckbiff-tapa'
5133
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5134
    url = 'https://tapastic.com/series/Biff'
5135
    _categories = ('HALLBACK', )
5136
5137
5138
class RandoWisTapa(GenericTapasticComic):
5139
    """Class to retrieve RandoWis comics."""
5140
    # Also on https://randowis.com
5141
    name = 'randowis-tapa'
5142
    long_name = 'RandoWis (from Tapastic)'
5143
    url = 'https://tapastic.com/series/RandoWis'
5144
5145
5146
class PigeonGazetteTapa(GenericTapasticComic):
5147
    """Class to retrieve The Pigeon Gazette comics."""
5148
    # Also on http://thepigeongazette.tumblr.com
5149
    name = 'pigeon-tapa'
5150
    long_name = 'The Pigeon Gazette (from Tapastic)'
5151
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5152
5153
5154
class TheOdd1sOutTapa(GenericTapasticComic):
5155
    """Class to retrieve The Odd 1s Out comics."""
5156
    # Also on http://theodd1sout.com
5157
    # Also on http://theodd1sout.tumblr.com
5158
    name = 'theodd-tapa'
5159
    long_name = 'The Odd 1s Out (from Tapastic)'
5160
    url = 'https://tapastic.com/series/Theodd1sout'
5161
5162
5163
class TheWorldIsFlatTapa(GenericTapasticComic):
5164
    """Class to retrieve The World Is Flat Comics."""
5165
    # Also on http://theworldisflatcomics.tumblr.com
5166
    name = 'flatworld-tapa'
5167
    long_name = 'The World Is Flat (from Tapastic)'
5168
    url = 'https://tapastic.com/series/The-World-is-Flat'
5169
5170
5171
class MisterAndMeTapa(GenericTapasticComic):
5172
    """Class to retrieve Mister & Me Comics."""
5173
    # Also on http://www.mister-and-me.com
5174
    # Also on http://www.gocomics.com/mister-and-me
5175
    name = 'mister-tapa'
5176
    long_name = 'Mister & Me (from Tapastic)'
5177
    url = 'https://tapastic.com/series/Mister-and-Me'
5178
5179
5180
class TalesOfAbsurdityTapa(GenericEmptyComic, GenericTapasticComic):
5181
    """Class to retrieve Tales Of Absurdity comics."""
5182
    # Also on http://talesofabsurdity.com
5183
    # Also on http://talesofabsurdity.tumblr.com
5184
    name = 'absurdity-tapa'
5185
    long_name = 'Tales of Absurdity (from Tapastic)'
5186
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5187
    _categories = ('ABSURDITY', )
5188
5189
5190
class BFGFSTapa(GenericTapasticComic):
5191
    """Class to retrieve BFGFS comics."""
5192
    # Also on http://bfgfs.com
5193
    # Also on https://bfgfs.tumblr.com
5194
    name = 'bfgfs-tapa'
5195
    long_name = 'BFGFS (from Tapastic)'
5196
    url = 'https://tapastic.com/series/BFGFS'
5197
5198
5199
class DoodleForFoodTapa(GenericTapasticComic):
5200
    """Class to retrieve Doodle For Food comics."""
5201
    # Also on http://www.doodleforfood.com
5202
    name = 'doodle-tapa'
5203
    long_name = 'Doodle For Food (from Tapastic)'
5204
    url = 'https://tapastic.com/series/Doodle-for-Food'
5205
5206
5207
class MrLovensteinTapa(GenericTapasticComic):
5208
    """Class to retrieve Mr Lovenstein comics."""
5209
    # Also on  https://tapastic.com/series/MrLovenstein
5210
    name = 'mrlovenstein-tapa'
5211
    long_name = 'Mr. Lovenstein (from Tapastic)'
5212
    url = 'https://tapastic.com/series/MrLovenstein'
5213
5214
5215
class CassandraCalinTapa(GenericTapasticComic):
5216
    """Class to retrieve C. Cassandra comics."""
5217
    # Also on http://cassandracalin.com
5218
    # Also on http://c-cassandra.tumblr.com
5219
    name = 'cassandra-tapa'
5220
    long_name = 'Cassandra Calin (from Tapastic)'
5221
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5222
5223
5224
class WafflesAndPancakes(GenericTapasticComic):
5225
    """Class to retrieve Waffles And Pancakes comics."""
5226
    # Also on http://wandpcomic.com
5227
    name = 'waffles'
5228
    long_name = 'Waffles And Pancakes'
5229
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5230
5231
5232
class YesterdaysPopcornTapastic(GenericTapasticComic):
5233
    """Class to retrieve Yesterday's Popcorn comics."""
5234
    # Also on http://www.yesterdayspopcorn.com
5235
    # Also on http://yesterdayspopcorn.tumblr.com
5236
    name = 'popcorn-tapa'
5237
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5238
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5239
5240
5241
class OurSuperAdventureTapastic(GenericEmptyComic, GenericTapasticComic):
5242
    """Class to retrieve Our Super Adventure comics."""
5243
    # Also on http://www.oursuperadventure.com
5244
    # http://sarahssketchbook.tumblr.com
5245
    # http://sarahgraley.com
5246
    name = 'superadventure-tapastic'
5247
    long_name = 'Our Super Adventure (from Tapastic)'
5248
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5249
5250
5251
class NamelessPCs(GenericTapasticComic):
5252
    """Class to retrieve Nameless PCs comics."""
5253
    # Also on http://namelesspcs.com
5254
    name = 'namelesspcs-tapa'
5255
    long_name = 'NamelessPCs (from Tapastic)'
5256
    url = 'https://tapastic.com/series/NamelessPC'
5257
5258
5259
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5260
    """Class to retrieve Down The Upward Spiral comics."""
5261
    # Also on http://www.downtheupwardspiral.com
5262
    # Also on http://downtheupwardspiral.tumblr.com
5263
    name = 'spiral-tapa'
5264
    long_name = 'Down the Upward Spiral (from Tapastic)'
5265
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5266
5267
5268
class UbertoolTapa(GenericTapasticComic):
5269
    """Class to retrieve Ubertool comics."""
5270
    # Also on http://ubertoolcomic.com
5271
    # Also on https://ubertool.tumblr.com
5272
    name = 'ubertool-tapa'
5273
    long_name = 'Ubertool (from Tapastic)'
5274
    url = 'https://tapastic.com/series/ubertool'
5275
    _categories = ('UBERTOOL', )
5276
5277
5278
class BarteNerdsTapa(GenericTapasticComic):
5279
    """Class to retrieve BarteNerds comics."""
5280
    # Also on http://www.bartenerds.com
5281
    name = 'bartenerds-tapa'
5282
    long_name = 'BarteNerds (from Tapastic)'
5283
    url = 'https://tapastic.com/series/BarteNERDS'
5284
5285
5286
class SmallBlueYonderTapa(GenericTapasticComic):
5287
    """Class to retrieve Small Blue Yonder comics."""
5288
    # Also on http://www.smallblueyonder.com
5289
    name = 'smallblue-tapa'
5290
    long_name = 'Small Blue Yonder (from Tapastic)'
5291
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5292
5293
5294
class TizzyStitchBirdTapa(GenericTapasticComic):
5295
    """Class to retrieve Tizzy Stitch Bird comics."""
5296
    # Also on http://tizzystitchbird.com
5297
    # Also on http://tizzystitchbird.tumblr.com
5298
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5299
    name = 'tizzy-tapa'
5300
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5301
    url = 'https://tapastic.com/series/TizzyStitchbird'
5302
5303
5304
class RockPaperCynicTapa(GenericTapasticComic):
5305
    """Class to retrieve RockPaperCynic comics."""
5306
    # Also on http://www.rockpapercynic.com
5307
    # Also on http://rockpapercynic.tumblr.com
5308
    name = 'rpc-tapa'
5309
    long_name = 'Rock Paper Cynic (from Tapastic)'
5310
    url = 'https://tapastic.com/series/rockpapercynic'
5311
5312
5313
class IsItCanonTapa(GenericTapasticComic):
5314
    """Class to retrieve Is It Canon comics."""
5315
    # Also on http://www.isitcanon.com
5316
    name = 'canon-tapa'
5317
    long_name = 'Is It Canon (from Tapastic)'
5318
    url = 'http://tapastic.com/series/isitcanon'
5319
5320
5321
class ItsTheTieTapa(GenericTapasticComic):
5322
    """Class to retrieve It's the tie comics."""
5323
    # Also on http://itsthetie.com
5324
    # Also on http://itsthetie.tumblr.com
5325
    name = 'tie-tapa'
5326
    long_name = "It's the tie (from Tapastic)"
5327
    url = "https://tapastic.com/series/itsthetie"
5328
    _categories = ('TIE', )
5329
5330
5331
class JamesOfNoTradesTapa(GenericTapasticComic):
5332
    """Class to retrieve JamesOfNoTrades comics."""
5333
    # Also on http://jamesofnotrades.com
5334
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
5335
    # Also on http://jamesfregan.tumblr.com
5336
    name = 'jamesofnotrades-tapa'
5337
    long_name = 'James Of No Trades (from Tapastic)'
5338
    url = 'https://tapas.io/series/James-of-No-Trades'
5339
    _categories = ('JAMESOFNOTRADES', )
5340
5341
5342
class MomentumTapa(GenericTapasticComic):
5343
    """Class to retrieve Momentum comics."""
5344
    # Also on http://www.momentumcomic.com
5345
    name = 'momentum-tapa'
5346
    long_name = 'Momentum (from Tapastic)'
5347
    url = 'https://tapastic.com/series/momentum'
5348
5349
5350
class APleasantWasteOfTimeTapa(GenericTapasticComic):
5351
    """Class to retrieve A Pleasant Waste Of Time comics."""
5352
    # Also on https://artjcf.tumblr.com
5353
    name = 'pleasant-waste-tapa'
5354
    long_name = 'A Pleasant Waste Of Time (from Tapastic)'
5355
    url = 'https://tapas.io/series/A-Pleasant-'
5356
    _categories = ('WASTE', )
5357
5358
5359
def get_subclasses(klass):
5360
    """Gets the list of direct/indirect subclasses of a class"""
5361
    subclasses = klass.__subclasses__()
5362
    for derived in list(subclasses):
5363
        subclasses.extend(get_subclasses(derived))
5364
    return subclasses
5365
5366
5367
def remove_st_nd_rd_th_from_date(string):
5368
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5369
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5370
    return (string.replace('st', '')
5371
            .replace('nd', '')
5372
            .replace('rd', '')
5373
            .replace('th', '')
5374
            .replace('Augu', 'August'))
5375
5376
5377
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5378
    """Function to convert string to date object.
5379
    Wrapper around datetime.datetime.strptime."""
5380
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5381
    prev_locale = locale.setlocale(locale.LC_ALL)
5382
    if local != prev_locale:
5383
        locale.setlocale(locale.LC_ALL, local)
5384
    ret = datetime.datetime.strptime(string, date_format).date()
5385
    if local != prev_locale:
5386
        locale.setlocale(locale.LC_ALL, prev_locale)
5387
    return ret
5388
5389
5390
COMICS = set(get_subclasses(GenericComic))
5391
VALID_COMICS = [c for c in COMICS if c.name is not None]
5392
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5393
assert len(VALID_COMICS) == len(COMIC_NAMES)
5394
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5395
assert len(VALID_COMICS) == len(CLASS_NAMES)
5396