Completed
Push — master ( 2a65c1...0d51a1 )
by De
32s
created

comics.py (11 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    try:
333
        url = cls.first_url
334
    except AttributeError:
335
        url = input("Get starting URL: ")
336
    print(url)
337
    comic = cls.get_prev_link(get_soup_at_url(url))
338
    while comic:
339
        url = cls.get_url_from_link(comic)
340
        print(url)
341
        comic = cls.get_prev_link(get_soup_at_url(url))
342
    return {'href': url}
343
344
345
class GenericEmptyComic(GenericComic):
346
    """Generic class for comics where nothing is to be done.
347
348
    It can be useful to deactivate temporarily comics that do not work
349
    properly by replacing `def MyComic(GenericWhateverComic)` with
350
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
351
    _categories = ('EMPTY', )
352
353
    @classmethod
354
    def get_next_comic(cls, last_comic):
355
        """Implementation of get_next_comic returning no comics."""
356
        cls.log("comic is considered as empty - returning no comic")
357
        return []
358
359
360
class GenericComicNotWorking(GenericEmptyComic):
361
    """Subclass of GenericEmptyComic used when comic is not working.
362
363
    This is more explicit than GenericEmptyComic as it hilights that
364
    only the implementation is not working and it can be fixed."""
365
    _categories = ('NOTWORKING', )
366
367
368
class GenericUnavailableComic(GenericEmptyComic):
369
    """Subclass of GenericEmptyComic used when a comic is not available.
370
371
    This is more explicit than GenericEmptyComic as it hilights that
372
    the source of the comic is not available but we expect it to be back
373
    soonish. See also GenericDeletedComic."""
374
    _categories = ('UNAVAILABLE', )
375
376
377
class GenericDeletedComic(GenericEmptyComic):
378
    """Subclass of GenericEmptyComic used when a comic does not exist anymore.
379
380
    This is more explicit than GenericEmptyComic as it hilights that
381
    the source of the comic does not exist anymore and it probably cannot
382
    be fixed. Corresponding classes are kept as we can still use the
383
    downloaded data. See also GenericUnavailableComic."""
384
    _categories = ('DELETED', )
385
386
387 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
388
    """Class to retrieve Extra Fabulous Comics."""
389
    # Also on https://extrafabulouscomics.tumblr.com
390
    name = 'efc'
391
    long_name = 'Extra Fabulous Comics'
392
    url = 'http://extrafabulouscomics.com'
393
    _categories = ('EFC', )
394
    get_first_comic_link = get_a_navi_navifirst
395
    get_navi_link = get_link_rel_next
396
397
    @classmethod
398
    def get_comic_info(cls, soup, link):
399
        """Get information about a particular comics."""
400
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
401
        imgs = soup.find_all('img', src=img_src_re)
402
        title = soup.find('meta', property='og:title')['content']
403
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
404
        day = string_to_date(date_str, "%Y-%m-%d")
405
        return {
406
            'title': title,
407
            'img': [i['src'] for i in imgs],
408
            'month': day.month,
409
            'year': day.year,
410
            'day': day.day,
411
            'prefix': title + '-'
412
        }
413
414
415 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
416
    """Generic class to retrieve comics from Le Monde blogs."""
417
    _categories = ('LEMONDE', 'FRANCAIS')
418
    get_navi_link = get_link_rel_next
419
    get_first_comic_link = simulate_first_link
420
    first_url = NotImplemented
421
422
    @classmethod
423
    def get_comic_info(cls, soup, link):
424
        """Get information about a particular comics."""
425
        url2 = soup.find('link', rel='shortlink')['href']
426
        title = soup.find('meta', property='og:title')['content']
427
        date_str = soup.find("span", class_="entry-date").string
428
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
429
        imgs = soup.find_all('meta', property='og:image')
430
        return {
431
            'title': title,
432
            'url2': url2,
433
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
434
            'month': day.month,
435
            'year': day.year,
436
            'day': day.day,
437
        }
438
439
440
class ZepWorld(GenericLeMondeBlog):
441
    """Class to retrieve Zep World comics."""
442
    name = "zep"
443
    long_name = "Zep World"
444
    url = "http://zepworld.blog.lemonde.fr"
445
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
446
447
448
class Vidberg(GenericLeMondeBlog):
449
    """Class to retrieve Vidberg comics."""
450
    name = 'vidberg'
451
    long_name = "Vidberg - l'actu en patates"
452
    url = "http://vidberg.blog.lemonde.fr"
453
    # Not the first but I didn't find an efficient way to retrieve it
454
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
455
456
457
class Plantu(GenericLeMondeBlog):
458
    """Class to retrieve Plantu comics."""
459
    name = 'plantu'
460
    long_name = "Plantu"
461
    url = "http://plantu.blog.lemonde.fr"
462
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
463
464
465
class XavierGorce(GenericLeMondeBlog):
466
    """Class to retrieve Xavier Gorce comics."""
467
    name = 'gorce'
468
    long_name = "Xavier Gorce"
469
    url = "http://xaviergorce.blog.lemonde.fr"
470
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
471
472
473
class CartooningForPeace(GenericLeMondeBlog):
474
    """Class to retrieve Cartooning For Peace comics."""
475
    name = 'forpeace'
476
    long_name = "Cartooning For Peace"
477
    url = "http://cartooningforpeace.blog.lemonde.fr"
478
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
479
480
481
class Aurel(GenericLeMondeBlog):
482
    """Class to retrieve Aurel comics."""
483
    name = 'aurel'
484
    long_name = "Aurel"
485
    url = "http://aurel.blog.lemonde.fr"
486
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
487
488
489
class LesCulottees(GenericLeMondeBlog):
490
    """Class to retrieve Les Culottees comics."""
491
    name = 'culottees'
492
    long_name = 'Les Culottees'
493
    url = "http://lesculottees.blog.lemonde.fr"
494
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
495
496
497
class UneAnneeAuLycee(GenericLeMondeBlog):
498
    """Class to retrieve Une Annee Au Lycee comics."""
499
    name = 'lycee'
500
    long_name = 'Une Annee au Lycee'
501
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
502
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
503
504
505 View Code Duplication
class Rall(GenericComicNotWorking, GenericNavigableComic):
506
    """Class to retrieve Ted Rall comics."""
507
    # Also on http://www.gocomics.com/tedrall
508
    name = 'rall'
509
    long_name = "Ted Rall"
510
    url = "http://rall.com/comic"
511
    _categories = ('RALL', )
512
    get_navi_link = get_link_rel_next
513
    get_first_comic_link = simulate_first_link
514
    # Not the first but I didn't find an efficient way to retrieve it
515
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
516
517
    @classmethod
518
    def get_comic_info(cls, soup, link):
519
        """Get information about a particular comics."""
520
        title = soup.find('meta', property='og:title')['content']
521
        author = soup.find("span", class_="author vcard").find("a").string
522
        date_str = soup.find("span", class_="entry-date").string
523
        day = string_to_date(date_str, "%B %d, %Y")
524
        desc = soup.find('meta', property='og:description')['content']
525
        imgs = soup.find('div', class_='entry-content').find_all('img')
526
        imgs = imgs[:-7]  # remove social media buttons
527
        return {
528
            'title': title,
529
            'author': author,
530
            'month': day.month,
531
            'year': day.year,
532
            'day': day.day,
533
            'description': desc,
534
            'img': [i['src'] for i in imgs],
535
        }
536
537
538
class Dilem(GenericNavigableComic):
539
    """Class to retrieve Ali Dilem comics."""
540
    name = 'dilem'
541
    long_name = 'Ali Dilem'
542
    url = 'http://information.tv5monde.com/dilem'
543
    _categories = ('FRANCAIS', )
544
    get_url_from_link = join_cls_url_to_href
545
    get_first_comic_link = simulate_first_link
546
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
547
548
    @classmethod
549
    def get_navi_link(cls, last_soup, next_):
550
        """Get link to next or previous comic."""
551
        # prev is next / next is prev
552
        li = last_soup.find('li', class_='prev' if next_ else 'next')
553
        return li.find('a') if li else None
554
555
    @classmethod
556
    def get_comic_info(cls, soup, link):
557
        """Get information about a particular comics."""
558
        short_url = soup.find('link', rel='shortlink')['href']
559
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
560
        imgs = soup.find_all('meta', property='og:image')
561
        date_str = soup.find('span', property='dc:date')['content']
562
        date_str = date_str[:10]
563
        day = string_to_date(date_str, "%Y-%m-%d")
564
        return {
565
            'short_url': short_url,
566
            'title': title,
567
            'img': [i['content'] for i in imgs],
568
            'day': day.day,
569
            'month': day.month,
570
            'year': day.year,
571
        }
572
573
574
class SpaceAvalanche(GenericNavigableComic):
575
    """Class to retrieve Space Avalanche comics."""
576
    name = 'avalanche'
577
    long_name = 'Space Avalanche'
578
    url = 'http://www.spaceavalanche.com'
579
    get_navi_link = get_link_rel_next
580
581
    @classmethod
582
    def get_first_comic_link(cls):
583
        """Get link to first comics."""
584
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
585
586
    @classmethod
587
    def get_comic_info(cls, soup, link):
588
        """Get information about a particular comics."""
589
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
590
        title = link['title']
591
        url = cls.get_url_from_link(link)
592
        year, month, day = [int(s)
593
                            for s in url_date_re.match(url).groups()]
594
        imgs = soup.find("div", class_="entry").find_all("img")
595
        return {
596
            'title': title,
597
            'day': day,
598
            'month': month,
599
            'year': year,
600
            'img': [i['src'] for i in imgs],
601
        }
602
603
604
class ZenPencils(GenericNavigableComic):
605
    """Class to retrieve ZenPencils comics."""
606
    # Also on http://zenpencils.tumblr.com
607
    # Also on http://www.gocomics.com/zen-pencils
608
    name = 'zenpencils'
609
    long_name = 'Zen Pencils'
610
    url = 'http://zenpencils.com'
611
    _categories = ('ZENPENCILS', )
612
    get_navi_link = get_link_rel_next
613
    get_first_comic_link = simulate_first_link
614
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
615
616
    @classmethod
617
    def get_comic_info(cls, soup, link):
618
        """Get information about a particular comics."""
619
        imgs = soup.find('div', id='comic').find_all('img')
620
        # imgs2 = soup.find_all('meta', property='og:image')
621
        post = soup.find('div', class_='post-content')
622
        author = post.find("span", class_="post-author").find("a").string
623
        title = soup.find('h2', class_='post-title').string
624
        date_str = post.find('span', class_='post-date').string
625
        day = string_to_date(date_str, "%B %d, %Y")
626
        assert imgs
627
        assert all(i['alt'] == i['title'] for i in imgs)
628
        assert all(i['alt'] in (title, "") for i in imgs)
629
        return {
630
            'title': title,
631
            'author': author,
632
            'day': day.day,
633
            'month': day.month,
634
            'year': day.year,
635
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
636
        }
637
638
639
class ItsTheTie(GenericDeletedComic, GenericNavigableComic):
640
    """Class to retrieve It's the tie comics."""
641
    # Also on http://itsthetie.tumblr.com
642
    # Also on https://tapastic.com/series/itsthetie
643
    name = 'tie'
644
    long_name = "It's the tie"
645
    url = "http://itsthetie.com"
646
    _categories = ('TIE', )
647
    get_first_comic_link = get_div_navfirst_a
648
    get_navi_link = get_a_rel_next
649
650
    @classmethod
651
    def get_comic_info(cls, soup, link):
652
        """Get information about a particular comics."""
653
        title = soup.find('h1', class_='comic-title').find('a').string
654
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
655
        day = string_to_date(date_str, "%B %d, %Y")
656
        # Bonus images may or may not be in meta og:image.
657
        imgs = soup.find_all('meta', property='og:image')
658
        imgs_src = [i['content'] for i in imgs]
659
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
660
        bonus_src = [b['data-oversrc'] for b in bonus]
661
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
662
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
663
        tag_meta = soup.find('meta', property='article:tag')
664
        tags = tag_meta['content'] if tag_meta else ""
665
        return {
666
            'title': title,
667
            'month': day.month,
668
            'year': day.year,
669
            'day': day.day,
670
            'img': all_imgs_src,
671
            'tags': tags,
672
        }
673
674
675 View Code Duplication
class PenelopeBagieu(GenericNavigableComic):
676
    """Class to retrieve comics from Penelope Bagieu's blog."""
677
    name = 'bagieu'
678
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
679
    url = 'http://www.penelope-jolicoeur.com'
680
    _categories = ('FRANCAIS', )
681
    get_navi_link = get_link_rel_next
682
    get_first_comic_link = simulate_first_link
683
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
684
685
    @classmethod
686
    def get_comic_info(cls, soup, link):
687
        """Get information about a particular comics."""
688
        date_str = soup.find('h2', class_='date-header').string
689
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
690
        imgs = soup.find('div', class_='entry-body').find_all('img')
691
        title = soup.find('h3', class_='entry-header').string
692
        return {
693
            'title': title,
694
            'img': [i['src'] for i in imgs],
695
            'month': day.month,
696
            'year': day.year,
697
            'day': day.day,
698
        }
699
700
701 View Code Duplication
class OneOneOneOneComic(GenericComicNotWorking, GenericNavigableComic):
702
    """Class to retrieve 1111 Comics."""
703
    # Also on http://comics1111.tumblr.com
704
    # Also on https://tapastic.com/series/1111-Comics
705
    name = '1111'
706
    long_name = '1111 Comics'
707
    url = 'http://www.1111comics.me'
708
    _categories = ('ONEONEONEONE', )
709
    get_first_comic_link = get_div_navfirst_a
710
    get_navi_link = get_link_rel_next
711
712
    @classmethod
713
    def get_comic_info(cls, soup, link):
714
        """Get information about a particular comics."""
715
        title = soup.find('h1', class_='comic-title').find('a').string
716
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
717
        day = string_to_date(date_str, "%B %d, %Y")
718
        imgs = soup.find_all('meta', property='og:image')
719
        return {
720
            'title': title,
721
            'month': day.month,
722
            'year': day.year,
723
            'day': day.day,
724
            'img': [i['content'] for i in imgs],
725
        }
726
727
728 View Code Duplication
class AngryAtNothing(GenericDeletedComic, GenericNavigableComic):
729
    """Class to retrieve Angry at Nothing comics."""
730
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
731
    # Also on http://angryatnothing.tumblr.com
732
    name = 'angry'
733
    long_name = 'Angry At Nothing'
734
    url = 'http://www.angryatnothing.net'
735
    get_first_comic_link = get_div_navfirst_a
736
    get_navi_link = get_a_rel_next
737
738
    @classmethod
739
    def get_comic_info(cls, soup, link):
740
        """Get information about a particular comics."""
741
        title = soup.find('h1', class_='comic-title').find('a').string
742
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
743
        day = string_to_date(date_str, "%B %d, %Y")
744
        imgs = soup.find_all('meta', property='og:image')
745
        return {
746
            'title': title,
747
            'month': day.month,
748
            'year': day.year,
749
            'day': day.day,
750
            'img': [i['content'] for i in imgs],
751
        }
752
753
754
class NeDroid(GenericNavigableComic):
755
    """Class to retrieve NeDroid comics."""
756
    name = 'nedroid'
757
    long_name = 'NeDroid'
758
    url = 'http://nedroid.com'
759
    get_first_comic_link = get_div_navfirst_a
760
    get_navi_link = get_link_rel_next
761
    get_url_from_link = join_cls_url_to_href
762
763 View Code Duplication
    @classmethod
764
    def get_comic_info(cls, soup, link):
765
        """Get information about a particular comics."""
766
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
767
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
768
        num = int(short_url_re.match(short_url).groups()[0])
769
        imgs = soup.find('div', id='comic').find_all('img')
770
        assert len(imgs) == 1
771
        title = imgs[0]['alt']
772
        title2 = imgs[0]['title']
773
        return {
774
            'short_url': short_url,
775
            'title': title,
776
            'title2': title2,
777
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
778
            'num': num,
779
        }
780
781
782 View Code Duplication
class Garfield(GenericNavigableComic):
783
    """Class to retrieve Garfield comics."""
784
    # Also on http://www.gocomics.com/garfield
785
    name = 'garfield'
786
    long_name = 'Garfield'
787
    url = 'https://garfield.com'
788
    _categories = ('GARFIELD', )
789
    get_first_comic_link = simulate_first_link
790
    first_url = 'https://garfield.com/comic/1978/06/19'
791
792
    @classmethod
793
    def get_navi_link(cls, last_soup, next_):
794
        """Get link to next or previous comic."""
795
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
796
797
    @classmethod
798
    def get_comic_info(cls, soup, link):
799
        """Get information about a particular comics."""
800
        url = cls.get_url_from_link(link)
801
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
802
        year, month, day = [int(s) for s in date_re.match(url).groups()]
803
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
804
        return {
805
            'month': month,
806
            'year': year,
807
            'day': day,
808
            'img': [i['src'] for i in imgs],
809
        }
810
811
812
class Dilbert(GenericNavigableComic):
813
    """Class to retrieve Dilbert comics."""
814
    # Also on http://www.gocomics.com/dilbert-classics
815
    name = 'dilbert'
816
    long_name = 'Dilbert'
817
    url = 'http://dilbert.com'
818
    get_url_from_link = join_cls_url_to_href
819
    get_first_comic_link = simulate_first_link
820
    first_url = 'http://dilbert.com/strip/1989-04-16'
821
822
    @classmethod
823
    def get_navi_link(cls, last_soup, next_):
824
        """Get link to next or previous comic."""
825
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
826
        return link.find('a') if link else None
827
828 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
829
    def get_comic_info(cls, soup, link):
830
        """Get information about a particular comics."""
831
        title = soup.find('meta', property='og:title')['content']
832
        imgs = soup.find_all('meta', property='og:image')
833
        desc = soup.find('meta', property='og:description')['content']
834
        date_str = soup.find('meta', property='article:publish_date')['content']
835
        day = string_to_date(date_str, "%B %d, %Y")
836
        author = soup.find('meta', property='article:author')['content']
837
        tags = soup.find('meta', property='article:tag')['content']
838
        return {
839
            'title': title,
840
            'description': desc,
841
            'img': [i['content'] for i in imgs],
842
            'author': author,
843
            'tags': tags,
844
            'day': day.day,
845
            'month': day.month,
846
            'year': day.year
847
        }
848
849
850
class VictimsOfCircumsolar(GenericDeletedComic, GenericNavigableComic):
851
    """Class to retrieve VictimsOfCircumsolar comics."""
852
    # Also on https://victimsofcomics.tumblr.com
853
    name = 'circumsolar'
854
    long_name = 'Victims Of Circumsolar'
855
    url = 'http://www.victimsofcircumsolar.com'
856
    get_navi_link = get_a_navi_comicnavnext_navinext
857
    get_first_comic_link = simulate_first_link
858
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
859
860
    @classmethod
861
    def get_comic_info(cls, soup, link):
862
        """Get information about a particular comics."""
863
        # Date is on the archive page
864
        title = soup.find_all('meta', property='og:title')[-1]['content']
865
        desc = soup.find_all('meta', property='og:description')[-1]['content']
866
        imgs = soup.find('div', id='comic').find_all('img')
867
        assert all(i['title'] == i['alt'] == title for i in imgs)
868
        return {
869
            'title': title,
870
            'description': desc,
871
            'img': [i['src'] for i in imgs],
872
        }
873
874
875
class ThreeWordPhrase(GenericNavigableComic):
876
    """Class to retrieve Three Word Phrase comics."""
877
    # Also on http://www.threewordphrase.tumblr.com
878
    name = 'threeword'
879
    long_name = 'Three Word Phrase'
880
    url = 'http://threewordphrase.com'
881
    get_url_from_link = join_cls_url_to_href
882
883
    @classmethod
884
    def get_first_comic_link(cls):
885
        """Get link to first comics."""
886
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
887
888
    @classmethod
889
    def get_navi_link(cls, last_soup, next_):
890
        """Get link to next or previous comic."""
891
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
892
        return None if link.get('href') is None else link
893
894
    @classmethod
895
    def get_comic_info(cls, soup, link):
896
        """Get information about a particular comics."""
897
        title = soup.find('title')
898
        imgs = [img for img in soup.find_all('img')
899
                if not img['src'].endswith(
900
                    ('link.gif', '32.png', 'twpbookad.jpg',
901
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
902
        return {
903
            'title': title.string if title else None,
904
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
905
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
906
        }
907
908
909
class DeadlyPanel(GenericComicNotWorking, GenericNavigableComic):  # Not working on my machine
910
    """Class to retrieve Deadly Panel comics."""
911
    # Also on https://tapastic.com/series/deadlypanel
912
    # Also on https://deadlypanel.tumblr.com
913
    name = 'deadly'
914
    long_name = 'Deadly Panel'
915
    url = 'http://www.deadlypanel.com'
916
    get_first_comic_link = get_a_navi_navifirst
917
    get_navi_link = get_a_navi_comicnavnext_navinext
918
919
    @classmethod
920
    def get_comic_info(cls, soup, link):
921
        """Get information about a particular comics."""
922
        imgs = soup.find('div', id='comic').find_all('img')
923
        assert all(i['alt'] == i['title'] for i in imgs)
924
        return {
925
            'img': [i['src'] for i in imgs],
926
        }
927
928
929 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
930
    """Class to retrieve The Gentleman Armchair comics."""
931
    name = 'gentlemanarmchair'
932
    long_name = 'The Gentleman Armchair'
933
    url = 'http://thegentlemansarmchair.com'
934
    get_first_comic_link = get_a_navi_navifirst
935
    get_navi_link = get_link_rel_next
936
937
    @classmethod
938
    def get_comic_info(cls, soup, link):
939
        """Get information about a particular comics."""
940
        title = soup.find('h2', class_='post-title').string
941
        author = soup.find("span", class_="post-author").find("a").string
942
        date_str = soup.find('span', class_='post-date').string
943
        day = string_to_date(date_str, "%B %d, %Y")
944
        imgs = soup.find('div', id='comic').find_all('img')
945
        return {
946
            'img': [i['src'] for i in imgs],
947
            'title': title,
948
            'author': author,
949
            'month': day.month,
950
            'year': day.year,
951
            'day': day.day,
952
        }
953
954
955 View Code Duplication
class ImogenQuest(GenericNavigableComic):
956
    """Class to retrieve Imogen Quest comics."""
957
    # Also on http://imoquest.tumblr.com
958
    name = 'imogen'
959
    long_name = 'Imogen Quest'
960
    url = 'http://imogenquest.net'
961
    get_first_comic_link = get_div_navfirst_a
962
    get_navi_link = get_a_rel_next
963
964
    @classmethod
965
    def get_comic_info(cls, soup, link):
966
        """Get information about a particular comics."""
967
        title = soup.find('h2', class_='post-title').string
968
        author = soup.find("span", class_="post-author").find("a").string
969
        date_str = soup.find('span', class_='post-date').string
970
        day = string_to_date(date_str, '%B %d, %Y')
971
        imgs = soup.find('div', class_='comicpane').find_all('img')
972
        assert all(i['alt'] == i['title'] for i in imgs)
973
        title2 = imgs[0]['title']
974
        return {
975
            'day': day.day,
976
            'month': day.month,
977
            'year': day.year,
978
            'img': [i['src'] for i in imgs],
979
            'title': title,
980
            'title2': title2,
981
            'author': author,
982
        }
983
984
985 View Code Duplication
class MyExtraLife(GenericNavigableComic):
986
    """Class to retrieve My Extra Life comics."""
987
    name = 'extralife'
988
    long_name = 'My Extra Life'
989
    url = 'http://www.myextralife.com'
990
    get_navi_link = get_link_rel_next
991
992
    @classmethod
993
    def get_first_comic_link(cls):
994
        """Get link to first comics."""
995
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
996
997
    @classmethod
998
    def get_comic_info(cls, soup, link):
999
        """Get information about a particular comics."""
1000
        title = soup.find("h1", class_="comic_title").string
1001
        date_str = soup.find("span", class_="comic_date").string
1002
        day = string_to_date(date_str, "%B %d, %Y")
1003
        imgs = soup.find_all("img", class_="comic")
1004
        assert all(i['alt'] == i['title'] == title for i in imgs)
1005
        return {
1006
            'title': title,
1007
            'img': [i['src'] for i in imgs if i["src"]],
1008
            'day': day.day,
1009
            'month': day.month,
1010
            'year': day.year
1011
        }
1012
1013
1014
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
1015
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
1016
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
1017
    # Also on http://smbc-comics.tumblr.com
1018
    name = 'smbc'
1019
    long_name = 'Saturday Morning Breakfast Cereal'
1020
    url = 'http://www.smbc-comics.com'
1021
    _categories = ('SMBC', )
1022
    get_navi_link = get_a_rel_next
1023
1024
    @classmethod
1025
    def get_first_comic_link(cls):
1026
        """Get link to first comics."""
1027
        return get_soup_at_url(cls.url).find('a', rel='start')
1028
1029
    @classmethod
1030
    def get_comic_info(cls, soup, link):
1031
        """Get information about a particular comics."""
1032
        image1 = soup.find('img', id='cc-comic')
1033
        image_url1 = image1['src']
1034
        aftercomic = soup.find('div', id='aftercomic')
1035
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1036
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1037
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1038
        day = string_to_date(date_str, "%B %d, %Y")
1039
        return {
1040
            'title': image1['title'],
1041
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i)) for i in imgs],
1042
            'day': day.day,
1043
            'month': day.month,
1044
            'year': day.year
1045
        }
1046
1047
1048 View Code Duplication
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1049
    """Class to retrieve Perry Bible Fellowship comics."""
1050
    name = 'pbf'
1051
    long_name = 'Perry Bible Fellowship'
1052
    url = 'http://pbfcomics.com'
1053
    get_url_from_archive_element = join_cls_url_to_href
1054
1055
    @classmethod
1056
    def get_archive_elements(cls):
1057
        soup = get_soup_at_url(cls.url)
1058
        thumbnails = soup.find('div', id='all_thumbnails')
1059
        return reversed(thumbnails.find_all('a'))
1060
1061
    @classmethod
1062
    def get_comic_info(cls, soup, link):
1063
        """Get information about a particular comics."""
1064
        name = soup.find('meta', property='og:title')['content']
1065
        imgs = soup.find_all('meta', property='og:image')
1066
        assert len(imgs) == 1
1067
        return {
1068
            'name': name,
1069
            'img': [i['content'] for i in imgs],
1070
        }
1071
1072
1073 View Code Duplication
class Mercworks(GenericNavigableComic):
1074
    """Class to retrieve Mercworks comics."""
1075
    # Also on http://mercworks.tumblr.com
1076
    name = 'mercworks'
1077
    long_name = 'Mercworks'
1078
    url = 'http://mercworks.net'
1079
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1080
    get_navi_link = get_link_rel_next
1081
1082
    @classmethod
1083
    def get_comic_info(cls, soup, link):
1084
        """Get information about a particular comics."""
1085
        title = soup.find('meta', property='og:title')['content']
1086
        metadesc = soup.find('meta', property='og:description')
1087
        desc = metadesc['content'] if metadesc else ""
1088
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1089
        day = string_to_date(date_str, "%Y-%m-%d")
1090
        imgs = soup.find_all('meta', property='og:image')
1091
        return {
1092
            'img': [i['content'] for i in imgs],
1093
            'title': title,
1094
            'desc': desc,
1095
            'day': day.day,
1096
            'month': day.month,
1097
            'year': day.year
1098
        }
1099
1100
1101
class BerkeleyMews(GenericListableComic):
1102
    """Class to retrieve Berkeley Mews comics."""
1103
    # Also on http://mews.tumblr.com
1104
    # Also on http://www.gocomics.com/berkeley-mews
1105
    name = 'berkeley'
1106
    long_name = 'Berkeley Mews'
1107
    url = 'http://www.berkeleymews.com'
1108
    _categories = ('BERKELEY', )
1109
    get_url_from_archive_element = get_href
1110
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1111
1112
    @classmethod
1113
    def get_archive_elements(cls):
1114
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1115
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1116
1117
    @classmethod
1118
    def get_comic_info(cls, soup, link):
1119
        """Get information about a particular comics."""
1120
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1121
        url = cls.get_url_from_archive_element(link)
1122
        num = int(cls.comic_num_re.match(url).groups()[0])
1123
        img = soup.find('div', id='comic').find('img')
1124
        assert all(i['alt'] == i['title'] for i in [img])
1125
        title2 = img['title']
1126
        img_url = img['src']
1127
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1128
        return {
1129
            'num': num,
1130
            'title': link.string,
1131
            'title2': title2,
1132
            'img': [img_url],
1133
            'year': year,
1134
            'month': month,
1135
            'day': day,
1136
        }
1137
1138
1139
class GenericBouletCorp(GenericNavigableComic):
1140
    """Generic class to retrieve BouletCorp comics in different languages."""
1141
    # Also on https://bouletcorp.tumblr.com
1142
    _categories = ('BOULET', )
1143
    get_navi_link = get_link_rel_next
1144
1145
    @classmethod
1146
    def get_first_comic_link(cls):
1147
        """Get link to first comics."""
1148
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1149
1150
    @classmethod
1151
    def get_comic_info(cls, soup, link):
1152
        """Get information about a particular comics."""
1153
        url = cls.get_url_from_link(link)
1154
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1155
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1156
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1157
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1158
        title = soup.find('title').string
1159
        return {
1160
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1161
            'title': title,
1162
            'texts': texts,
1163
            'year': year,
1164
            'month': month,
1165
            'day': day,
1166
        }
1167
1168
1169
class BouletCorp(GenericBouletCorp):
1170
    """Class to retrieve BouletCorp comics."""
1171
    name = 'boulet'
1172
    long_name = 'Boulet Corp'
1173
    url = 'http://www.bouletcorp.com'
1174
    _categories = ('FRANCAIS', )
1175
1176
1177
class BouletCorpEn(GenericBouletCorp):
1178
    """Class to retrieve EnglishBouletCorp comics."""
1179
    name = 'boulet_en'
1180
    long_name = 'Boulet Corp English'
1181
    url = 'http://english.bouletcorp.com'
1182
1183
1184 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
1185
    """Class to retrieve Amazing Super Powers comics."""
1186
    name = 'asp'
1187
    long_name = 'Amazing Super Powers'
1188
    url = 'http://www.amazingsuperpowers.com'
1189
    get_first_comic_link = get_a_navi_navifirst
1190
    get_navi_link = get_a_navi_navinext
1191
1192
    @classmethod
1193
    def get_comic_info(cls, soup, link):
1194
        """Get information about a particular comics."""
1195
        author = soup.find("span", class_="post-author").find("a").string
1196
        date_str = soup.find('span', class_='post-date').string
1197
        day = string_to_date(date_str, "%B %d, %Y")
1198
        imgs = soup.find('div', id='comic').find_all('img')
1199
        title = ' '.join(i['title'] for i in imgs)
1200
        assert all(i['alt'] == i['title'] for i in imgs)
1201
        return {
1202
            'title': title,
1203
            'author': author,
1204
            'img': [img['src'] for img in imgs],
1205
            'day': day.day,
1206
            'month': day.month,
1207
            'year': day.year
1208
        }
1209
1210
1211
class ToonHole(GenericNavigableComic):
1212
    """Class to retrieve Toon Holes comics."""
1213
    # Also on http://tapastic.com/series/TOONHOLE
1214
    name = 'toonhole'
1215
    long_name = 'Toon Hole'
1216
    url = 'http://www.toonhole.com'
1217
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1218
    get_navi_link = get_a_comicnavbase_comicnavnext
1219
1220
    @classmethod
1221
    def get_comic_info(cls, soup, link):
1222
        """Get information about a particular comics."""
1223
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1224
        day = string_to_date(date_str, "%B %d, %Y")
1225
        imgs = soup.find('div', id='comic').find_all('img')
1226
        if imgs:
1227
            img = imgs[0]
1228
            title = img['alt']
1229
            assert img['title'] == title
1230
        else:
1231
            title = ""
1232
        return {
1233
            'title': title,
1234
            'month': day.month,
1235
            'year': day.year,
1236
            'day': day.day,
1237
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1238
        }
1239
1240
1241
class Channelate(GenericNavigableComic):
1242
    """Class to retrieve Channelate comics."""
1243
    name = 'channelate'
1244
    long_name = 'Channelate'
1245
    url = 'http://www.channelate.com'
1246
    get_first_comic_link = get_div_navfirst_a
1247
    get_navi_link = get_link_rel_next
1248
    get_url_from_link = join_cls_url_to_href
1249
1250
    @classmethod
1251
    def get_comic_info(cls, soup, link):
1252
        """Get information about a particular comics."""
1253
        author = soup.find("span", class_="post-author").find("a").string
1254
        date_str = soup.find('span', class_='post-date').string
1255
        day = string_to_date(date_str, '%Y/%m/%d')
1256
        title = soup.find('meta', property='og:title')['content']
1257
        post = soup.find('div', id='comic')
1258
        imgs = post.find_all('img') if post else []
1259
        extra_url = None
1260
        extra_div = soup.find('div', id='extrapanelbutton')
1261
        if extra_div:
1262
            extra_url = extra_div.find('a')['href']
1263
            extra_soup = get_soup_at_url(extra_url)
1264
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1265
            imgs.extend(extra_imgs)
1266
        return {
1267
            'url_extra': extra_url,
1268
            'title': title,
1269
            'author': author,
1270
            'month': day.month,
1271
            'year': day.year,
1272
            'day': day.day,
1273
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1274
        }
1275
1276
1277
class CyanideAndHappiness(GenericNavigableComic):
1278
    """Class to retrieve Cyanide And Happiness comics."""
1279
    name = 'cyanide'
1280
    long_name = 'Cyanide and Happiness'
1281
    url = 'http://explosm.net'
1282
    _categories = ('NSFW', )
1283
    get_url_from_link = join_cls_url_to_href
1284
1285
    @classmethod
1286
    def get_first_comic_link(cls):
1287
        """Get link to first comics."""
1288
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1289
1290
    @classmethod
1291
    def get_navi_link(cls, last_soup, next_):
1292
        """Get link to next or previous comic."""
1293
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1294
        return None if link.get('href') is None else link
1295
1296
    @classmethod
1297
    def get_comic_info(cls, soup, link):
1298
        """Get information about a particular comics."""
1299
        url2 = soup.find('meta', property='og:url')['content']
1300
        num = int(url2.split('/')[-2])
1301
        date_str = soup.find('h3').find('a').string
1302
        day = string_to_date(date_str, '%Y.%m.%d')
1303
        author = soup.find('small', class_="author-credit-name").string
1304
        assert author.startswith('by ')
1305
        author = author[3:]
1306
        imgs = soup.find_all('img', id='main-comic')
1307
        return {
1308
            'num': num,
1309
            'author': author,
1310
            'month': day.month,
1311
            'year': day.year,
1312
            'day': day.day,
1313
            'prefix': '%d-' % num,
1314
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1315
        }
1316
1317
1318
class MrLovenstein(GenericComic):
1319
    """Class to retrieve Mr Lovenstein comics."""
1320
    # Also on https://tapastic.com/series/MrLovenstein
1321
    name = 'mrlovenstein'
1322
    long_name = 'Mr. Lovenstein'
1323
    url = 'http://www.mrlovenstein.com'
1324
1325
    @classmethod
1326
    def get_next_comic(cls, last_comic):
1327
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1328
        # TODO: more info from http://www.mrlovenstein.com/archive
1329
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1330
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1331
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1332
        first, last = min(nums), max(nums)
1333
        if last_comic:
1334
            first = last_comic['num'] + 1
1335
        for num in range(first, last + 1):
1336
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1337
            soup = get_soup_at_url(url)
1338
            imgs = list(
1339
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1340
            description = soup.find('meta', attrs={'name': 'description'})['content']
1341
            yield {
1342
                'url': url,
1343
                'num': num,
1344
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1345
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1346
                'description': description,
1347
            }
1348
1349
1350
class DinosaurComics(GenericListableComic):
1351
    """Class to retrieve Dinosaur Comics comics."""
1352
    name = 'dinosaur'
1353
    long_name = 'Dinosaur Comics'
1354
    url = 'http://www.qwantz.com'
1355
    get_url_from_archive_element = get_href
1356
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1357
1358
    @classmethod
1359
    def get_archive_elements(cls):
1360
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1361
        # first link is random -> skip it
1362
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1363
1364
    @classmethod
1365
    def get_comic_info(cls, soup, link):
1366
        """Get information about a particular comics."""
1367
        url = cls.get_url_from_archive_element(link)
1368
        num = int(cls.comic_link_re.match(url).groups()[0])
1369
        date_str = link.string
1370
        text = link.next_sibling.string
1371
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1372
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1373
        img = soup.find('img', src=comic_img_re)
1374
        return {
1375
            'month': day.month,
1376
            'year': day.year,
1377
            'day': day.day,
1378
            'img': [img.get('src')],
1379
            'title': img.get('title'),
1380
            'text': text,
1381
            'num': num,
1382
        }
1383
1384
1385
class ButterSafe(GenericListableComic):
1386
    """Class to retrieve Butter Safe comics."""
1387 View Code Duplication
    name = 'butter'
1388
    long_name = 'ButterSafe'
1389
    url = 'http://buttersafe.com'
1390
    get_url_from_archive_element = get_href
1391
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1392
1393
    @classmethod
1394
    def get_archive_elements(cls):
1395
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1396
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1397
1398
    @classmethod
1399
    def get_comic_info(cls, soup, link):
1400
        """Get information about a particular comics."""
1401
        url = cls.get_url_from_archive_element(link)
1402
        title = link.string
1403
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1404
        img = soup.find('div', id='comic').find('img')
1405
        assert img['alt'] == title
1406
        return {
1407
            'title': title,
1408
            'day': day,
1409
            'month': month,
1410
            'year': year,
1411
            'img': [img['src']],
1412
        }
1413
1414
1415
class CalvinAndHobbes(GenericComic):
1416
    """Class to retrieve Calvin and Hobbes comics."""
1417
    # Also on http://www.gocomics.com/calvinandhobbes/
1418
    name = 'calvin'
1419
    long_name = 'Calvin and Hobbes'
1420
    # This is not through any official webpage but eh...
1421
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1422
1423
    @classmethod
1424
    def get_next_comic(cls, last_comic):
1425
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1426
        last_date = get_date_for_comic(
1427
            last_comic) if last_comic else date(1985, 11, 1)
1428
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1429
        img_re = re.compile('')
1430
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1431
            url = link['href']
1432
            year, month = link_re.match(url).groups()
1433
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1434
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1435
                month_url = urljoin_wrapper(cls.url, url)
1436
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1437
                    img_src = img['src']
1438
                    day = int(img_re.match(img_src).groups()[0])
1439
                    comic_date = date(int(year), int(month), day)
1440
                    if comic_date > last_date:
1441
                        yield {
1442
                            'url': month_url,
1443
                            'year': int(year),
1444
                            'month': int(month),
1445
                            'day': int(day),
1446
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1447
                        }
1448
                        last_date = comic_date
1449
1450
1451
class AbstruseGoose(GenericListableComic):
1452
    """Class to retrieve AbstruseGoose Comics."""
1453 View Code Duplication
    name = 'abstruse'
1454
    long_name = 'Abstruse Goose'
1455
    url = 'http://abstrusegoose.com'
1456
    get_url_from_archive_element = get_href
1457
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1458
    comic_img_re = re.compile('^%s/strips/.*' % url)
1459
1460
    @classmethod
1461
    def get_archive_elements(cls):
1462
        archive_url = urljoin_wrapper(cls.url, 'archive')
1463
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1464
1465
    @classmethod
1466
    def get_comic_info(cls, soup, archive_elt):
1467
        comic_url = cls.get_url_from_archive_element(archive_elt)
1468
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1469
        return {
1470
            'num': num,
1471
            'title': archive_elt.string,
1472
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1473
        }
1474
1475
1476
class PhDComics(GenericNavigableComic):
1477
    """Class to retrieve PHD Comics."""
1478
    name = 'phd'
1479
    long_name = 'PhD Comics'
1480
    url = 'http://phdcomics.com/comics/archive.php'
1481
1482
    @classmethod
1483
    def get_first_comic_link(cls):
1484
        """Get link to first comics."""
1485
        soup = get_soup_at_url(cls.url)
1486
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1487
        return None if img is None else img.parent
1488
1489
    @classmethod
1490
    def get_navi_link(cls, last_soup, next_):
1491
        """Get link to next or previous comic."""
1492
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1493
        img = last_soup.find('img', src=url)
1494
        return None if img is None else img.parent
1495
1496
    @classmethod
1497
    def get_comic_info(cls, soup, link):
1498
        """Get information about a particular comics."""
1499
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1500
        imgs = soup.find_all('meta', property='og:image')
1501
        return {
1502
            'img': [i['content'] for i in imgs],
1503
            'title': title,
1504
        }
1505
1506
1507
class Octopuns(GenericComicNotWorking, GenericNavigableComic):  # Website has changed
1508
    """Class to retrieve Octopuns comics."""
1509 View Code Duplication
    # Also on http://octopuns.tumblr.com
1510
    name = 'octopuns'
1511
    long_name = 'Octopuns'
1512
    url = 'http://www.octopuns.net'
1513
1514
    @classmethod
1515
    def get_first_comic_link(cls):
1516
        """Get link to first comics."""
1517
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1518
1519
    @classmethod
1520
    def get_navi_link(cls, last_soup, next_):
1521
        """Get link to next or previous comic."""
1522
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1523
        return None if link.get('href') is None else link
1524
1525
    @classmethod
1526
    def get_comic_info(cls, soup, link):
1527
        """Get information about a particular comics."""
1528
        title = soup.find('h3', class_='post-title entry-title').string
1529
        date_str = soup.find('h2', class_='date-header').string
1530
        day = string_to_date(date_str, "%A, %B %d, %Y")
1531
        imgs = soup.find_all('link', rel='image_src')
1532
        return {
1533
            'img': [i['href'] for i in imgs],
1534
            'title': title,
1535
            'day': day.day,
1536
            'month': day.month,
1537
            'year': day.year,
1538
        }
1539
1540
1541
class Quarktees(GenericNavigableComic):
1542
    """Class to retrieve the Quarktees comics."""
1543
    name = 'quarktees'
1544
    long_name = 'Quarktees'
1545
    url = 'http://www.quarktees.com/blogs/news'
1546
    get_url_from_link = join_cls_url_to_href
1547
    get_first_comic_link = simulate_first_link
1548
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1549
1550
    @classmethod
1551
    def get_navi_link(cls, last_soup, next_):
1552
        """Get link to next or previous comic."""
1553
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1554
1555
    @classmethod
1556
    def get_comic_info(cls, soup, link):
1557
        """Get information about a particular comics."""
1558
        title = soup.find('meta', property='og:title')['content']
1559
        article = soup.find('div', class_='single-article')
1560
        imgs = article.find_all('img')
1561
        return {
1562
            'title': title,
1563
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1564
        }
1565
1566
1567
class OverCompensating(GenericNavigableComic):
1568
    """Class to retrieve the Over Compensating comics."""
1569
    name = 'compensating'
1570
    long_name = 'Over Compensating'
1571
    url = 'http://www.overcompensating.com'
1572
    get_url_from_link = join_cls_url_to_href
1573
1574
    @classmethod
1575
    def get_first_comic_link(cls):
1576
        """Get link to first comics."""
1577
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1578
1579
    @classmethod
1580
    def get_navi_link(cls, last_soup, next_):
1581
        """Get link to next or previous comic."""
1582
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1583
1584
    @classmethod
1585
    def get_comic_info(cls, soup, link):
1586
        """Get information about a particular comics."""
1587
        img_src_re = re.compile('^/oc/comics/.*')
1588
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1589
        comic_url = cls.get_url_from_link(link)
1590
        num = int(comic_num_re.match(comic_url).groups()[0])
1591
        img = soup.find('img', src=img_src_re)
1592
        return {
1593
            'num': num,
1594
            'img': [urljoin_wrapper(comic_url, img['src'])],
1595
            'title': img.get('title')
1596
        }
1597
1598
1599
class Oglaf(GenericNavigableComic):
1600
    """Class to retrieve Oglaf comics."""
1601
    name = 'oglaf'
1602
    long_name = 'Oglaf [NSFW]'
1603
    url = 'http://oglaf.com'
1604
    _categories = ('NSFW', )
1605
    get_url_from_link = join_cls_url_to_href
1606
1607
    @classmethod
1608
    def get_first_comic_link(cls):
1609
        """Get link to first comics."""
1610
        return get_soup_at_url(cls.url).find("div", id="st").parent
1611
1612
    @classmethod
1613
    def get_navi_link(cls, last_soup, next_):
1614
        """Get link to next or previous comic."""
1615
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1616
        return div.parent if div else None
1617
1618
    @classmethod
1619
    def get_comic_info(cls, soup, link):
1620
        """Get information about a particular comics."""
1621
        title = soup.find('title').string
1622
        title_imgs = soup.find('div', id='tt').find_all('img')
1623
        assert len(title_imgs) == 1
1624
        strip_imgs = soup.find_all('img', id='strip')
1625
        assert len(strip_imgs) == 1
1626
        imgs = title_imgs + strip_imgs
1627
        desc = ' '.join(i['title'] for i in imgs)
1628
        return {
1629
            'title': title,
1630
            'img': [i['src'] for i in imgs],
1631
            'description': desc,
1632
        }
1633
1634
1635
class ScandinaviaAndTheWorld(GenericNavigableComic):
1636
    """Class to retrieve Scandinavia And The World comics."""
1637
    name = 'satw'
1638
    long_name = 'Scandinavia And The World'
1639
    url = 'http://satwcomic.com'
1640
    get_first_comic_link = simulate_first_link
1641
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1642
1643
    @classmethod
1644
    def get_navi_link(cls, last_soup, next_):
1645
        """Get link to next or previous comic."""
1646
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1647
1648
    @classmethod
1649
    def get_comic_info(cls, soup, link):
1650
        """Get information about a particular comics."""
1651
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1652
        desc = soup.find('meta', property='og:description')['content']
1653
        imgs = soup.find_all('img', itemprop="image")
1654
        return {
1655
            'title': title,
1656
            'description': desc,
1657
            'img': [i['src'] for i in imgs],
1658
        }
1659
1660
1661
class SomethingOfThatIlk(GenericDeletedComic):
1662
    """Class to retrieve the Something Of That Ilk comics."""
1663
    name = 'somethingofthatilk'
1664
    long_name = 'Something Of That Ilk'
1665
    url = 'http://www.somethingofthatilk.com'
1666
1667
1668
class InfiniteMonkeyBusiness(GenericNavigableComic):
1669
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1670
    name = 'monkey'
1671
    long_name = 'Infinite Monkey Business'
1672
    url = 'http://infinitemonkeybusiness.net'
1673
    get_navi_link = get_a_navi_comicnavnext_navinext
1674
    get_first_comic_link = simulate_first_link
1675
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1676
1677
    @classmethod
1678
    def get_comic_info(cls, soup, link):
1679
        """Get information about a particular comics."""
1680
        title = soup.find('meta', property='og:title')['content']
1681
        imgs = soup.find('div', id='comic').find_all('img')
1682
        return {
1683
            'title': title,
1684
            'img': [i['src'] for i in imgs],
1685
        }
1686
1687
1688
class Wondermark(GenericListableComic):
1689
    """Class to retrieve the Wondermark comics."""
1690
    name = 'wondermark'
1691
    long_name = 'Wondermark'
1692
    url = 'http://wondermark.com'
1693
    get_url_from_archive_element = get_href
1694
1695
    @classmethod
1696
    def get_archive_elements(cls):
1697
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1698
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1699
1700
    @classmethod
1701
    def get_comic_info(cls, soup, link):
1702
        """Get information about a particular comics."""
1703
        date_str = soup.find('div', class_='postdate').find('em').string
1704
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1705
        div = soup.find('div', id='comic')
1706
        if div:
1707
            img = div.find('img')
1708
            img_src = [img['src']]
1709
            alt = img['alt']
1710
            assert alt == img['title']
1711
            title = soup.find('meta', property='og:title')['content']
1712
        else:
1713
            img_src = []
1714
            alt = ''
1715
            title = ''
1716
        return {
1717
            'month': day.month,
1718
            'year': day.year,
1719
            'day': day.day,
1720
            'img': img_src,
1721
            'title': title,
1722
            'alt': alt,
1723
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1724
        }
1725
1726
1727
class WarehouseComic(GenericNavigableComic):
1728
    """Class to retrieve Warehouse Comic comics."""
1729
    name = 'warehouse'
1730
    long_name = 'Warehouse Comic'
1731
    url = 'http://warehousecomic.com'
1732
    get_first_comic_link = get_a_navi_navifirst
1733
    get_navi_link = get_link_rel_next
1734
1735
    @classmethod
1736
    def get_comic_info(cls, soup, link):
1737
        """Get information about a particular comics."""
1738
        title = soup.find('h2', class_='post-title').string
1739
        date_str = soup.find('span', class_='post-date').string
1740
        day = string_to_date(date_str, "%B %d, %Y")
1741
        imgs = soup.find('div', id='comic').find_all('img')
1742
        return {
1743
            'img': [i['src'] for i in imgs],
1744
            'title': title,
1745
            'day': day.day,
1746
            'month': day.month,
1747
            'year': day.year,
1748
        }
1749
1750
1751
class JustSayEh(GenericNavigableComic):
1752
    """Class to retrieve Just Say Eh comics."""
1753
    # Also on http//tapastic.com/series/Just-Say-Eh
1754
    name = 'justsayeh'
1755
    long_name = 'Just Say Eh'
1756
    url = 'http://www.justsayeh.com'
1757
    get_first_comic_link = get_a_navi_navifirst
1758
    get_navi_link = get_a_navi_comicnavnext_navinext
1759
1760
    @classmethod
1761
    def get_comic_info(cls, soup, link):
1762
        """Get information about a particular comics."""
1763
        title = soup.find('h2', class_='post-title').string
1764
        imgs = soup.find("div", id="comic").find_all("img")
1765
        assert all(i['alt'] == i['title'] for i in imgs)
1766
        alt = imgs[0]['alt']
1767
        return {
1768
            'img': [i['src'] for i in imgs],
1769
            'title': title,
1770
            'alt': alt,
1771
        }
1772
1773
1774
class MouseBearComedy(GenericComicNotWorking):  # Website has changed
1775
    """Class to retrieve Mouse Bear Comedy comics."""
1776 View Code Duplication
    # Also on http://mousebearcomedy.tumblr.com
1777
    name = 'mousebear'
1778
    long_name = 'Mouse Bear Comedy'
1779
    url = 'http://www.mousebearcomedy.com'
1780
    get_first_comic_link = get_a_navi_navifirst
1781
    get_navi_link = get_a_navi_comicnavnext_navinext
1782
1783
    @classmethod
1784
    def get_comic_info(cls, soup, link):
1785
        """Get information about a particular comics."""
1786
        title = soup.find('h2', class_='post-title').string
1787
        author = soup.find("span", class_="post-author").find("a").string
1788
        date_str = soup.find("span", class_="post-date").string
1789
        day = string_to_date(date_str, '%B %d, %Y')
1790
        imgs = soup.find("div", id="comic").find_all("img")
1791
        assert all(i['alt'] == i['title'] == title for i in imgs)
1792
        return {
1793
            'day': day.day,
1794
            'month': day.month,
1795
            'year': day.year,
1796
            'img': [i['src'] for i in imgs],
1797
            'title': title,
1798
            'author': author,
1799
        }
1800
1801
1802
class BigFootJustice(GenericNavigableComic):
1803
    """Class to retrieve Big Foot Justice comics."""
1804 View Code Duplication
    # Also on http://tapastic.com/series/bigfoot-justice
1805
    name = 'bigfoot'
1806
    long_name = 'Big Foot Justice'
1807
    url = 'http://bigfootjustice.com'
1808
    get_first_comic_link = get_a_navi_navifirst
1809
    get_navi_link = get_a_navi_comicnavnext_navinext
1810
1811
    @classmethod
1812
    def get_comic_info(cls, soup, link):
1813
        """Get information about a particular comics."""
1814
        imgs = soup.find('div', id='comic').find_all('img')
1815
        assert all(i['title'] == i['alt'] for i in imgs)
1816
        title = ' '.join(i['title'] for i in imgs)
1817
        return {
1818
            'img': [i['src'] for i in imgs],
1819
            'title': title,
1820
        }
1821
1822
1823
class RespawnComic(GenericNavigableComic):
1824
    """Class to retrieve Respawn Comic."""
1825
    # Also on https://respawncomic.tumblr.com
1826
    name = 'respawn'
1827
    long_name = 'Respawn Comic'
1828
    url = 'http://respawncomic.com '
1829
    _categories = ('RESPAWN', )
1830
    get_navi_link = get_a_rel_next
1831
    get_first_comic_link = simulate_first_link
1832
    first_url = 'http://respawncomic.com/comic/c0001/'
1833
1834
    @classmethod
1835
    def get_comic_info(cls, soup, link):
1836
        """Get information about a particular comics."""
1837
        title = soup.find('meta', property='og:title')['content']
1838
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1839
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1840
        date_str = date_str[:10]
1841
        day = string_to_date(date_str, "%Y-%m-%d")
1842
        imgs = soup.find_all('meta', property='og:image')
1843
        skip_imgs = {
1844
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1845
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1846
        }
1847
        return {
1848
            'title': title,
1849
            'author': author,
1850
            'day': day.day,
1851
            'month': day.month,
1852
            'year': day.year,
1853
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1854
        }
1855
1856
1857
class SafelyEndangered(GenericNavigableComic):
1858
    """Class to retrieve Safely Endangered comics."""
1859 View Code Duplication
    # Also on http://tumblr.safelyendangered.com
1860
    name = 'endangered'
1861
    long_name = 'Safely Endangered'
1862
    url = 'http://www.safelyendangered.com'
1863
    get_navi_link = get_link_rel_next
1864
    get_first_comic_link = simulate_first_link
1865
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1866
1867
    @classmethod
1868
    def get_comic_info(cls, soup, link):
1869
        """Get information about a particular comics."""
1870
        title = soup.find('h2', class_='post-title').string
1871
        date_str = soup.find('span', class_='post-date').string
1872
        day = string_to_date(date_str, '%B %d, %Y')
1873
        imgs = soup.find('div', id='comic').find_all('img')
1874
        alt = imgs[0]['alt']
1875
        assert all(i['alt'] == i['title'] for i in imgs)
1876
        return {
1877
            'day': day.day,
1878
            'month': day.month,
1879
            'year': day.year,
1880
            'img': [i['src'] for i in imgs],
1881
            'title': title,
1882
            'alt': alt,
1883
        }
1884
1885
1886
class PicturesInBoxes(GenericNavigableComic):
1887
    """Class to retrieve Pictures In Boxes comics."""
1888 View Code Duplication
    # Also on https://picturesinboxescomic.tumblr.com
1889
    name = 'picturesinboxes'
1890
    long_name = 'Pictures in Boxes'
1891
    url = 'http://www.picturesinboxes.com'
1892
    get_navi_link = get_a_navi_navinext
1893
    get_first_comic_link = simulate_first_link
1894
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1895
1896
    @classmethod
1897
    def get_comic_info(cls, soup, link):
1898
        """Get information about a particular comics."""
1899
        title = soup.find('h2', class_='post-title').string
1900
        author = soup.find("span", class_="post-author").find("a").string
1901
        date_str = soup.find('span', class_='post-date').string
1902
        day = string_to_date(date_str, '%B %d, %Y')
1903
        imgs = soup.find('div', class_='comicpane').find_all('img')
1904
        assert imgs
1905
        assert all(i['title'] == i['alt'] == title for i in imgs)
1906
        return {
1907
            'day': day.day,
1908
            'month': day.month,
1909
            'year': day.year,
1910
            'img': [i['src'] for i in imgs],
1911
            'title': title,
1912
            'author': author,
1913
        }
1914
1915
1916
class Penmen(GenericNavigableComic):
1917
    """Class to retrieve Penmen comics."""
1918 View Code Duplication
    name = 'penmen'
1919
    long_name = 'Penmen'
1920
    url = 'http://penmen.com'
1921
    get_navi_link = get_link_rel_next
1922
    get_first_comic_link = simulate_first_link
1923
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1924
1925
    @classmethod
1926
    def get_comic_info(cls, soup, link):
1927
        """Get information about a particular comics."""
1928
        title = soup.find('title').string
1929
        imgs = soup.find('div', class_='entry-content').find_all('img')
1930
        short_url = soup.find('link', rel='shortlink')['href']
1931
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1932
        date_str = soup.find('time')['datetime'][:10]
1933
        day = string_to_date(date_str, "%Y-%m-%d")
1934
        return {
1935
            'title': title,
1936
            'short_url': short_url,
1937
            'img': [i['src'] for i in imgs],
1938
            'tags': tags,
1939
            'month': day.month,
1940
            'year': day.year,
1941
            'day': day.day,
1942
        }
1943
1944
1945
class TheDoghouseDiaries(GenericDeletedComic, GenericNavigableComic):
1946
    """Class to retrieve The Dog House Diaries comics."""
1947
    name = 'doghouse'
1948
    long_name = 'The Dog House Diaries'
1949
    url = 'http://thedoghousediaries.com'
1950
1951
    @classmethod
1952
    def get_first_comic_link(cls):
1953
        """Get link to first comics."""
1954
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1955
1956
    @classmethod
1957
    def get_navi_link(cls, last_soup, next_):
1958
        """Get link to next or previous comic."""
1959
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1960
1961
    @classmethod
1962
    def get_comic_info(cls, soup, link):
1963
        """Get information about a particular comics."""
1964
        comic_img_re = re.compile('^dhdcomics/.*')
1965
        img = soup.find('img', src=comic_img_re)
1966
        comic_url = cls.get_url_from_link(link)
1967
        return {
1968
            'title': soup.find('h2', id='titleheader').string,
1969
            'title2': soup.find('div', id='subtext').string,
1970
            'alt': img.get('title'),
1971
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1972
            'num': int(comic_url.split('/')[-1]),
1973
        }
1974
1975
1976
class InvisibleBread(GenericListableComic):
1977
    """Class to retrieve Invisible Bread comics."""
1978
    # Also on http://www.gocomics.com/invisible-bread
1979
    name = 'invisiblebread'
1980
    long_name = 'Invisible Bread'
1981
    url = 'http://invisiblebread.com'
1982
1983
    @classmethod
1984
    def get_archive_elements(cls):
1985
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1986
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1987
1988
    @classmethod
1989
    def get_url_from_archive_element(cls, td):
1990
        return td.find('a')['href']
1991
1992
    @classmethod
1993
    def get_comic_info(cls, soup, td):
1994 View Code Duplication
        """Get information about a particular comics."""
1995
        url = cls.get_url_from_archive_element(td)
1996
        title = td.find('a').string
1997
        month_and_day = td.previous_sibling.string
1998
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1999
        year = link_re.match(url).groups()[0]
2000
        date_str = month_and_day + ' ' + year
2001
        day = string_to_date(date_str, '%b %d %Y')
2002
        imgs = [soup.find('div', id='comic').find('img')]
2003
        assert len(imgs) == 1
2004
        assert all(i['title'] == i['alt'] == title for i in imgs)
2005
        return {
2006
            'month': day.month,
2007
            'year': day.year,
2008
            'day': day.day,
2009
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2010
            'title': title,
2011
        }
2012
2013
2014
class DiscoBleach(GenericDeletedComic):
2015
    """Class to retrieve Disco Bleach Comics."""
2016
    name = 'discobleach'
2017
    long_name = 'Disco Bleach'
2018
    url = 'http://discobleach.com'
2019
2020
2021
class TubeyToons(GenericDeletedComic):
2022
    """Class to retrieve TubeyToons comics."""
2023
    # Also on http://tapastic.com/series/Tubey-Toons
2024
    # Also on https://tubeytoons.tumblr.com
2025
    name = 'tubeytoons'
2026
    long_name = 'Tubey Toons'
2027
    url = 'http://tubeytoons.com'
2028
    _categories = ('TUNEYTOONS', )
2029
2030
2031
class CompletelySeriousComics(GenericNavigableComic):
2032
    """Class to retrieve Completely Serious comics."""
2033 View Code Duplication
    name = 'completelyserious'
2034
    long_name = 'Completely Serious Comics'
2035
    url = 'http://completelyseriouscomics.com'
2036
    get_first_comic_link = get_a_navi_navifirst
2037
    get_navi_link = get_a_navi_navinext
2038
2039
    @classmethod
2040
    def get_comic_info(cls, soup, link):
2041
        """Get information about a particular comics."""
2042
        title = soup.find('h2', class_='post-title').string
2043
        author = soup.find('span', class_='post-author').contents[1].string
2044
        date_str = soup.find('span', class_='post-date').string
2045
        day = string_to_date(date_str, '%B %d, %Y')
2046
        imgs = soup.find('div', class_='comicpane').find_all('img')
2047
        assert imgs
2048
        alt = imgs[0]['title']
2049
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2050
        return {
2051
            'month': day.month,
2052
            'year': day.year,
2053
            'day': day.day,
2054
            'img': [i['src'] for i in imgs],
2055
            'title': title,
2056
            'alt': alt,
2057
            'author': author,
2058
        }
2059
2060
2061
class PoorlyDrawnLines(GenericListableComic):
2062
    """Class to retrieve Poorly Drawn Lines comics."""
2063 View Code Duplication
    # Also on http://pdlcomics.tumblr.com
2064
    name = 'poorlydrawn'
2065
    long_name = 'Poorly Drawn Lines'
2066
    url = 'https://www.poorlydrawnlines.com'
2067
    _categories = ('POORLYDRAWN', )
2068
    get_url_from_archive_element = get_href
2069
2070
    @classmethod
2071
    def get_comic_info(cls, soup, link):
2072
        """Get information about a particular comics."""
2073
        imgs = soup.find('div', class_='post').find_all('img')
2074
        assert len(imgs) <= 1
2075
        return {
2076
            'img': [i['src'] for i in imgs],
2077
            'title': imgs[0].get('title', "") if imgs else "",
2078
        }
2079
2080
    @classmethod
2081
    def get_archive_elements(cls):
2082
        archive_url = urljoin_wrapper(cls.url, 'archive')
2083
        url_re = re.compile('^%s/comic/.' % cls.url)
2084
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2085
2086
2087
class LoadingComics(GenericNavigableComic):
2088
    """Class to retrieve Loading Artist comics."""
2089 View Code Duplication
    name = 'loadingartist'
2090
    long_name = 'Loading Artist'
2091
    url = 'http://www.loadingartist.com/latest'
2092
2093
    @classmethod
2094
    def get_first_comic_link(cls):
2095
        """Get link to first comics."""
2096
        return get_soup_at_url(cls.url).find('a', title="First")
2097
2098
    @classmethod
2099
    def get_navi_link(cls, last_soup, next_):
2100
        """Get link to next or previous comic."""
2101
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2102
2103
    @classmethod
2104
    def get_comic_info(cls, soup, link):
2105
        """Get information about a particular comics."""
2106
        title = soup.find('h1').string
2107
        date_str = soup.find('span', class_='date').string.strip()
2108
        day = string_to_date(date_str, "%B %d, %Y")
2109
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2110
        return {
2111
            'title': title,
2112
            'img': [i['src'] for i in imgs],
2113
            'month': day.month,
2114
            'year': day.year,
2115
            'day': day.day,
2116
        }
2117
2118
2119
class ChuckleADuck(GenericNavigableComic):
2120
    """Class to retrieve Chuckle-A-Duck comics."""
2121 View Code Duplication
    name = 'chuckleaduck'
2122
    long_name = 'Chuckle-A-duck'
2123
    url = 'http://chuckleaduck.com'
2124
    get_first_comic_link = get_div_navfirst_a
2125
    get_navi_link = get_link_rel_next
2126
2127
    @classmethod
2128
    def get_comic_info(cls, soup, link):
2129
        """Get information about a particular comics."""
2130
        date_str = soup.find('span', class_='post-date').string
2131
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2132
        author = soup.find('span', class_='post-author').string
2133
        div = soup.find('div', id='comic')
2134
        imgs = div.find_all('img') if div else []
2135
        title = imgs[0]['title'] if imgs else ""
2136
        assert all(i['title'] == i['alt'] == title for i in imgs)
2137
        return {
2138
            'month': day.month,
2139
            'year': day.year,
2140
            'day': day.day,
2141
            'img': [i['src'] for i in imgs],
2142
            'title': title,
2143
            'author': author,
2144
        }
2145
2146
2147
class DepressedAlien(GenericNavigableComic):
2148
    """Class to retrieve Depressed Alien Comics."""
2149
    name = 'depressedalien'
2150
    long_name = 'Depressed Alien'
2151
    url = 'http://depressedalien.com'
2152
    get_url_from_link = join_cls_url_to_href
2153
2154
    @classmethod
2155
    def get_first_comic_link(cls):
2156
        """Get link to first comics."""
2157
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2158
2159
    @classmethod
2160
    def get_navi_link(cls, last_soup, next_):
2161
        """Get link to next or previous comic."""
2162
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2163
2164
    @classmethod
2165
    def get_comic_info(cls, soup, link):
2166
        """Get information about a particular comics."""
2167
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2168
        imgs = soup.find_all('meta', property='og:image')
2169
        return {
2170
            'title': title,
2171
            'img': [i['content'] for i in imgs],
2172
        }
2173
2174
2175
class TurnOffUs(GenericListableComic):
2176
    """Class to retrieve TurnOffUs comics."""
2177 View Code Duplication
    name = 'turnoffus'
2178
    long_name = 'Turn Off Us'
2179
    url = 'http://turnoff.us'
2180
    get_url_from_archive_element = join_cls_url_to_href
2181
2182
    @classmethod
2183
    def get_archive_elements(cls):
2184
        archive_url = urljoin_wrapper(cls.url, 'all')
2185
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2186
        return reversed(post_list.find_all('a', class_='post-link'))
2187
2188
    @classmethod
2189
    def get_comic_info(cls, soup, archive_elt):
2190
        """Get information about a particular comics."""
2191
        title = soup.find('meta', property='og:title')['content']
2192
        imgs = soup.find_all('meta', property='og:image')
2193
        return {
2194
            'title': title,
2195
            'img': [i['content'] for i in imgs],
2196
        }
2197
2198
2199
class ThingsInSquares(GenericListableComic):
2200
    """Class to retrieve Things In Squares comics."""
2201
    # This can be retrieved in other languages
2202
    # Also on https://tapastic.com/series/Things-in-Squares
2203
    name = 'squares'
2204
    long_name = 'Things in squares'
2205
    url = 'http://www.thingsinsquares.com'
2206
2207
    @classmethod
2208
    def get_comic_info(cls, soup, tr):
2209
        """Get information about a particular comics."""
2210
        _, td2, td3 = tr.find_all('td')
2211
        a = td2.find('a')
2212
        date_str = td3.string
2213
        day = string_to_date(date_str, "%m.%d.%y")
2214
        title = a.string
2215
        title2 = soup.find('meta', property='og:title')['content']
2216
        desc = soup.find('meta', property='og:description')
2217
        description = desc['content'] if desc else ''
2218
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2219
        imgs = soup.find('div', class_='entry-content').find_all('img')
2220
        return {
2221
            'day': day.day,
2222
            'month': day.month,
2223
            'year': day.year,
2224
            'title': title,
2225
            'title2': title2,
2226
            'description': description,
2227
            'tags': tags,
2228
            'img': [i['src'] for i in imgs],
2229
            'alt': ' '.join(i['alt'] for i in imgs),
2230
        }
2231
2232
    @classmethod
2233
    def get_url_from_archive_element(cls, tr):
2234
        _, td2, __ = tr.find_all('td')
2235
        return td2.find('a')['href']
2236
2237
    @classmethod
2238
    def get_archive_elements(cls):
2239
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2240
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2241
2242
2243
class HappleTea(GenericNavigableComic):
2244
    """Class to retrieve Happle Tea Comics."""
2245 View Code Duplication
    name = 'happletea'
2246
    long_name = 'Happle Tea'
2247
    url = 'http://www.happletea.com'
2248
    get_first_comic_link = get_a_navi_navifirst
2249
    get_navi_link = get_link_rel_next
2250
2251
    @classmethod
2252
    def get_comic_info(cls, soup, link):
2253
        """Get information about a particular comics."""
2254
        imgs = soup.find('div', id='comic').find_all('img')
2255
        post = soup.find('div', class_='post-content')
2256
        title = post.find('h2', class_='post-title').string
2257
        author = post.find('a', rel='author').string
2258
        date_str = post.find('span', class_='post-date').string
2259
        day = string_to_date(date_str, "%B %d, %Y")
2260
        assert all(i['alt'] == i['title'] for i in imgs)
2261
        return {
2262
            'title': title,
2263
            'img': [i['src'] for i in imgs],
2264
            'alt': ''.join(i['alt'] for i in imgs),
2265
            'month': day.month,
2266
            'year': day.year,
2267
            'day': day.day,
2268
            'author': author,
2269
        }
2270
2271
2272
class RockPaperScissors(GenericNavigableComic):
2273
    """Class to retrieve Rock Paper Scissors comics."""
2274
    name = 'rps'
2275
    long_name = 'Rock Paper Scissors'
2276
    url = 'http://rps-comics.com'
2277
    get_first_comic_link = get_a_navi_navifirst
2278
    get_navi_link = get_link_rel_next
2279
2280
    @classmethod
2281
    def get_comic_info(cls, soup, link):
2282
        """Get information about a particular comics."""
2283
        title = soup.find('title').string
2284
        imgs = soup.find_all('meta', property='og:image')
2285
        short_url = soup.find('link', rel='shortlink')['href']
2286
        transcript = soup.find('div', id='transcript-content').string
2287
        return {
2288
            'title': title,
2289
            'transcript': transcript,
2290
            'short_url': short_url,
2291
            'img': [i['content'] for i in imgs],
2292
        }
2293
2294
2295
class FatAwesomeComics(GenericNavigableComic):
2296
    """Class to retrieve Fat Awesome Comics."""
2297
    # Also on http://fatawesomecomedy.tumblr.com
2298
    name = 'fatawesome'
2299
    long_name = 'Fat Awesome'
2300
    url = 'http://fatawesome.com/comics'
2301
    get_navi_link = get_a_rel_next
2302
    get_first_comic_link = simulate_first_link
2303
    first_url = 'http://fatawesome.com/shortbus/'
2304
2305
    @classmethod
2306
    def get_comic_info(cls, soup, link):
2307
        """Get information about a particular comics."""
2308
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2309
        description = soup.find('meta', attrs={'name': 'description'})['content']
2310
        tags_prop = soup.find('meta', property='article:tag')
2311
        tags = tags_prop['content'] if tags_prop else ""
2312
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2313
        day = string_to_date(date_str, "%Y-%m-%d")
2314
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2315
        assert len(imgs) == 1
2316
        return {
2317
            'title': title,
2318
            'description': description,
2319
            'tags': tags,
2320
            'alt': "".join(i['alt'] for i in imgs),
2321
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2322
            'month': day.month,
2323
            'year': day.year,
2324
            'day': day.day,
2325
        }
2326
2327
2328
class JuliasDrawings(GenericListableComic):
2329
    """Class to retrieve Julia's Drawings."""
2330 View Code Duplication
    name = 'julia'
2331
    long_name = "Julia's Drawings"
2332
    url = 'https://drawings.jvns.ca'
2333
    get_url_from_archive_element = get_href
2334
2335
    @classmethod
2336
    def get_archive_elements(cls):
2337
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2338
        return [art.find('a') for art in reversed(articles)]
2339
2340
    @classmethod
2341
    def get_comic_info(cls, soup, archive_elt):
2342
        """Get information about a particular comics."""
2343
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2344
        day = string_to_date(date_str, "%Y-%m-%d")
2345
        title = soup.find('h3', class_='p-post-title').string
2346
        imgs = soup.find('section', class_='post-content').find_all('img')
2347
        return {
2348
            'title': title,
2349
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2350
            'month': day.month,
2351
            'year': day.year,
2352
            'day': day.day,
2353
        }
2354
2355
2356
class AnythingComic(GenericListableComic):
2357
    """Class to retrieve Anything Comics."""
2358
    # Also on http://tapastic.com/series/anything
2359
    name = 'anythingcomic'
2360
    long_name = 'Anything Comic'
2361
    url = 'http://www.anythingcomic.com'
2362
2363
    @classmethod
2364
    def get_archive_elements(cls):
2365
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2366
        # The first 2 <tr>'s do not correspond to comics
2367
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2368
2369
    @classmethod
2370
    def get_url_from_archive_element(cls, tr):
2371
        """Get url corresponding to an archive element."""
2372
        _, td_comic, td_date, _ = tr.find_all('td')
2373
        link = td_comic.find('a')
2374
        return urljoin_wrapper(cls.url, link['href'])
2375
2376
    @classmethod
2377
    def get_comic_info(cls, soup, tr):
2378 View Code Duplication
        """Get information about a particular comics."""
2379
        td_num, td_comic, td_date, _ = tr.find_all('td')
2380
        num = int(td_num.string)
2381
        link = td_comic.find('a')
2382
        title = link.string
2383
        imgs = soup.find_all('img', id='comic_image')
2384
        date_str = td_date.string
2385
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2386
        assert len(imgs) == 1
2387
        assert all(i.get('alt') == i.get('title') for i in imgs)
2388
        return {
2389
            'num': num,
2390
            'title': title,
2391
            'alt': imgs[0].get('alt', ''),
2392
            'img': [i['src'] for i in imgs],
2393
            'month': day.month,
2394
            'year': day.year,
2395
            'day': day.day,
2396
        }
2397
2398
2399
class LonnieMillsap(GenericNavigableComic):
2400
    """Class to retrieve Lonnie Millsap's comics."""
2401 View Code Duplication
    name = 'millsap'
2402
    long_name = 'Lonnie Millsap'
2403
    url = 'http://www.lonniemillsap.com'
2404
    get_navi_link = get_link_rel_next
2405
    get_first_comic_link = simulate_first_link
2406
    first_url = 'http://www.lonniemillsap.com/?p=42'
2407
2408
    @classmethod
2409
    def get_comic_info(cls, soup, link):
2410
        """Get information about a particular comics."""
2411
        title = soup.find('h2', class_='post-title').string
2412
        post = soup.find('div', class_='post-content')
2413
        author = post.find("span", class_="post-author").find("a").string
2414
        date_str = post.find("span", class_="post-date").string
2415
        day = string_to_date(date_str, "%B %d, %Y")
2416
        imgs = post.find("div", class_="entry").find_all("img")
2417
        return {
2418
            'title': title,
2419
            'author': author,
2420
            'img': [i['src'] for i in imgs],
2421
            'month': day.month,
2422
            'year': day.year,
2423
            'day': day.day,
2424
        }
2425
2426
2427
class LinsEditions(GenericNavigableComic):
2428
    """Class to retrieve L.I.N.S. Editions comics."""
2429 View Code Duplication
    # Also on https://linscomics.tumblr.com
2430
    # Now on https://warandpeas.com
2431
    name = 'lins'
2432
    long_name = 'L.I.N.S. Editions'
2433
    url = 'https://linsedition.com'
2434
    _categories = ('LINS', )
2435
    get_navi_link = get_link_rel_next
2436
    get_first_comic_link = simulate_first_link
2437
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2438
2439
    @classmethod
2440
    def get_comic_info(cls, soup, link):
2441
        """Get information about a particular comics."""
2442
        title = soup.find('meta', property='og:title')['content']
2443
        imgs = soup.find_all('meta', property='og:image')
2444
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2445
        day = string_to_date(date_str, "%Y-%m-%d")
2446
        return {
2447
            'title': title,
2448
            'img': [i['content'] for i in imgs],
2449
            'month': day.month,
2450
            'year': day.year,
2451
            'day': day.day,
2452
        }
2453
2454
2455
class ThorsThundershack(GenericNavigableComic):
2456
    """Class to retrieve Thor's Thundershack comics."""
2457
    # Also on http://tapastic.com/series/Thors-Thundershac
2458
    name = 'thor'
2459
    long_name = 'Thor\'s Thundershack'
2460
    url = 'http://www.thorsthundershack.com'
2461
    _categories = ('THOR', )
2462
    get_url_from_link = join_cls_url_to_href
2463
2464
    @classmethod
2465
    def get_first_comic_link(cls):
2466
        """Get link to first comics."""
2467
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2468
2469
    @classmethod
2470
    def get_navi_link(cls, last_soup, next_):
2471
        """Get link to next or previous comic."""
2472
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2473
            if link['href'] != '/comic':
2474
                return link
2475
        return None
2476
2477
    @classmethod
2478
    def get_comic_info(cls, soup, link):
2479
        """Get information about a particular comics."""
2480
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2481
        description = soup.find('div', itemprop='articleBody').text
2482
        author = soup.find('span', itemprop='author copyrightHolder').string
2483
        imgs = soup.find_all('img', itemprop='image')
2484
        assert all(i['title'] == i['alt'] for i in imgs)
2485
        alt = imgs[0]['alt'] if imgs else ""
2486
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2487
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2488
        return {
2489
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2490
            'month': day.month,
2491
            'year': day.year,
2492
            'day': day.day,
2493
            'author': author,
2494
            'title': title,
2495
            'alt': alt,
2496
            'description': description,
2497
        }
2498
2499
2500
class GerbilWithAJetpack(GenericNavigableComic):
2501
    """Class to retrieve GerbilWithAJetpack comics."""
2502
    name = 'gerbil'
2503
    long_name = 'Gerbil With A Jetpack'
2504
    url = 'http://gerbilwithajetpack.com'
2505
    get_first_comic_link = get_a_navi_navifirst
2506
    get_navi_link = get_a_rel_next
2507
2508
    @classmethod
2509
    def get_comic_info(cls, soup, link):
2510
        """Get information about a particular comics."""
2511
        title = soup.find('h2', class_='post-title').string
2512
        author = soup.find("span", class_="post-author").find("a").string
2513
        date_str = soup.find("span", class_="post-date").string
2514
        day = string_to_date(date_str, "%B %d, %Y")
2515
        imgs = soup.find("div", id="comic").find_all("img")
2516
        alt = imgs[0]['alt']
2517
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2518
        return {
2519
            'img': [i['src'] for i in imgs],
2520
            'title': title,
2521
            'alt': alt,
2522
            'author': author,
2523
            'day': day.day,
2524
            'month': day.month,
2525
            'year': day.year
2526
        }
2527
2528
2529
class EveryDayBlues(GenericDeletedComic, GenericNavigableComic):
2530
    """Class to retrieve EveryDayBlues Comics."""
2531 View Code Duplication
    name = "blues"
2532
    long_name = "Every Day Blues"
2533
    url = "http://everydayblues.net"
2534
    get_first_comic_link = get_a_navi_navifirst
2535
    get_navi_link = get_link_rel_next
2536
2537
    @classmethod
2538
    def get_comic_info(cls, soup, link):
2539
        """Get information about a particular comics."""
2540
        title = soup.find("h2", class_="post-title").string
2541
        author = soup.find("span", class_="post-author").find("a").string
2542
        date_str = soup.find("span", class_="post-date").string
2543
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2544
        imgs = soup.find("div", id="comic").find_all("img")
2545
        assert all(i['alt'] == i['title'] == title for i in imgs)
2546
        assert len(imgs) <= 1
2547
        return {
2548
            'img': [i['src'] for i in imgs],
2549
            'title': title,
2550
            'author': author,
2551
            'day': day.day,
2552
            'month': day.month,
2553
            'year': day.year
2554
        }
2555
2556
2557
class BiterComics(GenericNavigableComic):
2558
    """Class to retrieve Biter Comics."""
2559 View Code Duplication
    name = "biter"
2560
    long_name = "Biter Comics"
2561
    url = "http://www.bitercomics.com"
2562
    get_first_comic_link = get_a_navi_navifirst
2563
    get_navi_link = get_link_rel_next
2564
2565
    @classmethod
2566
    def get_comic_info(cls, soup, link):
2567
        """Get information about a particular comics."""
2568
        title = soup.find("h1", class_="entry-title").string
2569
        author = soup.find("span", class_="author vcard").find("a").string
2570
        date_str = soup.find("span", class_="entry-date").string
2571
        day = string_to_date(date_str, "%B %d, %Y")
2572
        imgs = soup.find("div", id="comic").find_all("img")
2573
        assert all(i['alt'] == i['title'] for i in imgs)
2574
        assert len(imgs) == 1
2575
        alt = imgs[0]['alt']
2576
        return {
2577
            'img': [i['src'] for i in imgs],
2578
            'title': title,
2579
            'alt': alt,
2580
            'author': author,
2581
            'day': day.day,
2582
            'month': day.month,
2583
            'year': day.year
2584
        }
2585
2586
2587
class TheAwkwardYeti(GenericNavigableComic):
2588
    """Class to retrieve The Awkward Yeti comics."""
2589 View Code Duplication
    # Also on http://www.gocomics.com/the-awkward-yeti
2590
    # Also on http://larstheyeti.tumblr.com
2591
    # Also on https://tapastic.com/series/TheAwkwardYeti
2592
    name = 'yeti'
2593
    long_name = 'The Awkward Yeti'
2594
    url = 'http://theawkwardyeti.com'
2595
    _categories = ('YETI', )
2596
    get_first_comic_link = get_a_navi_navifirst
2597
    get_navi_link = get_link_rel_next
2598
2599
    @classmethod
2600
    def get_comic_info(cls, soup, link):
2601
        """Get information about a particular comics."""
2602
        title = soup.find('h2', class_='post-title').string
2603
        date_str = soup.find("span", class_="post-date").string
2604
        day = string_to_date(date_str, "%B %d, %Y")
2605
        imgs = soup.find("div", id="comic").find_all("img")
2606
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2607
        return {
2608
            'img': [i['src'] for i in imgs],
2609
            'title': title,
2610
            'day': day.day,
2611
            'month': day.month,
2612
            'year': day.year
2613
        }
2614
2615
2616
class PleasantThoughts(GenericNavigableComic):
2617
    """Class to retrieve Pleasant Thoughts comics."""
2618
    name = 'pleasant'
2619
    long_name = 'Pleasant Thoughts'
2620
    url = 'http://pleasant-thoughts.com'
2621
    get_first_comic_link = get_a_navi_navifirst
2622
    get_navi_link = get_link_rel_next
2623
2624
    @classmethod
2625
    def get_comic_info(cls, soup, link):
2626
        """Get information about a particular comics."""
2627
        post = soup.find('div', class_='post-content')
2628
        title = post.find('h2', class_='post-title').string
2629
        imgs = post.find("div", class_="entry").find_all("img")
2630
        return {
2631
            'title': title,
2632
            'img': [i['src'] for i in imgs],
2633
        }
2634
2635
2636
class MisterAndMe(GenericNavigableComic):
2637
    """Class to retrieve Mister & Me Comics."""
2638 View Code Duplication
    # Also on http://www.gocomics.com/mister-and-me
2639
    # Also on https://tapastic.com/series/Mister-and-Me
2640
    name = 'mister'
2641
    long_name = 'Mister & Me'
2642
    url = 'http://www.mister-and-me.com'
2643
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2644
    get_navi_link = get_link_rel_next
2645
2646
    @classmethod
2647
    def get_comic_info(cls, soup, link):
2648
        """Get information about a particular comics."""
2649
        title = soup.find('h2', class_='post-title').string
2650
        author = soup.find("span", class_="post-author").find("a").string
2651
        date_str = soup.find("span", class_="post-date").string
2652
        day = string_to_date(date_str, "%B %d, %Y")
2653
        imgs = soup.find("div", id="comic").find_all("img")
2654
        assert all(i['alt'] == i['title'] for i in imgs)
2655
        assert len(imgs) <= 1
2656
        alt = imgs[0]['alt'] if imgs else ""
2657
        return {
2658
            'img': [i['src'] for i in imgs],
2659
            'title': title,
2660
            'alt': alt,
2661
            'author': author,
2662
            'day': day.day,
2663
            'month': day.month,
2664
            'year': day.year
2665
        }
2666
2667
2668
class LastPlaceComics(GenericNavigableComic):
2669
    """Class to retrieve Last Place Comics."""
2670
    name = 'lastplace'
2671
    long_name = 'Last Place Comics'
2672
    url = "http://lastplacecomics.com"
2673
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2674
    get_navi_link = get_link_rel_next
2675
2676
    @classmethod
2677
    def get_comic_info(cls, soup, link):
2678
        """Get information about a particular comics."""
2679
        title = soup.find('h2', class_='post-title').string
2680
        author = soup.find("span", class_="post-author").find("a").string
2681
        date_str = soup.find("span", class_="post-date").string
2682
        day = string_to_date(date_str, "%B %d, %Y")
2683
        imgs = soup.find("div", id="comic").find_all("img")
2684
        assert all(i['alt'] == i['title'] for i in imgs)
2685
        assert len(imgs) <= 1
2686
        alt = imgs[0]['alt'] if imgs else ""
2687
        return {
2688
            'img': [i['src'] for i in imgs],
2689
            'title': title,
2690
            'alt': alt,
2691
            'author': author,
2692
            'day': day.day,
2693
            'month': day.month,
2694
            'year': day.year
2695
        }
2696
2697
2698
class TalesOfAbsurdity(GenericNavigableComic):
2699
    """Class to retrieve Tales Of Absurdity comics."""
2700 View Code Duplication
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2701
    # Also on http://talesofabsurdity.tumblr.com
2702
    name = 'absurdity'
2703
    long_name = 'Tales of Absurdity'
2704
    url = 'http://talesofabsurdity.com'
2705
    _categories = ('ABSURDITY', )
2706
    get_first_comic_link = get_a_navi_navifirst
2707
    get_navi_link = get_a_navi_comicnavnext_navinext
2708
2709
    @classmethod
2710
    def get_comic_info(cls, soup, link):
2711
        """Get information about a particular comics."""
2712
        title = soup.find('h2', class_='post-title').string
2713
        author = soup.find("span", class_="post-author").find("a").string
2714
        date_str = soup.find("span", class_="post-date").string
2715
        day = string_to_date(date_str, "%B %d, %Y")
2716
        imgs = soup.find("div", id="comic").find_all("img")
2717
        assert all(i['alt'] == i['title'] for i in imgs)
2718
        alt = imgs[0]['alt'] if imgs else ""
2719
        return {
2720
            'img': [i['src'] for i in imgs],
2721
            'title': title,
2722
            'alt': alt,
2723
            'author': author,
2724
            'day': day.day,
2725
            'month': day.month,
2726
            'year': day.year
2727
        }
2728
2729
2730
class EndlessOrigami(GenericComicNotWorking, GenericNavigableComic):  # Nav not working
2731
    """Class to retrieve Endless Origami Comics."""
2732 View Code Duplication
    name = "origami"
2733
    long_name = "Endless Origami"
2734
    url = "http://endlessorigami.com"
2735
    get_first_comic_link = get_a_navi_navifirst
2736
    get_navi_link = get_link_rel_next
2737
2738
    @classmethod
2739
    def get_comic_info(cls, soup, link):
2740
        """Get information about a particular comics."""
2741
        title = soup.find('h2', class_='post-title').string
2742
        author = soup.find("span", class_="post-author").find("a").string
2743
        date_str = soup.find("span", class_="post-date").string
2744
        day = string_to_date(date_str, "%B %d, %Y")
2745
        imgs = soup.find("div", id="comic").find_all("img")
2746
        assert all(i['alt'] == i['title'] for i in imgs)
2747
        alt = imgs[0]['alt'] if imgs else ""
2748
        return {
2749
            'img': [i['src'] for i in imgs],
2750
            'title': title,
2751
            'alt': alt,
2752
            'author': author,
2753
            'day': day.day,
2754
            'month': day.month,
2755
            'year': day.year
2756
        }
2757
2758
2759
class PlanC(GenericNavigableComic):
2760
    """Class to retrieve Plan C comics."""
2761 View Code Duplication
    name = 'planc'
2762
    long_name = 'Plan C'
2763
    url = 'http://www.plancomic.com'
2764
    get_first_comic_link = get_a_navi_navifirst
2765
    get_navi_link = get_a_navi_comicnavnext_navinext
2766
2767
    @classmethod
2768
    def get_comic_info(cls, soup, link):
2769
        """Get information about a particular comics."""
2770
        title = soup.find('h2', class_='post-title').string
2771
        date_str = soup.find("span", class_="post-date").string
2772
        day = string_to_date(date_str, "%B %d, %Y")
2773
        imgs = soup.find('div', id='comic').find_all('img')
2774
        return {
2775
            'title': title,
2776
            'img': [i['src'] for i in imgs],
2777
            'month': day.month,
2778
            'year': day.year,
2779
            'day': day.day,
2780
        }
2781
2782
2783
class BuniComic(GenericNavigableComic):
2784
    """Class to retrieve Buni Comics."""
2785 View Code Duplication
    name = 'buni'
2786
    long_name = 'BuniComics'
2787
    url = 'http://www.bunicomic.com'
2788
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2789
    get_navi_link = get_link_rel_next
2790
2791
    @classmethod
2792
    def get_comic_info(cls, soup, link):
2793
        """Get information about a particular comics."""
2794
        imgs = soup.find('div', id='comic').find_all('img')
2795
        assert all(i['alt'] == i['title'] for i in imgs)
2796
        assert len(imgs) == 1
2797
        return {
2798
            'img': [i['src'] for i in imgs],
2799
            'title': imgs[0]['title'],
2800
        }
2801
2802
2803
class GenericCommitStrip(GenericNavigableComic):
2804
    """Generic class to retrieve Commit Strips in different languages."""
2805 View Code Duplication
    get_navi_link = get_a_rel_next
2806
    get_first_comic_link = simulate_first_link
2807
    first_url = NotImplemented
2808
2809
    @classmethod
2810
    def get_comic_info(cls, soup, link):
2811
        """Get information about a particular comics."""
2812
        desc = soup.find('meta', property='og:description')['content']
2813
        title = soup.find('meta', property='og:title')['content']
2814
        imgs = soup.find('div', class_='entry-content').find_all('img')
2815
        title2 = ' '.join(i.get('title', '') for i in imgs)
2816
        return {
2817
            'title': title,
2818
            'title2': title2,
2819
            'description': desc,
2820
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2821
        }
2822
2823
2824
class CommitStripFr(GenericCommitStrip):
2825
    """Class to retrieve Commit Strips in French."""
2826
    name = 'commit_fr'
2827
    long_name = 'Commit Strip (Fr)'
2828
    url = 'http://www.commitstrip.com/fr'
2829
    _categories = ('FRANCAIS', )
2830
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2831
2832
2833
class CommitStripEn(GenericCommitStrip):
2834
    """Class to retrieve Commit Strips in English."""
2835
    name = 'commit_en'
2836
    long_name = 'Commit Strip (En)'
2837
    url = 'http://www.commitstrip.com/en'
2838
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2839
2840
2841
class GenericBoumerie(GenericNavigableComic):
2842
    """Generic class to retrieve Boumeries comics in different languages."""
2843 View Code Duplication
    get_first_comic_link = get_a_navi_navifirst
2844
    get_navi_link = get_link_rel_next
2845
    date_format = NotImplemented
2846
    lang = NotImplemented
2847
2848
    @classmethod
2849
    def get_comic_info(cls, soup, link):
2850
        """Get information about a particular comics."""
2851
        title = soup.find('h2', class_='post-title').string
2852
        short_url = soup.find('link', rel='shortlink')['href']
2853
        author = soup.find("span", class_="post-author").find("a").string
2854
        date_str = soup.find('span', class_='post-date').string
2855
        day = string_to_date(date_str, cls.date_format, cls.lang)
2856
        imgs = soup.find('div', id='comic').find_all('img')
2857
        assert all(i['alt'] == i['title'] for i in imgs)
2858
        return {
2859
            'short_url': short_url,
2860
            'img': [i['src'] for i in imgs],
2861
            'title': title,
2862
            'author': author,
2863
            'month': day.month,
2864
            'year': day.year,
2865
            'day': day.day,
2866
        }
2867
2868
2869
class BoumerieEn(GenericBoumerie):
2870
    """Class to retrieve Boumeries comics in English."""
2871
    name = 'boumeries_en'
2872
    long_name = 'Boumeries (En)'
2873
    url = 'http://comics.boumerie.com'
2874
    date_format = "%B %d, %Y"
2875
    lang = 'en_GB.UTF-8'
2876
2877
2878
class BoumerieFr(GenericBoumerie):
2879
    """Class to retrieve Boumeries comics in French."""
2880
    name = 'boumeries_fr'
2881
    long_name = 'Boumeries (Fr)'
2882
    url = 'http://bd.boumerie.com'
2883
    _categories = ('FRANCAIS', )
2884
    date_format = "%A, %d %B %Y"
2885
    lang = "fr_FR.utf8"
2886
2887
2888
class UnearthedComics(GenericNavigableComic):
2889
    """Class to retrieve Unearthed comics."""
2890 View Code Duplication
    # Also on http://tapastic.com/series/UnearthedComics
2891
    # Also on https://unearthedcomics.tumblr.com
2892
    name = 'unearthed'
2893
    long_name = 'Unearthed Comics'
2894
    url = 'http://unearthedcomics.com'
2895
    _categories = ('UNEARTHED', )
2896
    get_navi_link = get_link_rel_next
2897
    get_first_comic_link = simulate_first_link
2898
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2899
2900
    @classmethod
2901
    def get_comic_info(cls, soup, link):
2902
        """Get information about a particular comics."""
2903
        short_url = soup.find('link', rel='shortlink')['href']
2904
        title_elt = soup.find('h1') or soup.find('h2')
2905
        title = title_elt.string if title_elt else ""
2906
        desc = soup.find('meta', property='og:description')
2907
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2908
        day = string_to_date(date_str, "%Y-%m-%d")
2909
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2910
        imgs = post.find_all('img')
2911
        return {
2912
            'title': title,
2913
            'description': desc,
2914
            'url2': short_url,
2915
            'img': [i['src'] for i in imgs],
2916
            'month': day.month,
2917
            'year': day.year,
2918
            'day': day.day,
2919
        }
2920
2921
2922
class Optipess(GenericNavigableComic):
2923
    """Class to retrieve Optipess comics."""
2924 View Code Duplication
    name = 'optipess'
2925
    long_name = 'Optipess'
2926
    url = 'http://www.optipess.com'
2927
    get_first_comic_link = get_a_navi_navifirst
2928
    get_navi_link = get_link_rel_next
2929
2930
    @classmethod
2931
    def get_comic_info(cls, soup, link):
2932
        """Get information about a particular comics."""
2933
        title = soup.find('h2', class_='post-title').string
2934
        author = soup.find("span", class_="post-author").find("a").string
2935
        comic = soup.find('div', id='comic')
2936
        imgs = comic.find_all('img') if comic else []
2937
        alt = imgs[0]['title'] if imgs else ""
2938
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2939
        date_str = soup.find('span', class_='post-date').string
2940
        day = string_to_date(date_str, "%B %d, %Y")
2941
        return {
2942
            'title': title,
2943
            'alt': alt,
2944
            'author': author,
2945
            'img': [i['src'] for i in imgs],
2946
            'month': day.month,
2947
            'year': day.year,
2948
            'day': day.day,
2949
        }
2950
2951
2952
class PainTrainComic(GenericNavigableComic):
2953
    """Class to retrieve Pain Train Comics."""
2954
    name = 'paintrain'
2955
    long_name = 'Pain Train Comics'
2956
    url = 'http://paintraincomic.com'
2957
    get_first_comic_link = get_a_navi_navifirst
2958
    get_navi_link = get_link_rel_next
2959
2960
    @classmethod
2961
    def get_comic_info(cls, soup, link):
2962 View Code Duplication
        """Get information about a particular comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2963
        title = soup.find('h2', class_='post-title').string
2964
        short_url = soup.find('link', rel='shortlink')['href']
2965
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2966
        num = int(short_url_re.match(short_url).groups()[0])
2967
        imgs = soup.find('div', id='comic').find_all('img')
2968
        alt = imgs[0]['title']
2969
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2970
        date_str = soup.find('span', class_='post-date').string
2971
        day = string_to_date(date_str, "%d/%m/%Y")
2972
        return {
2973
            'short_url': short_url,
2974
            'num': num,
2975
            'img': [i['src'] for i in imgs],
2976
            'month': day.month,
2977
            'year': day.year,
2978
            'day': day.day,
2979
            'alt': alt,
2980
            'title': title,
2981
        }
2982
2983
2984
class MoonBeard(GenericNavigableComic):
2985
    """Class to retrieve MoonBeard comics."""
2986
    # Also on http://squireseses.tumblr.com
2987
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2988
    name = 'moonbeard'
2989
    long_name = 'Moon Beard'
2990
    url = 'http://moonbeard.com'
2991
    _categories = ('MOONBEARD', )
2992
    get_first_comic_link = get_a_navi_navifirst
2993
    get_navi_link = get_a_navi_navinext
2994
2995
    @classmethod
2996
    def get_comic_info(cls, soup, link):
2997
        """Get information about a particular comics."""
2998
        title = soup.find('h2', class_='post-title').string
2999 View Code Duplication
        short_url = soup.find('link', rel='shortlink')['href']
3000
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
3001
        num = int(short_url_re.match(short_url).groups()[0])
3002
        imgs = soup.find('div', id='comic').find_all('img')
3003
        alt = imgs[0]['title']
3004
        assert all(i['alt'] == i['title'] == alt for i in imgs)
3005
        date_str = soup.find('span', class_='post-date').string
3006
        day = string_to_date(date_str, "%B %d, %Y")
3007
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
3008
        author = soup.find('span', class_='post-author').string
3009
        return {
3010
            'short_url': short_url,
3011
            'num': num,
3012
            'img': [i['src'] for i in imgs],
3013
            'month': day.month,
3014
            'year': day.year,
3015
            'day': day.day,
3016
            'title': title,
3017
            'tags': tags,
3018
            'alt': alt,
3019
            'author': author,
3020
        }
3021
3022
3023
class AHammADay(GenericComicNotWorking, GenericNavigableComic):  # Website has changed
3024
    """Class to retrieve class A Hamm A Day comics."""
3025
    name = 'hamm'
3026
    long_name = 'A Hamm A Day'
3027
    url = 'http://www.ahammaday.com'
3028
    get_url_from_link = join_cls_url_to_href
3029
    get_first_comic_link = simulate_first_link
3030
    first_url = 'http://www.ahammaday.com/today/3/6/french'
3031
3032
    @classmethod
3033
    def get_navi_link(cls, last_soup, next_):
3034
        """Get link to next or previous comic."""
3035
        # prev is next / next is prev
3036
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
3037
3038
    @classmethod
3039
    def get_comic_info(cls, soup, link):
3040
        """Get information about a particular comics."""
3041
        date_str = soup.find('time', class_='published')['datetime']
3042
        day = string_to_date(date_str, "%Y-%m-%d")
3043
        author = soup.find('span', class_='blog-author').find('a').string
3044
        title = soup.find('meta', property='og:title')['content']
3045
        imgs = soup.find_all('meta', itemprop='image')
3046
        return {
3047
            'img': [i['content'] for i in imgs],
3048
            'title': title,
3049
            'author': author,
3050
            'day': day.day,
3051
            'month': day.month,
3052
            'year': day.year,
3053
        }
3054
3055
3056
class SystemComic(GenericNavigableComic):
3057
    """Class to retrieve System Comic."""
3058
    name = 'system'
3059
    long_name = 'System Comic'
3060
    url = 'http://www.systemcomic.com'
3061
    get_navi_link = get_a_rel_next
3062
3063
    @classmethod
3064
    def get_first_comic_link(cls):
3065
        """Get link to first comics."""
3066
        return get_soup_at_url(cls.url).find('li', class_='first').find('a')
3067
3068
    @classmethod
3069
    def get_comic_info(cls, soup, link):
3070
        """Get information about a particular comics."""
3071
        title = soup.find('meta', property='og:title')['content']
3072
        desc = soup.find('meta', property='og:description')['content']
3073
        date_str = soup.find('time')["datetime"]
3074
        day = string_to_date(date_str, "%Y-%m-%d")
3075
        imgs = soup.find('figure').find_all('img')
3076
        return {
3077
            'title': title,
3078
            'description': desc,
3079
            'day': day.day,
3080
            'month': day.month,
3081
            'year': day.year,
3082
            'img': [i['src'] for i in imgs],
3083
        }
3084
3085
3086
class LittleLifeLines(GenericNavigableComic):
3087
    """Class to retrieve Little Life Lines comics."""
3088
    # Also on https://little-life-lines.tumblr.com
3089
    name = 'life'
3090
    long_name = 'Little Life Lines'
3091
    url = 'http://www.littlelifelines.com'
3092
    get_url_from_link = join_cls_url_to_href
3093
    get_first_comic_link = simulate_first_link
3094
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3095
3096
    @classmethod
3097
    def get_navi_link(cls, last_soup, next_):
3098
        """Get link to next or previous comic."""
3099
        # prev is next / next is prev
3100
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3101
        return li.find('a') if li else None
3102
3103
    @classmethod
3104
    def get_comic_info(cls, soup, link):
3105 View Code Duplication
        """Get information about a particular comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3106
        title = soup.find('meta', property='og:title')['content']
3107
        desc = soup.find('meta', property='og:description')['content']
3108
        date_str = soup.find('time', class_='published')['datetime']
3109
        day = string_to_date(date_str, "%Y-%m-%d")
3110
        author = soup.find('a', rel='author').string
3111
        div_content = soup.find('div', class_="body entry-content")
3112
        imgs = div_content.find_all('img')
3113
        imgs = [i for i in imgs if i.get('src') is not None]
3114
        alt = imgs[0]['alt']
3115
        return {
3116
            'title': title,
3117
            'alt': alt,
3118
            'description': desc,
3119
            'author': author,
3120
            'day': day.day,
3121
            'month': day.month,
3122
            'year': day.year,
3123
            'img': [i['src'] for i in imgs],
3124
        }
3125
3126
3127
class GenericWordPressInkblot(GenericNavigableComic):
3128
    """Generic class to retrieve comics using WordPress with Inkblot."""
3129
    get_navi_link = get_link_rel_next
3130
3131
    @classmethod
3132
    def get_first_comic_link(cls):
3133
        """Get link to first comics."""
3134
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3135
3136
    @classmethod
3137
    def get_comic_info(cls, soup, link):
3138
        """Get information about a particular comics."""
3139
        title = soup.find('meta', property='og:title')['content']
3140
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3141
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3142
        day = string_to_date(date_str, "%Y-%m-%d")
3143
        return {
3144
            'title': title,
3145
            'day': day.day,
3146
            'month': day.month,
3147
            'year': day.year,
3148
            'img': [i['src'] for i in imgs],
3149
        }
3150
3151
3152
class EverythingsStupid(GenericWordPressInkblot):
3153
    """Class to retrieve Everything's stupid Comics."""
3154
    # Also on http://tapastic.com/series/EverythingsStupid
3155
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3156
    # Also on http://everythingsstupidcomics.tumblr.com
3157
    name = 'stupid'
3158
    long_name = "Everything's Stupid"
3159
    url = 'http://everythingsstupid.net'
3160
3161
3162
class TheIsmComics(GenericDeletedComic, GenericWordPressInkblot):
3163
    """Class to retrieve The Ism Comics."""
3164
    # Also on https://tapastic.com/series/TheIsm (?)
3165
    name = 'theism'
3166
    long_name = "The Ism"
3167
    url = 'http://www.theism-comics.com'
3168
3169
3170
class WoodenPlankStudios(GenericWordPressInkblot):
3171
    """Class to retrieve Wooden Plank Studios comics."""
3172
    name = 'woodenplank'
3173
    long_name = 'Wooden Plank Studios'
3174
    url = 'http://woodenplankstudios.com'
3175
3176
3177
class ElectricBunnyComic(GenericNavigableComic):
3178
    """Class to retrieve Electric Bunny Comics."""
3179
    # Also on http://electricbunnycomics.tumblr.com
3180
    name = 'bunny'
3181
    long_name = 'Electric Bunny Comic'
3182
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3183
    get_url_from_link = join_cls_url_to_href
3184
3185
    @classmethod
3186
    def get_first_comic_link(cls):
3187
        """Get link to first comics."""
3188
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3189
3190
    @classmethod
3191
    def get_navi_link(cls, last_soup, next_):
3192
        """Get link to next or previous comic."""
3193
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3194
        return img.parent if img else None
3195
3196
    @classmethod
3197
    def get_comic_info(cls, soup, link):
3198
        """Get information about a particular comics."""
3199
        title = soup.find('meta', property='og:title')['content']
3200
        imgs = soup.find_all('meta', property='og:image')
3201
        return {
3202
            'title': title,
3203
            'img': [i['content'] for i in imgs],
3204
        }
3205
3206
3207
class SheldonComics(GenericNavigableComic):
3208
    """Class to retrieve Sheldon comics."""
3209
    # Also on http://www.gocomics.com/sheldon
3210
    name = 'sheldon'
3211
    long_name = 'Sheldon Comics'
3212
    url = 'http://www.sheldoncomics.com'
3213
3214
    @classmethod
3215
    def get_first_comic_link(cls):
3216
        """Get link to first comics."""
3217
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3218
3219
    @classmethod
3220
    def get_navi_link(cls, last_soup, next_):
3221
        """Get link to next or previous comic."""
3222
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3223
            if link['href'] != 'http://www.sheldoncomics.com':
3224
                return link
3225
        return None
3226
3227
    @classmethod
3228
    def get_comic_info(cls, soup, link):
3229
        """Get information about a particular comics."""
3230
        imgs = soup.find("div", id="comic-foot").find_all("img")
3231
        assert all(i['alt'] == i['title'] for i in imgs)
3232
        assert len(imgs) == 1
3233
        title = imgs[0]['title']
3234
        return {
3235
            'title': title,
3236
            'img': [i['src'] for i in imgs],
3237
        }
3238
3239
3240
class Ubertool(GenericNavigableComic):
3241
    """Class to retrieve Ubertool comics."""
3242 View Code Duplication
    # Also on https://ubertool.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3243
    # Also on https://tapastic.com/series/ubertool
3244
    name = 'ubertool'
3245
    long_name = 'Ubertool'
3246
    url = 'http://ubertoolcomic.com'
3247
    _categories = ('UBERTOOL', )
3248
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3249
    get_navi_link = get_a_comicnavbase_comicnavnext
3250
3251
    @classmethod
3252
    def get_comic_info(cls, soup, link):
3253
        """Get information about a particular comics."""
3254
        title = soup.find('h2', class_='post-title').string
3255
        date_str = soup.find('span', class_='post-date').string
3256
        day = string_to_date(date_str, "%B %d, %Y")
3257
        imgs = soup.find('div', id='comic').find_all('img')
3258
        return {
3259
            'img': [i['src'] for i in imgs],
3260
            'title': title,
3261
            'month': day.month,
3262
            'year': day.year,
3263
            'day': day.day,
3264
        }
3265
3266
3267
class EarthExplodes(GenericNavigableComic):
3268
    """Class to retrieve The Earth Explodes comics."""
3269 View Code Duplication
    name = 'earthexplodes'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3270
    long_name = 'The Earth Explodes'
3271
    url = 'http://www.earthexplodes.com'
3272
    get_url_from_link = join_cls_url_to_href
3273
    get_first_comic_link = simulate_first_link
3274
    first_url = 'http://www.earthexplodes.com/comics/000/'
3275
3276
    @classmethod
3277
    def get_navi_link(cls, last_soup, next_):
3278
        """Get link to next or previous comic."""
3279
        return last_soup.find('a', id='next' if next_ else 'prev')
3280
3281
    @classmethod
3282
    def get_comic_info(cls, soup, link):
3283
        """Get information about a particular comics."""
3284
        title = soup.find('title').string
3285
        imgs = soup.find('div', id='image').find_all('img')
3286
        alt = imgs[0].get('title', '')
3287
        return {
3288
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3289
            'title': title,
3290
            'alt': alt,
3291
        }
3292
3293
3294 View Code Duplication
class PomComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3295
    """Class to retrieve PomComics."""
3296
    name = 'pom'
3297
    long_name = 'Pom Comics / Piece of Me'
3298
    url = 'http://www.pomcomic.com'
3299
    get_url_from_link = join_cls_url_to_href
3300
3301
    @classmethod
3302
    def get_first_comic_link(cls):
3303
        """Get link to first comics."""
3304
        return get_soup_at_url(cls.url).find('a', class_='btn-first')
3305
3306
    @classmethod
3307
    def get_navi_link(cls, last_soup, next_):
3308
        """Get link to next or previous comic."""
3309
        return last_soup.find('a', class_='btn-next' if next_ else 'btn-prev')
3310
3311
    @classmethod
3312
    def get_comic_info(cls, soup, link):
3313
        """Get information about a particular comics."""
3314
        title = soup.find('h1').string
3315
        desc = soup.find('meta', property='og:description')['content']
3316
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3317
        imgs = soup.find('div', class_='comic').find_all('img')
3318
        return {
3319
            'title': title,
3320
            'desc': desc,
3321
            'tags': tags,
3322
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3323
        }
3324
3325
3326
class CubeDrone(GenericComicNotWorking, GenericNavigableComic):  # Website has changed
3327
    """Class to retrieve Cube Drone comics."""
3328
    name = 'cubedrone'
3329
    long_name = 'Cube Drone'
3330
    url = 'http://cube-drone.com/comics'
3331
    get_url_from_link = join_cls_url_to_href
3332
3333
    @classmethod
3334
    def get_first_comic_link(cls):
3335
        """Get link to first comics."""
3336
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3337
3338
    @classmethod
3339
    def get_navi_link(cls, last_soup, next_):
3340
        """Get link to next or previous comic."""
3341
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3342
        return last_soup.find('span', class_=class_).parent
3343
3344
    @classmethod
3345
    def get_comic_info(cls, soup, link):
3346
        """Get information about a particular comics."""
3347
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3348
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3349
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3350
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3351
        imgs = soup.find_all('img', class_='comic img-responsive')
3352
        title2 = imgs[0]['title']
3353
        alt = imgs[0]['alt']
3354
        return {
3355
            'url2': url2,
3356
            'title': title,
3357
            'title2': title2,
3358
            'alt': alt,
3359
            'img': [i['src'] for i in imgs],
3360
        }
3361
3362
3363
class MakeItStoopid(GenericDeletedComic, GenericNavigableComic):
3364
    """Class to retrieve Make It Stoopid Comics."""
3365
    name = 'stoopid'
3366
    long_name = 'Make it stoopid'
3367
    url = 'http://makeitstoopid.com/comic.php'
3368
3369
    @classmethod
3370
    def get_nav(cls, soup):
3371
        """Get the navigation elements from soup object."""
3372
        cnav = soup.find_all(class_='cnav')
3373
        nav1, nav2 = cnav[:5], cnav[5:]
3374
        assert nav1 == nav2
3375
        # begin, prev, archive, next_, end = nav1
3376
        return [None if i.get('href') is None else i for i in nav1]
3377
3378
    @classmethod
3379
    def get_first_comic_link(cls):
3380
        """Get link to first comics."""
3381
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3382
3383
    @classmethod
3384
    def get_navi_link(cls, last_soup, next_):
3385
        """Get link to next or previous comic."""
3386
        return cls.get_nav(last_soup)[3 if next_ else 1]
3387
3388
    @classmethod
3389
    def get_comic_info(cls, soup, link):
3390
        """Get information about a particular comics."""
3391
        title = link['title']
3392
        imgs = soup.find_all('img', id='comicimg')
3393
        return {
3394
            'title': title,
3395
            'img': [i['src'] for i in imgs],
3396
        }
3397
3398
3399
class OffTheLeashDog(GenericNavigableComic):
3400
    """Class to retrieve Off The Leash Dog comics."""
3401
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3402
    # Also on http://www.rupertfawcettcartoons.com
3403
    name = 'offtheleash'
3404
    long_name = 'Off The Leash Dog'
3405
    url = 'http://offtheleashdogcartoons.com'
3406
    _categories = ('FAWCETT', )
3407
    get_navi_link = get_a_rel_next
3408
    get_first_comic_link = simulate_first_link
3409
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3410
3411
    @classmethod
3412
    def get_comic_info(cls, soup, link):
3413
        """Get information about a particular comics."""
3414
        print(link)
3415
        title = soup.find("h1", class_="entry-title").string
3416
        imgs = soup.find('div', class_='entry-content').find_all('img')
3417
        return {
3418
            'title': title,
3419
            'img': [i['src'] for i in imgs],
3420
        }
3421
3422
3423
class MarketoonistComics(GenericNavigableComic):
3424
    """Class to retrieve Marketoonist Comics."""
3425 View Code Duplication
    name = 'marketoonist'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3426
    long_name = 'Marketoonist'
3427
    url = 'https://marketoonist.com/cartoons'
3428
    get_first_comic_link = simulate_first_link
3429
    get_navi_link = get_link_rel_next
3430
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3431
3432
    @classmethod
3433
    def get_comic_info(cls, soup, link):
3434
        """Get information about a particular comics."""
3435
        imgs = soup.find_all('meta', property='og:image')
3436
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3437
        day = string_to_date(date_str, "%Y-%m-%d")
3438
        title = soup.find('meta', property='og:title')['content']
3439
        return {
3440
            'img': [i['content'] for i in imgs],
3441
            'day': day.day,
3442
            'month': day.month,
3443
            'year': day.year,
3444
            'title': title,
3445
        }
3446
3447
3448 View Code Duplication
class ConsoliaComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3449
    """Class to retrieve Consolia comics."""
3450
    name = 'consolia'
3451
    long_name = 'consolia'
3452
    url = 'https://consolia-comic.com'
3453
    get_url_from_link = join_cls_url_to_href
3454
3455
    @classmethod
3456
    def get_first_comic_link(cls):
3457
        """Get link to first comics."""
3458
        return get_soup_at_url(cls.url).find('a', class_='first')
3459
3460
    @classmethod
3461
    def get_navi_link(cls, last_soup, next_):
3462
        """Get link to next or previous comic."""
3463
        return last_soup.find('a', class_='next' if next_ else 'prev')
3464
3465
    @classmethod
3466
    def get_comic_info(cls, soup, link):
3467
        """Get information about a particular comics."""
3468
        title = soup.find('meta', property='og:title')['content']
3469
        date_str = soup.find('time')["datetime"]
3470
        day = string_to_date(date_str, "%Y-%m-%d")
3471
        imgs = soup.find_all('meta', property='og:image')
3472
        return {
3473
            'title': title,
3474
            'img': [i['content'] for i in imgs],
3475
            'day': day.day,
3476
            'month': day.month,
3477
            'year': day.year,
3478
        }
3479
3480
3481
class TuMourrasMoinsBete(GenericNavigableComic):
3482
    """Class to retrieve Tu Mourras Moins Bete comics."""
3483 View Code Duplication
    name = 'mourrasmoinsbete'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3484
    long_name = 'Tu Mourras Moins Bete'
3485
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3486
    _categories = ('FRANCAIS', )
3487
    get_first_comic_link = simulate_first_link
3488
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3489
3490
    @classmethod
3491
    def get_navi_link(cls, last_soup, next_):
3492
        """Get link to next or previous comic."""
3493
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3494
3495
    @classmethod
3496
    def get_comic_info(cls, soup, link):
3497
        """Get information about a particular comics."""
3498
        title = soup.find('title').string
3499
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3500
        author = soup.find('span', itemprop='author').string
3501
        return {
3502
            'img': [i['src'] for i in imgs],
3503
            'author': author,
3504
            'title': title,
3505
        }
3506
3507
3508
class GeekAndPoke(GenericNavigableComic):
3509
    """Class to retrieve Geek And Poke comics."""
3510
    name = 'geek'
3511
    long_name = 'Geek And Poke'
3512
    url = 'http://geek-and-poke.com'
3513
    get_url_from_link = join_cls_url_to_href
3514
    get_first_comic_link = simulate_first_link
3515
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3516
3517
    @classmethod
3518
    def get_navi_link(cls, last_soup, next_):
3519
        """Get link to next or previous comic."""
3520
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3521
3522
    @classmethod
3523
    def get_comic_info(cls, soup, link):
3524
        """Get information about a particular comics."""
3525
        title = soup.find('meta', property='og:title')['content']
3526
        desc = soup.find('meta', property='og:description')['content']
3527
        date_str = soup.find('time', class_='published')['datetime']
3528
        day = string_to_date(date_str, "%Y-%m-%d")
3529
        author = soup.find('a', rel='author').string
3530
        div_content = (soup.find('div', class_="body entry-content") or
3531
                       soup.find('div', class_="special-content"))
3532
        imgs = div_content.find_all('img')
3533
        imgs = [i for i in imgs if i.get('src') is not None]
3534
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3535
        alt = imgs[0].get('alt', "") if imgs else []
3536
        return {
3537
            'title': title,
3538
            'alt': alt,
3539
            'description': desc,
3540
            'author': author,
3541
            'day': day.day,
3542
            'month': day.month,
3543
            'year': day.year,
3544
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3545
        }
3546
3547
3548
class GloryOwlComix(GenericNavigableComic):
3549
    """Class to retrieve Glory Owl comics."""
3550 View Code Duplication
    name = 'gloryowl'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3551
    long_name = 'Glory Owl'
3552
    url = 'http://gloryowlcomix.blogspot.fr'
3553
    _categories = ('NSFW', 'FRANCAIS')
3554
    get_first_comic_link = simulate_first_link
3555
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3556
3557
    @classmethod
3558
    def get_navi_link(cls, last_soup, next_):
3559
        """Get link to next or previous comic."""
3560
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3561
3562
    @classmethod
3563
    def get_comic_info(cls, soup, link):
3564
        """Get information about a particular comics."""
3565
        title = soup.find('title').string
3566
        imgs = soup.find_all('link', rel='image_src')
3567
        author = soup.find('a', rel='author').string
3568
        return {
3569
            'img': [i['href'] for i in imgs],
3570
            'author': author,
3571
            'title': title,
3572
        }
3573
3574
3575
class AtRandomComics(GenericNavigableComic):
3576
    """Class to retrieve At Random Comics."""
3577
    name = 'atrandom'
3578
    long_name = 'At Random Comics'
3579
    url = 'http://www.atrandomcomics.com'
3580
    get_url_from_link = join_cls_url_to_href
3581
    get_first_comic_link = simulate_first_link
3582
    first_url = 'http://www.atrandomcomics.com/at-random-comics-home/2015/5/5/can-of-worms'
3583
3584
    @classmethod
3585
    def get_navi_link(cls, last_soup, next_):
3586
        """Get link to next or previous comic."""
3587
        return last_soup.find('a', id='prevLink' if next_ else 'nextLink')
3588
3589
    @classmethod
3590
    def get_comic_info(cls, soup, link):
3591
        """Get information about a particular comics."""
3592
        title = soup.find('meta', property='og:title')['content']
3593
        desc = soup.find('meta', property='og:description')['content']
3594
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
3595
        day = string_to_date(date_str, "%Y-%m-%d")
3596
        author = soup.find('a', rel='author').string
3597
        imgs = soup.find_all('meta', property='og:image')
3598
        return {
3599
            'title': title,
3600
            'img': [i['content'] for i in imgs],
3601
            'month': day.month,
3602
            'year': day.year,
3603
            'day': day.day,
3604
            'author': author,
3605
            'description': desc,
3606
        }
3607
3608
3609
class GenericTumblrV1(GenericComic):
3610
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3611
    _categories = ('TUMBLR', )
3612
3613
    @classmethod
3614
    def get_next_comic(cls, last_comic):
3615
        """Generic implementation of get_next_comic for Tumblr comics."""
3616
        for p in cls.get_posts(last_comic):
3617
            comic = cls.get_comic_info(p)
3618
            if comic is not None:
3619
                yield comic
3620
3621
    @classmethod
3622
    def get_url_from_post(cls, post):
3623
        url = post['url']
3624
        if not url.startswith(cls.url):
3625
            print("url '%s' does not start with '%s'" % (url, cls.url))
3626
        return url
3627
3628
    @classmethod
3629
    def get_api_url(cls):
3630
        return urljoin_wrapper(cls.url, '/api/read/')
3631
3632
    @classmethod
3633
    def get_api_url_for_id(cls, tumblr_id):
3634
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3635
3636
    @classmethod
3637
    def get_comic_info(cls, post):
3638
        """Get information about a particular comics."""
3639
        type_ = post['type']
3640
        if type_ != 'photo':
3641
            return None
3642
        tumblr_id = int(post['id'])
3643
        api_url = cls.get_api_url_for_id(tumblr_id)
3644
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3645
        caption = post.find('photo-caption')
3646
        title = caption.string if caption else ""
3647
        tags = ' '.join(t.string for t in post.find_all('tag'))
3648
        # Photos may appear in 'photo' tags and/or straight in the post
3649
        photo_tags = post.find_all('photo')
3650
        if not photo_tags:
3651
            photo_tags = [post]
3652
        # Images are in multiple resolutions - taking the first one
3653
        imgs = [photo.find('photo-url') for photo in photo_tags]
3654
        return {
3655
            'url': cls.get_url_from_post(post),
3656
            'url2': post['url-with-slug'],
3657
            'day': day.day,
3658
            'month': day.month,
3659
            'year': day.year,
3660
            'title': title,
3661
            'tags': tags,
3662
            'img': [i.string for i in imgs],
3663
            'tumblr-id': tumblr_id,
3664
            'api_url': api_url,
3665
        }
3666
3667
    @classmethod
3668
    def get_posts(cls, last_comic, nb_post_per_call=10):
3669
        """Get posts using API. nb_post_per_call is max 50.
3670
3671
        Posts are retrieved from newer to older as per the tumblr v1 api
3672
        but are returned in chronological order."""
3673
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3674
        posts_acc = []
3675
        if last_comic is not None:
3676
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3677
            # might end up spending a lot of time looking for something that
3678
            # doesn't exist. Failing early and clearly might be a better option.
3679
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3680
            try:
3681
                get_soup_at_url(last_api_url)
3682
            except urllib.error.HTTPError:
3683
                try:
3684
                    get_soup_at_url(cls.url)
3685
                except urllib.error.HTTPError:
3686
                    print("Did not find previous post nor main url %s" % cls.url)
3687
                else:
3688
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3689
                return reversed(posts_acc)
3690
        api_url = cls.get_api_url()
3691
        posts = get_soup_at_url(api_url).find('posts')
3692
        start, total = int(posts['start']), int(posts['total'])
3693
        assert start == 0
3694
        for starting_num in range(0, total, nb_post_per_call):
3695
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3696
            posts2 = get_soup_at_url(api_url2).find('posts')
3697
            start2, total2 = int(posts2['start']), int(posts2['total'])
3698
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3699
            # This may happen and should be handled in the future
3700
            assert total == total2, "%d != %d" % (total, total2)
3701
            for p in posts2.find_all('post'):
3702
                tumblr_id = int(p['id'])
3703
                if waiting_for_id and waiting_for_id == tumblr_id:
3704
                    return reversed(posts_acc)
3705
                posts_acc.append(p)
3706
        if waiting_for_id is None:
3707
            return reversed(posts_acc)
3708
        print("Did not find %s : there might be a problem" % waiting_for_id)
3709
        return []
3710
3711
3712
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3713
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3714
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3715
    # Also on http://www.smbc-comics.com
3716
    name = 'smbc-tumblr'
3717
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3718
    url = 'http://smbc-comics.tumblr.com'
3719
    _categories = ('SMBC', )
3720
3721
3722
class IrwinCardozo(GenericTumblrV1):
3723
    """Class to retrieve Irwin Cardozo Comics."""
3724
    name = 'irwinc'
3725
    long_name = 'Irwin Cardozo'
3726
    url = 'http://irwincardozocomics.tumblr.com'
3727
3728
3729
class AccordingToDevin(GenericTumblrV1):
3730
    """Class to retrieve According To Devin comics."""
3731
    name = 'devin'
3732
    long_name = 'According To Devin'
3733
    url = 'http://accordingtodevin.tumblr.com'
3734
3735
3736
class ItsTheTieTumblr(GenericTumblrV1):
3737
    """Class to retrieve It's the tie comics."""
3738
    # Also on http://itsthetie.com
3739
    # Also on https://tapastic.com/series/itsthetie
3740
    name = 'tie-tumblr'
3741
    long_name = "It's the tie (from Tumblr)"
3742
    url = "http://itsthetie.tumblr.com"
3743
    _categories = ('TIE', )
3744
3745
3746
class OctopunsTumblr(GenericTumblrV1):
3747
    """Class to retrieve Octopuns comics."""
3748
    # Also on http://www.octopuns.net
3749
    name = 'octopuns-tumblr'
3750
    long_name = 'Octopuns (from Tumblr)'
3751
    url = 'http://octopuns.tumblr.com'
3752
3753
3754
class PicturesInBoxesTumblr(GenericTumblrV1):
3755
    """Class to retrieve Pictures In Boxes comics."""
3756
    # Also on http://www.picturesinboxes.com
3757
    name = 'picturesinboxes-tumblr'
3758
    long_name = 'Pictures in Boxes (from Tumblr)'
3759
    url = 'https://picturesinboxescomic.tumblr.com'
3760
3761
3762
class TubeyToonsTumblr(GenericTumblrV1):
3763
    """Class to retrieve TubeyToons comics."""
3764
    # Also on http://tapastic.com/series/Tubey-Toons
3765
    # Also on http://tubeytoons.com
3766
    name = 'tubeytoons-tumblr'
3767
    long_name = 'Tubey Toons (from Tumblr)'
3768
    url = 'https://tubeytoons.tumblr.com'
3769
    _categories = ('TUNEYTOONS', )
3770
3771
3772
class UnearthedComicsTumblr(GenericTumblrV1):
3773
    """Class to retrieve Unearthed comics."""
3774
    # Also on http://tapastic.com/series/UnearthedComics
3775
    # Also on http://unearthedcomics.com
3776
    name = 'unearthed-tumblr'
3777
    long_name = 'Unearthed Comics (from Tumblr)'
3778
    url = 'https://unearthedcomics.tumblr.com'
3779
    _categories = ('UNEARTHED', )
3780
3781
3782
class PieComic(GenericTumblrV1):
3783
    """Class to retrieve Pie Comic comics."""
3784
    name = 'pie'
3785
    long_name = 'Pie Comic'
3786
    url = "http://piecomic.tumblr.com"
3787
3788
3789
class MrEthanDiamond(GenericTumblrV1):
3790
    """Class to retrieve Mr Ethan Diamond comics."""
3791
    name = 'diamond'
3792
    long_name = 'Mr Ethan Diamond'
3793
    url = 'http://mrethandiamond.tumblr.com'
3794
3795
3796
class Flocci(GenericTumblrV1):
3797
    """Class to retrieve floccinaucinihilipilification comics."""
3798
    name = 'flocci'
3799
    long_name = 'floccinaucinihilipilification'
3800
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3801
3802
3803
class UpAndOut(GenericTumblrV1):
3804
    """Class to retrieve Up & Out comics."""
3805
    # Also on http://tapastic.com/series/UP-and-OUT
3806
    name = 'upandout'
3807
    long_name = 'Up And Out (from Tumblr)'
3808
    url = 'http://upandoutcomic.tumblr.com'
3809
3810
3811
class Pundemonium(GenericTumblrV1):
3812
    """Class to retrieve Pundemonium comics."""
3813
    name = 'pundemonium'
3814
    long_name = 'Pundemonium'
3815
    url = 'http://monstika.tumblr.com'
3816
3817
3818
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3819
    """Class to retrieve Poorly Drawn Lines comics."""
3820
    # Also on http://poorlydrawnlines.com
3821
    name = 'poorlydrawn-tumblr'
3822
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3823
    url = 'http://pdlcomics.tumblr.com'
3824
    _categories = ('POORLYDRAWN', )
3825
3826
3827
class PearShapedComics(GenericTumblrV1):
3828
    """Class to retrieve Pear Shaped Comics."""
3829
    name = 'pearshaped'
3830
    long_name = 'Pear-Shaped Comics'
3831
    url = 'http://pearshapedcomics.com'
3832
3833
3834
class PondScumComics(GenericTumblrV1):
3835
    """Class to retrieve Pond Scum Comics."""
3836
    name = 'pond'
3837
    long_name = 'Pond Scum'
3838
    url = 'http://pondscumcomic.tumblr.com'
3839
3840
3841
class MercworksTumblr(GenericTumblrV1):
3842
    """Class to retrieve Mercworks comics."""
3843
    # Also on http://mercworks.net
3844
    name = 'mercworks-tumblr'
3845
    long_name = 'Mercworks (from Tumblr)'
3846
    url = 'http://mercworks.tumblr.com'
3847
3848
3849
class OwlTurdTumblr(GenericTumblrV1):
3850
    """Class to retrieve Owl Turd comics."""
3851
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3852
    name = 'owlturd-tumblr'
3853
    long_name = 'Owl Turd (from Tumblr)'
3854
    url = 'http://owlturd.com'
3855
    _categories = ('OWLTURD', )
3856
3857
3858
class VectorBelly(GenericTumblrV1):
3859
    """Class to retrieve Vector Belly comics."""
3860
    # Also on http://vectorbelly.com
3861
    name = 'vector'
3862
    long_name = 'Vector Belly'
3863
    url = 'http://vectorbelly.tumblr.com'
3864
3865
3866
class GoneIntoRapture(GenericTumblrV1):
3867
    """Class to retrieve Gone Into Rapture comics."""
3868
    # Also on http://goneintorapture.tumblr.com
3869
    # Also on http://tapastic.com/series/Goneintorapture
3870
    name = 'rapture'
3871
    long_name = 'Gone Into Rapture'
3872
    url = 'http://goneintorapture.com'
3873
3874
3875
class TheOatmealTumblr(GenericTumblrV1):
3876
    """Class to retrieve The Oatmeal comics."""
3877
    # Also on http://theoatmeal.com
3878
    name = 'oatmeal-tumblr'
3879
    long_name = 'The Oatmeal (from Tumblr)'
3880
    url = 'http://oatmeal.tumblr.com'
3881
3882
3883
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3884
    """Class to retrieve Heck If I Know Comics."""
3885
    # Also on http://tapastic.com/series/Regular
3886
    name = 'heck-tumblr'
3887
    long_name = 'Heck if I Know comics (from Tumblr)'
3888
    url = 'http://heckifiknowcomics.com'
3889
3890
3891
class MyJetPack(GenericTumblrV1):
3892
    """Class to retrieve My Jet Pack comics."""
3893
    name = 'jetpack'
3894
    long_name = 'My Jet Pack'
3895
    url = 'http://myjetpack.tumblr.com'
3896
3897
3898
class CheerUpEmoKidTumblr(GenericTumblrV1):
3899
    """Class to retrieve CheerUpEmoKid comics."""
3900
    # Also on http://www.cheerupemokid.com
3901
    # Also on http://tapastic.com/series/CUEK
3902
    name = 'cuek-tumblr'
3903
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3904
    url = 'https://enzocomics.tumblr.com'
3905
3906
3907
class ForLackOfABetterComic(GenericTumblrV1):
3908
    """Class to retrieve For Lack Of A Better Comics."""
3909
    # Also on http://forlackofabettercomic.com
3910
    name = 'lack'
3911
    long_name = 'For Lack Of A Better Comic'
3912
    url = 'http://forlackofabettercomic.tumblr.com'
3913
3914
3915
class ZenPencilsTumblr(GenericTumblrV1):
3916
    """Class to retrieve ZenPencils comics."""
3917
    # Also on http://zenpencils.com
3918
    # Also on http://www.gocomics.com/zen-pencils
3919
    name = 'zenpencils-tumblr'
3920
    long_name = 'Zen Pencils (from Tumblr)'
3921
    url = 'http://zenpencils.tumblr.com'
3922
    _categories = ('ZENPENCILS', )
3923
3924
3925
class ThreeWordPhraseTumblr(GenericTumblrV1):
3926
    """Class to retrieve Three Word Phrase comics."""
3927
    # Also on http://threewordphrase.com
3928
    name = 'threeword-tumblr'
3929
    long_name = 'Three Word Phrase (from Tumblr)'
3930
    url = 'http://threewordphrase.tumblr.com'
3931
3932
3933
class TimeTrabbleTumblr(GenericTumblrV1):
3934
    """Class to retrieve Time Trabble comics."""
3935
    # Also on http://timetrabble.com
3936
    name = 'timetrabble-tumblr'
3937
    long_name = 'Time Trabble (from Tumblr)'
3938
    url = 'http://timetrabble.tumblr.com'
3939
3940
3941
class SafelyEndangeredTumblr(GenericTumblrV1):
3942
    """Class to retrieve Safely Endangered comics."""
3943
    # Also on http://www.safelyendangered.com
3944
    name = 'endangered-tumblr'
3945
    long_name = 'Safely Endangered (from Tumblr)'
3946
    url = 'http://tumblr.safelyendangered.com'
3947
3948
3949
class MouseBearComedyTumblr(GenericTumblrV1):
3950
    """Class to retrieve Mouse Bear Comedy comics."""
3951
    # Also on http://www.mousebearcomedy.com
3952
    name = 'mousebear-tumblr'
3953
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3954
    url = 'http://mousebearcomedy.tumblr.com'
3955
3956
3957
class BouletCorpTumblr(GenericTumblrV1):
3958
    """Class to retrieve BouletCorp comics."""
3959
    # Also on http://www.bouletcorp.com
3960
    name = 'boulet-tumblr'
3961
    long_name = 'Boulet Corp (from Tumblr)'
3962
    url = 'https://bouletcorp.tumblr.com'
3963
    _categories = ('BOULET', )
3964
3965
3966
class TheAwkwardYetiTumblr(GenericTumblrV1):
3967
    """Class to retrieve The Awkward Yeti comics."""
3968
    # Also on http://www.gocomics.com/the-awkward-yeti
3969
    # Also on http://theawkwardyeti.com
3970
    # Also on https://tapastic.com/series/TheAwkwardYeti
3971
    name = 'yeti-tumblr'
3972
    long_name = 'The Awkward Yeti (from Tumblr)'
3973
    url = 'http://larstheyeti.tumblr.com'
3974
    _categories = ('YETI', )
3975
3976
3977
class NellucNhoj(GenericTumblrV1):
3978
    """Class to retrieve NellucNhoj comics."""
3979
    name = 'nhoj'
3980
    long_name = 'Nelluc Nhoj'
3981
    url = 'http://nellucnhoj.com'
3982
3983
3984
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3985
    """Class to retrieve Down The Upward Spiral comics."""
3986
    # Also on http://www.downtheupwardspiral.com
3987
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
3988
    name = 'spiral-tumblr'
3989
    long_name = 'Down the Upward Spiral (from Tumblr)'
3990
    url = 'http://downtheupwardspiral.tumblr.com'
3991
3992
3993
class AsPerUsualTumblr(GenericTumblrV1):
3994
    """Class to retrieve As Per Usual comics."""
3995
    # Also on https://tapastic.com/series/AsPerUsual
3996
    name = 'usual-tumblr'
3997
    long_name = 'As Per Usual (from Tumblr)'
3998
    url = 'http://as-per-usual.tumblr.com'
3999
    categories = ('DAMILEE', )
4000
4001
4002
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
4003
    """Class to retrieve Hot Comics For Cool People."""
4004
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
4005
    # Also on http://hotcomics.biz (links to tumblr)
4006
    # Also on http://hcfcp.com (links to tumblr)
4007
    name = 'hotcomics-tumblr'
4008
    long_name = 'Hot Comics For Cool People (from Tumblr)'
4009
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
4010
    categories = ('DAMILEE', )
4011
4012
4013
class OneOneOneOneComicTumblr(GenericTumblrV1):
4014
    """Class to retrieve 1111 Comics."""
4015
    # Also on http://www.1111comics.me
4016
    # Also on https://tapastic.com/series/1111-Comics
4017
    name = '1111-tumblr'
4018
    long_name = '1111 Comics (from Tumblr)'
4019
    url = 'http://comics1111.tumblr.com'
4020
    _categories = ('ONEONEONEONE', )
4021
4022
4023
class JhallComicsTumblr(GenericTumblrV1):
4024
    """Class to retrieve Jhall Comics."""
4025
    # Also on http://jhallcomics.com
4026
    name = 'jhall-tumblr'
4027
    long_name = 'Jhall Comics (from Tumblr)'
4028
    url = 'http://jhallcomics.tumblr.com'
4029
4030
4031
class BerkeleyMewsTumblr(GenericTumblrV1):
4032
    """Class to retrieve Berkeley Mews comics."""
4033
    # Also on http://www.gocomics.com/berkeley-mews
4034
    # Also on http://www.berkeleymews.com
4035
    name = 'berkeley-tumblr'
4036
    long_name = 'Berkeley Mews (from Tumblr)'
4037
    url = 'http://mews.tumblr.com'
4038
    _categories = ('BERKELEY', )
4039
4040
4041
class JoanCornellaTumblr(GenericTumblrV1):
4042
    """Class to retrieve Joan Cornella comics."""
4043
    # Also on http://joancornella.net
4044
    name = 'cornella-tumblr'
4045
    long_name = 'Joan Cornella (from Tumblr)'
4046
    url = 'http://cornellajoan.tumblr.com'
4047
4048
4049
class RespawnComicTumblr(GenericTumblrV1):
4050
    """Class to retrieve Respawn Comic."""
4051
    # Also on http://respawncomic.com
4052
    name = 'respawn-tumblr'
4053
    long_name = 'Respawn Comic (from Tumblr)'
4054
    url = 'https://respawncomic.tumblr.com'
4055
4056
4057
class ChrisHallbeckTumblr(GenericTumblrV1):
4058
    """Class to retrieve Chris Hallbeck comics."""
4059
    # Also on https://tapastic.com/ChrisHallbeck
4060
    # Also on http://maximumble.com
4061
    # Also on http://minimumble.com
4062
    # Also on http://thebookofbiff.com
4063
    name = 'hallbeck-tumblr'
4064
    long_name = 'Chris Hallback (from Tumblr)'
4065
    url = 'https://chrishallbeck.tumblr.com'
4066
    _categories = ('HALLBACK', )
4067
4068
4069
class ComicNuggets(GenericTumblrV1):
4070
    """Class to retrieve Comic Nuggets."""
4071
    name = 'nuggets'
4072
    long_name = 'Comic Nuggets'
4073
    url = 'http://comicnuggets.com'
4074
4075
4076
class PigeonGazetteTumblr(GenericTumblrV1):
4077
    """Class to retrieve The Pigeon Gazette comics."""
4078
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
4079
    name = 'pigeon-tumblr'
4080
    long_name = 'The Pigeon Gazette (from Tumblr)'
4081
    url = 'http://thepigeongazette.tumblr.com'
4082
4083
4084
class CancerOwl(GenericTumblrV1):
4085
    """Class to retrieve Cancer Owl comics."""
4086
    # Also on http://cancerowl.com
4087
    name = 'cancerowl-tumblr'
4088
    long_name = 'Cancer Owl (from Tumblr)'
4089
    url = 'http://cancerowl.tumblr.com'
4090
4091
4092
class FowlLanguageTumblr(GenericTumblrV1):
4093
    """Class to retrieve Fowl Language comics."""
4094
    # Also on http://www.fowllanguagecomics.com
4095
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4096
    # Also on http://www.gocomics.com/fowl-language
4097
    name = 'fowllanguage-tumblr'
4098
    long_name = 'Fowl Language Comics (from Tumblr)'
4099
    url = 'http://fowllanguagecomics.tumblr.com'
4100
    _categories = ('FOWLLANGUAGE', )
4101
4102
4103
class TheOdd1sOutTumblr(GenericTumblrV1):
4104
    """Class to retrieve The Odd 1s Out comics."""
4105
    # Also on http://theodd1sout.com
4106
    # Also on https://tapastic.com/series/Theodd1sout
4107
    name = 'theodd-tumblr'
4108
    long_name = 'The Odd 1s Out (from Tumblr)'
4109
    url = 'http://theodd1sout.tumblr.com'
4110
4111
4112
class TheUnderfoldTumblr(GenericTumblrV1):
4113
    """Class to retrieve The Underfold comics."""
4114
    # Also on http://theunderfold.com
4115
    name = 'underfold-tumblr'
4116
    long_name = 'The Underfold (from Tumblr)'
4117
    url = 'http://theunderfold.tumblr.com'
4118
4119
4120
class LolNeinTumblr(GenericTumblrV1):
4121
    """Class to retrieve Lol Nein comics."""
4122
    # Also on http://lolnein.com
4123
    name = 'lolnein-tumblr'
4124
    long_name = 'Lol Nein (from Tumblr)'
4125
    url = 'http://lolneincom.tumblr.com'
4126
4127
4128
class FatAwesomeComicsTumblr(GenericTumblrV1):
4129
    """Class to retrieve Fat Awesome Comics."""
4130
    # Also on http://fatawesome.com/comics
4131
    name = 'fatawesome-tumblr'
4132
    long_name = 'Fat Awesome (from Tumblr)'
4133
    url = 'http://fatawesomecomedy.tumblr.com'
4134
4135
4136
class TheWorldIsFlatTumblr(GenericTumblrV1):
4137
    """Class to retrieve The World Is Flat Comics."""
4138
    # Also on https://tapastic.com/series/The-World-is-Flat
4139
    name = 'flatworld-tumblr'
4140
    long_name = 'The World Is Flat (from Tumblr)'
4141
    url = 'http://theworldisflatcomics.com'
4142
4143
4144
class DorrisMc(GenericTumblrV1):
4145
    """Class to retrieve Dorris Mc Comics"""
4146
    # Also on http://www.gocomics.com/dorris-mccomics
4147
    name = 'dorrismc'
4148
    long_name = 'Dorris Mc'
4149
    url = 'http://dorrismccomics.com'
4150
4151
4152
class LeleozTumblr(GenericDeletedComic, GenericTumblrV1):
4153
    """Class to retrieve Leleoz comics."""
4154
    # Also on https://tapastic.com/series/Leleoz
4155
    name = 'leleoz-tumblr'
4156
    long_name = 'Leleoz (from Tumblr)'
4157
    url = 'http://leleozcomics.tumblr.com'
4158
4159
4160
class MoonBeardTumblr(GenericTumblrV1):
4161
    """Class to retrieve MoonBeard comics."""
4162
    # Also on http://moonbeard.com
4163
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4164
    name = 'moonbeard-tumblr'
4165
    long_name = 'Moon Beard (from Tumblr)'
4166
    url = 'http://squireseses.tumblr.com'
4167
    _categories = ('MOONBEARD', )
4168
4169
4170
class AComik(GenericTumblrV1):
4171
    """Class to retrieve A Comik"""
4172
    name = 'comik'
4173
    long_name = 'A Comik'
4174
    url = 'http://acomik.com'
4175
4176
4177
class ClassicRandy(GenericTumblrV1):
4178
    """Class to retrieve Classic Randy comics."""
4179
    name = 'randy'
4180
    long_name = 'Classic Randy'
4181
    url = 'http://classicrandy.tumblr.com'
4182
4183
4184
class DagssonTumblr(GenericTumblrV1):
4185
    """Class to retrieve Dagsson comics."""
4186
    # Also on http://www.dagsson.com
4187
    name = 'dagsson-tumblr'
4188
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4189
    url = 'https://hugleikurdagsson.tumblr.com'
4190
4191
4192
class LinsEditionsTumblr(GenericTumblrV1):
4193
    """Class to retrieve L.I.N.S. Editions comics."""
4194
    # Also on https://linsedition.com
4195
    # Now on http://warandpeas.tumblr.com
4196
    name = 'lins-tumblr'
4197
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4198
    url = 'https://linscomics.tumblr.com'
4199
    _categories = ('LINS', )
4200
4201
4202
class WarAndPeasTumblr(GenericTumblrV1):
4203
    """Class to retrieve War And Peas comics."""
4204
    # Was on https://linscomics.tumblr.com
4205
    name = 'warandpeas-tumblr'
4206
    long_name = 'War And Peas (from Tumblr)'
4207
    url = 'http://warandpeas.tumblr.com'
4208
    _categories = ('WARANDPEAS', )
4209
4210
4211
class OrigamiHotDish(GenericTumblrV1):
4212
    """Class to retrieve Origami Hot Dish comics."""
4213
    name = 'origamihotdish'
4214
    long_name = 'Origami Hot Dish'
4215
    url = 'http://origamihotdish.com'
4216
4217
4218
class HitAndMissComicsTumblr(GenericTumblrV1):
4219
    """Class to retrieve Hit and Miss Comics."""
4220
    name = 'hitandmiss'
4221
    long_name = 'Hit and Miss Comics'
4222
    url = 'https://hitandmisscomics.tumblr.com'
4223
4224
4225
class HMBlanc(GenericTumblrV1):
4226
    """Class to retrieve HM Blanc comics."""
4227
    name = 'hmblanc'
4228
    long_name = 'HM Blanc'
4229
    url = 'http://hmblanc.tumblr.com'
4230
4231
4232
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4233
    """Class to retrieve Tales Of Absurdity comics."""
4234
    # Also on http://talesofabsurdity.com
4235
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4236
    name = 'absurdity-tumblr'
4237
    long_name = 'Tales of Absurdity (from Tumblr)'
4238
    url = 'http://talesofabsurdity.tumblr.com'
4239
    _categories = ('ABSURDITY', )
4240
4241
4242
class RobbieAndBobby(GenericTumblrV1):
4243
    """Class to retrieve Robbie And Bobby comics."""
4244
    # Also on http://robbieandbobby.com
4245
    name = 'robbie-tumblr'
4246
    long_name = 'Robbie And Bobby (from Tumblr)'
4247
    url = 'http://robbieandbobby.tumblr.com'
4248
4249
4250
class ElectricBunnyComicTumblr(GenericTumblrV1):
4251
    """Class to retrieve Electric Bunny Comics."""
4252
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4253
    name = 'bunny-tumblr'
4254
    long_name = 'Electric Bunny Comic (from Tumblr)'
4255
    url = 'http://electricbunnycomics.tumblr.com'
4256
4257
4258
class Hoomph(GenericTumblrV1):
4259
    """Class to retrieve Hoomph comics."""
4260
    name = 'hoomph'
4261
    long_name = 'Hoomph'
4262
    url = 'http://hoom.ph'
4263
4264
4265
class BFGFSTumblr(GenericTumblrV1):
4266
    """Class to retrieve BFGFS comics."""
4267
    # Also on https://tapastic.com/series/BFGFS
4268
    # Also on http://bfgfs.com
4269
    name = 'bfgfs-tumblr'
4270
    long_name = 'BFGFS (from Tumblr)'
4271
    url = 'https://bfgfs.tumblr.com'
4272
4273
4274
class DoodleForFood(GenericTumblrV1):
4275
    """Class to retrieve Doodle For Food comics."""
4276
    # Also on https://tapastic.com/series/Doodle-for-Food
4277
    name = 'doodle'
4278
    long_name = 'Doodle For Food'
4279
    url = 'http://www.doodleforfood.com'
4280
4281
4282
class CassandraCalinTumblr(GenericTumblrV1):
4283
    """Class to retrieve C. Cassandra comics."""
4284
    # Also on http://cassandracalin.com
4285
    # Also on https://tapastic.com/series/C-Cassandra-comics
4286
    name = 'cassandra-tumblr'
4287
    long_name = 'Cassandra Calin (from Tumblr)'
4288
    url = 'http://c-cassandra.tumblr.com'
4289
4290
4291
class DougWasTaken(GenericTumblrV1):
4292
    """Class to retrieve Doug Was Taken comics."""
4293
    name = 'doug'
4294
    long_name = 'Doug Was Taken'
4295
    url = 'https://dougwastaken.tumblr.com'
4296
4297
4298
class MandatoryRollerCoaster(GenericTumblrV1):
4299
    """Class to retrieve Mandatory Roller Coaster comics."""
4300
    name = 'rollercoaster'
4301
    long_name = 'Mandatory Roller Coaster'
4302
    url = 'http://mandatoryrollercoaster.com'
4303
4304
4305
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4306
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4307
    name = 'cperspqccltt'
4308
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4309
    url = 'http://marcoandco.tumblr.com'
4310
4311
4312
class TheGrohlTroll(GenericTumblrV1):
4313
    """Class to retrieve The Grohl Troll comics."""
4314
    name = 'grohltroll'
4315
    long_name = 'The Grohl Troll'
4316
    url = 'http://thegrohltroll.com'
4317
4318
4319
class WebcomicName(GenericTumblrV1):
4320
    """Class to retrieve Webcomic Name comics."""
4321
    name = 'webcomicname'
4322
    long_name = 'Webcomic Name'
4323
    url = 'http://webcomicname.com'
4324
4325
4326
class BooksOfAdam(GenericTumblrV1):
4327
    """Class to retrieve Books of Adam comics."""
4328
    # Also on http://www.booksofadam.com
4329
    name = 'booksofadam'
4330
    long_name = 'Books of Adam'
4331
    url = 'http://booksofadam.tumblr.com'
4332
4333
4334
class HarkAVagrant(GenericTumblrV1):
4335
    """Class to retrieve Hark A Vagrant comics."""
4336
    # Also on http://www.harkavagrant.com
4337
    name = 'hark-tumblr'
4338
    long_name = 'Hark A Vagrant (from Tumblr)'
4339
    url = 'http://beatonna.tumblr.com'
4340
4341
4342
class OurSuperAdventureTumblr(GenericTumblrV1):
4343
    """Class to retrieve Our Super Adventure comics."""
4344
    # Also on https://tapastic.com/series/Our-Super-Adventure
4345
    # Also on http://www.oursuperadventure.com
4346
    # http://sarahgraley.com
4347
    name = 'superadventure-tumblr'
4348
    long_name = 'Our Super Adventure (from Tumblr)'
4349
    url = 'http://sarahssketchbook.tumblr.com'
4350
4351
4352
class JakeLikesOnions(GenericTumblrV1):
4353
    """Class to retrieve Jake Likes Onions comics."""
4354
    name = 'jake'
4355
    long_name = 'Jake Likes Onions'
4356
    url = 'http://jakelikesonions.com'
4357
4358
4359
class InYourFaceCakeTumblr(GenericTumblrV1):
4360
    """Class to retrieve In Your Face Cake comics."""
4361
    # Also on https://tapas.io/series/In-Your-Face-Cake
4362
    name = 'inyourfacecake-tumblr'
4363
    long_name = 'In Your Face Cake (from Tumblr)'
4364
    url = 'https://in-your-face-cake.tumblr.com'
4365
    _categories = ('INYOURFACECAKE', )
4366
4367
4368
class Robospunk(GenericTumblrV1):
4369
    """Class to retrieve Robospunk comics."""
4370
    name = 'robospunk'
4371
    long_name = 'Robospunk'
4372
    url = 'http://robospunk.com'
4373
4374
4375
class BananaTwinky(GenericTumblrV1):
4376
    """Class to retrieve Banana Twinky comics."""
4377
    name = 'banana'
4378
    long_name = 'Banana Twinky'
4379
    url = 'https://bananatwinky.tumblr.com'
4380
4381
4382
class YesterdaysPopcornTumblr(GenericTumblrV1):
4383
    """Class to retrieve Yesterday's Popcorn comics."""
4384
    # Also on http://www.yesterdayspopcorn.com
4385
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4386
    name = 'popcorn-tumblr'
4387
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4388
    url = 'http://yesterdayspopcorn.tumblr.com'
4389
4390
4391
class TwistedDoodles(GenericTumblrV1):
4392
    """Class to retrieve Twisted Doodles comics."""
4393
    name = 'twisted'
4394
    long_name = 'Twisted Doodles'
4395
    url = 'http://www.twisteddoodles.com'
4396
4397
4398
class UbertoolTumblr(GenericTumblrV1):
4399
    """Class to retrieve Ubertool comics."""
4400
    # Also on http://ubertoolcomic.com
4401
    # Also on https://tapastic.com/series/ubertool
4402
    name = 'ubertool-tumblr'
4403
    long_name = 'Ubertool (from Tumblr)'
4404
    url = 'https://ubertool.tumblr.com'
4405
    _categories = ('UBERTOOL', )
4406
4407
4408
class LittleLifeLinesTumblr(GenericDeletedComic, GenericTumblrV1):
4409
    """Class to retrieve Little Life Lines comics."""
4410
    # Also on http://www.littlelifelines.com
4411
    name = 'life-tumblr'
4412
    long_name = 'Little Life Lines (from Tumblr)'
4413
    url = 'https://little-life-lines.tumblr.com'
4414
4415
4416
class TheyCanTalk(GenericTumblrV1):
4417
    """Class to retrieve They Can Talk comics."""
4418
    name = 'theycantalk'
4419
    long_name = 'They Can Talk'
4420
    url = 'http://theycantalk.com'
4421
4422
4423
class Will5NeverCome(GenericTumblrV1):
4424
    """Class to retrieve Will 5:00 Never Come comics."""
4425
    name = 'will5'
4426
    long_name = 'Will 5:00 Never Come ?'
4427
    url = 'http://will5nevercome.com'
4428
4429
4430
class Sephko(GenericTumblrV1):
4431
    """Class to retrieve Sephko Comics."""
4432
    # Also on http://www.sephko.com
4433
    name = 'sephko'
4434
    long_name = 'Sephko'
4435
    url = 'https://sephko.tumblr.com'
4436
4437
4438
class BlazersAtDawn(GenericTumblrV1):
4439
    """Class to retrieve Blazers At Dawn Comics."""
4440
    name = 'blazers'
4441
    long_name = 'Blazers At Dawn'
4442
    url = 'http://blazersatdawn.tumblr.com'
4443
4444
4445
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4446
    """Class to retrieve Art By Moga Comics."""
4447
    name = 'moga'
4448
    long_name = 'Art By Moga'
4449
    url = 'http://artbymoga.tumblr.com'
4450
4451
4452
class VerbalVomitTumblr(GenericTumblrV1):
4453
    """Class to retrieve Verbal Vomit comics."""
4454
    # Also on http://www.verbal-vomit.com
4455
    name = 'vomit-tumblr'
4456
    long_name = 'Verbal Vomit (from Tumblr)'
4457
    url = 'http://verbalvomits.tumblr.com'
4458
4459
4460
class LibraryComic(GenericTumblrV1):
4461
    """Class to retrieve LibraryComic."""
4462
    # Also on http://librarycomic.com
4463
    name = 'library-tumblr'
4464
    long_name = 'LibraryComic (from Tumblr)'
4465
    url = 'https://librarycomic.tumblr.com'
4466
4467
4468
class TizzyStitchBirdTumblr(GenericTumblrV1):
4469
    """Class to retrieve Tizzy Stitch Bird comics."""
4470
    # Also on http://tizzystitchbird.com
4471
    # Also on https://tapastic.com/series/TizzyStitchbird
4472
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4473
    name = 'tizzy-tumblr'
4474
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4475
    url = 'http://tizzystitchbird.tumblr.com'
4476
4477
4478
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4479
    """Class to retrieve VictimsOfCircumsolar comics."""
4480
    # Also on http://www.victimsofcircumsolar.com
4481
    name = 'circumsolar-tumblr'
4482
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4483
    url = 'https://victimsofcomics.tumblr.com'
4484
4485
4486
class RockPaperCynicTumblr(GenericTumblrV1):
4487
    """Class to retrieve RockPaperCynic comics."""
4488
    # Also on http://www.rockpapercynic.com
4489
    # Also on https://tapastic.com/series/rockpapercynic
4490
    name = 'rpc-tumblr'
4491
    long_name = 'Rock Paper Cynic (from Tumblr)'
4492
    url = 'http://rockpapercynic.tumblr.com'
4493
4494
4495
class DeadlyPanelTumblr(GenericTumblrV1):
4496
    """Class to retrieve Deadly Panel comics."""
4497
    # Also on http://www.deadlypanel.com
4498
    # Also on https://tapastic.com/series/deadlypanel
4499
    name = 'deadly-tumblr'
4500
    long_name = 'Deadly Panel (from Tumblr)'
4501
    url = 'https://deadlypanel.tumblr.com'
4502
4503
4504
class CatanaComics(GenericComicNotWorking):  # Not a Tumblr anymore ?
4505
    """Class to retrieve Catana comics."""
4506
    name = 'catana'
4507
    long_name = 'Catana'
4508
    url = 'http://www.catanacomics.com'
4509
4510
4511
class AngryAtNothingTumblr(GenericTumblrV1):
4512
    """Class to retrieve Angry at Nothing comics."""
4513
    # Also on http://www.angryatnothing.net
4514
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4515
    name = 'angry-tumblr'
4516
    long_name = 'Angry At Nothing (from Tumblr)'
4517
    url = 'http://angryatnothing.tumblr.com'
4518
4519
4520
class ShanghaiTango(GenericTumblrV1):
4521
    """Class to retrieve Shanghai Tango comic."""
4522
    name = 'tango'
4523
    long_name = 'Shanghai Tango'
4524
    url = 'http://tango2010weibo.tumblr.com'
4525
4526
4527
class OffTheLeashDogTumblr(GenericTumblrV1):
4528
    """Class to retrieve Off The Leash Dog comics."""
4529
    # Also on http://offtheleashdogcartoons.com
4530
    # Also on http://www.rupertfawcettcartoons.com
4531
    name = 'offtheleash-tumblr'
4532
    long_name = 'Off The Leash Dog (from Tumblr)'
4533
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4534
    _categories = ('FAWCETT', )
4535
4536
4537
class ImogenQuestTumblr(GenericTumblrV1):
4538
    """Class to retrieve Imogen Quest comics."""
4539
    # Also on http://imogenquest.net
4540
    name = 'imogen-tumblr'
4541
    long_name = 'Imogen Quest (from Tumblr)'
4542
    url = 'http://imoquest.tumblr.com'
4543
4544
4545
class Shitfest(GenericTumblrV1):
4546
    """Class to retrieve Shitfest comics."""
4547
    name = 'shitfest'
4548
    long_name = 'Shitfest'
4549
    url = 'http://shitfestcomic.com'
4550
4551
4552
class IceCreamSandwichComics(GenericTumblrV1):
4553
    """Class to retrieve Ice Cream Sandwich Comics."""
4554
    name = 'icecream'
4555
    long_name = 'Ice Cream Sandwich Comics'
4556
    url = 'http://icecreamsandwichcomics.com'
4557
4558
4559
class Dustinteractive(GenericTumblrV1):
4560
    """Class to retrieve Dustinteractive comics."""
4561
    name = 'dustinteractive'
4562
    long_name = 'Dustinteractive'
4563
    url = 'http://dustinteractive.com'
4564
4565
4566
class StickyCinemaFloor(GenericTumblrV1):
4567
    """Class to retrieve Sticky Cinema Floor comics."""
4568
    name = 'stickycinema'
4569
    long_name = 'Sticky Cinema Floor'
4570
    url = 'https://stickycinemafloor.tumblr.com'
4571
4572
4573
class IncidentalComicsTumblr(GenericTumblrV1):
4574
    """Class to retrieve Incidental Comics."""
4575
    # Also on http://www.incidentalcomics.com
4576
    name = 'incidental-tumblr'
4577
    long_name = 'Incidental Comics (from Tumblr)'
4578
    url = 'http://incidentalcomics.tumblr.com'
4579
4580
4581
class APleasantWasteOfTimeTumblr(GenericTumblrV1):
4582
    """Class to retrieve A Pleasant Waste Of Time comics."""
4583
    # Also on https://tapas.io/series/A-Pleasant-
4584
    name = 'pleasant-waste-tumblr'
4585
    long_name = 'A Pleasant Waste Of Time (from Tumblr)'
4586
    url = 'https://artjcf.tumblr.com'
4587
    _categories = ('WASTE', )
4588
4589
4590
class HorovitzComicsTumblr(GenericTumblrV1):
4591
    """Class to retrieve Horovitz new comics."""
4592
    # Also on http://www.horovitzcomics.com
4593
    name = 'horovitz-tumblr'
4594
    long_name = 'Horovitz (from Tumblr)'
4595
    url = 'https://horovitzcomics.tumblr.com'
4596
    _categories = ('HOROVITZ', )
4597
4598
4599
class DeepDarkFearsTumblr(GenericTumblrV1):
4600
    """Class to retrieve DeepvDarkvFears comics."""
4601
    name = 'deep-dark-fears-tumblr'
4602
    long_name = 'Deep Dark Fears (from Tumblr)'
4603
    url = 'http://deep-dark-fears.tumblr.com'
4604
4605
4606
class DakotaMcDadzean(GenericTumblrV1):
4607
    """Class to retrieve Dakota McDadzean comics."""
4608
    name = 'dakota'
4609
    long_name = 'Dakota McDadzean'
4610
    url = 'http://dakotamcfadzean.tumblr.com'
4611
4612
4613
class ExtraFabulousComicsTumblr(GenericTumblrV1):
4614
    """Class to retrieve Extra Fabulous Comics."""
4615
    # Also on http://extrafabulouscomics.com
4616
    name = 'efc-tumblr'
4617
    long_name = 'Extra Fabulous Comics (from Tumblr)'
4618
    url = 'https://extrafabulouscomics.tumblr.com'
4619
    _categories = ('EFC', )
4620
4621
4622
class AlexLevesque(GenericTumblrV1):
4623
    """Class to retrieve AlexLevesque comics."""
4624
    name = 'alevesque'
4625
    long_name = 'Alex Levesque'
4626
    url = 'http://alexlevesque.com'
4627
    _categories = ('FRANCAIS', )
4628
4629
4630
class JamesOfNoTradesTumblr(GenericTumblrV1):
4631
    """Class to retrieve JamesOfNoTrades comics."""
4632
    # Also on http://jamesofnotrades.com
4633
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
4634
    # Also on https://tapas.io/series/James-of-No-Trades
4635
    name = 'jamesofnotrades-tumblr'
4636
    long_name = 'James Of No Trades (from Tumblr)'
4637
    url = 'http://jamesfregan.tumblr.com'
4638
    _categories = ('JAMESOFNOTRADES', )
4639
4640
4641
class InfiniteGuff(GenericTumblrV1):
4642
    """Class to retrieve Infinite Guff comics."""
4643
    name = 'infiniteguff'
4644
    long_name = 'Infinite Guff'
4645
    url = 'http://infiniteguff.com'
4646
4647
4648
class SkeletonClaw(GenericTumblrV1):
4649
    """Class to retrieve Skeleton Claw comics."""
4650
    name = 'skeletonclaw'
4651
    long_name = 'Skeleton Claw'
4652
    url = 'http://skeletonclaw.com'
4653
4654
4655
class HorovitzComics(GenericDeletedComic, GenericListableComic):
4656
    """Generic class to handle the logic common to the different comics from Horovitz."""
4657
    # Also on https://horovitzcomics.tumblr.com
4658
    url = 'http://www.horovitzcomics.com'
4659
    _categories = ('HOROVITZ', )
4660
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4661
    link_re = NotImplemented
4662
    get_url_from_archive_element = join_cls_url_to_href
4663
4664
    @classmethod
4665
    def get_comic_info(cls, soup, link):
4666 View Code Duplication
        """Get information about a particular comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4667
        href = link['href']
4668
        num = int(cls.link_re.match(href).groups()[0])
4669
        title = link.string
4670
        imgs = soup.find_all('img', id='comic')
4671
        assert len(imgs) == 1
4672
        year, month, day = [int(s)
4673
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4674
        return {
4675
            'title': title,
4676
            'day': day,
4677
            'month': month,
4678
            'year': year,
4679
            'img': [i['src'] for i in imgs],
4680
            'num': num,
4681
        }
4682
4683
    @classmethod
4684
    def get_archive_elements(cls):
4685
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4686
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4687
4688
4689
class HorovitzNew(HorovitzComics):
4690
    """Class to retrieve Horovitz new comics."""
4691
    name = 'horovitznew'
4692
    long_name = 'Horovitz New'
4693
    link_re = re.compile('^/comics/new/([0-9]+)$')
4694
4695
4696
class HorovitzClassic(HorovitzComics):
4697
    """Class to retrieve Horovitz classic comics."""
4698
    name = 'horovitzclassic'
4699
    long_name = 'Horovitz Classic'
4700
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4701
4702
4703
class GenericGoComic(GenericNavigableComic):
4704
    """Generic class to handle the logic common to comics from gocomics.com."""
4705
    _categories = ('GOCOMIC', )
4706
4707
    @classmethod
4708
    def get_first_comic_link(cls):
4709
        """Get link to first comics."""
4710
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4711
4712
    @classmethod
4713
    def get_navi_link(cls, last_soup, next_):
4714
        """Get link to next or previous comic."""
4715
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left js-previous-comic sm '
4716
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right js-next-comic hidden-sm-up sm '
4717
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4718
4719
    @classmethod
4720
    def get_url_from_link(cls, link):
4721
        gocomics = 'http://www.gocomics.com'
4722
        return urljoin_wrapper(gocomics, link['href'])
4723
4724
    @classmethod
4725
    def get_comic_info(cls, soup, link):
4726
        """Get information about a particular comics."""
4727
        date_str = soup.find('meta', property='article:published_time')['content']
4728
        day = string_to_date(date_str, "%Y-%m-%d")
4729
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4730
        author = soup.find('meta', property='article:author')['content']
4731
        tags = soup.find('meta', property='article:tag')['content']
4732
        return {
4733
            'day': day.day,
4734
            'month': day.month,
4735
            'year': day.year,
4736
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4737
            'author': author,
4738
            'tags': tags,
4739
        }
4740
4741
4742
class PearlsBeforeSwine(GenericGoComic):
4743
    """Class to retrieve Pearls Before Swine comics."""
4744
    name = 'pearls'
4745
    long_name = 'Pearls Before Swine'
4746
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4747
4748
4749
class Peanuts(GenericGoComic):
4750
    """Class to retrieve Peanuts comics."""
4751
    name = 'peanuts'
4752
    long_name = 'Peanuts'
4753
    url = 'http://www.gocomics.com/peanuts'
4754
4755
4756
class MattWuerker(GenericGoComic):
4757
    """Class to retrieve Matt Wuerker comics."""
4758
    name = 'wuerker'
4759
    long_name = 'Matt Wuerker'
4760
    url = 'http://www.gocomics.com/mattwuerker'
4761
4762
4763
class TomToles(GenericGoComic):
4764
    """Class to retrieve Tom Toles comics."""
4765
    name = 'toles'
4766
    long_name = 'Tom Toles'
4767
    url = 'http://www.gocomics.com/tomtoles'
4768
4769
4770
class BreakOfDay(GenericGoComic):
4771
    """Class to retrieve Break Of Day comics."""
4772
    name = 'breakofday'
4773
    long_name = 'Break Of Day'
4774
    url = 'http://www.gocomics.com/break-of-day'
4775
4776
4777
class Brevity(GenericGoComic):
4778
    """Class to retrieve Brevity comics."""
4779
    name = 'brevity'
4780
    long_name = 'Brevity'
4781
    url = 'http://www.gocomics.com/brevity'
4782
4783
4784
class MichaelRamirez(GenericGoComic):
4785
    """Class to retrieve Michael Ramirez comics."""
4786
    name = 'ramirez'
4787
    long_name = 'Michael Ramirez'
4788
    url = 'http://www.gocomics.com/michaelramirez'
4789
4790
4791
class MikeLuckovich(GenericGoComic):
4792
    """Class to retrieve Mike Luckovich comics."""
4793
    name = 'luckovich'
4794
    long_name = 'Mike Luckovich'
4795
    url = 'http://www.gocomics.com/mikeluckovich'
4796
4797
4798
class JimBenton(GenericGoComic):
4799
    """Class to retrieve Jim Benton comics."""
4800
    # Also on http://jimbenton.tumblr.com
4801
    name = 'benton'
4802
    long_name = 'Jim Benton'
4803
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4804
4805
4806
class TheArgyleSweater(GenericGoComic):
4807
    """Class to retrieve the Argyle Sweater comics."""
4808
    name = 'argyle'
4809
    long_name = 'Argyle Sweater'
4810
    url = 'http://www.gocomics.com/theargylesweater'
4811
4812
4813
class SunnyStreet(GenericGoComic):
4814
    """Class to retrieve Sunny Street comics."""
4815
    # Also on http://www.sunnystreetcomics.com
4816
    name = 'sunny'
4817
    long_name = 'Sunny Street'
4818
    url = 'http://www.gocomics.com/sunny-street'
4819
4820
4821
class OffTheMark(GenericGoComic):
4822
    """Class to retrieve Off The Mark comics."""
4823
    # Also on https://www.offthemark.com
4824
    name = 'offthemark'
4825
    long_name = 'Off The Mark'
4826
    url = 'http://www.gocomics.com/offthemark'
4827
4828
4829
class WuMo(GenericGoComic):
4830
    """Class to retrieve WuMo comics."""
4831
    # Also on http://wumo.com
4832
    name = 'wumo'
4833
    long_name = 'WuMo'
4834
    url = 'http://www.gocomics.com/wumo'
4835
4836
4837
class LunarBaboon(GenericGoComic):
4838
    """Class to retrieve Lunar Baboon comics."""
4839
    # Also on http://www.lunarbaboon.com
4840
    # Also on https://tapastic.com/series/Lunarbaboon
4841
    name = 'lunarbaboon'
4842
    long_name = 'Lunar Baboon'
4843
    url = 'http://www.gocomics.com/lunarbaboon'
4844
4845
4846
class SandersenGocomic(GenericGoComic):
4847
    """Class to retrieve Sarah Andersen comics."""
4848
    # Also on http://sarahcandersen.com
4849
    # Also on http://tapastic.com/series/Doodle-Time
4850
    name = 'sandersen-goc'
4851
    long_name = 'Sarah Andersen (from GoComics)'
4852
    url = 'http://www.gocomics.com/sarahs-scribbles'
4853
4854
4855
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4856
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4857
    # Also on http://smbc-comics.tumblr.com
4858
    # Also on http://www.smbc-comics.com
4859
    name = 'smbc-goc'
4860
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4861
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4862
    _categories = ('SMBC', )
4863
4864
4865
class CalvinAndHobbesGoComic(GenericGoComic):
4866
    """Class to retrieve Calvin and Hobbes comics."""
4867
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4868
    name = 'calvin-goc'
4869
    long_name = 'Calvin and Hobbes (from GoComics)'
4870
    url = 'http://www.gocomics.com/calvinandhobbes'
4871
4872
4873
class RallGoComic(GenericGoComic):
4874
    """Class to retrieve Ted Rall comics."""
4875
    # Also on http://rall.com/comic
4876
    name = 'rall-goc'
4877
    long_name = "Ted Rall (from GoComics)"
4878
    url = "http://www.gocomics.com/ted-rall"
4879
    _categories = ('RALL', )
4880
4881
4882
class TheAwkwardYetiGoComic(GenericGoComic):
4883
    """Class to retrieve The Awkward Yeti comics."""
4884
    # Also on http://larstheyeti.tumblr.com
4885
    # Also on http://theawkwardyeti.com
4886
    # Also on https://tapastic.com/series/TheAwkwardYeti
4887
    name = 'yeti-goc'
4888
    long_name = 'The Awkward Yeti (from GoComics)'
4889
    url = 'http://www.gocomics.com/the-awkward-yeti'
4890
    _categories = ('YETI', )
4891
4892
4893
class BerkeleyMewsGoComics(GenericGoComic):
4894
    """Class to retrieve Berkeley Mews comics."""
4895
    # Also on http://mews.tumblr.com
4896
    # Also on http://www.berkeleymews.com
4897
    name = 'berkeley-goc'
4898
    long_name = 'Berkeley Mews (from GoComics)'
4899
    url = 'http://www.gocomics.com/berkeley-mews'
4900
    _categories = ('BERKELEY', )
4901
4902
4903
class SheldonGoComics(GenericGoComic):
4904
    """Class to retrieve Sheldon comics."""
4905
    # Also on http://www.sheldoncomics.com
4906
    name = 'sheldon-goc'
4907
    long_name = 'Sheldon Comics (from GoComics)'
4908
    url = 'http://www.gocomics.com/sheldon'
4909
4910
4911
class FowlLanguageGoComics(GenericGoComic):
4912
    """Class to retrieve Fowl Language comics."""
4913
    # Also on http://www.fowllanguagecomics.com
4914
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4915
    # Also on http://fowllanguagecomics.tumblr.com
4916
    name = 'fowllanguage-goc'
4917
    long_name = 'Fowl Language Comics (from GoComics)'
4918
    url = 'http://www.gocomics.com/fowl-language'
4919
    _categories = ('FOWLLANGUAGE', )
4920
4921
4922
class NickAnderson(GenericGoComic):
4923
    """Class to retrieve Nick Anderson comics."""
4924
    name = 'nickanderson'
4925
    long_name = 'Nick Anderson'
4926
    url = 'http://www.gocomics.com/nickanderson'
4927
4928
4929
class GarfieldGoComics(GenericGoComic):
4930
    """Class to retrieve Garfield comics."""
4931
    # Also on http://garfield.com
4932
    name = 'garfield-goc'
4933
    long_name = 'Garfield (from GoComics)'
4934
    url = 'http://www.gocomics.com/garfield'
4935
    _categories = ('GARFIELD', )
4936
4937
4938
class DorrisMcGoComics(GenericGoComic):
4939
    """Class to retrieve Dorris Mc Comics"""
4940
    # Also on http://dorrismccomics.com
4941
    name = 'dorrismc-goc'
4942
    long_name = 'Dorris Mc (from GoComics)'
4943
    url = 'http://www.gocomics.com/dorris-mccomics'
4944
4945
4946
class FoxTrot(GenericGoComic):
4947
    """Class to retrieve FoxTrot comics."""
4948
    name = 'foxtrot'
4949
    long_name = 'FoxTrot'
4950
    url = 'http://www.gocomics.com/foxtrot'
4951
4952
4953
class FoxTrotClassics(GenericGoComic):
4954
    """Class to retrieve FoxTrot Classics comics."""
4955
    name = 'foxtrot-classics'
4956
    long_name = 'FoxTrot Classics'
4957
    url = 'http://www.gocomics.com/foxtrotclassics'
4958
4959
4960
class MisterAndMeGoComics(GenericDeletedComic, GenericGoComic):
4961
    """Class to retrieve Mister & Me Comics."""
4962
    # Also on http://www.mister-and-me.com
4963
    # Also on https://tapastic.com/series/Mister-and-Me
4964
    name = 'mister-goc'
4965
    long_name = 'Mister & Me (from GoComics)'
4966
    url = 'http://www.gocomics.com/mister-and-me'
4967
4968
4969
class NonSequitur(GenericGoComic):
4970
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4971
    name = 'nonsequitur'
4972
    long_name = 'Non Sequitur'
4973
    url = 'http://www.gocomics.com/nonsequitur'
4974
4975
4976
class GenericTapasticComic(GenericListableComic):
4977
    """Generic class to handle the logic common to comics from tapastic.com."""
4978
    _categories = ('TAPASTIC', )
4979
4980
    @classmethod
4981
    def get_comic_info(cls, soup, archive_elt):
4982
        """Get information about a particular comics."""
4983
        timestamp = int(archive_elt['publishDate']) / 1000.0
4984
        day = datetime.datetime.fromtimestamp(timestamp).date()
4985
        imgs = soup.find_all('img', class_='art-image')
4986
        if not imgs:
4987
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4988
            return None
4989
        assert len(imgs) > 0
4990
        return {
4991
            'day': day.day,
4992
            'year': day.year,
4993
            'month': day.month,
4994
            'img': [i['src'] for i in imgs],
4995
            'title': archive_elt['title'],
4996
        }
4997
4998
    @classmethod
4999
    def get_url_from_archive_element(cls, archive_elt):
5000
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
5001
5002
    @classmethod
5003
    def get_archive_elements(cls):
5004
        pref, suff = 'episodeList : ', ','
5005
        # Information is stored in the javascript part
5006
        # I don't know the clean way to get it so this is the ugly way.
5007
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
5008
        return json.loads(string)
5009
5010
5011
class VegetablesForDessert(GenericTapasticComic):
5012
    """Class to retrieve Vegetables For Dessert comics."""
5013
    # Also on http://vegetablesfordessert.tumblr.com
5014
    name = 'vegetables'
5015
    long_name = 'Vegetables For Dessert'
5016
    url = 'http://tapastic.com/series/vegetablesfordessert'
5017
5018
5019
class FowlLanguageTapa(GenericTapasticComic):
5020
    """Class to retrieve Fowl Language comics."""
5021
    # Also on http://www.fowllanguagecomics.com
5022
    # Also on http://fowllanguagecomics.tumblr.com
5023
    # Also on http://www.gocomics.com/fowl-language
5024
    name = 'fowllanguage-tapa'
5025
    long_name = 'Fowl Language Comics (from Tapastic)'
5026
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
5027
    _categories = ('FOWLLANGUAGE', )
5028
5029
5030
class OscillatingProfundities(GenericTapasticComic):
5031
    """Class to retrieve Oscillating Profundities comics."""
5032
    name = 'oscillating'
5033
    long_name = 'Oscillating Profundities'
5034
    url = 'http://tapastic.com/series/oscillatingprofundities'
5035
5036
5037
class ZnoflatsComics(GenericTapasticComic):
5038
    """Class to retrieve Znoflats comics."""
5039
    name = 'znoflats'
5040
    long_name = 'Znoflats Comics'
5041
    url = 'http://tapastic.com/series/Znoflats-Comics'
5042
5043
5044
class SandersenTapastic(GenericTapasticComic):
5045
    """Class to retrieve Sarah Andersen comics."""
5046
    # Also on http://sarahcandersen.com
5047
    # Also on http://www.gocomics.com/sarahs-scribbles
5048
    name = 'sandersen-tapa'
5049
    long_name = 'Sarah Andersen (from Tapastic)'
5050
    url = 'http://tapastic.com/series/Doodle-Time'
5051
5052
5053
class TubeyToonsTapastic(GenericTapasticComic):
5054
    """Class to retrieve TubeyToons comics."""
5055
    # Also on http://tubeytoons.com
5056
    # Also on https://tubeytoons.tumblr.com
5057
    name = 'tubeytoons-tapa'
5058
    long_name = 'Tubey Toons (from Tapastic)'
5059
    url = 'http://tapastic.com/series/Tubey-Toons'
5060
    _categories = ('TUNEYTOONS', )
5061
5062
5063
class AnythingComicTapastic(GenericTapasticComic):
5064
    """Class to retrieve Anything Comics."""
5065
    # Also on http://www.anythingcomic.com
5066
    name = 'anythingcomic-tapa'
5067
    long_name = 'Anything Comic (from Tapastic)'
5068
    url = 'http://tapastic.com/series/anything'
5069
5070
5071
class UnearthedComicsTapastic(GenericTapasticComic):
5072
    """Class to retrieve Unearthed comics."""
5073
    # Also on http://unearthedcomics.com
5074
    # Also on https://unearthedcomics.tumblr.com
5075
    name = 'unearthed-tapa'
5076
    long_name = 'Unearthed Comics (from Tapastic)'
5077
    url = 'http://tapastic.com/series/UnearthedComics'
5078
    _categories = ('UNEARTHED', )
5079
5080
5081
class EverythingsStupidTapastic(GenericTapasticComic):
5082
    """Class to retrieve Everything's stupid Comics."""
5083
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
5084
    # Also on http://everythingsstupid.net
5085
    name = 'stupid-tapa'
5086
    long_name = "Everything's Stupid (from Tapastic)"
5087
    url = 'http://tapastic.com/series/EverythingsStupid'
5088
5089
5090
class JustSayEhTapastic(GenericTapasticComic):
5091
    """Class to retrieve Just Say Eh comics."""
5092
    # Also on http://www.justsayeh.com
5093
    name = 'justsayeh-tapa'
5094
    long_name = 'Just Say Eh (from Tapastic)'
5095
    url = 'http://tapastic.com/series/Just-Say-Eh'
5096
5097
5098
class ThorsThundershackTapastic(GenericTapasticComic):
5099
    """Class to retrieve Thor's Thundershack comics."""
5100
    # Also on http://www.thorsthundershack.com
5101
    name = 'thor-tapa'
5102
    long_name = 'Thor\'s Thundershack (from Tapastic)'
5103
    url = 'http://tapastic.com/series/Thors-Thundershac'
5104
    _categories = ('THOR', )
5105
5106
5107
class OwlTurdTapastic(GenericTapasticComic):
5108
    """Class to retrieve Owl Turd comics."""
5109
    # Also on http://owlturd.com
5110
    name = 'owlturd-tapa'
5111
    long_name = 'Owl Turd (from Tapastic)'
5112
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
5113
    _categories = ('OWLTURD', )
5114
5115
5116
class GoneIntoRaptureTapastic(GenericTapasticComic):
5117
    """Class to retrieve Gone Into Rapture comics."""
5118
    # Also on http://goneintorapture.tumblr.com
5119
    # Also on http://goneintorapture.com
5120
    name = 'rapture-tapa'
5121
    long_name = 'Gone Into Rapture (from Tapastic)'
5122
    url = 'http://tapastic.com/series/Goneintorapture'
5123
5124
5125
class HeckIfIKnowComicsTapa(GenericTapasticComic):
5126
    """Class to retrieve Heck If I Know Comics."""
5127
    # Also on http://heckifiknowcomics.com
5128
    name = 'heck-tapa'
5129
    long_name = 'Heck if I Know comics (from Tapastic)'
5130
    url = 'http://tapastic.com/series/Regular'
5131
5132
5133
class CheerUpEmoKidTapa(GenericTapasticComic):
5134
    """Class to retrieve CheerUpEmoKid comics."""
5135
    # Also on http://www.cheerupemokid.com
5136
    # Also on https://enzocomics.tumblr.com
5137
    name = 'cuek-tapa'
5138
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
5139
    url = 'http://tapastic.com/series/CUEK'
5140
5141
5142
class BigFootJusticeTapa(GenericTapasticComic):
5143
    """Class to retrieve Big Foot Justice comics."""
5144
    # Also on http://bigfootjustice.com
5145
    name = 'bigfoot-tapa'
5146
    long_name = 'Big Foot Justice (from Tapastic)'
5147
    url = 'http://tapastic.com/series/bigfoot-justice'
5148
5149
5150
class UpAndOutTapa(GenericTapasticComic):
5151
    """Class to retrieve Up & Out comics."""
5152
    # Also on http://upandoutcomic.tumblr.com
5153
    name = 'upandout-tapa'
5154
    long_name = 'Up And Out (from Tapastic)'
5155
    url = 'http://tapastic.com/series/UP-and-OUT'
5156
5157
5158
class ToonHoleTapa(GenericTapasticComic):
5159
    """Class to retrieve Toon Holes comics."""
5160
    # Also on http://www.toonhole.com
5161
    name = 'toonhole-tapa'
5162
    long_name = 'Toon Hole (from Tapastic)'
5163
    url = 'http://tapastic.com/series/TOONHOLE'
5164
5165
5166
class AngryAtNothingTapa(GenericTapasticComic):
5167
    """Class to retrieve Angry at Nothing comics."""
5168
    # Also on http://www.angryatnothing.net
5169
    # Also on http://angryatnothing.tumblr.com
5170
    name = 'angry-tapa'
5171
    long_name = 'Angry At Nothing (from Tapastic)'
5172
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
5173
5174
5175
class LeleozTapa(GenericTapasticComic):
5176
    """Class to retrieve Leleoz comics."""
5177
    # Also on http://leleozcomics.tumblr.com
5178
    name = 'leleoz-tapa'
5179
    long_name = 'Leleoz (from Tapastic)'
5180
    url = 'https://tapastic.com/series/Leleoz'
5181
5182
5183
class TheAwkwardYetiTapa(GenericTapasticComic):
5184
    """Class to retrieve The Awkward Yeti comics."""
5185
    # Also on http://www.gocomics.com/the-awkward-yeti
5186
    # Also on http://theawkwardyeti.com
5187
    # Also on http://larstheyeti.tumblr.com
5188
    name = 'yeti-tapa'
5189
    long_name = 'The Awkward Yeti (from Tapastic)'
5190
    url = 'https://tapastic.com/series/TheAwkwardYeti'
5191
    _categories = ('YETI', )
5192
5193
5194
class AsPerUsualTapa(GenericTapasticComic):
5195
    """Class to retrieve As Per Usual comics."""
5196
    # Also on http://as-per-usual.tumblr.com
5197
    name = 'usual-tapa'
5198
    long_name = 'As Per Usual (from Tapastic)'
5199
    url = 'https://tapastic.com/series/AsPerUsual'
5200
    categories = ('DAMILEE', )
5201
5202
5203
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
5204
    """Class to retrieve Hot Comics For Cool People."""
5205
    # Also on http://hotcomicsforcoolpeople.tumblr.com
5206
    # Also on http://hotcomics.biz (links to tumblr)
5207
    # Also on http://hcfcp.com (links to tumblr)
5208
    name = 'hotcomics-tapa'
5209
    long_name = 'Hot Comics For Cool People (from Tapastic)'
5210
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
5211
    categories = ('DAMILEE', )
5212
5213
5214
class OneOneOneOneComicTapa(GenericTapasticComic):
5215
    """Class to retrieve 1111 Comics."""
5216
    # Also on http://www.1111comics.me
5217
    # Also on http://comics1111.tumblr.com
5218
    name = '1111-tapa'
5219
    long_name = '1111 Comics (from Tapastic)'
5220
    url = 'https://tapastic.com/series/1111-Comics'
5221
    _categories = ('ONEONEONEONE', )
5222
5223
5224
class TumbleDryTapa(GenericTapasticComic):
5225
    """Class to retrieve Tumble Dry comics."""
5226
    # Also on http://tumbledrycomics.com
5227
    name = 'tumbledry-tapa'
5228
    long_name = 'Tumblr Dry (from Tapastic)'
5229
    url = 'https://tapastic.com/series/TumbleDryComics'
5230
5231
5232
class DeadlyPanelTapa(GenericTapasticComic):
5233
    """Class to retrieve Deadly Panel comics."""
5234
    # Also on http://www.deadlypanel.com
5235
    # Also on https://deadlypanel.tumblr.com
5236
    name = 'deadly-tapa'
5237
    long_name = 'Deadly Panel (from Tapastic)'
5238
    url = 'https://tapastic.com/series/deadlypanel'
5239
5240
5241
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5242
    """Class to retrieve Chris Hallbeck comics."""
5243
    # Also on https://chrishallbeck.tumblr.com
5244
    # Also on http://maximumble.com
5245
    name = 'hallbeckmaxi-tapa'
5246
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5247
    url = 'https://tapastic.com/series/Maximumble'
5248
    _categories = ('HALLBACK', )
5249
5250
5251
class ChrisHallbeckMiniTapa(GenericDeletedComic, GenericTapasticComic):
5252
    """Class to retrieve Chris Hallbeck comics."""
5253
    # Also on https://chrishallbeck.tumblr.com
5254
    # Also on http://minimumble.com
5255
    name = 'hallbeckmini-tapa'
5256
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5257
    url = 'https://tapastic.com/series/Minimumble'
5258
    _categories = ('HALLBACK', )
5259
5260
5261
class ChrisHallbeckBiffTapa(GenericDeletedComic, GenericTapasticComic):
5262
    """Class to retrieve Chris Hallbeck comics."""
5263
    # Also on https://chrishallbeck.tumblr.com
5264
    # Also on http://thebookofbiff.com
5265
    name = 'hallbeckbiff-tapa'
5266
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5267
    url = 'https://tapastic.com/series/Biff'
5268
    _categories = ('HALLBACK', )
5269
5270
5271
class RandoWisTapa(GenericTapasticComic):
5272
    """Class to retrieve RandoWis comics."""
5273
    # Also on https://randowis.com
5274
    name = 'randowis-tapa'
5275
    long_name = 'RandoWis (from Tapastic)'
5276
    url = 'https://tapastic.com/series/RandoWis'
5277
5278
5279
class PigeonGazetteTapa(GenericTapasticComic):
5280
    """Class to retrieve The Pigeon Gazette comics."""
5281
    # Also on http://thepigeongazette.tumblr.com
5282
    name = 'pigeon-tapa'
5283
    long_name = 'The Pigeon Gazette (from Tapastic)'
5284
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5285
5286
5287
class TheOdd1sOutTapa(GenericTapasticComic):
5288
    """Class to retrieve The Odd 1s Out comics."""
5289
    # Also on http://theodd1sout.com
5290
    # Also on http://theodd1sout.tumblr.com
5291
    name = 'theodd-tapa'
5292
    long_name = 'The Odd 1s Out (from Tapastic)'
5293
    url = 'https://tapastic.com/series/Theodd1sout'
5294
5295
5296
class TheWorldIsFlatTapa(GenericTapasticComic):
5297
    """Class to retrieve The World Is Flat Comics."""
5298
    # Also on http://theworldisflatcomics.tumblr.com
5299
    name = 'flatworld-tapa'
5300
    long_name = 'The World Is Flat (from Tapastic)'
5301
    url = 'https://tapastic.com/series/The-World-is-Flat'
5302
5303
5304
class MisterAndMeTapa(GenericTapasticComic):
5305
    """Class to retrieve Mister & Me Comics."""
5306
    # Also on http://www.mister-and-me.com
5307
    # Also on http://www.gocomics.com/mister-and-me
5308
    name = 'mister-tapa'
5309
    long_name = 'Mister & Me (from Tapastic)'
5310
    url = 'https://tapastic.com/series/Mister-and-Me'
5311
5312
5313
class TalesOfAbsurdityTapa(GenericDeletedComic, GenericTapasticComic):
5314
    """Class to retrieve Tales Of Absurdity comics."""
5315
    # Also on http://talesofabsurdity.com
5316
    # Also on http://talesofabsurdity.tumblr.com
5317
    name = 'absurdity-tapa'
5318
    long_name = 'Tales of Absurdity (from Tapastic)'
5319
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5320
    _categories = ('ABSURDITY', )
5321
5322
5323
class BFGFSTapa(GenericTapasticComic):
5324
    """Class to retrieve BFGFS comics."""
5325
    # Also on http://bfgfs.com
5326
    # Also on https://bfgfs.tumblr.com
5327
    name = 'bfgfs-tapa'
5328
    long_name = 'BFGFS (from Tapastic)'
5329
    url = 'https://tapastic.com/series/BFGFS'
5330
5331
5332
class DoodleForFoodTapa(GenericTapasticComic):
5333
    """Class to retrieve Doodle For Food comics."""
5334
    # Also on http://www.doodleforfood.com
5335
    name = 'doodle-tapa'
5336
    long_name = 'Doodle For Food (from Tapastic)'
5337
    url = 'https://tapastic.com/series/Doodle-for-Food'
5338
5339
5340
class MrLovensteinTapa(GenericTapasticComic):
5341
    """Class to retrieve Mr Lovenstein comics."""
5342
    # Also on  https://tapastic.com/series/MrLovenstein
5343
    name = 'mrlovenstein-tapa'
5344
    long_name = 'Mr. Lovenstein (from Tapastic)'
5345
    url = 'https://tapastic.com/series/MrLovenstein'
5346
5347
5348
class CassandraCalinTapa(GenericTapasticComic):
5349
    """Class to retrieve C. Cassandra comics."""
5350
    # Also on http://cassandracalin.com
5351
    # Also on http://c-cassandra.tumblr.com
5352
    name = 'cassandra-tapa'
5353
    long_name = 'Cassandra Calin (from Tapastic)'
5354
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5355
5356
5357
class WafflesAndPancakes(GenericTapasticComic):
5358
    """Class to retrieve Waffles And Pancakes comics."""
5359
    # Also on http://wandpcomic.com
5360
    name = 'waffles'
5361
    long_name = 'Waffles And Pancakes'
5362
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5363
5364
5365
class YesterdaysPopcornTapastic(GenericTapasticComic):
5366
    """Class to retrieve Yesterday's Popcorn comics."""
5367
    # Also on http://www.yesterdayspopcorn.com
5368
    # Also on http://yesterdayspopcorn.tumblr.com
5369
    name = 'popcorn-tapa'
5370
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5371
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5372
5373
5374
class OurSuperAdventureTapastic(GenericDeletedComic, GenericTapasticComic):
5375
    """Class to retrieve Our Super Adventure comics."""
5376
    # Also on http://www.oursuperadventure.com
5377
    # http://sarahssketchbook.tumblr.com
5378
    # http://sarahgraley.com
5379
    name = 'superadventure-tapastic'
5380
    long_name = 'Our Super Adventure (from Tapastic)'
5381
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5382
5383
5384
class NamelessPCs(GenericTapasticComic):
5385
    """Class to retrieve Nameless PCs comics."""
5386
    # Also on http://namelesspcs.com
5387
    name = 'namelesspcs-tapa'
5388
    long_name = 'NamelessPCs (from Tapastic)'
5389
    url = 'https://tapastic.com/series/NamelessPC'
5390
5391
5392
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5393
    """Class to retrieve Down The Upward Spiral comics."""
5394
    # Also on http://www.downtheupwardspiral.com
5395
    # Also on http://downtheupwardspiral.tumblr.com
5396
    name = 'spiral-tapa'
5397
    long_name = 'Down the Upward Spiral (from Tapastic)'
5398
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5399
5400
5401
class UbertoolTapa(GenericTapasticComic):
5402
    """Class to retrieve Ubertool comics."""
5403
    # Also on http://ubertoolcomic.com
5404
    # Also on https://ubertool.tumblr.com
5405
    name = 'ubertool-tapa'
5406
    long_name = 'Ubertool (from Tapastic)'
5407
    url = 'https://tapastic.com/series/ubertool'
5408
    _categories = ('UBERTOOL', )
5409
5410
5411
class BarteNerdsTapa(GenericDeletedComic, GenericTapasticComic):
5412
    """Class to retrieve BarteNerds comics."""
5413
    # Also on http://www.bartenerds.com
5414
    name = 'bartenerds-tapa'
5415
    long_name = 'BarteNerds (from Tapastic)'
5416
    url = 'https://tapastic.com/series/BarteNERDS'
5417
5418
5419
class SmallBlueYonderTapa(GenericTapasticComic):
5420
    """Class to retrieve Small Blue Yonder comics."""
5421
    # Also on http://www.smallblueyonder.com
5422
    name = 'smallblue-tapa'
5423
    long_name = 'Small Blue Yonder (from Tapastic)'
5424
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5425
5426
5427
class TizzyStitchBirdTapa(GenericTapasticComic):
5428
    """Class to retrieve Tizzy Stitch Bird comics."""
5429
    # Also on http://tizzystitchbird.com
5430
    # Also on http://tizzystitchbird.tumblr.com
5431
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5432
    name = 'tizzy-tapa'
5433
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5434
    url = 'https://tapastic.com/series/TizzyStitchbird'
5435
5436
5437
class RockPaperCynicTapa(GenericTapasticComic):
5438
    """Class to retrieve RockPaperCynic comics."""
5439
    # Also on http://www.rockpapercynic.com
5440
    # Also on http://rockpapercynic.tumblr.com
5441
    name = 'rpc-tapa'
5442
    long_name = 'Rock Paper Cynic (from Tapastic)'
5443
    url = 'https://tapastic.com/series/rockpapercynic'
5444
5445
5446
class IsItCanonTapa(GenericTapasticComic):
5447
    """Class to retrieve Is It Canon comics."""
5448
    # Also on http://www.isitcanon.com
5449
    name = 'canon-tapa'
5450
    long_name = 'Is It Canon (from Tapastic)'
5451
    url = 'http://tapastic.com/series/isitcanon'
5452
5453
5454
class ItsTheTieTapa(GenericTapasticComic):
5455
    """Class to retrieve It's the tie comics."""
5456
    # Also on http://itsthetie.com
5457
    # Also on http://itsthetie.tumblr.com
5458
    name = 'tie-tapa'
5459
    long_name = "It's the tie (from Tapastic)"
5460
    url = "https://tapastic.com/series/itsthetie"
5461
    _categories = ('TIE', )
5462
5463
5464
class JamesOfNoTradesTapa(GenericTapasticComic):
5465
    """Class to retrieve JamesOfNoTrades comics."""
5466
    # Also on http://jamesofnotrades.com
5467
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
5468
    # Also on http://jamesfregan.tumblr.com
5469
    name = 'jamesofnotrades-tapa'
5470
    long_name = 'James Of No Trades (from Tapastic)'
5471
    url = 'https://tapas.io/series/James-of-No-Trades'
5472
    _categories = ('JAMESOFNOTRADES', )
5473
5474
5475
class MomentumTapa(GenericTapasticComic):
5476
    """Class to retrieve Momentum comics."""
5477
    # Also on http://www.momentumcomic.com
5478
    name = 'momentum-tapa'
5479
    long_name = 'Momentum (from Tapastic)'
5480
    url = 'https://tapastic.com/series/momentum'
5481
5482
5483
class InYourFaceCakeTapa(GenericTapasticComic):
5484
    """Class to retrieve In Your Face Cake comics."""
5485
    # Also on https://in-your-face-cake.tumblr.com
5486
    name = 'inyourfacecake-tapa'
5487
    long_name = 'In Your Face Cake (from Tapastic)'
5488
    url = 'https://tapas.io/series/In-Your-Face-Cake'
5489
    _categories = ('INYOURFACECAKE', )
5490
5491
5492
class APleasantWasteOfTimeTapa(GenericTapasticComic):
5493
    """Class to retrieve A Pleasant Waste Of Time comics."""
5494
    # Also on https://artjcf.tumblr.com
5495
    name = 'pleasant-waste-tapa'
5496
    long_name = 'A Pleasant Waste Of Time (from Tapastic)'
5497
    url = 'https://tapas.io/series/A-Pleasant-'
5498
    _categories = ('WASTE', )
5499
5500
5501
def get_subclasses(klass):
5502
    """Gets the list of direct/indirect subclasses of a class"""
5503
    subclasses = klass.__subclasses__()
5504
    for derived in list(subclasses):
5505
        subclasses.extend(get_subclasses(derived))
5506
    return subclasses
5507
5508
5509
def remove_st_nd_rd_th_from_date(string):
5510
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5511
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5512
    return (string.replace('st', '')
5513
            .replace('nd', '')
5514
            .replace('rd', '')
5515
            .replace('th', '')
5516
            .replace('Augu', 'August'))
5517
5518
5519
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5520
    """Function to convert string to date object.
5521
    Wrapper around datetime.datetime.strptime."""
5522
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5523
    prev_locale = locale.setlocale(locale.LC_ALL)
5524
    if local != prev_locale:
5525
        locale.setlocale(locale.LC_ALL, local)
5526
    ret = datetime.datetime.strptime(string, date_format).date()
5527
    if local != prev_locale:
5528
        locale.setlocale(locale.LC_ALL, prev_locale)
5529
    return ret
5530
5531
5532
COMICS = set(get_subclasses(GenericComic))
5533
VALID_COMICS = [c for c in COMICS if c.name is not None]
5534
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5535
assert len(VALID_COMICS) == len(COMIC_NAMES)
5536
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5537
assert len(VALID_COMICS) == len(CLASS_NAMES)
5538