Completed
Push — master ( 6005aa...0e2df8 )
by De
29s
created

comics.py (15 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    try:
333
        url = cls.first_url
334
    except AttributeError:
335
        url = input("Get starting URL: ")
336
    print(url)
337
    comic = cls.get_prev_link(get_soup_at_url(url))
338
    while comic:
339
        url = cls.get_url_from_link(comic)
340
        print(url)
341
        comic = cls.get_prev_link(get_soup_at_url(url))
342
    return {'href': url}
343
344
345
class GenericEmptyComic(GenericComic):
346
    """Generic class for comics where nothing is to be done.
347
348
    It can be useful to deactivate temporarily comics that do not work
349
    properly by replacing `def MyComic(GenericWhateverComic)` with
350
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
351
    _categories = ('EMPTY', )
352
353
    @classmethod
354
    def get_next_comic(cls, last_comic):
355
        """Implementation of get_next_comic returning no comics."""
356
        cls.log("comic is considered as empty - returning no comic")
357
        return []
358
359
360
class GenericComicNotWorking(GenericEmptyComic):
361
    """Subclass of GenericEmptyComic used when comic is not working.
362
363
    This is more explicit than GenericEmptyComic as it hilights that
364
    only the implementation is not working and it can be fixed."""
365
    _categories = ('NOTWORKING', )
366
367
368
class GenericUnavailableComic(GenericEmptyComic):
369
    """Subclass of GenericEmptyComic used when a comic is not available.
370
371
    This is more explicit than GenericEmptyComic as it hilights that
372
    the source of the comic is not available but we expect it to be back
373
    soonish. See also GenericDeletedComic."""
374
    _categories = ('UNAVAILABLE', )
375
376
377
class GenericDeletedComic(GenericEmptyComic):
378
    """Subclass of GenericEmptyComic used when a comic does not exist anymore.
379
380
    This is more explicit than GenericEmptyComic as it hilights that
381
    the source of the comic does not exist anymore and it probably cannot
382
    be fixed. Corresponding classes are kept as we can still use the
383
    downloaded data. See also GenericUnavailableComic."""
384
    _categories = ('DELETED', )
385
386
387
class ExtraFabulousComics(GenericNavigableComic):
388
    """Class to retrieve Extra Fabulous Comics."""
389
    # Also on https://extrafabulouscomics.tumblr.com
390
    name = 'efc'
391
    long_name = 'Extra Fabulous Comics'
392
    url = 'http://extrafabulouscomics.com'
393
    _categories = ('EFC', )
394
    get_navi_link = get_link_rel_next
395
    get_first_comic_link = simulate_first_link
396
    first_url = 'http://extrafabulouscomics.com/comic/buttfly/'
397
398
    @classmethod
399
    def get_comic_info(cls, soup, link):
400
        """Get information about a particular comics."""
401
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
402
        imgs = soup.find_all('img', src=img_src_re)
403
        title = soup.find('meta', property='og:title')['content']
404
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
405
        day = string_to_date(date_str, "%Y-%m-%d")
406
        return {
407
            'title': title,
408
            'img': [i['src'] for i in imgs],
409
            'month': day.month,
410
            'year': day.year,
411
            'day': day.day,
412
            'prefix': title + '-'
413
        }
414
415
416
class GenericLeMondeBlog(GenericNavigableComic):
417
    """Generic class to retrieve comics from Le Monde blogs."""
418
    _categories = ('LEMONDE', 'FRANCAIS')
419
    get_navi_link = get_link_rel_next
420
    get_first_comic_link = simulate_first_link
421
    first_url = NotImplemented
422
423
    @classmethod
424
    def get_comic_info(cls, soup, link):
425
        """Get information about a particular comics."""
426
        url2 = soup.find('link', rel='shortlink')['href']
427
        title = soup.find('meta', property='og:title')['content']
428
        date_str = soup.find("span", class_="entry-date").string
429
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
430
        imgs = soup.find_all('meta', property='og:image')
431
        return {
432
            'title': title,
433
            'url2': url2,
434
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
435
            'month': day.month,
436
            'year': day.year,
437
            'day': day.day,
438
        }
439
440
441
class ZepWorld(GenericLeMondeBlog):
442
    """Class to retrieve Zep World comics."""
443
    name = "zep"
444
    long_name = "Zep World"
445
    url = "http://zepworld.blog.lemonde.fr"
446
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
447
448
449
class Vidberg(GenericLeMondeBlog):
450
    """Class to retrieve Vidberg comics."""
451
    name = 'vidberg'
452
    long_name = "Vidberg - l'actu en patates"
453
    url = "http://vidberg.blog.lemonde.fr"
454
    # Not the first but I didn't find an efficient way to retrieve it
455
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
456
457
458
class Plantu(GenericLeMondeBlog):
459
    """Class to retrieve Plantu comics."""
460
    name = 'plantu'
461
    long_name = "Plantu"
462
    url = "http://plantu.blog.lemonde.fr"
463
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
464
465
466
class XavierGorce(GenericLeMondeBlog):
467
    """Class to retrieve Xavier Gorce comics."""
468
    name = 'gorce'
469
    long_name = "Xavier Gorce"
470
    url = "http://xaviergorce.blog.lemonde.fr"
471
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
472
473
474
class CartooningForPeace(GenericLeMondeBlog):
475
    """Class to retrieve Cartooning For Peace comics."""
476
    name = 'forpeace'
477
    long_name = "Cartooning For Peace"
478
    url = "http://cartooningforpeace.blog.lemonde.fr"
479
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
480
481
482
class Aurel(GenericLeMondeBlog):
483
    """Class to retrieve Aurel comics."""
484
    name = 'aurel'
485
    long_name = "Aurel"
486
    url = "http://aurel.blog.lemonde.fr"
487
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
488
489
490
class LesCulottees(GenericLeMondeBlog):
491
    """Class to retrieve Les Culottees comics."""
492
    name = 'culottees'
493
    long_name = 'Les Culottees'
494
    url = "http://lesculottees.blog.lemonde.fr"
495
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
496
497
498
class UneAnneeAuLycee(GenericLeMondeBlog):
499
    """Class to retrieve Une Annee Au Lycee comics."""
500
    name = 'lycee'
501
    long_name = 'Une Annee au Lycee'
502
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
503
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
504
505
506
class Rall(GenericComicNotWorking, GenericNavigableComic):
507
    """Class to retrieve Ted Rall comics."""
508
    # Also on http://www.gocomics.com/tedrall
509
    name = 'rall'
510
    long_name = "Ted Rall"
511
    url = "http://rall.com/comic"
512
    _categories = ('RALL', )
513
    get_navi_link = get_link_rel_next
514
    get_first_comic_link = simulate_first_link
515
    # Not the first but I didn't find an efficient way to retrieve it
516
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
517
518
    @classmethod
519
    def get_comic_info(cls, soup, link):
520
        """Get information about a particular comics."""
521
        title = soup.find('meta', property='og:title')['content']
522
        author = soup.find("span", class_="author vcard").find("a").string
523
        date_str = soup.find("span", class_="entry-date").string
524
        day = string_to_date(date_str, "%B %d, %Y")
525
        desc = soup.find('meta', property='og:description')['content']
526
        imgs = soup.find('div', class_='entry-content').find_all('img')
527
        imgs = imgs[:-7]  # remove social media buttons
528
        return {
529
            'title': title,
530
            'author': author,
531
            'month': day.month,
532
            'year': day.year,
533
            'day': day.day,
534
            'description': desc,
535
            'img': [i['src'] for i in imgs],
536
        }
537
538
539
class Dilem(GenericNavigableComic):
540
    """Class to retrieve Ali Dilem comics."""
541
    name = 'dilem'
542
    long_name = 'Ali Dilem'
543
    url = 'http://information.tv5monde.com/dilem'
544
    _categories = ('FRANCAIS', )
545
    get_url_from_link = join_cls_url_to_href
546
    get_first_comic_link = simulate_first_link
547
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
548
549
    @classmethod
550
    def get_navi_link(cls, last_soup, next_):
551
        """Get link to next or previous comic."""
552
        # prev is next / next is prev
553
        li = last_soup.find('li', class_='prev' if next_ else 'next')
554
        return li.find('a') if li else None
555
556
    @classmethod
557
    def get_comic_info(cls, soup, link):
558
        """Get information about a particular comics."""
559
        short_url = soup.find('link', rel='shortlink')['href']
560
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
561
        imgs = soup.find_all('meta', property='og:image')
562
        date_str = soup.find('span', property='dc:date')['content']
563
        date_str = date_str[:10]
564
        day = string_to_date(date_str, "%Y-%m-%d")
565
        return {
566
            'short_url': short_url,
567
            'title': title,
568
            'img': [i['content'] for i in imgs],
569
            'day': day.day,
570
            'month': day.month,
571
            'year': day.year,
572
        }
573
574
575
class SpaceAvalanche(GenericNavigableComic):
576
    """Class to retrieve Space Avalanche comics."""
577
    name = 'avalanche'
578
    long_name = 'Space Avalanche'
579
    url = 'http://www.spaceavalanche.com'
580
    get_navi_link = get_link_rel_next
581
582
    @classmethod
583
    def get_first_comic_link(cls):
584
        """Get link to first comics."""
585
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
586
587
    @classmethod
588
    def get_comic_info(cls, soup, link):
589
        """Get information about a particular comics."""
590
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
591
        title = link['title']
592
        url = cls.get_url_from_link(link)
593
        year, month, day = [int(s)
594
                            for s in url_date_re.match(url).groups()]
595
        imgs = soup.find("div", class_="entry").find_all("img")
596
        return {
597
            'title': title,
598
            'day': day,
599
            'month': month,
600
            'year': year,
601
            'img': [i['src'] for i in imgs],
602
        }
603
604
605
class ZenPencils(GenericNavigableComic):
606
    """Class to retrieve ZenPencils comics."""
607
    # Also on http://zenpencils.tumblr.com
608
    # Also on http://www.gocomics.com/zen-pencils
609
    name = 'zenpencils'
610
    long_name = 'Zen Pencils'
611
    url = 'http://zenpencils.com'
612
    _categories = ('ZENPENCILS', )
613
    get_navi_link = get_link_rel_next
614
    get_first_comic_link = simulate_first_link
615
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
616
617
    @classmethod
618
    def get_comic_info(cls, soup, link):
619
        """Get information about a particular comics."""
620
        imgs = soup.find('div', id='comic').find_all('img')
621
        # imgs2 = soup.find_all('meta', property='og:image')
622
        post = soup.find('div', class_='post-content')
623
        author = post.find("span", class_="post-author").find("a").string
624
        title = soup.find('h2', class_='post-title').string
625
        date_str = post.find('span', class_='post-date').string
626
        day = string_to_date(date_str, "%B %d, %Y")
627
        assert imgs
628
        assert all(i['alt'] == i['title'] for i in imgs)
629
        assert all(i['alt'] in (title, "") for i in imgs)
630
        return {
631
            'title': title,
632
            'author': author,
633
            'day': day.day,
634
            'month': day.month,
635
            'year': day.year,
636
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
637
        }
638
639
640
class ItsTheTie(GenericDeletedComic, GenericNavigableComic):
641
    """Class to retrieve It's the tie comics."""
642
    # Also on http://itsthetie.tumblr.com
643
    # Also on https://tapastic.com/series/itsthetie
644
    name = 'tie'
645
    long_name = "It's the tie"
646
    url = "http://itsthetie.com"
647
    _categories = ('TIE', )
648
    get_first_comic_link = get_div_navfirst_a
649
    get_navi_link = get_a_rel_next
650
651
    @classmethod
652
    def get_comic_info(cls, soup, link):
653
        """Get information about a particular comics."""
654
        title = soup.find('h1', class_='comic-title').find('a').string
655
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
656
        day = string_to_date(date_str, "%B %d, %Y")
657
        # Bonus images may or may not be in meta og:image.
658
        imgs = soup.find_all('meta', property='og:image')
659
        imgs_src = [i['content'] for i in imgs]
660
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
661
        bonus_src = [b['data-oversrc'] for b in bonus]
662
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
663
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
664
        tag_meta = soup.find('meta', property='article:tag')
665
        tags = tag_meta['content'] if tag_meta else ""
666
        return {
667
            'title': title,
668
            'month': day.month,
669
            'year': day.year,
670
            'day': day.day,
671
            'img': all_imgs_src,
672
            'tags': tags,
673
        }
674
675
676
class PenelopeBagieu(GenericNavigableComic):
677
    """Class to retrieve comics from Penelope Bagieu's blog."""
678
    name = 'bagieu'
679
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
680
    url = 'http://www.penelope-jolicoeur.com'
681
    _categories = ('FRANCAIS', )
682
    get_navi_link = get_link_rel_next
683
    get_first_comic_link = simulate_first_link
684
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
685
686
    @classmethod
687
    def get_comic_info(cls, soup, link):
688
        """Get information about a particular comics."""
689
        date_str = soup.find('h2', class_='date-header').string
690
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
691
        imgs = soup.find('div', class_='entry-body').find_all('img')
692
        title = soup.find('h3', class_='entry-header').string
693
        return {
694
            'title': title,
695
            'img': [i['src'] for i in imgs],
696
            'month': day.month,
697
            'year': day.year,
698
            'day': day.day,
699
        }
700
701
702
class OneOneOneOneComic(GenericComicNotWorking, GenericNavigableComic):
703
    """Class to retrieve 1111 Comics."""
704
    # Also on http://comics1111.tumblr.com
705
    # Also on https://tapastic.com/series/1111-Comics
706
    name = '1111'
707
    long_name = '1111 Comics'
708
    url = 'http://www.1111comics.me'
709
    _categories = ('ONEONEONEONE', )
710
    get_first_comic_link = get_div_navfirst_a
711
    get_navi_link = get_link_rel_next
712
713
    @classmethod
714
    def get_comic_info(cls, soup, link):
715
        """Get information about a particular comics."""
716
        title = soup.find('h1', class_='comic-title').find('a').string
717
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
718
        day = string_to_date(date_str, "%B %d, %Y")
719
        imgs = soup.find_all('meta', property='og:image')
720
        return {
721
            'title': title,
722
            'month': day.month,
723
            'year': day.year,
724
            'day': day.day,
725
            'img': [i['content'] for i in imgs],
726
        }
727
728
729
class AngryAtNothing(GenericDeletedComic, GenericNavigableComic):
730
    """Class to retrieve Angry at Nothing comics."""
731
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
732
    # Also on http://angryatnothing.tumblr.com
733
    name = 'angry'
734
    long_name = 'Angry At Nothing'
735
    url = 'http://www.angryatnothing.net'
736
    get_first_comic_link = get_div_navfirst_a
737
    get_navi_link = get_a_rel_next
738
739
    @classmethod
740
    def get_comic_info(cls, soup, link):
741
        """Get information about a particular comics."""
742
        title = soup.find('h1', class_='comic-title').find('a').string
743
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
744
        day = string_to_date(date_str, "%B %d, %Y")
745
        imgs = soup.find_all('meta', property='og:image')
746
        return {
747
            'title': title,
748
            'month': day.month,
749
            'year': day.year,
750
            'day': day.day,
751
            'img': [i['content'] for i in imgs],
752
        }
753
754
755
class NeDroid(GenericNavigableComic):
756
    """Class to retrieve NeDroid comics."""
757
    name = 'nedroid'
758
    long_name = 'NeDroid'
759
    url = 'http://nedroid.com'
760
    get_first_comic_link = get_div_navfirst_a
761
    get_navi_link = get_link_rel_next
762
    get_url_from_link = join_cls_url_to_href
763 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
764
    @classmethod
765
    def get_comic_info(cls, soup, link):
766
        """Get information about a particular comics."""
767
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
768
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
769
        num = int(short_url_re.match(short_url).groups()[0])
770
        imgs = soup.find('div', id='comic').find_all('img')
771
        assert len(imgs) == 1, imgs
772
        title = imgs[0]['alt']
773
        title2 = imgs[0]['title']
774
        return {
775
            'short_url': short_url,
776
            'title': title,
777
            'title2': title2,
778
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
779
            'num': num,
780
        }
781
782 View Code Duplication
783
class Garfield(GenericNavigableComic):
784
    """Class to retrieve Garfield comics."""
785
    # Also on http://www.gocomics.com/garfield
786
    name = 'garfield'
787
    long_name = 'Garfield'
788
    url = 'https://garfield.com'
789
    _categories = ('GARFIELD', )
790
    get_first_comic_link = simulate_first_link
791
    first_url = 'https://garfield.com/comic/1978/06/19'
792
793
    @classmethod
794
    def get_navi_link(cls, last_soup, next_):
795
        """Get link to next or previous comic."""
796
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
797
798
    @classmethod
799
    def get_comic_info(cls, soup, link):
800
        """Get information about a particular comics."""
801
        url = cls.get_url_from_link(link)
802
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
803
        year, month, day = [int(s) for s in date_re.match(url).groups()]
804
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
805
        return {
806
            'month': month,
807
            'year': year,
808
            'day': day,
809
            'img': [i['src'] for i in imgs],
810
        }
811
812
813
class Dilbert(GenericNavigableComic):
814
    """Class to retrieve Dilbert comics."""
815
    # Also on http://www.gocomics.com/dilbert-classics
816
    name = 'dilbert'
817
    long_name = 'Dilbert'
818
    url = 'http://dilbert.com'
819
    get_url_from_link = join_cls_url_to_href
820
    get_first_comic_link = simulate_first_link
821
    first_url = 'http://dilbert.com/strip/1989-04-16'
822
823
    @classmethod
824
    def get_navi_link(cls, last_soup, next_):
825
        """Get link to next or previous comic."""
826
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
827
        return link.find('a') if link else None
828
829
    @classmethod
830
    def get_comic_info(cls, soup, link):
831
        """Get information about a particular comics."""
832
        title = soup.find('meta', property='og:title')['content']
833
        imgs = soup.find_all('meta', property='og:image')
834
        desc = soup.find('meta', property='og:description')['content']
835
        date_str = soup.find('meta', property='article:publish_date')['content']
836
        day = string_to_date(date_str, "%B %d, %Y")
837
        author = soup.find('meta', property='article:author')['content']
838
        tags = soup.find('meta', property='article:tag')['content']
839
        return {
840
            'title': title,
841
            'description': desc,
842
            'img': [i['content'] for i in imgs],
843
            'author': author,
844
            'tags': tags,
845
            'day': day.day,
846
            'month': day.month,
847
            'year': day.year
848
        }
849
850
851
class VictimsOfCircumsolar(GenericDeletedComic, GenericNavigableComic):
852
    """Class to retrieve VictimsOfCircumsolar comics."""
853
    # Also on https://victimsofcomics.tumblr.com
854
    name = 'circumsolar'
855
    long_name = 'Victims Of Circumsolar'
856
    url = 'http://www.victimsofcircumsolar.com'
857
    get_navi_link = get_a_navi_comicnavnext_navinext
858
    get_first_comic_link = simulate_first_link
859
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
860
861
    @classmethod
862
    def get_comic_info(cls, soup, link):
863
        """Get information about a particular comics."""
864
        # Date is on the archive page
865
        title = soup.find_all('meta', property='og:title')[-1]['content']
866
        desc = soup.find_all('meta', property='og:description')[-1]['content']
867
        imgs = soup.find('div', id='comic').find_all('img')
868
        assert all(i['title'] == i['alt'] == title for i in imgs)
869
        return {
870
            'title': title,
871
            'description': desc,
872
            'img': [i['src'] for i in imgs],
873
        }
874
875
876
class ThreeWordPhrase(GenericNavigableComic):
877
    """Class to retrieve Three Word Phrase comics."""
878
    # Also on http://www.threewordphrase.tumblr.com
879
    name = 'threeword'
880
    long_name = 'Three Word Phrase'
881
    url = 'http://threewordphrase.com'
882
    get_url_from_link = join_cls_url_to_href
883
884
    @classmethod
885
    def get_first_comic_link(cls):
886
        """Get link to first comics."""
887
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
888
889
    @classmethod
890
    def get_navi_link(cls, last_soup, next_):
891
        """Get link to next or previous comic."""
892
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
893
        return None if link.get('href') is None else link
894
895
    @classmethod
896
    def get_comic_info(cls, soup, link):
897
        """Get information about a particular comics."""
898
        title = soup.find('title')
899
        imgs = [img for img in soup.find_all('img')
900
                if not img['src'].endswith(
901
                    ('link.gif', '32.png', 'twpbookad.jpg',
902
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
903
        return {
904
            'title': title.string if title else None,
905
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
906
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
907
        }
908
909
910
class DeadlyPanel(GenericComicNotWorking, GenericNavigableComic):  # Not working on my machine
911
    """Class to retrieve Deadly Panel comics."""
912
    # Also on https://tapastic.com/series/deadlypanel
913
    # Also on https://deadlypanel.tumblr.com
914
    name = 'deadly'
915
    long_name = 'Deadly Panel'
916
    url = 'http://www.deadlypanel.com'
917
    get_first_comic_link = get_a_navi_navifirst
918
    get_navi_link = get_a_navi_comicnavnext_navinext
919
920
    @classmethod
921
    def get_comic_info(cls, soup, link):
922
        """Get information about a particular comics."""
923
        imgs = soup.find('div', id='comic').find_all('img')
924
        assert all(i['alt'] == i['title'] for i in imgs)
925
        return {
926
            'img': [i['src'] for i in imgs],
927
        }
928
929
930
class TheGentlemanArmchair(GenericNavigableComic):
931
    """Class to retrieve The Gentleman Armchair comics."""
932
    name = 'gentlemanarmchair'
933
    long_name = 'The Gentleman Armchair'
934
    url = 'http://thegentlemansarmchair.com'
935
    get_first_comic_link = get_a_navi_navifirst
936
    get_navi_link = get_link_rel_next
937
938
    @classmethod
939
    def get_comic_info(cls, soup, link):
940
        """Get information about a particular comics."""
941
        title = soup.find('h2', class_='post-title').string
942
        author = soup.find("span", class_="post-author").find("a").string
943
        date_str = soup.find('span', class_='post-date').string
944
        day = string_to_date(date_str, "%B %d, %Y")
945
        imgs = soup.find('div', id='comic').find_all('img')
946
        return {
947
            'img': [i['src'] for i in imgs],
948
            'title': title,
949
            'author': author,
950
            'month': day.month,
951
            'year': day.year,
952
            'day': day.day,
953
        }
954
955
956
class ImogenQuest(GenericNavigableComic):
957
    """Class to retrieve Imogen Quest comics."""
958
    # Also on http://imoquest.tumblr.com
959
    name = 'imogen'
960
    long_name = 'Imogen Quest'
961
    url = 'http://imogenquest.net'
962
    get_first_comic_link = get_div_navfirst_a
963
    get_navi_link = get_a_rel_next
964
965
    @classmethod
966
    def get_comic_info(cls, soup, link):
967
        """Get information about a particular comics."""
968
        title = soup.find('h2', class_='post-title').string
969
        author = soup.find("span", class_="post-author").find("a").string
970
        date_str = soup.find('span', class_='post-date').string
971
        day = string_to_date(date_str, '%B %d, %Y')
972
        imgs = soup.find('div', class_='comicpane').find_all('img')
973
        assert all(i['alt'] == i['title'] for i in imgs)
974
        title2 = imgs[0]['title']
975
        return {
976
            'day': day.day,
977
            'month': day.month,
978
            'year': day.year,
979
            'img': [i['src'] for i in imgs],
980
            'title': title,
981
            'title2': title2,
982
            'author': author,
983
        }
984
985
986
class MyExtraLife(GenericNavigableComic):
987
    """Class to retrieve My Extra Life comics."""
988
    name = 'extralife'
989
    long_name = 'My Extra Life'
990
    url = 'http://www.myextralife.com'
991
    get_navi_link = get_link_rel_next
992
993
    @classmethod
994
    def get_first_comic_link(cls):
995
        """Get link to first comics."""
996
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
997
998
    @classmethod
999
    def get_comic_info(cls, soup, link):
1000
        """Get information about a particular comics."""
1001
        title = soup.find("h1", class_="comic_title").string
1002
        date_str = soup.find("span", class_="comic_date").string
1003
        day = string_to_date(date_str, "%B %d, %Y")
1004
        imgs = soup.find_all("img", class_="comic")
1005
        assert all(i['alt'] == i['title'] == title for i in imgs)
1006
        return {
1007
            'title': title,
1008
            'img': [i['src'] for i in imgs if i["src"]],
1009
            'day': day.day,
1010
            'month': day.month,
1011
            'year': day.year
1012
        }
1013
1014
1015
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
1016
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
1017
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
1018
    # Also on http://smbc-comics.tumblr.com
1019
    name = 'smbc'
1020
    long_name = 'Saturday Morning Breakfast Cereal'
1021
    url = 'http://www.smbc-comics.com'
1022
    _categories = ('SMBC', )
1023
    get_navi_link = get_a_rel_next
1024
1025
    @classmethod
1026
    def get_first_comic_link(cls):
1027
        """Get link to first comics."""
1028
        return get_soup_at_url(cls.url).find('a', rel='start')
1029
1030
    @classmethod
1031
    def get_comic_info(cls, soup, link):
1032
        """Get information about a particular comics."""
1033
        image1 = soup.find('img', id='cc-comic')
1034
        image_url1 = image1['src']
1035
        aftercomic = soup.find('div', id='aftercomic')
1036
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1037
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1038
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1039
        day = string_to_date(date_str, "%B %d, %Y")
1040
        return {
1041
            'title': image1['title'],
1042
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i)) for i in imgs],
1043
            'day': day.day,
1044
            'month': day.month,
1045
            'year': day.year
1046
        }
1047
1048
1049 View Code Duplication
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1050
    """Class to retrieve Perry Bible Fellowship comics."""
1051
    name = 'pbf'
1052
    long_name = 'Perry Bible Fellowship'
1053
    url = 'http://pbfcomics.com'
1054
    get_url_from_archive_element = join_cls_url_to_href
1055
1056
    @classmethod
1057
    def get_archive_elements(cls):
1058
        soup = get_soup_at_url(cls.url)
1059
        thumbnails = soup.find('div', id='all_thumbnails')
1060
        return reversed(thumbnails.find_all('a'))
1061
1062
    @classmethod
1063
    def get_comic_info(cls, soup, link):
1064
        """Get information about a particular comics."""
1065
        name = soup.find('meta', property='og:title')['content']
1066
        imgs = soup.find_all('meta', property='og:image')
1067
        assert len(imgs) == 1, imgs
1068
        return {
1069
            'name': name,
1070
            'img': [i['content'] for i in imgs],
1071
        }
1072
1073
1074
class Mercworks(GenericNavigableComic):
1075
    """Class to retrieve Mercworks comics."""
1076
    # Also on http://mercworks.tumblr.com
1077
    name = 'mercworks'
1078
    long_name = 'Mercworks'
1079
    url = 'http://mercworks.net'
1080
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1081
    get_navi_link = get_link_rel_next
1082
1083
    @classmethod
1084
    def get_comic_info(cls, soup, link):
1085
        """Get information about a particular comics."""
1086
        title = soup.find('meta', property='og:title')['content']
1087
        metadesc = soup.find('meta', property='og:description')
1088
        desc = metadesc['content'] if metadesc else ""
1089
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1090
        day = string_to_date(date_str, "%Y-%m-%d")
1091
        imgs = soup.find_all('meta', property='og:image')
1092
        return {
1093
            'img': [i['content'] for i in imgs],
1094
            'title': title,
1095
            'desc': desc,
1096
            'day': day.day,
1097
            'month': day.month,
1098
            'year': day.year
1099
        }
1100
1101
1102
class BerkeleyMews(GenericListableComic):
1103
    """Class to retrieve Berkeley Mews comics."""
1104
    # Also on http://mews.tumblr.com
1105
    # Also on http://www.gocomics.com/berkeley-mews
1106
    name = 'berkeley'
1107
    long_name = 'Berkeley Mews'
1108
    url = 'http://www.berkeleymews.com'
1109
    _categories = ('BERKELEY', )
1110
    get_url_from_archive_element = get_href
1111
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1112
1113
    @classmethod
1114
    def get_archive_elements(cls):
1115
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1116
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1117
1118
    @classmethod
1119
    def get_comic_info(cls, soup, link):
1120
        """Get information about a particular comics."""
1121
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1122
        url = cls.get_url_from_archive_element(link)
1123
        num = int(cls.comic_num_re.match(url).groups()[0])
1124
        img = soup.find('div', id='comic').find('img')
1125
        assert all(i['alt'] == i['title'] for i in [img])
1126
        title2 = img['title']
1127
        img_url = img['src']
1128
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1129
        return {
1130
            'num': num,
1131
            'title': link.string,
1132
            'title2': title2,
1133
            'img': [img_url],
1134
            'year': year,
1135
            'month': month,
1136
            'day': day,
1137
        }
1138
1139
1140
class GenericBouletCorp(GenericNavigableComic):
1141
    """Generic class to retrieve BouletCorp comics in different languages."""
1142
    # Also on https://bouletcorp.tumblr.com
1143
    _categories = ('BOULET', )
1144
    get_navi_link = get_link_rel_next
1145
1146
    @classmethod
1147
    def get_first_comic_link(cls):
1148
        """Get link to first comics."""
1149
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1150
1151
    @classmethod
1152
    def get_comic_info(cls, soup, link):
1153
        """Get information about a particular comics."""
1154
        url = cls.get_url_from_link(link)
1155
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1156
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1157
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1158
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1159
        title = soup.find('title').string
1160
        return {
1161
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1162
            'title': title,
1163
            'texts': texts,
1164
            'year': year,
1165
            'month': month,
1166
            'day': day,
1167
        }
1168
1169
1170
class BouletCorp(GenericBouletCorp):
1171
    """Class to retrieve BouletCorp comics."""
1172
    name = 'boulet'
1173
    long_name = 'Boulet Corp'
1174
    url = 'http://www.bouletcorp.com'
1175
    _categories = ('FRANCAIS', )
1176
1177
1178
class BouletCorpEn(GenericBouletCorp):
1179
    """Class to retrieve EnglishBouletCorp comics."""
1180
    name = 'boulet_en'
1181
    long_name = 'Boulet Corp English'
1182
    url = 'http://english.bouletcorp.com'
1183
1184
1185
class AmazingSuperPowers(GenericNavigableComic):
1186
    """Class to retrieve Amazing Super Powers comics."""
1187
    name = 'asp'
1188
    long_name = 'Amazing Super Powers'
1189
    url = 'http://www.amazingsuperpowers.com'
1190
    get_first_comic_link = get_a_navi_navifirst
1191
    get_navi_link = get_a_navi_navinext
1192
1193
    @classmethod
1194
    def get_comic_info(cls, soup, link):
1195
        """Get information about a particular comics."""
1196
        author = soup.find("span", class_="post-author").find("a").string
1197
        date_str = soup.find('span', class_='post-date').string
1198
        day = string_to_date(date_str, "%B %d, %Y")
1199
        imgs = soup.find('div', id='comic').find_all('img')
1200
        title = ' '.join(i['title'] for i in imgs)
1201
        assert all(i['alt'] == i['title'] for i in imgs)
1202
        return {
1203
            'title': title,
1204
            'author': author,
1205
            'img': [img['src'] for img in imgs],
1206
            'day': day.day,
1207
            'month': day.month,
1208
            'year': day.year
1209
        }
1210
1211
1212
class ToonHole(GenericNavigableComic):
1213
    """Class to retrieve Toon Holes comics."""
1214
    # Also on http://tapastic.com/series/TOONHOLE
1215
    name = 'toonhole'
1216
    long_name = 'Toon Hole'
1217
    url = 'http://www.toonhole.com'
1218
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1219
    get_navi_link = get_a_comicnavbase_comicnavnext
1220
1221
    @classmethod
1222
    def get_comic_info(cls, soup, link):
1223
        """Get information about a particular comics."""
1224
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1225
        day = string_to_date(date_str, "%B %d, %Y")
1226
        imgs = soup.find('div', id='comic').find_all('img')
1227
        if imgs:
1228
            img = imgs[0]
1229
            title = img['alt']
1230
            assert img['title'] == title
1231
        else:
1232
            title = ""
1233
        return {
1234
            'title': title,
1235
            'month': day.month,
1236
            'year': day.year,
1237
            'day': day.day,
1238
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1239
        }
1240
1241
1242
class Channelate(GenericNavigableComic):
1243
    """Class to retrieve Channelate comics."""
1244
    name = 'channelate'
1245
    long_name = 'Channelate'
1246
    url = 'http://www.channelate.com'
1247
    get_first_comic_link = get_div_navfirst_a
1248
    get_navi_link = get_link_rel_next
1249
    get_url_from_link = join_cls_url_to_href
1250
1251
    @classmethod
1252
    def get_comic_info(cls, soup, link):
1253
        """Get information about a particular comics."""
1254
        author = soup.find("span", class_="post-author").find("a").string
1255
        date_str = soup.find('span', class_='post-date').string
1256
        day = string_to_date(date_str, '%Y/%m/%d')
1257
        title = soup.find('meta', property='og:title')['content']
1258
        post = soup.find('div', id='comic')
1259
        imgs = post.find_all('img') if post else []
1260
        extra_url = None
1261
        extra_div = soup.find('div', id='extrapanelbutton')
1262
        if extra_div:
1263
            extra_url = extra_div.find('a')['href']
1264
            extra_soup = get_soup_at_url(extra_url)
1265
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1266
            imgs.extend(extra_imgs)
1267
        return {
1268
            'url_extra': extra_url,
1269
            'title': title,
1270
            'author': author,
1271
            'month': day.month,
1272
            'year': day.year,
1273
            'day': day.day,
1274
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1275
        }
1276
1277
1278
class CyanideAndHappiness(GenericNavigableComic):
1279
    """Class to retrieve Cyanide And Happiness comics."""
1280
    name = 'cyanide'
1281
    long_name = 'Cyanide and Happiness'
1282
    url = 'http://explosm.net'
1283
    _categories = ('NSFW', )
1284
    get_url_from_link = join_cls_url_to_href
1285
1286
    @classmethod
1287
    def get_first_comic_link(cls):
1288
        """Get link to first comics."""
1289
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1290
1291
    @classmethod
1292
    def get_navi_link(cls, last_soup, next_):
1293
        """Get link to next or previous comic."""
1294
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1295
        return None if link.get('href') is None else link
1296
1297
    @classmethod
1298
    def get_comic_info(cls, soup, link):
1299
        """Get information about a particular comics."""
1300
        url2 = soup.find('meta', property='og:url')['content']
1301
        num = int(url2.split('/')[-2])
1302
        date_str = soup.find('h3').find('a').string
1303
        day = string_to_date(date_str, '%Y.%m.%d')
1304
        author = soup.find('small', class_="author-credit-name").string
1305
        assert author.startswith('by ')
1306
        author = author[3:]
1307
        imgs = soup.find_all('img', id='main-comic')
1308
        return {
1309
            'num': num,
1310
            'author': author,
1311
            'month': day.month,
1312
            'year': day.year,
1313
            'day': day.day,
1314
            'prefix': '%d-' % num,
1315
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1316
        }
1317
1318
1319
class MrLovenstein(GenericComic):
1320
    """Class to retrieve Mr Lovenstein comics."""
1321
    # Also on https://tapastic.com/series/MrLovenstein
1322
    name = 'mrlovenstein'
1323
    long_name = 'Mr. Lovenstein'
1324
    url = 'http://www.mrlovenstein.com'
1325
1326
    @classmethod
1327
    def get_next_comic(cls, last_comic):
1328
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1329
        # TODO: more info from http://www.mrlovenstein.com/archive
1330
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1331
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1332
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1333
        first, last = min(nums), max(nums)
1334
        if last_comic:
1335
            first = last_comic['num'] + 1
1336
        for num in range(first, last + 1):
1337
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1338
            soup = get_soup_at_url(url)
1339
            imgs = list(
1340
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1341
            description = soup.find('meta', attrs={'name': 'description'})['content']
1342
            yield {
1343
                'url': url,
1344
                'num': num,
1345
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1346
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1347
                'description': description,
1348
            }
1349
1350
1351
class DinosaurComics(GenericListableComic):
1352
    """Class to retrieve Dinosaur Comics comics."""
1353
    name = 'dinosaur'
1354
    long_name = 'Dinosaur Comics'
1355
    url = 'http://www.qwantz.com'
1356
    get_url_from_archive_element = get_href
1357
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1358
1359
    @classmethod
1360
    def get_archive_elements(cls):
1361
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1362
        # first link is random -> skip it
1363
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1364
1365
    @classmethod
1366
    def get_comic_info(cls, soup, link):
1367
        """Get information about a particular comics."""
1368
        url = cls.get_url_from_archive_element(link)
1369
        num = int(cls.comic_link_re.match(url).groups()[0])
1370
        date_str = link.string
1371
        text = link.next_sibling.string
1372
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1373
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1374
        img = soup.find('img', src=comic_img_re)
1375
        return {
1376
            'month': day.month,
1377
            'year': day.year,
1378
            'day': day.day,
1379
            'img': [img.get('src')],
1380
            'title': img.get('title'),
1381
            'text': text,
1382
            'num': num,
1383
        }
1384
1385
1386 View Code Duplication
class ButterSafe(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1387
    """Class to retrieve Butter Safe comics."""
1388
    name = 'butter'
1389
    long_name = 'ButterSafe'
1390
    url = 'http://buttersafe.com'
1391
    get_url_from_archive_element = get_href
1392
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1393
1394
    @classmethod
1395
    def get_archive_elements(cls):
1396
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1397
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1398
1399
    @classmethod
1400
    def get_comic_info(cls, soup, link):
1401
        """Get information about a particular comics."""
1402
        url = cls.get_url_from_archive_element(link)
1403
        title = link.string
1404
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1405
        img = soup.find('div', id='comic').find('img')
1406
        assert img['alt'] == title
1407
        return {
1408
            'title': title,
1409
            'day': day,
1410
            'month': month,
1411
            'year': year,
1412
            'img': [img['src']],
1413
        }
1414
1415
1416
class CalvinAndHobbes(GenericComic):
1417
    """Class to retrieve Calvin and Hobbes comics."""
1418
    # Also on http://www.gocomics.com/calvinandhobbes/
1419
    name = 'calvin'
1420
    long_name = 'Calvin and Hobbes'
1421
    # This is not through any official webpage but eh...
1422
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1423
1424
    @classmethod
1425
    def get_next_comic(cls, last_comic):
1426
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1427
        last_date = get_date_for_comic(
1428
            last_comic) if last_comic else date(1985, 11, 1)
1429
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1430
        img_re = re.compile('')
1431
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1432
            url = link['href']
1433
            year, month = link_re.match(url).groups()
1434
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1435
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1436
                month_url = urljoin_wrapper(cls.url, url)
1437
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1438
                    img_src = img['src']
1439
                    day = int(img_re.match(img_src).groups()[0])
1440
                    comic_date = date(int(year), int(month), day)
1441
                    if comic_date > last_date:
1442
                        yield {
1443
                            'url': month_url,
1444
                            'year': int(year),
1445
                            'month': int(month),
1446
                            'day': int(day),
1447
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1448
                        }
1449
                        last_date = comic_date
1450
1451
1452 View Code Duplication
class AbstruseGoose(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1453
    """Class to retrieve AbstruseGoose Comics."""
1454
    name = 'abstruse'
1455
    long_name = 'Abstruse Goose'
1456
    url = 'http://abstrusegoose.com'
1457
    get_url_from_archive_element = get_href
1458
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1459
    comic_img_re = re.compile('^%s/strips/.*' % url)
1460
1461
    @classmethod
1462
    def get_archive_elements(cls):
1463
        archive_url = urljoin_wrapper(cls.url, 'archive')
1464
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1465
1466
    @classmethod
1467
    def get_comic_info(cls, soup, archive_elt):
1468
        comic_url = cls.get_url_from_archive_element(archive_elt)
1469
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1470
        return {
1471
            'num': num,
1472
            'title': archive_elt.string,
1473
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1474
        }
1475
1476
1477
class PhDComics(GenericNavigableComic):
1478
    """Class to retrieve PHD Comics."""
1479
    name = 'phd'
1480
    long_name = 'PhD Comics'
1481
    url = 'http://phdcomics.com/comics/archive.php'
1482
1483
    @classmethod
1484
    def get_first_comic_link(cls):
1485
        """Get link to first comics."""
1486
        soup = get_soup_at_url(cls.url)
1487
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1488
        return None if img is None else img.parent
1489
1490
    @classmethod
1491
    def get_navi_link(cls, last_soup, next_):
1492
        """Get link to next or previous comic."""
1493
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1494
        img = last_soup.find('img', src=url)
1495
        return None if img is None else img.parent
1496
1497
    @classmethod
1498
    def get_comic_info(cls, soup, link):
1499
        """Get information about a particular comics."""
1500
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1501
        imgs = soup.find_all('meta', property='og:image')
1502
        return {
1503
            'img': [i['content'] for i in imgs],
1504
            'title': title,
1505
        }
1506
1507
1508
class Quarktees(GenericNavigableComic):
1509
    """Class to retrieve the Quarktees comics."""
1510
    name = 'quarktees'
1511
    long_name = 'Quarktees'
1512
    url = 'http://www.quarktees.com/blogs/news'
1513
    get_url_from_link = join_cls_url_to_href
1514
    get_first_comic_link = simulate_first_link
1515
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1516
1517
    @classmethod
1518
    def get_navi_link(cls, last_soup, next_):
1519
        """Get link to next or previous comic."""
1520
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1521
1522
    @classmethod
1523
    def get_comic_info(cls, soup, link):
1524
        """Get information about a particular comics."""
1525
        title = soup.find('meta', property='og:title')['content']
1526
        article = soup.find('div', class_='single-article')
1527
        imgs = article.find_all('img')
1528
        return {
1529
            'title': title,
1530
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1531
        }
1532
1533
1534
class OverCompensating(GenericNavigableComic):
1535
    """Class to retrieve the Over Compensating comics."""
1536
    name = 'compensating'
1537
    long_name = 'Over Compensating'
1538
    url = 'http://www.overcompensating.com'
1539
    get_url_from_link = join_cls_url_to_href
1540
1541
    @classmethod
1542
    def get_first_comic_link(cls):
1543
        """Get link to first comics."""
1544
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1545
1546
    @classmethod
1547
    def get_navi_link(cls, last_soup, next_):
1548
        """Get link to next or previous comic."""
1549
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1550
1551
    @classmethod
1552
    def get_comic_info(cls, soup, link):
1553
        """Get information about a particular comics."""
1554
        img_src_re = re.compile('^/oc/comics/.*')
1555
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1556
        comic_url = cls.get_url_from_link(link)
1557
        num = int(comic_num_re.match(comic_url).groups()[0])
1558
        img = soup.find('img', src=img_src_re)
1559
        return {
1560
            'num': num,
1561
            'img': [urljoin_wrapper(comic_url, img['src'])],
1562
            'title': img.get('title')
1563
        }
1564
1565
1566
class Oglaf(GenericNavigableComic):
1567
    """Class to retrieve Oglaf comics."""
1568
    name = 'oglaf'
1569
    long_name = 'Oglaf [NSFW]'
1570
    url = 'http://oglaf.com'
1571
    _categories = ('NSFW', )
1572
    get_url_from_link = join_cls_url_to_href
1573
1574
    @classmethod
1575
    def get_first_comic_link(cls):
1576
        """Get link to first comics."""
1577
        return get_soup_at_url(cls.url).find("div", id="st").parent
1578
1579
    @classmethod
1580
    def get_navi_link(cls, last_soup, next_):
1581
        """Get link to next or previous comic."""
1582
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1583
        return div.parent if div else None
1584
1585
    @classmethod
1586
    def get_comic_info(cls, soup, link):
1587
        """Get information about a particular comics."""
1588
        title = soup.find('title').string
1589
        title_imgs = soup.find('div', id='tt').find_all('img')
1590
        assert len(title_imgs) == 1, title_imgs
1591
        strip_imgs = soup.find_all('img', id='strip')
1592
        assert len(strip_imgs) == 1, strip_imgs
1593
        imgs = title_imgs + strip_imgs
1594
        desc = ' '.join(i['title'] for i in imgs)
1595
        return {
1596
            'title': title,
1597
            'img': [i['src'] for i in imgs],
1598
            'description': desc,
1599
        }
1600
1601
1602
class ScandinaviaAndTheWorld(GenericNavigableComic):
1603
    """Class to retrieve Scandinavia And The World comics."""
1604
    name = 'satw'
1605
    long_name = 'Scandinavia And The World'
1606
    url = 'http://satwcomic.com'
1607
    get_first_comic_link = simulate_first_link
1608
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1609
1610
    @classmethod
1611
    def get_navi_link(cls, last_soup, next_):
1612
        """Get link to next or previous comic."""
1613
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1614
1615
    @classmethod
1616
    def get_comic_info(cls, soup, link):
1617
        """Get information about a particular comics."""
1618
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1619
        desc = soup.find('meta', property='og:description')['content']
1620
        imgs = soup.find_all('img', itemprop="image")
1621
        return {
1622
            'title': title,
1623
            'description': desc,
1624
            'img': [i['src'] for i in imgs],
1625
        }
1626
1627
1628
class SomethingOfThatIlk(GenericDeletedComic):
1629
    """Class to retrieve the Something Of That Ilk comics."""
1630
    name = 'somethingofthatilk'
1631
    long_name = 'Something Of That Ilk'
1632
    url = 'http://www.somethingofthatilk.com'
1633
1634
1635
class InfiniteMonkeyBusiness(GenericNavigableComic):
1636
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1637
    name = 'monkey'
1638
    long_name = 'Infinite Monkey Business'
1639
    url = 'http://infinitemonkeybusiness.net'
1640
    get_navi_link = get_a_navi_comicnavnext_navinext
1641
    get_first_comic_link = simulate_first_link
1642
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1643
1644
    @classmethod
1645
    def get_comic_info(cls, soup, link):
1646
        """Get information about a particular comics."""
1647
        title = soup.find('meta', property='og:title')['content']
1648
        imgs = soup.find('div', id='comic').find_all('img')
1649
        return {
1650
            'title': title,
1651
            'img': [i['src'] for i in imgs],
1652
        }
1653
1654
1655
class Wondermark(GenericListableComic):
1656
    """Class to retrieve the Wondermark comics."""
1657
    name = 'wondermark'
1658
    long_name = 'Wondermark'
1659
    url = 'http://wondermark.com'
1660
    get_url_from_archive_element = get_href
1661
1662
    @classmethod
1663
    def get_archive_elements(cls):
1664
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1665
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1666
1667
    @classmethod
1668
    def get_comic_info(cls, soup, link):
1669
        """Get information about a particular comics."""
1670
        date_str = soup.find('div', class_='postdate').find('em').string
1671
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1672
        div = soup.find('div', id='comic')
1673
        if div:
1674
            img = div.find('img')
1675
            img_src = [img['src']]
1676
            alt = img['alt']
1677
            assert alt == img['title']
1678
            title = soup.find('meta', property='og:title')['content']
1679
        else:
1680
            img_src = []
1681
            alt = ''
1682
            title = ''
1683
        return {
1684
            'month': day.month,
1685
            'year': day.year,
1686
            'day': day.day,
1687
            'img': img_src,
1688
            'title': title,
1689
            'alt': alt,
1690
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1691
        }
1692
1693
1694
class WarehouseComic(GenericNavigableComic):
1695
    """Class to retrieve Warehouse Comic comics."""
1696
    name = 'warehouse'
1697
    long_name = 'Warehouse Comic'
1698
    url = 'http://warehousecomic.com'
1699
    get_first_comic_link = get_a_navi_navifirst
1700
    get_navi_link = get_link_rel_next
1701
1702
    @classmethod
1703
    def get_comic_info(cls, soup, link):
1704
        """Get information about a particular comics."""
1705
        title = soup.find('h2', class_='post-title').string
1706
        date_str = soup.find('span', class_='post-date').string
1707
        day = string_to_date(date_str, "%B %d, %Y")
1708
        imgs = soup.find('div', id='comic').find_all('img')
1709
        return {
1710
            'img': [i['src'] for i in imgs],
1711
            'title': title,
1712
            'day': day.day,
1713
            'month': day.month,
1714
            'year': day.year,
1715
        }
1716
1717
1718
class JustSayEh(GenericNavigableComic):
1719
    """Class to retrieve Just Say Eh comics."""
1720
    # Also on http//tapastic.com/series/Just-Say-Eh
1721
    name = 'justsayeh'
1722
    long_name = 'Just Say Eh'
1723
    url = 'http://www.justsayeh.com'
1724
    get_first_comic_link = get_a_navi_navifirst
1725
    get_navi_link = get_a_navi_comicnavnext_navinext
1726
1727
    @classmethod
1728
    def get_comic_info(cls, soup, link):
1729
        """Get information about a particular comics."""
1730
        title = soup.find('h2', class_='post-title').string
1731
        imgs = soup.find("div", id="comic").find_all("img")
1732
        assert all(i['alt'] == i['title'] for i in imgs)
1733
        alt = imgs[0]['alt']
1734
        return {
1735
            'img': [i['src'] for i in imgs],
1736
            'title': title,
1737
            'alt': alt,
1738
        }
1739
1740
1741
class MouseBearComedy(GenericComicNotWorking):  # Website has changed
1742
    """Class to retrieve Mouse Bear Comedy comics."""
1743
    # Also on http://mousebearcomedy.tumblr.com
1744
    name = 'mousebear'
1745
    long_name = 'Mouse Bear Comedy'
1746
    url = 'http://www.mousebearcomedy.com'
1747
    get_first_comic_link = get_a_navi_navifirst
1748
    get_navi_link = get_a_navi_comicnavnext_navinext
1749
1750
    @classmethod
1751
    def get_comic_info(cls, soup, link):
1752
        """Get information about a particular comics."""
1753
        title = soup.find('h2', class_='post-title').string
1754
        author = soup.find("span", class_="post-author").find("a").string
1755
        date_str = soup.find("span", class_="post-date").string
1756
        day = string_to_date(date_str, '%B %d, %Y')
1757
        imgs = soup.find("div", id="comic").find_all("img")
1758
        assert all(i['alt'] == i['title'] == title for i in imgs)
1759
        return {
1760
            'day': day.day,
1761
            'month': day.month,
1762
            'year': day.year,
1763
            'img': [i['src'] for i in imgs],
1764
            'title': title,
1765
            'author': author,
1766
        }
1767
1768
1769 View Code Duplication
class BigFootJustice(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1770
    """Class to retrieve Big Foot Justice comics."""
1771
    # Also on http://tapastic.com/series/bigfoot-justice
1772
    name = 'bigfoot'
1773
    long_name = 'Big Foot Justice'
1774
    url = 'http://bigfootjustice.com'
1775
    get_first_comic_link = get_a_navi_navifirst
1776
    get_navi_link = get_a_navi_comicnavnext_navinext
1777
1778
    @classmethod
1779
    def get_comic_info(cls, soup, link):
1780
        """Get information about a particular comics."""
1781
        imgs = soup.find('div', id='comic').find_all('img')
1782
        assert all(i['title'] == i['alt'] for i in imgs)
1783
        title = ' '.join(i['title'] for i in imgs)
1784
        return {
1785
            'img': [i['src'] for i in imgs],
1786
            'title': title,
1787
        }
1788
1789
1790
class RespawnComic(GenericNavigableComic):
1791
    """Class to retrieve Respawn Comic."""
1792
    # Also on https://respawncomic.tumblr.com
1793
    name = 'respawn'
1794
    long_name = 'Respawn Comic'
1795
    url = 'http://respawncomic.com '
1796
    _categories = ('RESPAWN', )
1797
    get_navi_link = get_a_rel_next
1798
    get_first_comic_link = simulate_first_link
1799
    first_url = 'http://respawncomic.com/comic/c0001/'
1800
1801
    @classmethod
1802
    def get_comic_info(cls, soup, link):
1803
        """Get information about a particular comics."""
1804
        title = soup.find('meta', property='og:title')['content']
1805
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1806
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1807
        date_str = date_str[:10]
1808
        day = string_to_date(date_str, "%Y-%m-%d")
1809
        imgs = soup.find_all('meta', property='og:image')
1810
        skip_imgs = {
1811
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1812
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1813
        }
1814
        return {
1815
            'title': title,
1816
            'author': author,
1817
            'day': day.day,
1818
            'month': day.month,
1819
            'year': day.year,
1820
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1821
        }
1822
1823
1824
class SafelyEndangered(GenericNavigableComic):
1825
    """Class to retrieve Safely Endangered comics."""
1826
    # Also on http://tumblr.safelyendangered.com
1827
    name = 'endangered'
1828
    long_name = 'Safely Endangered'
1829
    url = 'http://www.safelyendangered.com'
1830
    get_navi_link = get_link_rel_next
1831
    get_first_comic_link = simulate_first_link
1832
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1833
1834
    @classmethod
1835
    def get_comic_info(cls, soup, link):
1836
        """Get information about a particular comics."""
1837
        title = soup.find('h2', class_='post-title').string
1838
        date_str = soup.find('span', class_='post-date').string
1839
        day = string_to_date(date_str, '%B %d, %Y')
1840
        imgs = soup.find('div', id='comic').find_all('img')
1841
        alt = imgs[0]['alt']
1842
        assert all(i['alt'] == i['title'] for i in imgs)
1843
        return {
1844
            'day': day.day,
1845
            'month': day.month,
1846
            'year': day.year,
1847
            'img': [i['src'] for i in imgs],
1848
            'title': title,
1849
            'alt': alt,
1850
        }
1851
1852
1853
class PicturesInBoxes(GenericNavigableComic):
1854
    """Class to retrieve Pictures In Boxes comics."""
1855
    # Also on https://picturesinboxescomic.tumblr.com
1856
    name = 'picturesinboxes'
1857
    long_name = 'Pictures in Boxes'
1858
    url = 'http://www.picturesinboxes.com'
1859
    get_navi_link = get_a_navi_navinext
1860
    get_first_comic_link = simulate_first_link
1861
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1862
1863
    @classmethod
1864
    def get_comic_info(cls, soup, link):
1865
        """Get information about a particular comics."""
1866
        title = soup.find('h2', class_='post-title').string
1867
        author = soup.find("span", class_="post-author").find("a").string
1868
        date_str = soup.find('span', class_='post-date').string
1869
        day = string_to_date(date_str, '%B %d, %Y')
1870
        imgs = soup.find('div', class_='comicpane').find_all('img')
1871
        assert imgs
1872
        assert all(i['title'] == i['alt'] == title for i in imgs)
1873
        return {
1874
            'day': day.day,
1875
            'month': day.month,
1876
            'year': day.year,
1877
            'img': [i['src'] for i in imgs],
1878
            'title': title,
1879
            'author': author,
1880
        }
1881
1882
1883
class Penmen(GenericComicNotWorking, GenericNavigableComic):
1884
    """Class to retrieve Penmen comics."""
1885
    name = 'penmen'
1886
    long_name = 'Penmen'
1887
    url = 'http://penmen.com'
1888
    get_navi_link = get_link_rel_next
1889
    get_first_comic_link = simulate_first_link
1890
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1891
1892
    @classmethod
1893
    def get_comic_info(cls, soup, link):
1894
        """Get information about a particular comics."""
1895
        title = soup.find('title').string
1896
        imgs = soup.find('div', class_='entry-content').find_all('img')
1897
        short_url = soup.find('link', rel='shortlink')['href']
1898
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1899
        date_str = soup.find('time')['datetime'][:10]
1900
        day = string_to_date(date_str, "%Y-%m-%d")
1901
        return {
1902
            'title': title,
1903
            'short_url': short_url,
1904
            'img': [i['src'] for i in imgs],
1905
            'tags': tags,
1906
            'month': day.month,
1907
            'year': day.year,
1908
            'day': day.day,
1909
        }
1910
1911
1912
class TheDoghouseDiaries(GenericDeletedComic, GenericNavigableComic):
1913
    """Class to retrieve The Dog House Diaries comics."""
1914
    name = 'doghouse'
1915
    long_name = 'The Dog House Diaries'
1916
    url = 'http://thedoghousediaries.com'
1917
1918
    @classmethod
1919
    def get_first_comic_link(cls):
1920
        """Get link to first comics."""
1921
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1922
1923
    @classmethod
1924
    def get_navi_link(cls, last_soup, next_):
1925
        """Get link to next or previous comic."""
1926
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1927
1928
    @classmethod
1929
    def get_comic_info(cls, soup, link):
1930
        """Get information about a particular comics."""
1931
        comic_img_re = re.compile('^dhdcomics/.*')
1932
        img = soup.find('img', src=comic_img_re)
1933
        comic_url = cls.get_url_from_link(link)
1934
        return {
1935
            'title': soup.find('h2', id='titleheader').string,
1936
            'title2': soup.find('div', id='subtext').string,
1937
            'alt': img.get('title'),
1938
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1939
            'num': int(comic_url.split('/')[-1]),
1940
        }
1941
1942
1943
class InvisibleBread(GenericListableComic):
1944
    """Class to retrieve Invisible Bread comics."""
1945
    # Also on http://www.gocomics.com/invisible-bread
1946
    name = 'invisiblebread'
1947
    long_name = 'Invisible Bread'
1948
    url = 'http://invisiblebread.com'
1949
1950
    @classmethod
1951
    def get_archive_elements(cls):
1952
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1953
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1954
1955
    @classmethod
1956
    def get_url_from_archive_element(cls, td):
1957
        return td.find('a')['href']
1958
1959 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1960
    def get_comic_info(cls, soup, td):
1961
        """Get information about a particular comics."""
1962
        url = cls.get_url_from_archive_element(td)
1963
        title = td.find('a').string
1964
        month_and_day = td.previous_sibling.string
1965
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1966
        year = link_re.match(url).groups()[0]
1967
        date_str = month_and_day + ' ' + year
1968
        day = string_to_date(date_str, '%b %d %Y')
1969
        imgs = [soup.find('div', id='comic').find('img')]
1970
        assert len(imgs) == 1, imgs
1971
        assert all(i['title'] == i['alt'] == title for i in imgs)
1972
        return {
1973
            'month': day.month,
1974
            'year': day.year,
1975
            'day': day.day,
1976
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1977
            'title': title,
1978
        }
1979
1980
1981
class DiscoBleach(GenericDeletedComic):
1982
    """Class to retrieve Disco Bleach Comics."""
1983
    name = 'discobleach'
1984
    long_name = 'Disco Bleach'
1985
    url = 'http://discobleach.com'
1986
1987
1988
class TubeyToons(GenericDeletedComic):
1989
    """Class to retrieve TubeyToons comics."""
1990
    # Also on http://tapastic.com/series/Tubey-Toons
1991
    # Also on https://tubeytoons.tumblr.com
1992
    name = 'tubeytoons'
1993
    long_name = 'Tubey Toons'
1994
    url = 'http://tubeytoons.com'
1995
    _categories = ('TUNEYTOONS', )
1996
1997
1998
class CompletelySeriousComics(GenericNavigableComic):
1999
    """Class to retrieve Completely Serious comics."""
2000
    name = 'completelyserious'
2001
    long_name = 'Completely Serious Comics'
2002
    url = 'http://completelyseriouscomics.com'
2003
    get_first_comic_link = get_a_navi_navifirst
2004
    get_navi_link = get_a_navi_navinext
2005
2006
    @classmethod
2007
    def get_comic_info(cls, soup, link):
2008
        """Get information about a particular comics."""
2009
        title = soup.find('h2', class_='post-title').string
2010
        author = soup.find('span', class_='post-author').contents[1].string
2011
        date_str = soup.find('span', class_='post-date').string
2012
        day = string_to_date(date_str, '%B %d, %Y')
2013
        imgs = soup.find('div', class_='comicpane').find_all('img')
2014
        assert imgs
2015
        alt = imgs[0]['title']
2016
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2017
        return {
2018
            'month': day.month,
2019
            'year': day.year,
2020
            'day': day.day,
2021
            'img': [i['src'] for i in imgs],
2022
            'title': title,
2023
            'alt': alt,
2024
            'author': author,
2025
        }
2026
2027
2028 View Code Duplication
class PoorlyDrawnLines(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2029
    """Class to retrieve Poorly Drawn Lines comics."""
2030
    # Also on http://pdlcomics.tumblr.com
2031
    name = 'poorlydrawn'
2032
    long_name = 'Poorly Drawn Lines'
2033
    url = 'https://www.poorlydrawnlines.com'
2034
    _categories = ('POORLYDRAWN', )
2035
    get_url_from_archive_element = get_href
2036
2037
    @classmethod
2038
    def get_comic_info(cls, soup, link):
2039
        """Get information about a particular comics."""
2040
        imgs = soup.find('div', class_='post').find_all('img')
2041
        assert len(imgs) <= 1, imgs
2042
        return {
2043
            'img': [i['src'] for i in imgs],
2044
            'title': imgs[0].get('title', "") if imgs else "",
2045
        }
2046
2047
    @classmethod
2048
    def get_archive_elements(cls):
2049
        archive_url = urljoin_wrapper(cls.url, 'archive')
2050
        url_re = re.compile('^%s/comic/.' % cls.url)
2051
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2052
2053
2054
class LoadingComics(GenericNavigableComic):
2055
    """Class to retrieve Loading Artist comics."""
2056
    name = 'loadingartist'
2057
    long_name = 'Loading Artist'
2058
    url = 'http://www.loadingartist.com/latest'
2059
2060
    @classmethod
2061
    def get_first_comic_link(cls):
2062
        """Get link to first comics."""
2063
        return get_soup_at_url(cls.url).find('a', title="First")
2064
2065
    @classmethod
2066
    def get_navi_link(cls, last_soup, next_):
2067
        """Get link to next or previous comic."""
2068
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2069
2070
    @classmethod
2071
    def get_comic_info(cls, soup, link):
2072
        """Get information about a particular comics."""
2073
        title = soup.find('h1').string
2074
        date_str = soup.find('span', class_='date').string.strip()
2075
        day = string_to_date(date_str, "%B %d, %Y")
2076
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2077
        return {
2078
            'title': title,
2079
            'img': [i['src'] for i in imgs],
2080
            'month': day.month,
2081
            'year': day.year,
2082
            'day': day.day,
2083
        }
2084
2085
2086
class ChuckleADuck(GenericNavigableComic):
2087
    """Class to retrieve Chuckle-A-Duck comics."""
2088
    name = 'chuckleaduck'
2089
    long_name = 'Chuckle-A-duck'
2090
    url = 'http://chuckleaduck.com'
2091
    get_first_comic_link = get_div_navfirst_a
2092
    get_navi_link = get_link_rel_next
2093
2094
    @classmethod
2095
    def get_comic_info(cls, soup, link):
2096
        """Get information about a particular comics."""
2097
        date_str = soup.find('span', class_='post-date').string
2098
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2099
        author = soup.find('span', class_='post-author').string
2100
        div = soup.find('div', id='comic')
2101
        imgs = div.find_all('img') if div else []
2102
        title = imgs[0]['title'] if imgs else ""
2103
        assert all(i['title'] == i['alt'] == title for i in imgs)
2104
        return {
2105
            'month': day.month,
2106
            'year': day.year,
2107
            'day': day.day,
2108
            'img': [i['src'] for i in imgs],
2109
            'title': title,
2110
            'author': author,
2111
        }
2112
2113
2114
class DepressedAlien(GenericNavigableComic):
2115
    """Class to retrieve Depressed Alien Comics."""
2116
    name = 'depressedalien'
2117
    long_name = 'Depressed Alien'
2118
    url = 'http://depressedalien.com'
2119
    get_url_from_link = join_cls_url_to_href
2120
2121
    @classmethod
2122
    def get_first_comic_link(cls):
2123
        """Get link to first comics."""
2124
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2125
2126
    @classmethod
2127
    def get_navi_link(cls, last_soup, next_):
2128
        """Get link to next or previous comic."""
2129
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2130
2131
    @classmethod
2132
    def get_comic_info(cls, soup, link):
2133
        """Get information about a particular comics."""
2134
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2135
        imgs = soup.find_all('meta', property='og:image')
2136
        return {
2137
            'title': title,
2138
            'img': [i['content'] for i in imgs],
2139
        }
2140
2141
2142
class TurnOffUs(GenericListableComic):
2143
    """Class to retrieve TurnOffUs comics."""
2144
    name = 'turnoffus'
2145
    long_name = 'Turn Off Us'
2146
    url = 'http://turnoff.us'
2147
    get_url_from_archive_element = join_cls_url_to_href
2148
2149
    @classmethod
2150
    def get_archive_elements(cls):
2151
        archive_url = urljoin_wrapper(cls.url, 'all')
2152
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2153
        return reversed(post_list.find_all('a', class_='post-link'))
2154
2155
    @classmethod
2156
    def get_comic_info(cls, soup, archive_elt):
2157
        """Get information about a particular comics."""
2158
        title = soup.find('meta', property='og:title')['content']
2159
        imgs = soup.find_all('meta', property='og:image')
2160
        return {
2161
            'title': title,
2162
            'img': [i['content'] for i in imgs],
2163
        }
2164
2165
2166
class ThingsInSquares(GenericListableComic):
2167
    """Class to retrieve Things In Squares comics."""
2168
    # This can be retrieved in other languages
2169
    # Also on https://tapastic.com/series/Things-in-Squares
2170
    name = 'squares'
2171
    long_name = 'Things in squares'
2172
    url = 'http://www.thingsinsquares.com'
2173
2174
    @classmethod
2175
    def get_comic_info(cls, soup, tr):
2176
        """Get information about a particular comics."""
2177
        _, td2, td3 = tr.find_all('td')
2178
        a = td2.find('a')
2179
        date_str = td3.string
2180
        day = string_to_date(date_str, "%m.%d.%y")
2181
        title = a.string
2182
        title2 = soup.find('meta', property='og:title')['content']
2183
        desc = soup.find('meta', property='og:description')
2184
        description = desc['content'] if desc else ''
2185
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2186
        imgs = soup.find_all('meta', property='og:image')
2187
        return {
2188
            'day': day.day,
2189
            'month': day.month,
2190
            'year': day.year,
2191
            'title': title,
2192
            'title2': title2,
2193
            'description': description,
2194
            'tags': tags,
2195
            'img': [i['content'] for i in imgs],
2196
        }
2197
2198
    @classmethod
2199
    def get_url_from_archive_element(cls, tr):
2200
        _, td2, __ = tr.find_all('td')
2201
        return td2.find('a')['href']
2202
2203
    @classmethod
2204
    def get_archive_elements(cls):
2205
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2206
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2207
2208
2209
class HappleTea(GenericNavigableComic):
2210
    """Class to retrieve Happle Tea Comics."""
2211
    name = 'happletea'
2212
    long_name = 'Happle Tea'
2213
    url = 'http://www.happletea.com'
2214
    get_first_comic_link = get_a_navi_navifirst
2215
    get_navi_link = get_link_rel_next
2216
2217
    @classmethod
2218
    def get_comic_info(cls, soup, link):
2219
        """Get information about a particular comics."""
2220
        imgs = soup.find('div', id='comic').find_all('img')
2221
        post = soup.find('div', class_='post-content')
2222
        title = post.find('h2', class_='post-title').string
2223
        author = post.find('a', rel='author').string
2224
        date_str = post.find('span', class_='post-date').string
2225
        day = string_to_date(date_str, "%B %d, %Y")
2226
        assert all(i['alt'] == i['title'] for i in imgs)
2227
        return {
2228
            'title': title,
2229
            'img': [i['src'] for i in imgs],
2230
            'alt': ''.join(i['alt'] for i in imgs),
2231
            'month': day.month,
2232
            'year': day.year,
2233
            'day': day.day,
2234
            'author': author,
2235
        }
2236
2237
2238
class RockPaperScissors(GenericNavigableComic):
2239
    """Class to retrieve Rock Paper Scissors comics."""
2240
    name = 'rps'
2241
    long_name = 'Rock Paper Scissors'
2242
    url = 'http://rps-comics.com'
2243
    get_first_comic_link = get_a_navi_navifirst
2244
    get_navi_link = get_link_rel_next
2245
2246
    @classmethod
2247
    def get_comic_info(cls, soup, link):
2248
        """Get information about a particular comics."""
2249
        title = soup.find('title').string
2250
        imgs = soup.find_all('meta', property='og:image')
2251
        short_url = soup.find('link', rel='shortlink')['href']
2252
        transcript = soup.find('div', id='transcript-content').string
2253
        return {
2254
            'title': title,
2255
            'transcript': transcript,
2256
            'short_url': short_url,
2257
            'img': [i['content'] for i in imgs],
2258
        }
2259
2260
2261
class FatAwesomeComics(GenericNavigableComic):
2262
    """Class to retrieve Fat Awesome Comics."""
2263
    # Also on http://fatawesomecomedy.tumblr.com
2264
    name = 'fatawesome'
2265
    long_name = 'Fat Awesome'
2266
    url = 'http://fatawesome.com/comics'
2267
    get_navi_link = get_a_rel_next
2268
    get_first_comic_link = simulate_first_link
2269
    first_url = 'http://fatawesome.com/shortbus/'
2270
2271
    @classmethod
2272
    def get_comic_info(cls, soup, link):
2273
        """Get information about a particular comics."""
2274
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2275
        description = soup.find('meta', attrs={'name': 'description'})['content']
2276
        tags_prop = soup.find('meta', property='article:tag')
2277
        tags = tags_prop['content'] if tags_prop else ""
2278
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2279
        day = string_to_date(date_str, "%Y-%m-%d")
2280
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2281
        assert len(imgs) == 1, imgs
2282
        return {
2283
            'title': title,
2284
            'description': description,
2285
            'tags': tags,
2286
            'alt': "".join(i['alt'] for i in imgs),
2287
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2288
            'month': day.month,
2289
            'year': day.year,
2290
            'day': day.day,
2291
        }
2292
2293
2294
class JuliasDrawings(GenericListableComic):
2295
    """Class to retrieve Julia's Drawings."""
2296
    name = 'julia'
2297
    long_name = "Julia's Drawings"
2298
    url = 'https://drawings.jvns.ca'
2299
    get_url_from_archive_element = get_href
2300
2301
    @classmethod
2302
    def get_archive_elements(cls):
2303
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2304
        return [art.find('a') for art in reversed(articles)]
2305
2306
    @classmethod
2307
    def get_comic_info(cls, soup, archive_elt):
2308
        """Get information about a particular comics."""
2309
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2310
        day = string_to_date(date_str, "%Y-%m-%d")
2311
        title = soup.find('h3', class_='p-post-title').string
2312
        imgs = soup.find('section', class_='post-content').find_all('img')
2313
        return {
2314
            'title': title,
2315
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2316
            'month': day.month,
2317
            'year': day.year,
2318
            'day': day.day,
2319
        }
2320
2321
2322
class AnythingComic(GenericListableComic):
2323
    """Class to retrieve Anything Comics."""
2324
    # Also on http://tapastic.com/series/anything
2325
    name = 'anythingcomic'
2326
    long_name = 'Anything Comic'
2327
    url = 'http://www.anythingcomic.com'
2328
2329
    @classmethod
2330
    def get_archive_elements(cls):
2331
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2332
        # The first 2 <tr>'s do not correspond to comics
2333
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2334
2335
    @classmethod
2336
    def get_url_from_archive_element(cls, tr):
2337
        """Get url corresponding to an archive element."""
2338
        _, td_comic, td_date, _ = tr.find_all('td')
2339
        link = td_comic.find('a')
2340
        return urljoin_wrapper(cls.url, link['href'])
2341
2342 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2343
    def get_comic_info(cls, soup, tr):
2344
        """Get information about a particular comics."""
2345
        td_num, td_comic, td_date, _ = tr.find_all('td')
2346
        num = int(td_num.string)
2347
        link = td_comic.find('a')
2348
        title = link.string
2349
        imgs = soup.find_all('img', id='comic_image')
2350
        date_str = td_date.string
2351
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2352
        assert len(imgs) == 1, imgs
2353
        assert all(i.get('alt') == i.get('title') for i in imgs)
2354
        return {
2355
            'num': num,
2356
            'title': title,
2357
            'alt': imgs[0].get('alt', ''),
2358
            'img': [i['src'] for i in imgs],
2359
            'month': day.month,
2360
            'year': day.year,
2361
            'day': day.day,
2362
        }
2363
2364
2365
class LonnieMillsap(GenericNavigableComic):
2366
    """Class to retrieve Lonnie Millsap's comics."""
2367
    name = 'millsap'
2368
    long_name = 'Lonnie Millsap'
2369
    url = 'http://www.lonniemillsap.com'
2370
    get_navi_link = get_link_rel_next
2371
    get_first_comic_link = simulate_first_link
2372
    first_url = 'http://www.lonniemillsap.com/?p=42'
2373
2374
    @classmethod
2375
    def get_comic_info(cls, soup, link):
2376
        """Get information about a particular comics."""
2377
        title = soup.find('h2', class_='post-title').string
2378
        post = soup.find('div', class_='post-content')
2379
        author = post.find("span", class_="post-author").find("a").string
2380
        date_str = post.find("span", class_="post-date").string
2381
        day = string_to_date(date_str, "%B %d, %Y")
2382
        imgs = post.find("div", class_="entry").find_all("img")
2383
        return {
2384
            'title': title,
2385
            'author': author,
2386
            'img': [i['src'] for i in imgs],
2387
            'month': day.month,
2388
            'year': day.year,
2389
            'day': day.day,
2390
        }
2391
2392
2393
class LinsEditions(GenericNavigableComic):
2394
    """Class to retrieve L.I.N.S. Editions comics."""
2395
    # Also on https://linscomics.tumblr.com
2396
    # Now on https://warandpeas.com
2397
    name = 'lins'
2398
    long_name = 'L.I.N.S. Editions'
2399
    url = 'https://linsedition.com'
2400
    _categories = ('LINS', )
2401
    get_navi_link = get_link_rel_next
2402
    get_first_comic_link = simulate_first_link
2403
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2404
2405
    @classmethod
2406
    def get_comic_info(cls, soup, link):
2407
        """Get information about a particular comics."""
2408
        title = soup.find('meta', property='og:title')['content']
2409
        imgs = soup.find_all('meta', property='og:image')
2410
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2411
        day = string_to_date(date_str, "%Y-%m-%d")
2412
        return {
2413
            'title': title,
2414
            'img': [i['content'] for i in imgs],
2415
            'month': day.month,
2416
            'year': day.year,
2417
            'day': day.day,
2418
        }
2419
2420
2421
class ThorsThundershack(GenericNavigableComic):
2422
    """Class to retrieve Thor's Thundershack comics."""
2423
    # Also on http://tapastic.com/series/Thors-Thundershac
2424
    name = 'thor'
2425
    long_name = 'Thor\'s Thundershack'
2426
    url = 'http://www.thorsthundershack.com'
2427
    _categories = ('THOR', )
2428
    get_url_from_link = join_cls_url_to_href
2429
2430
    @classmethod
2431
    def get_first_comic_link(cls):
2432
        """Get link to first comics."""
2433
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2434
2435
    @classmethod
2436
    def get_navi_link(cls, last_soup, next_):
2437
        """Get link to next or previous comic."""
2438
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2439
            if link['href'] != '/comic':
2440
                return link
2441
        return None
2442
2443
    @classmethod
2444
    def get_comic_info(cls, soup, link):
2445
        """Get information about a particular comics."""
2446
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2447
        description = soup.find('div', itemprop='articleBody').text
2448
        author = soup.find('span', itemprop='author copyrightHolder').string
2449
        imgs = soup.find_all('img', itemprop='image')
2450
        assert all(i['title'] == i['alt'] for i in imgs)
2451
        alt = imgs[0]['alt'] if imgs else ""
2452
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2453
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2454
        return {
2455
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2456
            'month': day.month,
2457
            'year': day.year,
2458
            'day': day.day,
2459
            'author': author,
2460
            'title': title,
2461
            'alt': alt,
2462
            'description': description,
2463
        }
2464
2465
2466
class GerbilWithAJetpack(GenericNavigableComic):
2467
    """Class to retrieve GerbilWithAJetpack comics."""
2468
    name = 'gerbil'
2469
    long_name = 'Gerbil With A Jetpack'
2470
    url = 'http://gerbilwithajetpack.com'
2471
    get_first_comic_link = get_a_navi_navifirst
2472
    get_navi_link = get_a_rel_next
2473
2474
    @classmethod
2475
    def get_comic_info(cls, soup, link):
2476
        """Get information about a particular comics."""
2477
        title = soup.find('h2', class_='post-title').string
2478
        author = soup.find("span", class_="post-author").find("a").string
2479
        date_str = soup.find("span", class_="post-date").string
2480
        day = string_to_date(date_str, "%B %d, %Y")
2481
        imgs = soup.find("div", id="comic").find_all("img")
2482
        alt = imgs[0]['alt']
2483
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2484
        return {
2485
            'img': [i['src'] for i in imgs],
2486
            'title': title,
2487
            'alt': alt,
2488
            'author': author,
2489
            'day': day.day,
2490
            'month': day.month,
2491
            'year': day.year
2492
        }
2493
2494
2495
class EveryDayBlues(GenericDeletedComic, GenericNavigableComic):
2496
    """Class to retrieve EveryDayBlues Comics."""
2497
    name = "blues"
2498
    long_name = "Every Day Blues"
2499
    url = "http://everydayblues.net"
2500
    get_first_comic_link = get_a_navi_navifirst
2501
    get_navi_link = get_link_rel_next
2502
2503
    @classmethod
2504
    def get_comic_info(cls, soup, link):
2505
        """Get information about a particular comics."""
2506
        title = soup.find("h2", class_="post-title").string
2507
        author = soup.find("span", class_="post-author").find("a").string
2508
        date_str = soup.find("span", class_="post-date").string
2509
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2510
        imgs = soup.find("div", id="comic").find_all("img")
2511
        assert all(i['alt'] == i['title'] == title for i in imgs)
2512
        assert len(imgs) <= 1, imgs
2513
        return {
2514
            'img': [i['src'] for i in imgs],
2515
            'title': title,
2516
            'author': author,
2517
            'day': day.day,
2518
            'month': day.month,
2519
            'year': day.year
2520
        }
2521
2522
2523
class BiterComics(GenericNavigableComic):
2524
    """Class to retrieve Biter Comics."""
2525
    name = "biter"
2526
    long_name = "Biter Comics"
2527
    url = "http://www.bitercomics.com"
2528
    get_first_comic_link = get_a_navi_navifirst
2529
    get_navi_link = get_link_rel_next
2530
2531
    @classmethod
2532
    def get_comic_info(cls, soup, link):
2533
        """Get information about a particular comics."""
2534
        title = soup.find("h1", class_="entry-title").string
2535
        author = soup.find("span", class_="author vcard").find("a").string
2536
        date_str = soup.find("span", class_="entry-date").string
2537
        day = string_to_date(date_str, "%B %d, %Y")
2538
        imgs = soup.find("div", id="comic").find_all("img")
2539
        assert all(i['alt'] == i['title'] for i in imgs)
2540
        assert len(imgs) == 1, imgs
2541
        alt = imgs[0]['alt']
2542
        return {
2543
            'img': [i['src'] for i in imgs],
2544
            'title': title,
2545
            'alt': alt,
2546
            'author': author,
2547
            'day': day.day,
2548
            'month': day.month,
2549
            'year': day.year
2550
        }
2551
2552
2553
class TheAwkwardYeti(GenericNavigableComic):
2554
    """Class to retrieve The Awkward Yeti comics."""
2555
    # Also on http://www.gocomics.com/the-awkward-yeti
2556
    # Also on http://larstheyeti.tumblr.com
2557
    # Also on https://tapastic.com/series/TheAwkwardYeti
2558
    name = 'yeti'
2559
    long_name = 'The Awkward Yeti'
2560
    url = 'http://theawkwardyeti.com'
2561
    _categories = ('YETI', )
2562
    get_first_comic_link = get_a_navi_navifirst
2563
    get_navi_link = get_link_rel_next
2564
2565
    @classmethod
2566
    def get_comic_info(cls, soup, link):
2567
        """Get information about a particular comics."""
2568
        title = soup.find('h2', class_='post-title').string
2569
        date_str = soup.find("span", class_="post-date").string
2570
        day = string_to_date(date_str, "%B %d, %Y")
2571
        imgs = soup.find("div", id="comic").find_all("img")
2572
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2573
        return {
2574
            'img': [i['src'] for i in imgs],
2575
            'title': title,
2576
            'day': day.day,
2577
            'month': day.month,
2578
            'year': day.year
2579
        }
2580
2581
2582
class PleasantThoughts(GenericNavigableComic):
2583
    """Class to retrieve Pleasant Thoughts comics."""
2584
    name = 'pleasant'
2585
    long_name = 'Pleasant Thoughts'
2586
    url = 'http://pleasant-thoughts.com'
2587
    get_first_comic_link = get_a_navi_navifirst
2588
    get_navi_link = get_link_rel_next
2589
2590
    @classmethod
2591
    def get_comic_info(cls, soup, link):
2592
        """Get information about a particular comics."""
2593
        post = soup.find('div', class_='post-content')
2594
        title = post.find('h2', class_='post-title').string
2595
        imgs = post.find("div", class_="entry").find_all("img")
2596
        return {
2597
            'title': title,
2598
            'img': [i['src'] for i in imgs],
2599
        }
2600
2601
2602
class MisterAndMe(GenericNavigableComic):
2603
    """Class to retrieve Mister & Me Comics."""
2604
    # Also on http://www.gocomics.com/mister-and-me
2605
    # Also on https://tapastic.com/series/Mister-and-Me
2606
    name = 'mister'
2607
    long_name = 'Mister & Me'
2608
    url = 'http://www.mister-and-me.com'
2609
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2610
    get_navi_link = get_link_rel_next
2611
2612
    @classmethod
2613
    def get_comic_info(cls, soup, link):
2614
        """Get information about a particular comics."""
2615
        title = soup.find('h2', class_='post-title').string
2616
        author = soup.find("span", class_="post-author").find("a").string
2617
        date_str = soup.find("span", class_="post-date").string
2618
        day = string_to_date(date_str, "%B %d, %Y")
2619
        imgs = soup.find("div", id="comic").find_all("img")
2620
        assert all(i['alt'] == i['title'] for i in imgs)
2621
        assert len(imgs) <= 1, imgs
2622
        alt = imgs[0]['alt'] if imgs else ""
2623
        return {
2624
            'img': [i['src'] for i in imgs],
2625
            'title': title,
2626
            'alt': alt,
2627
            'author': author,
2628
            'day': day.day,
2629
            'month': day.month,
2630
            'year': day.year
2631
        }
2632
2633
2634
class LastPlaceComics(GenericNavigableComic):
2635
    """Class to retrieve Last Place Comics."""
2636
    name = 'lastplace'
2637
    long_name = 'Last Place Comics'
2638
    url = "http://lastplacecomics.com"
2639
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2640
    get_navi_link = get_link_rel_next
2641
2642
    @classmethod
2643
    def get_comic_info(cls, soup, link):
2644
        """Get information about a particular comics."""
2645
        title = soup.find('h2', class_='post-title').string
2646
        author = soup.find("span", class_="post-author").find("a").string
2647
        date_str = soup.find("span", class_="post-date").string
2648
        day = string_to_date(date_str, "%B %d, %Y")
2649
        imgs = soup.find("div", id="comic").find_all("img")
2650
        assert all(i['alt'] == i['title'] for i in imgs)
2651
        assert len(imgs) <= 1, imgs
2652
        alt = imgs[0]['alt'] if imgs else ""
2653
        return {
2654
            'img': [i['src'] for i in imgs],
2655
            'title': title,
2656
            'alt': alt,
2657
            'author': author,
2658
            'day': day.day,
2659
            'month': day.month,
2660
            'year': day.year
2661
        }
2662
2663
2664
class TalesOfAbsurdity(GenericNavigableComic):
2665
    """Class to retrieve Tales Of Absurdity comics."""
2666
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2667
    # Also on http://talesofabsurdity.tumblr.com
2668
    name = 'absurdity'
2669
    long_name = 'Tales of Absurdity'
2670
    url = 'http://talesofabsurdity.com'
2671
    _categories = ('ABSURDITY', )
2672
    get_first_comic_link = get_a_navi_navifirst
2673
    get_navi_link = get_a_navi_comicnavnext_navinext
2674
2675
    @classmethod
2676
    def get_comic_info(cls, soup, link):
2677
        """Get information about a particular comics."""
2678
        title = soup.find('h2', class_='post-title').string
2679
        author = soup.find("span", class_="post-author").find("a").string
2680
        date_str = soup.find("span", class_="post-date").string
2681
        day = string_to_date(date_str, "%B %d, %Y")
2682
        imgs = soup.find("div", id="comic").find_all("img")
2683
        assert all(i['alt'] == i['title'] for i in imgs)
2684
        alt = imgs[0]['alt'] if imgs else ""
2685
        return {
2686
            'img': [i['src'] for i in imgs],
2687
            'title': title,
2688
            'alt': alt,
2689
            'author': author,
2690
            'day': day.day,
2691
            'month': day.month,
2692
            'year': day.year
2693
        }
2694
2695
2696
class EndlessOrigami(GenericComicNotWorking, GenericNavigableComic):  # Nav not working
2697
    """Class to retrieve Endless Origami Comics."""
2698
    name = "origami"
2699
    long_name = "Endless Origami"
2700
    url = "http://endlessorigami.com"
2701
    get_first_comic_link = get_a_navi_navifirst
2702
    get_navi_link = get_link_rel_next
2703
2704
    @classmethod
2705
    def get_comic_info(cls, soup, link):
2706
        """Get information about a particular comics."""
2707
        title = soup.find('h2', class_='post-title').string
2708
        author = soup.find("span", class_="post-author").find("a").string
2709
        date_str = soup.find("span", class_="post-date").string
2710
        day = string_to_date(date_str, "%B %d, %Y")
2711
        imgs = soup.find("div", id="comic").find_all("img")
2712
        assert all(i['alt'] == i['title'] for i in imgs)
2713
        alt = imgs[0]['alt'] if imgs else ""
2714
        return {
2715
            'img': [i['src'] for i in imgs],
2716
            'title': title,
2717
            'alt': alt,
2718
            'author': author,
2719
            'day': day.day,
2720
            'month': day.month,
2721
            'year': day.year
2722
        }
2723
2724
2725
class PlanC(GenericNavigableComic):
2726
    """Class to retrieve Plan C comics."""
2727
    name = 'planc'
2728
    long_name = 'Plan C'
2729
    url = 'http://www.plancomic.com'
2730
    get_first_comic_link = get_a_navi_navifirst
2731
    get_navi_link = get_a_navi_comicnavnext_navinext
2732
2733
    @classmethod
2734
    def get_comic_info(cls, soup, link):
2735
        """Get information about a particular comics."""
2736
        title = soup.find('h2', class_='post-title').string
2737
        date_str = soup.find("span", class_="post-date").string
2738
        day = string_to_date(date_str, "%B %d, %Y")
2739
        imgs = soup.find('div', id='comic').find_all('img')
2740
        return {
2741
            'title': title,
2742
            'img': [i['src'] for i in imgs],
2743
            'month': day.month,
2744
            'year': day.year,
2745
            'day': day.day,
2746
        }
2747
2748
2749 View Code Duplication
class BuniComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2750
    """Class to retrieve Buni Comics."""
2751
    name = 'buni'
2752
    long_name = 'BuniComics'
2753
    url = 'http://www.bunicomic.com'
2754
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2755
    get_navi_link = get_link_rel_next
2756
2757
    @classmethod
2758
    def get_comic_info(cls, soup, link):
2759
        """Get information about a particular comics."""
2760
        imgs = soup.find('div', id='comic').find_all('img')
2761
        assert all(i['alt'] == i['title'] for i in imgs)
2762
        assert len(imgs) == 1, imgs
2763
        return {
2764
            'img': [i['src'] for i in imgs],
2765
            'title': imgs[0]['title'],
2766
        }
2767
2768
2769
class GenericCommitStrip(GenericNavigableComic):
2770
    """Generic class to retrieve Commit Strips in different languages."""
2771
    get_navi_link = get_a_rel_next
2772
    get_first_comic_link = simulate_first_link
2773
    first_url = NotImplemented
2774
2775
    @classmethod
2776
    def get_comic_info(cls, soup, link):
2777
        """Get information about a particular comics."""
2778
        desc = soup.find('meta', property='og:description')['content']
2779
        title = soup.find('meta', property='og:title')['content']
2780
        imgs = soup.find('div', class_='entry-content').find_all('img')
2781
        title2 = ' '.join(i.get('title', '') for i in imgs)
2782
        return {
2783
            'title': title,
2784
            'title2': title2,
2785
            'description': desc,
2786
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2787
        }
2788
2789
2790
class CommitStripFr(GenericCommitStrip):
2791
    """Class to retrieve Commit Strips in French."""
2792
    name = 'commit_fr'
2793
    long_name = 'Commit Strip (Fr)'
2794
    url = 'http://www.commitstrip.com/fr'
2795
    _categories = ('FRANCAIS', )
2796
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2797
2798
2799
class CommitStripEn(GenericCommitStrip):
2800
    """Class to retrieve Commit Strips in English."""
2801
    name = 'commit_en'
2802
    long_name = 'Commit Strip (En)'
2803
    url = 'http://www.commitstrip.com/en'
2804
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2805
2806
2807
class GenericBoumerie(GenericNavigableComic):
2808
    """Generic class to retrieve Boumeries comics in different languages."""
2809
    # Also on http://boumeries.tumblr.com
2810
    get_first_comic_link = get_a_navi_navifirst
2811
    get_navi_link = get_link_rel_next
2812
    date_format = NotImplemented
2813
    lang = NotImplemented
2814
2815
    @classmethod
2816
    def get_comic_info(cls, soup, link):
2817
        """Get information about a particular comics."""
2818
        title = soup.find('h2', class_='post-title').string
2819
        short_url = soup.find('link', rel='shortlink')['href']
2820
        author = soup.find("span", class_="post-author").find("a").string
2821
        date_str = soup.find('span', class_='post-date').string
2822
        day = string_to_date(date_str, cls.date_format, cls.lang)
2823
        imgs = soup.find('div', id='comic').find_all('img')
2824
        assert all(i['alt'] == i['title'] for i in imgs)
2825
        return {
2826
            'short_url': short_url,
2827
            'img': [i['src'] for i in imgs],
2828
            'title': title,
2829
            'author': author,
2830
            'month': day.month,
2831
            'year': day.year,
2832
            'day': day.day,
2833
        }
2834
2835
2836
class BoumerieEn(GenericBoumerie):
2837
    """Class to retrieve Boumeries comics in English."""
2838
    name = 'boumeries_en'
2839
    long_name = 'Boumeries (En)'
2840
    url = 'http://comics.boumerie.com'
2841
    _categories = ('BOUMERIES', )
2842
    date_format = "%B %d, %Y"
2843
    lang = 'en_GB.UTF-8'
2844
2845
2846
class BoumerieFr(GenericBoumerie):
2847
    """Class to retrieve Boumeries comics in French."""
2848
    name = 'boumeries_fr'
2849
    long_name = 'Boumeries (Fr)'
2850
    url = 'http://bd.boumerie.com'
2851
    _categories = ('BOUMERIES', 'FRANCAIS')
2852
    date_format = "%B %d, %Y" # "%A, %d %B %Y"
2853
    lang = "fr_FR.utf8"
2854
2855
2856
class UnearthedComics(GenericNavigableComic):
2857
    """Class to retrieve Unearthed comics."""
2858
    # Also on http://tapastic.com/series/UnearthedComics
2859
    # Also on https://unearthedcomics.tumblr.com
2860
    name = 'unearthed'
2861
    long_name = 'Unearthed Comics'
2862
    url = 'http://unearthedcomics.com'
2863
    _categories = ('UNEARTHED', )
2864
    get_navi_link = get_link_rel_next
2865
    get_first_comic_link = simulate_first_link
2866
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2867
2868
    @classmethod
2869
    def get_comic_info(cls, soup, link):
2870
        """Get information about a particular comics."""
2871
        short_url = soup.find('link', rel='shortlink')['href']
2872
        title_elt = soup.find('h1') or soup.find('h2')
2873
        title = title_elt.string if title_elt else ""
2874
        desc = soup.find('meta', property='og:description')
2875
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2876
        day = string_to_date(date_str, "%Y-%m-%d")
2877
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2878
        imgs = post.find_all('img')
2879
        return {
2880
            'title': title,
2881
            'description': desc,
2882
            'url2': short_url,
2883
            'img': [i['src'] for i in imgs],
2884
            'month': day.month,
2885
            'year': day.year,
2886
            'day': day.day,
2887
        }
2888
2889
2890
class Optipess(GenericNavigableComic):
2891
    """Class to retrieve Optipess comics."""
2892
    name = 'optipess'
2893
    long_name = 'Optipess'
2894
    url = 'http://www.optipess.com'
2895
    get_first_comic_link = get_a_navi_navifirst
2896
    get_navi_link = get_link_rel_next
2897
2898
    @classmethod
2899
    def get_comic_info(cls, soup, link):
2900
        """Get information about a particular comics."""
2901
        title = soup.find('h2', class_='post-title').string
2902
        author = soup.find("span", class_="post-author").find("a").string
2903
        comic = soup.find('div', id='comic')
2904
        imgs = comic.find_all('img') if comic else []
2905
        alt = imgs[0]['title'] if imgs else ""
2906
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2907
        date_str = soup.find('span', class_='post-date').string
2908
        day = string_to_date(date_str, "%B %d, %Y")
2909
        return {
2910
            'title': title,
2911
            'alt': alt,
2912
            'author': author,
2913
            'img': [i['src'] for i in imgs],
2914
            'month': day.month,
2915
            'year': day.year,
2916
            'day': day.day,
2917
        }
2918
2919
2920
class PainTrainComic(GenericNavigableComic):
2921
    """Class to retrieve Pain Train Comics."""
2922
    name = 'paintrain'
2923
    long_name = 'Pain Train Comics'
2924
    url = 'http://paintraincomic.com'
2925
    get_first_comic_link = get_a_navi_navifirst
2926
    get_navi_link = get_link_rel_next
2927
2928
    @classmethod
2929
    def get_comic_info(cls, soup, link):
2930
        """Get information about a particular comics."""
2931
        title = soup.find('h2', class_='post-title').string
2932
        short_url = soup.find('link', rel='shortlink')['href']
2933
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2934
        num = int(short_url_re.match(short_url).groups()[0])
2935
        imgs = soup.find('div', id='comic').find_all('img')
2936
        alt = imgs[0]['title']
2937
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2938
        date_str = soup.find('span', class_='post-date').string
2939
        day = string_to_date(date_str, "%d/%m/%Y")
2940
        return {
2941
            'short_url': short_url,
2942
            'num': num,
2943
            'img': [i['src'] for i in imgs],
2944
            'month': day.month,
2945
            'year': day.year,
2946
            'day': day.day,
2947
            'alt': alt,
2948
            'title': title,
2949
        }
2950
2951
2952
class MoonBeard(GenericNavigableComic):
2953
    """Class to retrieve MoonBeard comics."""
2954
    # Also on http://squireseses.tumblr.com
2955
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2956
    name = 'moonbeard'
2957
    long_name = 'Moon Beard'
2958
    url = 'http://moonbeard.com'
2959
    _categories = ('MOONBEARD', )
2960
    get_first_comic_link = get_a_navi_navifirst
2961
    get_navi_link = get_a_navi_navinext
2962
2963
    @classmethod
2964
    def get_comic_info(cls, soup, link):
2965
        """Get information about a particular comics."""
2966
        title = soup.find('h2', class_='post-title').string
2967
        short_url = soup.find('link', rel='shortlink')['href']
2968
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2969
        num = int(short_url_re.match(short_url).groups()[0])
2970
        imgs = soup.find('div', id='comic').find_all('img')
2971
        alt = imgs[0]['title']
2972
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2973
        date_str = soup.find('span', class_='post-date').string
2974
        day = string_to_date(date_str, "%B %d, %Y")
2975
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2976
        author = soup.find('span', class_='post-author').string
2977
        return {
2978
            'short_url': short_url,
2979
            'num': num,
2980
            'img': [i['src'] for i in imgs],
2981
            'month': day.month,
2982
            'year': day.year,
2983
            'day': day.day,
2984
            'title': title,
2985
            'tags': tags,
2986
            'alt': alt,
2987
            'author': author,
2988
        }
2989
2990
2991
class SystemComic(GenericNavigableComic):
2992
    """Class to retrieve System Comic."""
2993
    name = 'system'
2994
    long_name = 'System Comic'
2995
    url = 'http://www.systemcomic.com'
2996
    get_navi_link = get_a_rel_next
2997
2998
    @classmethod
2999 View Code Duplication
    def get_first_comic_link(cls):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3000
        """Get link to first comics."""
3001
        return get_soup_at_url(cls.url).find('li', class_='first').find('a')
3002
3003
    @classmethod
3004
    def get_comic_info(cls, soup, link):
3005
        """Get information about a particular comics."""
3006
        title = soup.find('meta', property='og:title')['content']
3007
        desc = soup.find('meta', property='og:description')['content']
3008
        date_str = soup.find('time')["datetime"]
3009
        day = string_to_date(date_str, "%Y-%m-%d")
3010
        imgs = soup.find('figure').find_all('img')
3011
        return {
3012
            'title': title,
3013
            'description': desc,
3014
            'day': day.day,
3015
            'month': day.month,
3016
            'year': day.year,
3017
            'img': [i['src'] for i in imgs],
3018
        }
3019
3020
3021
class LittleLifeLines(GenericNavigableComic):
3022
    """Class to retrieve Little Life Lines comics."""
3023
    # Also on https://little-life-lines.tumblr.com
3024
    name = 'life'
3025
    long_name = 'Little Life Lines'
3026
    url = 'http://www.littlelifelines.com'
3027
    get_url_from_link = join_cls_url_to_href
3028
    get_first_comic_link = simulate_first_link
3029
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3030
3031
    @classmethod
3032
    def get_navi_link(cls, last_soup, next_):
3033
        """Get link to next or previous comic."""
3034
        # prev is next / next is prev
3035
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3036
        return li.find('a') if li else None
3037
3038
    @classmethod
3039
    def get_comic_info(cls, soup, link):
3040
        """Get information about a particular comics."""
3041
        title = soup.find('meta', property='og:title')['content']
3042
        desc = soup.find('meta', property='og:description')['content']
3043
        date_str = soup.find('time', class_='published')['datetime']
3044
        day = string_to_date(date_str, "%Y-%m-%d")
3045
        author = soup.find('a', rel='author').string
3046
        div_content = soup.find('div', class_="body entry-content")
3047
        imgs = div_content.find_all('img')
3048
        imgs = [i for i in imgs if i.get('src') is not None]
3049
        alt = imgs[0]['alt']
3050
        return {
3051
            'title': title,
3052
            'alt': alt,
3053
            'description': desc,
3054
            'author': author,
3055
            'day': day.day,
3056
            'month': day.month,
3057
            'year': day.year,
3058
            'img': [i['src'] for i in imgs],
3059
        }
3060
3061
3062
class GenericWordPressInkblot(GenericNavigableComic):
3063
    """Generic class to retrieve comics using WordPress with Inkblot."""
3064
    get_navi_link = get_link_rel_next
3065
3066
    @classmethod
3067
    def get_first_comic_link(cls):
3068
        """Get link to first comics."""
3069
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3070
3071
    @classmethod
3072
    def get_comic_info(cls, soup, link):
3073
        """Get information about a particular comics."""
3074
        title = soup.find('meta', property='og:title')['content']
3075
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3076
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3077
        day = string_to_date(date_str, "%Y-%m-%d")
3078
        return {
3079
            'title': title,
3080
            'day': day.day,
3081
            'month': day.month,
3082
            'year': day.year,
3083
            'img': [i['src'] for i in imgs],
3084
        }
3085
3086
3087
class EverythingsStupid(GenericWordPressInkblot):
3088
    """Class to retrieve Everything's stupid Comics."""
3089
    # Also on http://tapastic.com/series/EverythingsStupid
3090
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3091
    # Also on http://everythingsstupidcomics.tumblr.com
3092
    name = 'stupid'
3093
    long_name = "Everything's Stupid"
3094
    url = 'http://everythingsstupid.net'
3095
3096
3097
class TheIsmComics(GenericDeletedComic, GenericWordPressInkblot):
3098
    """Class to retrieve The Ism Comics."""
3099
    # Also on https://tapastic.com/series/TheIsm (?)
3100
    name = 'theism'
3101
    long_name = "The Ism"
3102
    url = 'http://www.theism-comics.com'
3103
3104
3105
class WoodenPlankStudios(GenericWordPressInkblot):
3106
    """Class to retrieve Wooden Plank Studios comics."""
3107
    name = 'woodenplank'
3108
    long_name = 'Wooden Plank Studios'
3109
    url = 'http://woodenplankstudios.com'
3110
3111
3112
class ElectricBunnyComic(GenericNavigableComic):
3113
    """Class to retrieve Electric Bunny Comics."""
3114
    # Also on http://electricbunnycomics.tumblr.com
3115
    name = 'bunny'
3116
    long_name = 'Electric Bunny Comic'
3117
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3118
    get_url_from_link = join_cls_url_to_href
3119
3120
    @classmethod
3121
    def get_first_comic_link(cls):
3122
        """Get link to first comics."""
3123
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3124
3125
    @classmethod
3126
    def get_navi_link(cls, last_soup, next_):
3127
        """Get link to next or previous comic."""
3128
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3129
        return img.parent if img else None
3130
3131
    @classmethod
3132
    def get_comic_info(cls, soup, link):
3133
        """Get information about a particular comics."""
3134
        title = soup.find('meta', property='og:title')['content']
3135
        imgs = soup.find_all('meta', property='og:image')
3136
        return {
3137
            'title': title,
3138
            'img': [i['content'] for i in imgs],
3139
        }
3140
3141
3142
class SheldonComics(GenericNavigableComic):
3143
    """Class to retrieve Sheldon comics."""
3144
    # Also on http://www.gocomics.com/sheldon
3145
    name = 'sheldon'
3146
    long_name = 'Sheldon Comics'
3147
    url = 'http://www.sheldoncomics.com'
3148
3149
    @classmethod
3150
    def get_first_comic_link(cls):
3151
        """Get link to first comics."""
3152
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3153
3154
    @classmethod
3155
    def get_navi_link(cls, last_soup, next_):
3156
        """Get link to next or previous comic."""
3157
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3158
            if link['href'] != 'http://www.sheldoncomics.com':
3159
                return link
3160
        return None
3161
3162
    @classmethod
3163
    def get_comic_info(cls, soup, link):
3164
        """Get information about a particular comics."""
3165
        imgs = soup.find("div", id="comic-foot").find_all("img")
3166
        assert all(i['alt'] == i['title'] for i in imgs)
3167
        assert len(imgs) == 1, imgs
3168
        title = imgs[0]['title']
3169
        return {
3170
            'title': title,
3171
            'img': [i['src'] for i in imgs],
3172
        }
3173
3174
3175
class ManVersusManatee(GenericNavigableComic):
3176
    """Class to retrieve Man Versus Manatee comics."""
3177
    url = 'http://manvsmanatee.com'
3178
    name = 'manvsmanatee'
3179
    long_name = 'Man Versus Manatee'
3180
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3181
    get_navi_link = get_a_comicnavbase_comicnavnext
3182
3183
    @classmethod
3184
    def get_comic_info(cls, soup, link):
3185
        """Get information about a particular comics."""
3186
        title = soup.find('h2', class_='post-title').string
3187
        imgs = soup.find('div', id='comic').find_all('img')
3188
        date_str = soup.find('span', class_='post-date').string
3189
        day = string_to_date(date_str, "%B %d, %Y")
3190
        return {
3191
            'img': [i['src'] for i in imgs],
3192
            'title': title,
3193
            'month': day.month,
3194
            'year': day.year,
3195
            'day': day.day,
3196
        }
3197
3198
3199
class TheMeerkatguy(GenericNavigableComic):
3200
    """Class to retrieve The Meerkatguy comics."""
3201
    long_name = 'The Meerkatguy'
3202
    url = 'http://www.themeerkatguy.com'
3203
    name = 'meerkatguy'
3204
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3205
    get_navi_link = get_a_comicnavbase_comicnavnext
3206
3207
    @classmethod
3208
    def get_comic_info(cls, soup, link):
3209
        """Get information about a particular comics."""
3210
        title = soup.find('title').string
3211
        imgs = soup.find_all('meta', property='og:image')
3212
        return {
3213
            'img': [i['content'] for i in imgs],
3214
            'title': title,
3215
        }
3216
3217
3218
class Ubertool(GenericNavigableComic):
3219
    """Class to retrieve Ubertool comics."""
3220
    # Also on https://ubertool.tumblr.com
3221
    # Also on https://tapastic.com/series/ubertool
3222
    name = 'ubertool'
3223
    long_name = 'Ubertool'
3224
    url = 'http://ubertoolcomic.com'
3225
    _categories = ('UBERTOOL', )
3226
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3227
    get_navi_link = get_a_comicnavbase_comicnavnext
3228
3229
    @classmethod
3230
    def get_comic_info(cls, soup, link):
3231
        """Get information about a particular comics."""
3232
        title = soup.find('h2', class_='post-title').string
3233
        date_str = soup.find('span', class_='post-date').string
3234
        day = string_to_date(date_str, "%B %d, %Y")
3235
        imgs = soup.find('div', id='comic').find_all('img')
3236
        return {
3237
            'img': [i['src'] for i in imgs],
3238
            'title': title,
3239
            'month': day.month,
3240
            'year': day.year,
3241
            'day': day.day,
3242
        }
3243
3244
3245
class EarthExplodes(GenericNavigableComic):
3246
    """Class to retrieve The Earth Explodes comics."""
3247
    name = 'earthexplodes'
3248
    long_name = 'The Earth Explodes'
3249
    url = 'http://www.earthexplodes.com'
3250
    get_url_from_link = join_cls_url_to_href
3251
    get_first_comic_link = simulate_first_link
3252
    first_url = 'http://www.earthexplodes.com/comics/000/'
3253
3254
    @classmethod
3255
    def get_navi_link(cls, last_soup, next_):
3256
        """Get link to next or previous comic."""
3257
        return last_soup.find('a', id='next' if next_ else 'prev')
3258
3259
    @classmethod
3260
    def get_comic_info(cls, soup, link):
3261
        """Get information about a particular comics."""
3262
        title = soup.find('title').string
3263
        imgs = soup.find('div', id='image').find_all('img')
3264
        alt = imgs[0].get('title', '')
3265
        return {
3266
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3267
            'title': title,
3268
            'alt': alt,
3269
        }
3270
3271
3272 View Code Duplication
class PomComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3273
    """Class to retrieve PomComics."""
3274
    name = 'pom'
3275
    long_name = 'Pom Comics / Piece of Me'
3276
    url = 'http://www.pomcomic.com'
3277
    get_url_from_link = join_cls_url_to_href
3278
3279
    @classmethod
3280
    def get_first_comic_link(cls):
3281
        """Get link to first comics."""
3282
        return get_soup_at_url(cls.url).find('a', class_='btn-first')
3283
3284
    @classmethod
3285
    def get_navi_link(cls, last_soup, next_):
3286
        """Get link to next or previous comic."""
3287
        return last_soup.find('a', class_='btn-next' if next_ else 'btn-prev')
3288
3289
    @classmethod
3290
    def get_comic_info(cls, soup, link):
3291
        """Get information about a particular comics."""
3292
        title = soup.find('h1').string
3293
        desc = soup.find('meta', property='og:description')['content']
3294
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3295
        imgs = soup.find('div', class_='comic').find_all('img')
3296
        return {
3297
            'title': title,
3298
            'desc': desc,
3299
            'tags': tags,
3300
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3301
        }
3302
3303
3304
class CubeDrone(GenericComicNotWorking, GenericNavigableComic):  # Website has changed
3305
    """Class to retrieve Cube Drone comics."""
3306
    name = 'cubedrone'
3307
    long_name = 'Cube Drone'
3308
    url = 'http://cube-drone.com/comics'
3309
    get_url_from_link = join_cls_url_to_href
3310
3311
    @classmethod
3312
    def get_first_comic_link(cls):
3313
        """Get link to first comics."""
3314
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3315
3316
    @classmethod
3317
    def get_navi_link(cls, last_soup, next_):
3318
        """Get link to next or previous comic."""
3319
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3320
        return last_soup.find('span', class_=class_).parent
3321
3322
    @classmethod
3323
    def get_comic_info(cls, soup, link):
3324
        """Get information about a particular comics."""
3325
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3326
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3327
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3328
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3329
        imgs = soup.find_all('img', class_='comic img-responsive')
3330
        title2 = imgs[0]['title']
3331
        alt = imgs[0]['alt']
3332
        return {
3333
            'url2': url2,
3334
            'title': title,
3335
            'title2': title2,
3336
            'alt': alt,
3337
            'img': [i['src'] for i in imgs],
3338
        }
3339
3340
3341
class MakeItStoopid(GenericDeletedComic, GenericNavigableComic):
3342
    """Class to retrieve Make It Stoopid Comics."""
3343
    name = 'stoopid'
3344
    long_name = 'Make it stoopid'
3345
    url = 'http://makeitstoopid.com/comic.php'
3346
3347
    @classmethod
3348
    def get_nav(cls, soup):
3349
        """Get the navigation elements from soup object."""
3350
        cnav = soup.find_all(class_='cnav')
3351
        nav1, nav2 = cnav[:5], cnav[5:]
3352
        assert nav1 == nav2
3353
        # begin, prev, archive, next_, end = nav1
3354
        return [None if i.get('href') is None else i for i in nav1]
3355
3356
    @classmethod
3357
    def get_first_comic_link(cls):
3358
        """Get link to first comics."""
3359
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3360
3361
    @classmethod
3362
    def get_navi_link(cls, last_soup, next_):
3363
        """Get link to next or previous comic."""
3364
        return cls.get_nav(last_soup)[3 if next_ else 1]
3365
3366
    @classmethod
3367
    def get_comic_info(cls, soup, link):
3368
        """Get information about a particular comics."""
3369
        title = link['title']
3370
        imgs = soup.find_all('img', id='comicimg')
3371
        return {
3372
            'title': title,
3373
            'img': [i['src'] for i in imgs],
3374
        }
3375
3376
3377
class OffTheLeashDog(GenericNavigableComic):
3378
    """Class to retrieve Off The Leash Dog comics."""
3379
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3380
    # Also on http://www.rupertfawcettcartoons.com
3381
    name = 'offtheleash'
3382
    long_name = 'Off The Leash Dog'
3383
    url = 'http://offtheleashdogcartoons.com'
3384
    _categories = ('FAWCETT', )
3385
    get_navi_link = get_a_rel_next
3386
    get_first_comic_link = simulate_first_link
3387
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3388
3389
    @classmethod
3390
    def get_comic_info(cls, soup, link):
3391
        """Get information about a particular comics."""
3392
        title = soup.find("h1", class_="entry-title").string
3393
        imgs = soup.find('div', class_='entry-content').find_all('img')
3394
        return {
3395
            'title': title,
3396
            'img': [i['src'] for i in imgs],
3397
        }
3398
3399
3400
class MarketoonistComics(GenericNavigableComic):
3401
    """Class to retrieve Marketoonist Comics."""
3402
    name = 'marketoonist'
3403
    long_name = 'Marketoonist'
3404
    url = 'https://marketoonist.com/cartoons'
3405
    get_first_comic_link = simulate_first_link
3406
    get_navi_link = get_link_rel_next
3407
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3408
3409
    @classmethod
3410
    def get_comic_info(cls, soup, link):
3411
        """Get information about a particular comics."""
3412
        imgs = soup.find_all('meta', property='og:image')
3413
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3414
        day = string_to_date(date_str, "%Y-%m-%d")
3415
        title = soup.find('meta', property='og:title')['content']
3416
        return {
3417
            'img': [i['content'] for i in imgs],
3418
            'day': day.day,
3419
            'month': day.month,
3420
            'year': day.year,
3421
            'title': title,
3422
        }
3423
3424
3425 View Code Duplication
class ConsoliaComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3426
    """Class to retrieve Consolia comics."""
3427
    name = 'consolia'
3428
    long_name = 'consolia'
3429
    url = 'https://consolia-comic.com'
3430
    get_url_from_link = join_cls_url_to_href
3431
3432
    @classmethod
3433
    def get_first_comic_link(cls):
3434
        """Get link to first comics."""
3435
        return get_soup_at_url(cls.url).find('a', class_='first')
3436
3437
    @classmethod
3438
    def get_navi_link(cls, last_soup, next_):
3439
        """Get link to next or previous comic."""
3440
        return last_soup.find('a', class_='next' if next_ else 'prev')
3441
3442
    @classmethod
3443
    def get_comic_info(cls, soup, link):
3444
        """Get information about a particular comics."""
3445
        title = soup.find('meta', property='og:title')['content']
3446
        date_str = soup.find('time')["datetime"]
3447
        day = string_to_date(date_str, "%Y-%m-%d")
3448
        imgs = soup.find_all('meta', property='og:image')
3449
        return {
3450
            'title': title,
3451
            'img': [i['content'] for i in imgs],
3452
            'day': day.day,
3453
            'month': day.month,
3454
            'year': day.year,
3455
        }
3456
3457
3458
class GenericBlogspotComic(GenericNavigableComic):
3459
    """Generic class to retrieve comics from Blogspot."""
3460
    get_first_comic_link = simulate_first_link
3461
    first_url = NotImplemented
3462
    _categories = ('BLOGSPOT', )
3463
3464
    @classmethod
3465
    def get_navi_link(cls, last_soup, next_):
3466
        """Get link to next or previous comic."""
3467
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3468
3469
3470 View Code Duplication
class TuMourrasMoinsBete(GenericBlogspotComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3471
    """Class to retrieve Tu Mourras Moins Bete comics."""
3472
    name = 'mourrasmoinsbete'
3473
    long_name = 'Tu Mourras Moins Bete'
3474
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3475
    _categories = ('FRANCAIS', )
3476
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3477
3478
    @classmethod
3479
    def get_comic_info(cls, soup, link):
3480
        """Get information about a particular comics."""
3481
        title = soup.find('title').string
3482
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3483
        author = soup.find('span', itemprop='author').string
3484
        return {
3485
            'img': [i['src'] for i in imgs],
3486
            'author': author,
3487
            'title': title,
3488
        }
3489
3490
3491
class Octopuns(GenericBlogspotComic):
3492
    """Class to retrieve Octopuns comics."""
3493
    # Also on http://octopuns.tumblr.com
3494
    name = 'octopuns'
3495
    long_name = 'Octopuns'
3496
    url = 'http://www.octopuns.net'  # or http://octopuns.blogspot.fr/
3497
    first_url = 'http://octopuns.blogspot.com/2010/12/17122010-always-read-label.html'
3498
3499
    @classmethod
3500
    def get_comic_info(cls, soup, link):
3501
        """Get information about a particular comics."""
3502
        title = soup.find('h3', class_='post-title entry-title').string
3503
        date_str = soup.find('h2', class_='date-header').string
3504
        day = string_to_date(date_str, "%A, %B %d, %Y")
3505
        imgs = soup.find_all('link', rel='image_src')
3506
        return {
3507
            'img': [i['href'] for i in imgs],
3508
            'title': title,
3509
            'day': day.day,
3510
            'month': day.month,
3511
            'year': day.year,
3512
        }
3513
3514
3515
class GeekAndPoke(GenericNavigableComic):
3516
    """Class to retrieve Geek And Poke comics."""
3517
    name = 'geek'
3518
    long_name = 'Geek And Poke'
3519
    url = 'http://geek-and-poke.com'
3520
    get_url_from_link = join_cls_url_to_href
3521
    get_first_comic_link = simulate_first_link
3522
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3523
3524
    @classmethod
3525
    def get_navi_link(cls, last_soup, next_):
3526
        """Get link to next or previous comic."""
3527
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3528
3529
    @classmethod
3530
    def get_comic_info(cls, soup, link):
3531
        """Get information about a particular comics."""
3532
        title = soup.find('meta', property='og:title')['content']
3533
        desc = soup.find('meta', property='og:description')['content']
3534
        date_str = soup.find('time', class_='published')['datetime']
3535
        day = string_to_date(date_str, "%Y-%m-%d")
3536
        author = soup.find('a', rel='author').string
3537
        div_content = (soup.find('div', class_="body entry-content") or
3538
                       soup.find('div', class_="special-content"))
3539
        imgs = div_content.find_all('img')
3540
        imgs = [i for i in imgs if i.get('src') is not None]
3541
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3542
        alt = imgs[0].get('alt', "") if imgs else []
3543
        return {
3544
            'title': title,
3545
            'alt': alt,
3546
            'description': desc,
3547
            'author': author,
3548
            'day': day.day,
3549
            'month': day.month,
3550
            'year': day.year,
3551
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3552
        }
3553
3554
3555 View Code Duplication
class GloryOwlComix(GenericBlogspotComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3556
    """Class to retrieve Glory Owl comics."""
3557
    name = 'gloryowl'
3558
    long_name = 'Glory Owl'
3559
    url = 'http://gloryowlcomix.blogspot.fr'
3560
    _categories = ('NSFW', 'FRANCAIS')
3561
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3562
3563
    @classmethod
3564
    def get_comic_info(cls, soup, link):
3565
        """Get information about a particular comics."""
3566
        title = soup.find('title').string
3567
        imgs = soup.find_all('link', rel='image_src')
3568
        author = soup.find('a', rel='author').string
3569
        return {
3570
            'img': [i['href'] for i in imgs],
3571
            'author': author,
3572
            'title': title,
3573
        }
3574
3575
3576
class AtRandomComics(GenericNavigableComic):
3577
    """Class to retrieve At Random Comics."""
3578
    name = 'atrandom'
3579
    long_name = 'At Random Comics'
3580
    url = 'http://www.atrandomcomics.com'
3581
    get_url_from_link = join_cls_url_to_href
3582
    get_first_comic_link = simulate_first_link
3583
    first_url = 'http://www.atrandomcomics.com/at-random-comics-home/2015/5/5/can-of-worms'
3584
3585
    @classmethod
3586
    def get_navi_link(cls, last_soup, next_):
3587
        """Get link to next or previous comic."""
3588
        return last_soup.find('a', id='prevLink' if next_ else 'nextLink')
3589
3590
    @classmethod
3591
    def get_comic_info(cls, soup, link):
3592
        """Get information about a particular comics."""
3593
        title = soup.find('meta', property='og:title')['content']
3594
        desc = soup.find('meta', property='og:description')['content']
3595
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
3596
        day = string_to_date(date_str, "%Y-%m-%d")
3597
        author = soup.find('a', rel='author').string
3598
        imgs = soup.find_all('meta', property='og:image')
3599
        return {
3600
            'title': title,
3601
            'img': [i['content'] for i in imgs],
3602
            'month': day.month,
3603
            'year': day.year,
3604
            'day': day.day,
3605
            'author': author,
3606
            'description': desc,
3607
        }
3608
3609
3610
class GenericTumblrV1(GenericComic):
3611
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3612
    _categories = ('TUMBLR', )
3613
3614
    @classmethod
3615
    def get_next_comic(cls, last_comic):
3616
        """Generic implementation of get_next_comic for Tumblr comics."""
3617
        for p in cls.get_posts(last_comic):
3618
            comic = cls.get_comic_info(p)
3619
            if comic is not None:
3620
                yield comic
3621
3622
    @classmethod
3623
    def check_url(cls, url):
3624
        if not url.startswith(cls.url):
3625
            print("url '%s' does not start with '%s'" % (url, cls.url))
3626
        return url
3627
3628
    @classmethod
3629
    def get_url_from_post(cls, post):
3630
        return cls.check_url(post['url'])
3631
3632
    @classmethod
3633
    def get_api_url(cls):
3634
        return urljoin_wrapper(cls.url, '/api/read/')
3635
3636
    @classmethod
3637
    def get_api_url_for_id(cls, tumblr_id):
3638
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3639
3640
    @classmethod
3641
    def get_comic_info(cls, post):
3642
        """Get information about a particular comics."""
3643
        type_ = post['type']
3644
        if type_ != 'photo':
3645
            return None
3646
        tumblr_id = int(post['id'])
3647
        api_url = cls.get_api_url_for_id(tumblr_id)
3648
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3649
        caption = post.find('photo-caption')
3650
        title = caption.string if caption else ""
3651
        tags = ' '.join(t.string for t in post.find_all('tag'))
3652
        # Photos may appear in 'photo' tags and/or straight in the post
3653
        photo_tags = post.find_all('photo')
3654
        if not photo_tags:
3655
            photo_tags = [post]
3656
        # Images are in multiple resolutions - taking the first one
3657
        imgs = [photo.find('photo-url') for photo in photo_tags]
3658
        return {
3659
            'url': cls.get_url_from_post(post),
3660
            'url2': post['url-with-slug'],
3661
            'day': day.day,
3662
            'month': day.month,
3663
            'year': day.year,
3664
            'title': title,
3665
            'tags': tags,
3666
            'img': [i.string for i in imgs],
3667
            'tumblr-id': tumblr_id,
3668
            'api_url': api_url,
3669
        }
3670
3671
    @classmethod
3672
    def get_posts(cls, last_comic, nb_post_per_call=10):
3673
        """Get posts using API. nb_post_per_call is max 50.
3674
3675
        Posts are retrieved from newer to older as per the tumblr v1 api
3676
        but are returned in chronological order."""
3677
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3678
        posts_acc = []
3679
        if last_comic is not None:
3680
            cls.check_url(last_comic['url'])
3681
            cls.check_url(last_comic['api_url'])
3682
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3683
            # might end up spending a lot of time looking for something that
3684
            # doesn't exist. Failing early and clearly might be a better option.
3685
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3686
            try:
3687
                get_soup_at_url(last_api_url)
3688
            except urllib.error.HTTPError:
3689
                try:
3690
                    get_soup_at_url(cls.url)
3691
                except urllib.error.HTTPError:
3692
                    print("Did not find previous post nor main url %s" % cls.url)
3693
                else:
3694
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3695
                return reversed(posts_acc)
3696
        api_url = cls.get_api_url()
3697
        posts = get_soup_at_url(api_url).find('posts')
3698
        start, total = int(posts['start']), int(posts['total'])
3699
        assert start == 0
3700
        for starting_num in range(0, total, nb_post_per_call):
3701
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3702
            posts2 = get_soup_at_url(api_url2).find('posts')
3703
            start2, total2 = int(posts2['start']), int(posts2['total'])
3704
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3705
            # This may happen and should be handled in the future
3706
            assert total == total2, "%d != %d" % (total, total2)
3707
            for p in posts2.find_all('post'):
3708
                tumblr_id = int(p['id'])
3709
                if waiting_for_id and waiting_for_id == tumblr_id:
3710
                    return reversed(posts_acc)
3711
                posts_acc.append(p)
3712
        if waiting_for_id is None:
3713
            return reversed(posts_acc)
3714
        print("Did not find %s : there might be a problem" % waiting_for_id)
3715
        return []
3716
3717
3718
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3719
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3720
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3721
    # Also on http://www.smbc-comics.com
3722
    name = 'smbc-tumblr'
3723
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3724
    url = 'http://smbc-comics.tumblr.com'
3725
    _categories = ('SMBC', )
3726
3727
3728
class AHammADay(GenericTumblrV1):
3729
    """Class to retrieve class A Hamm A Day comics."""
3730
    name = 'hamm'
3731
    long_name = 'A Hamm A Day'
3732
    url = 'http://www.ahammaday.com'
3733
3734
3735
class IrwinCardozo(GenericTumblrV1):
3736
    """Class to retrieve Irwin Cardozo Comics."""
3737
    name = 'irwinc'
3738
    long_name = 'Irwin Cardozo'
3739
    url = 'http://irwincardozocomics.tumblr.com'
3740
3741
3742
class AccordingToDevin(GenericTumblrV1):
3743
    """Class to retrieve According To Devin comics."""
3744
    name = 'devin'
3745
    long_name = 'According To Devin'
3746
    url = 'http://accordingtodevin.tumblr.com'
3747
3748
3749
class ItsTheTieTumblr(GenericTumblrV1):
3750
    """Class to retrieve It's the tie comics."""
3751
    # Also on http://itsthetie.com
3752
    # Also on https://tapastic.com/series/itsthetie
3753
    name = 'tie-tumblr'
3754
    long_name = "It's the tie (from Tumblr)"
3755
    url = "http://itsthetie.tumblr.com"
3756
    _categories = ('TIE', )
3757
3758
3759
class OctopunsTumblr(GenericTumblrV1):
3760
    """Class to retrieve Octopuns comics."""
3761
    # Also on http://www.octopuns.net
3762
    name = 'octopuns-tumblr'
3763
    long_name = 'Octopuns (from Tumblr)'
3764
    url = 'http://octopuns.tumblr.com'
3765
3766
3767
class PicturesInBoxesTumblr(GenericTumblrV1):
3768
    """Class to retrieve Pictures In Boxes comics."""
3769
    # Also on http://www.picturesinboxes.com
3770
    name = 'picturesinboxes-tumblr'
3771
    long_name = 'Pictures in Boxes (from Tumblr)'
3772
    url = 'https://picturesinboxescomic.tumblr.com'
3773
3774
3775
class TubeyToonsTumblr(GenericTumblrV1):
3776
    """Class to retrieve TubeyToons comics."""
3777
    # Also on http://tapastic.com/series/Tubey-Toons
3778
    # Also on http://tubeytoons.com
3779
    name = 'tubeytoons-tumblr'
3780
    long_name = 'Tubey Toons (from Tumblr)'
3781
    url = 'https://tubeytoons.tumblr.com'
3782
    _categories = ('TUNEYTOONS', )
3783
3784
3785
class UnearthedComicsTumblr(GenericTumblrV1):
3786
    """Class to retrieve Unearthed comics."""
3787
    # Also on http://tapastic.com/series/UnearthedComics
3788
    # Also on http://unearthedcomics.com
3789
    name = 'unearthed-tumblr'
3790
    long_name = 'Unearthed Comics (from Tumblr)'
3791
    url = 'https://unearthedcomics.tumblr.com'
3792
    _categories = ('UNEARTHED', )
3793
3794
3795
class PieComic(GenericTumblrV1):
3796
    """Class to retrieve Pie Comic comics."""
3797
    name = 'pie'
3798
    long_name = 'Pie Comic'
3799
    url = "http://piecomic.tumblr.com"
3800
3801
3802
class MrEthanDiamond(GenericTumblrV1):
3803
    """Class to retrieve Mr Ethan Diamond comics."""
3804
    name = 'diamond'
3805
    long_name = 'Mr Ethan Diamond'
3806
    url = 'http://mrethandiamond.tumblr.com'
3807
3808
3809
class Flocci(GenericTumblrV1):
3810
    """Class to retrieve floccinaucinihilipilification comics."""
3811
    name = 'flocci'
3812
    long_name = 'floccinaucinihilipilification'
3813
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3814
3815
3816
class UpAndOut(GenericTumblrV1):
3817
    """Class to retrieve Up & Out comics."""
3818
    # Also on http://tapastic.com/series/UP-and-OUT
3819
    name = 'upandout'
3820
    long_name = 'Up And Out (from Tumblr)'
3821
    url = 'http://upandoutcomic.tumblr.com'
3822
3823
3824
class Pundemonium(GenericTumblrV1):
3825
    """Class to retrieve Pundemonium comics."""
3826
    name = 'pundemonium'
3827
    long_name = 'Pundemonium'
3828
    url = 'http://monstika.tumblr.com'
3829
3830
3831
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3832
    """Class to retrieve Poorly Drawn Lines comics."""
3833
    # Also on http://poorlydrawnlines.com
3834
    name = 'poorlydrawn-tumblr'
3835
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3836
    url = 'http://pdlcomics.tumblr.com'
3837
    _categories = ('POORLYDRAWN', )
3838
3839
3840
class PearShapedComics(GenericTumblrV1):
3841
    """Class to retrieve Pear Shaped Comics."""
3842
    name = 'pearshaped'
3843
    long_name = 'Pear-Shaped Comics'
3844
    url = 'http://pearshapedcomics.com'
3845
3846
3847
class PondScumComics(GenericTumblrV1):
3848
    """Class to retrieve Pond Scum Comics."""
3849
    name = 'pond'
3850
    long_name = 'Pond Scum'
3851
    url = 'http://pondscumcomic.tumblr.com'
3852
3853
3854
class MercworksTumblr(GenericTumblrV1):
3855
    """Class to retrieve Mercworks comics."""
3856
    # Also on http://mercworks.net
3857
    name = 'mercworks-tumblr'
3858
    long_name = 'Mercworks (from Tumblr)'
3859
    url = 'http://mercworks.tumblr.com'
3860
3861
3862
class OwlTurdTumblr(GenericTumblrV1):
3863
    """Class to retrieve Owl Turd comics."""
3864
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3865
    name = 'owlturd-tumblr'
3866
    long_name = 'Owl Turd (from Tumblr)'
3867
    url = 'http://owlturd.com'
3868
    _categories = ('OWLTURD', )
3869
3870
3871
class VectorBelly(GenericTumblrV1):
3872
    """Class to retrieve Vector Belly comics."""
3873
    # Also on http://vectorbelly.com
3874
    name = 'vector'
3875
    long_name = 'Vector Belly'
3876
    url = 'http://vectorbelly.tumblr.com'
3877
3878
3879
class GoneIntoRapture(GenericTumblrV1):
3880
    """Class to retrieve Gone Into Rapture comics."""
3881
    # Also on http://goneintorapture.tumblr.com
3882
    # Also on http://tapastic.com/series/Goneintorapture
3883
    name = 'rapture'
3884
    long_name = 'Gone Into Rapture'
3885
    url = 'http://goneintorapture.com'
3886
3887
3888
class TheOatmealTumblr(GenericTumblrV1):
3889
    """Class to retrieve The Oatmeal comics."""
3890
    # Also on http://theoatmeal.com
3891
    name = 'oatmeal-tumblr'
3892
    long_name = 'The Oatmeal (from Tumblr)'
3893
    url = 'http://oatmeal.tumblr.com'
3894
3895
3896
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3897
    """Class to retrieve Heck If I Know Comics."""
3898
    # Also on http://tapastic.com/series/Regular
3899
    name = 'heck-tumblr'
3900
    long_name = 'Heck if I Know comics (from Tumblr)'
3901
    url = 'http://heckifiknowcomics.com'
3902
3903
3904
class MyJetPack(GenericTumblrV1):
3905
    """Class to retrieve My Jet Pack comics."""
3906
    name = 'jetpack'
3907
    long_name = 'My Jet Pack'
3908
    url = 'http://myjetpack.tumblr.com'
3909
3910
3911
class CheerUpEmoKidTumblr(GenericTumblrV1):
3912
    """Class to retrieve CheerUpEmoKid comics."""
3913
    # Also on http://www.cheerupemokid.com
3914
    # Also on http://tapastic.com/series/CUEK
3915
    name = 'cuek-tumblr'
3916
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3917
    url = 'https://enzocomics.tumblr.com'
3918
3919
3920
class ForLackOfABetterComic(GenericTumblrV1):
3921
    """Class to retrieve For Lack Of A Better Comics."""
3922
    # Also on http://forlackofabettercomic.com
3923
    name = 'lack'
3924
    long_name = 'For Lack Of A Better Comic'
3925
    url = 'http://forlackofabettercomic.tumblr.com'
3926
3927
3928
class ZenPencilsTumblr(GenericTumblrV1):
3929
    """Class to retrieve ZenPencils comics."""
3930
    # Also on http://zenpencils.com
3931
    # Also on http://www.gocomics.com/zen-pencils
3932
    name = 'zenpencils-tumblr'
3933
    long_name = 'Zen Pencils (from Tumblr)'
3934
    url = 'http://zenpencils.tumblr.com'
3935
    _categories = ('ZENPENCILS', )
3936
3937
3938
class ThreeWordPhraseTumblr(GenericTumblrV1):
3939
    """Class to retrieve Three Word Phrase comics."""
3940
    # Also on http://threewordphrase.com
3941
    name = 'threeword-tumblr'
3942
    long_name = 'Three Word Phrase (from Tumblr)'
3943
    url = 'http://threewordphrase.tumblr.com'
3944
3945
3946
class TimeTrabbleTumblr(GenericTumblrV1):
3947
    """Class to retrieve Time Trabble comics."""
3948
    # Also on http://timetrabble.com
3949
    name = 'timetrabble-tumblr'
3950
    long_name = 'Time Trabble (from Tumblr)'
3951
    url = 'http://timetrabble.tumblr.com'
3952
3953
3954
class SafelyEndangeredTumblr(GenericTumblrV1):
3955
    """Class to retrieve Safely Endangered comics."""
3956
    # Also on http://www.safelyendangered.com
3957
    name = 'endangered-tumblr'
3958
    long_name = 'Safely Endangered (from Tumblr)'
3959
    url = 'http://tumblr.safelyendangered.com'
3960
3961
3962
class MouseBearComedyTumblr(GenericTumblrV1):
3963
    """Class to retrieve Mouse Bear Comedy comics."""
3964
    # Also on http://www.mousebearcomedy.com
3965
    name = 'mousebear-tumblr'
3966
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3967
    url = 'http://mousebearcomedy.tumblr.com'
3968
3969
3970
class BouletCorpTumblr(GenericTumblrV1):
3971
    """Class to retrieve BouletCorp comics."""
3972
    # Also on http://www.bouletcorp.com
3973
    name = 'boulet-tumblr'
3974
    long_name = 'Boulet Corp (from Tumblr)'
3975
    url = 'https://bouletcorp.tumblr.com'
3976
    _categories = ('BOULET', )
3977
3978
3979
class TheAwkwardYetiTumblr(GenericTumblrV1):
3980
    """Class to retrieve The Awkward Yeti comics."""
3981
    # Also on http://www.gocomics.com/the-awkward-yeti
3982
    # Also on http://theawkwardyeti.com
3983
    # Also on https://tapastic.com/series/TheAwkwardYeti
3984
    name = 'yeti-tumblr'
3985
    long_name = 'The Awkward Yeti (from Tumblr)'
3986
    url = 'http://larstheyeti.tumblr.com'
3987
    _categories = ('YETI', )
3988
3989
3990
class NellucNhoj(GenericTumblrV1):
3991
    """Class to retrieve NellucNhoj comics."""
3992
    name = 'nhoj'
3993
    long_name = 'Nelluc Nhoj'
3994
    url = 'http://nellucnhoj.com'
3995
3996
3997
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3998
    """Class to retrieve Down The Upward Spiral comics."""
3999
    # Also on http://www.downtheupwardspiral.com
4000
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
4001
    name = 'spiral-tumblr'
4002
    long_name = 'Down the Upward Spiral (from Tumblr)'
4003
    url = 'http://downtheupwardspiral.tumblr.com'
4004
4005
4006
class AsPerUsualTumblr(GenericTumblrV1):
4007
    """Class to retrieve As Per Usual comics."""
4008
    # Also on https://tapastic.com/series/AsPerUsual
4009
    name = 'usual-tumblr'
4010
    long_name = 'As Per Usual (from Tumblr)'
4011
    url = 'http://as-per-usual.tumblr.com'
4012
    categories = ('DAMILEE', )
4013
4014
4015
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
4016
    """Class to retrieve Hot Comics For Cool People."""
4017
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
4018
    # Also on http://hotcomics.biz (links to tumblr)
4019
    # Also on http://hcfcp.com (links to tumblr)
4020
    name = 'hotcomics-tumblr'
4021
    long_name = 'Hot Comics For Cool People (from Tumblr)'
4022
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
4023
    categories = ('DAMILEE', )
4024
4025
4026
class OneOneOneOneComicTumblr(GenericTumblrV1):
4027
    """Class to retrieve 1111 Comics."""
4028
    # Also on http://www.1111comics.me
4029
    # Also on https://tapastic.com/series/1111-Comics
4030
    name = '1111-tumblr'
4031
    long_name = '1111 Comics (from Tumblr)'
4032
    url = 'http://comics1111.tumblr.com'
4033
    _categories = ('ONEONEONEONE', )
4034
4035
4036
class JhallComicsTumblr(GenericTumblrV1):
4037
    """Class to retrieve Jhall Comics."""
4038
    # Also on http://jhallcomics.com
4039
    name = 'jhall-tumblr'
4040
    long_name = 'Jhall Comics (from Tumblr)'
4041
    url = 'http://jhallcomics.tumblr.com'
4042
4043
4044
class BerkeleyMewsTumblr(GenericTumblrV1):
4045
    """Class to retrieve Berkeley Mews comics."""
4046
    # Also on http://www.gocomics.com/berkeley-mews
4047
    # Also on http://www.berkeleymews.com
4048
    name = 'berkeley-tumblr'
4049
    long_name = 'Berkeley Mews (from Tumblr)'
4050
    url = 'http://mews.tumblr.com'
4051
    _categories = ('BERKELEY', )
4052
4053
4054
class JoanCornellaTumblr(GenericTumblrV1):
4055
    """Class to retrieve Joan Cornella comics."""
4056
    # Also on http://joancornella.net
4057
    name = 'cornella-tumblr'
4058
    long_name = 'Joan Cornella (from Tumblr)'
4059
    url = 'http://cornellajoan.tumblr.com'
4060
4061
4062
class RespawnComicTumblr(GenericTumblrV1):
4063
    """Class to retrieve Respawn Comic."""
4064
    # Also on http://respawncomic.com
4065
    name = 'respawn-tumblr'
4066
    long_name = 'Respawn Comic (from Tumblr)'
4067
    url = 'https://respawncomic.tumblr.com'
4068
4069
4070
class ChrisHallbeckTumblr(GenericTumblrV1):
4071
    """Class to retrieve Chris Hallbeck comics."""
4072
    # Also on https://tapastic.com/ChrisHallbeck
4073
    # Also on http://maximumble.com
4074
    # Also on http://minimumble.com
4075
    # Also on http://thebookofbiff.com
4076
    name = 'hallbeck-tumblr'
4077
    long_name = 'Chris Hallback (from Tumblr)'
4078
    url = 'https://chrishallbeck.tumblr.com'
4079
    _categories = ('HALLBACK', )
4080
4081
4082
class ComicNuggets(GenericTumblrV1):
4083
    """Class to retrieve Comic Nuggets."""
4084
    name = 'nuggets'
4085
    long_name = 'Comic Nuggets'
4086
    url = 'http://comicnuggets.com'
4087
4088
4089
class PigeonGazetteTumblr(GenericTumblrV1):
4090
    """Class to retrieve The Pigeon Gazette comics."""
4091
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
4092
    name = 'pigeon-tumblr'
4093
    long_name = 'The Pigeon Gazette (from Tumblr)'
4094
    url = 'http://thepigeongazette.tumblr.com'
4095
4096
4097
class CancerOwl(GenericTumblrV1):
4098
    """Class to retrieve Cancer Owl comics."""
4099
    # Also on http://cancerowl.com
4100
    name = 'cancerowl-tumblr'
4101
    long_name = 'Cancer Owl (from Tumblr)'
4102
    url = 'http://cancerowl.tumblr.com'
4103
4104
4105
class FowlLanguageTumblr(GenericTumblrV1):
4106
    """Class to retrieve Fowl Language comics."""
4107
    # Also on http://www.fowllanguagecomics.com
4108
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4109
    # Also on http://www.gocomics.com/fowl-language
4110
    name = 'fowllanguage-tumblr'
4111
    long_name = 'Fowl Language Comics (from Tumblr)'
4112
    url = 'http://fowllanguagecomics.tumblr.com'
4113
    _categories = ('FOWLLANGUAGE', )
4114
4115
4116
class TheOdd1sOutTumblr(GenericTumblrV1):
4117
    """Class to retrieve The Odd 1s Out comics."""
4118
    # Also on http://theodd1sout.com
4119
    # Also on https://tapastic.com/series/Theodd1sout
4120
    name = 'theodd-tumblr'
4121
    long_name = 'The Odd 1s Out (from Tumblr)'
4122
    url = 'http://theodd1sout.tumblr.com'
4123
4124
4125
class TheUnderfoldTumblr(GenericTumblrV1):
4126
    """Class to retrieve The Underfold comics."""
4127
    # Also on http://theunderfold.com
4128
    name = 'underfold-tumblr'
4129
    long_name = 'The Underfold (from Tumblr)'
4130
    url = 'http://theunderfold.tumblr.com'
4131
4132
4133
class LolNeinTumblr(GenericTumblrV1):
4134
    """Class to retrieve Lol Nein comics."""
4135
    # Also on http://lolnein.com
4136
    name = 'lolnein-tumblr'
4137
    long_name = 'Lol Nein (from Tumblr)'
4138
    url = 'http://lolneincom.tumblr.com'
4139
4140
4141
class FatAwesomeComicsTumblr(GenericTumblrV1):
4142
    """Class to retrieve Fat Awesome Comics."""
4143
    # Also on http://fatawesome.com/comics
4144
    name = 'fatawesome-tumblr'
4145
    long_name = 'Fat Awesome (from Tumblr)'
4146
    url = 'http://fatawesomecomedy.tumblr.com'
4147
4148
4149
class TheWorldIsFlatTumblr(GenericTumblrV1):
4150
    """Class to retrieve The World Is Flat Comics."""
4151
    # Also on https://tapastic.com/series/The-World-is-Flat
4152
    name = 'flatworld-tumblr'
4153
    long_name = 'The World Is Flat (from Tumblr)'
4154
    url = 'http://theworldisflatcomics.com'
4155
4156
4157
class DorrisMc(GenericTumblrV1):
4158
    """Class to retrieve Dorris Mc Comics"""
4159
    # Also on http://www.gocomics.com/dorris-mccomics
4160
    name = 'dorrismc'
4161
    long_name = 'Dorris Mc'
4162
    url = 'http://dorrismccomics.com'
4163
4164
4165
class LeleozTumblr(GenericDeletedComic, GenericTumblrV1):
4166
    """Class to retrieve Leleoz comics."""
4167
    # Also on https://tapastic.com/series/Leleoz
4168
    name = 'leleoz-tumblr'
4169
    long_name = 'Leleoz (from Tumblr)'
4170
    url = 'http://leleozcomics.tumblr.com'
4171
4172
4173
class MoonBeardTumblr(GenericTumblrV1):
4174
    """Class to retrieve MoonBeard comics."""
4175
    # Also on http://moonbeard.com
4176
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4177
    name = 'moonbeard-tumblr'
4178
    long_name = 'Moon Beard (from Tumblr)'
4179
    url = 'http://squireseses.tumblr.com'
4180
    _categories = ('MOONBEARD', )
4181
4182
4183
class AComik(GenericTumblrV1):
4184
    """Class to retrieve A Comik"""
4185
    name = 'comik'
4186
    long_name = 'A Comik'
4187
    url = 'http://acomik.com'
4188
4189
4190
class ClassicRandy(GenericTumblrV1):
4191
    """Class to retrieve Classic Randy comics."""
4192
    name = 'randy'
4193
    long_name = 'Classic Randy'
4194
    url = 'http://classicrandy.tumblr.com'
4195
4196
4197
class DagssonTumblr(GenericTumblrV1):
4198
    """Class to retrieve Dagsson comics."""
4199
    # Also on http://www.dagsson.com
4200
    name = 'dagsson-tumblr'
4201
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4202
    url = 'https://hugleikurdagsson.tumblr.com'
4203
4204
4205
class LinsEditionsTumblr(GenericTumblrV1):
4206
    """Class to retrieve L.I.N.S. Editions comics."""
4207
    # Also on https://linsedition.com
4208
    # Now on http://warandpeas.tumblr.com
4209
    name = 'lins-tumblr'
4210
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4211
    url = 'https://linscomics.tumblr.com'
4212
    _categories = ('LINS', )
4213
4214
4215
class WarAndPeasTumblr(GenericTumblrV1):
4216
    """Class to retrieve War And Peas comics."""
4217
    # Was on https://linscomics.tumblr.com
4218
    name = 'warandpeas-tumblr'
4219
    long_name = 'War And Peas (from Tumblr)'
4220
    url = 'http://warandpeas.tumblr.com'
4221
    _categories = ('WARANDPEAS', )
4222
4223
4224
class OrigamiHotDish(GenericTumblrV1):
4225
    """Class to retrieve Origami Hot Dish comics."""
4226
    name = 'origamihotdish'
4227
    long_name = 'Origami Hot Dish'
4228
    url = 'http://origamihotdish.com'
4229
4230
4231
class HitAndMissComicsTumblr(GenericTumblrV1):
4232
    """Class to retrieve Hit and Miss Comics."""
4233
    name = 'hitandmiss'
4234
    long_name = 'Hit and Miss Comics'
4235
    url = 'https://hitandmisscomics.tumblr.com'
4236
4237
4238
class HMBlanc(GenericTumblrV1):
4239
    """Class to retrieve HM Blanc comics."""
4240
    name = 'hmblanc'
4241
    long_name = 'HM Blanc'
4242
    url = 'http://hmblanc.tumblr.com'
4243
4244
4245
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4246
    """Class to retrieve Tales Of Absurdity comics."""
4247
    # Also on http://talesofabsurdity.com
4248
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4249
    name = 'absurdity-tumblr'
4250
    long_name = 'Tales of Absurdity (from Tumblr)'
4251
    url = 'http://talesofabsurdity.tumblr.com'
4252
    _categories = ('ABSURDITY', )
4253
4254
4255
class RobbieAndBobby(GenericTumblrV1):
4256
    """Class to retrieve Robbie And Bobby comics."""
4257
    # Also on http://robbieandbobby.com
4258
    name = 'robbie-tumblr'
4259
    long_name = 'Robbie And Bobby (from Tumblr)'
4260
    url = 'http://robbieandbobby.tumblr.com'
4261
4262
4263
class ElectricBunnyComicTumblr(GenericTumblrV1):
4264
    """Class to retrieve Electric Bunny Comics."""
4265
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4266
    name = 'bunny-tumblr'
4267
    long_name = 'Electric Bunny Comic (from Tumblr)'
4268
    url = 'http://electricbunnycomics.tumblr.com'
4269
4270
4271
class Hoomph(GenericTumblrV1):
4272
    """Class to retrieve Hoomph comics."""
4273
    name = 'hoomph'
4274
    long_name = 'Hoomph'
4275
    url = 'http://hoom.ph'
4276
4277
4278
class BFGFSTumblr(GenericTumblrV1):
4279
    """Class to retrieve BFGFS comics."""
4280
    # Also on https://tapastic.com/series/BFGFS
4281
    # Also on http://bfgfs.com
4282
    name = 'bfgfs-tumblr'
4283
    long_name = 'BFGFS (from Tumblr)'
4284
    url = 'https://bfgfs.tumblr.com'
4285
4286
4287
class DoodleForFood(GenericTumblrV1):
4288
    """Class to retrieve Doodle For Food comics."""
4289
    # Also on https://tapastic.com/series/Doodle-for-Food
4290
    name = 'doodle'
4291
    long_name = 'Doodle For Food'
4292
    url = 'http://www.doodleforfood.com'
4293
4294
4295
class CassandraCalinTumblr(GenericTumblrV1):
4296
    """Class to retrieve C. Cassandra comics."""
4297
    # Also on http://cassandracalin.com
4298
    # Also on https://tapastic.com/series/C-Cassandra-comics
4299
    name = 'cassandra-tumblr'
4300
    long_name = 'Cassandra Calin (from Tumblr)'
4301
    url = 'http://c-cassandra.tumblr.com'
4302
4303
4304
class DougWasTaken(GenericTumblrV1):
4305
    """Class to retrieve Doug Was Taken comics."""
4306
    name = 'doug'
4307
    long_name = 'Doug Was Taken'
4308
    url = 'https://dougwastaken.tumblr.com'
4309
4310
4311
class MandatoryRollerCoaster(GenericTumblrV1):
4312
    """Class to retrieve Mandatory Roller Coaster comics."""
4313
    name = 'rollercoaster'
4314
    long_name = 'Mandatory Roller Coaster'
4315
    url = 'http://mandatoryrollercoaster.com'
4316
4317
4318
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4319
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4320
    name = 'cperspqccltt'
4321
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4322
    url = 'http://marcoandco.tumblr.com'
4323
4324
4325
class TheGrohlTroll(GenericTumblrV1):
4326
    """Class to retrieve The Grohl Troll comics."""
4327
    name = 'grohltroll'
4328
    long_name = 'The Grohl Troll'
4329
    url = 'http://thegrohltroll.com'
4330
4331
4332
class WebcomicName(GenericTumblrV1):
4333
    """Class to retrieve Webcomic Name comics."""
4334
    name = 'webcomicname'
4335
    long_name = 'Webcomic Name'
4336
    url = 'http://webcomicname.com'
4337
4338
4339
class BooksOfAdam(GenericTumblrV1):
4340
    """Class to retrieve Books of Adam comics."""
4341
    # Also on http://www.booksofadam.com
4342
    name = 'booksofadam'
4343
    long_name = 'Books of Adam'
4344
    url = 'http://booksofadam.tumblr.com'
4345
4346
4347
class HarkAVagrant(GenericTumblrV1):
4348
    """Class to retrieve Hark A Vagrant comics."""
4349
    # Also on http://www.harkavagrant.com
4350
    name = 'hark-tumblr'
4351
    long_name = 'Hark A Vagrant (from Tumblr)'
4352
    url = 'http://beatonna.tumblr.com'
4353
4354
4355
class OurSuperAdventureTumblr(GenericTumblrV1):
4356
    """Class to retrieve Our Super Adventure comics."""
4357
    # Also on https://tapastic.com/series/Our-Super-Adventure
4358
    # Also on http://www.oursuperadventure.com
4359
    # http://sarahgraley.com
4360
    name = 'superadventure-tumblr'
4361
    long_name = 'Our Super Adventure (from Tumblr)'
4362
    url = 'http://sarahssketchbook.tumblr.com'
4363
4364
4365
class JakeLikesOnions(GenericTumblrV1):
4366
    """Class to retrieve Jake Likes Onions comics."""
4367
    name = 'jake'
4368
    long_name = 'Jake Likes Onions'
4369
    url = 'http://jakelikesonions.com'
4370
4371
4372
class InYourFaceCakeTumblr(GenericTumblrV1):
4373
    """Class to retrieve In Your Face Cake comics."""
4374
    # Also on https://tapas.io/series/In-Your-Face-Cake
4375
    name = 'inyourfacecake-tumblr'
4376
    long_name = 'In Your Face Cake (from Tumblr)'
4377
    url = 'https://in-your-face-cake.tumblr.com'
4378
    _categories = ('INYOURFACECAKE', )
4379
4380
4381
class Robospunk(GenericTumblrV1):
4382
    """Class to retrieve Robospunk comics."""
4383
    name = 'robospunk'
4384
    long_name = 'Robospunk'
4385
    url = 'http://robospunk.com'
4386
4387
4388
class BananaTwinky(GenericTumblrV1):
4389
    """Class to retrieve Banana Twinky comics."""
4390
    name = 'banana'
4391
    long_name = 'Banana Twinky'
4392
    url = 'https://bananatwinky.tumblr.com'
4393
4394
4395
class YesterdaysPopcornTumblr(GenericTumblrV1):
4396
    """Class to retrieve Yesterday's Popcorn comics."""
4397
    # Also on http://www.yesterdayspopcorn.com
4398
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4399
    name = 'popcorn-tumblr'
4400
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4401
    url = 'http://yesterdayspopcorn.tumblr.com'
4402
4403
4404
class TwistedDoodles(GenericTumblrV1):
4405
    """Class to retrieve Twisted Doodles comics."""
4406
    name = 'twisted'
4407
    long_name = 'Twisted Doodles'
4408
    url = 'http://www.twisteddoodles.com'
4409
4410
4411
class UbertoolTumblr(GenericTumblrV1):
4412
    """Class to retrieve Ubertool comics."""
4413
    # Also on http://ubertoolcomic.com
4414
    # Also on https://tapastic.com/series/ubertool
4415
    name = 'ubertool-tumblr'
4416
    long_name = 'Ubertool (from Tumblr)'
4417
    url = 'https://ubertool.tumblr.com'
4418
    _categories = ('UBERTOOL', )
4419
4420
4421
class LittleLifeLinesTumblr(GenericDeletedComic, GenericTumblrV1):
4422
    """Class to retrieve Little Life Lines comics."""
4423
    # Also on http://www.littlelifelines.com
4424
    name = 'life-tumblr'
4425
    long_name = 'Little Life Lines (from Tumblr)'
4426
    url = 'https://little-life-lines.tumblr.com'
4427
4428
4429
class TheyCanTalk(GenericTumblrV1):
4430
    """Class to retrieve They Can Talk comics."""
4431
    name = 'theycantalk'
4432
    long_name = 'They Can Talk'
4433
    url = 'http://theycantalk.com'
4434
4435
4436
class Will5NeverCome(GenericTumblrV1):
4437
    """Class to retrieve Will 5:00 Never Come comics."""
4438
    name = 'will5'
4439
    long_name = 'Will 5:00 Never Come ?'
4440
    url = 'http://will5nevercome.com'
4441
4442
4443
class Sephko(GenericTumblrV1):
4444
    """Class to retrieve Sephko Comics."""
4445
    # Also on http://www.sephko.com
4446
    name = 'sephko'
4447
    long_name = 'Sephko'
4448
    url = 'https://sephko.tumblr.com'
4449
4450
4451
class BlazersAtDawn(GenericTumblrV1):
4452
    """Class to retrieve Blazers At Dawn Comics."""
4453
    name = 'blazers'
4454
    long_name = 'Blazers At Dawn'
4455
    url = 'http://blazersatdawn.tumblr.com'
4456
4457
4458
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4459
    """Class to retrieve Art By Moga Comics."""
4460
    name = 'moga'
4461
    long_name = 'Art By Moga'
4462
    url = 'http://artbymoga.tumblr.com'
4463
4464
4465
class VerbalVomitTumblr(GenericTumblrV1):
4466
    """Class to retrieve Verbal Vomit comics."""
4467
    # Also on http://www.verbal-vomit.com
4468
    name = 'vomit-tumblr'
4469
    long_name = 'Verbal Vomit (from Tumblr)'
4470
    url = 'http://verbalvomits.tumblr.com'
4471
4472
4473
class LibraryComic(GenericTumblrV1):
4474
    """Class to retrieve LibraryComic."""
4475
    # Also on http://librarycomic.com
4476
    name = 'library-tumblr'
4477
    long_name = 'LibraryComic (from Tumblr)'
4478
    url = 'https://librarycomic.tumblr.com'
4479
4480
4481
class TizzyStitchBirdTumblr(GenericTumblrV1):
4482
    """Class to retrieve Tizzy Stitch Bird comics."""
4483
    # Also on http://tizzystitchbird.com
4484
    # Also on https://tapastic.com/series/TizzyStitchbird
4485
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4486
    name = 'tizzy-tumblr'
4487
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4488
    url = 'http://tizzystitchbird.tumblr.com'
4489
4490
4491
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4492
    """Class to retrieve VictimsOfCircumsolar comics."""
4493
    # Also on http://www.victimsofcircumsolar.com
4494
    name = 'circumsolar-tumblr'
4495
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4496
    url = 'https://victimsofcomics.tumblr.com'
4497
4498
4499
class RockPaperCynicTumblr(GenericTumblrV1):
4500
    """Class to retrieve RockPaperCynic comics."""
4501
    # Also on http://www.rockpapercynic.com
4502
    # Also on https://tapastic.com/series/rockpapercynic
4503
    name = 'rpc-tumblr'
4504
    long_name = 'Rock Paper Cynic (from Tumblr)'
4505
    url = 'http://rockpapercynic.tumblr.com'
4506
4507
4508
class DeadlyPanelTumblr(GenericTumblrV1):
4509
    """Class to retrieve Deadly Panel comics."""
4510
    # Also on http://www.deadlypanel.com
4511
    # Also on https://tapastic.com/series/deadlypanel
4512
    name = 'deadly-tumblr'
4513
    long_name = 'Deadly Panel (from Tumblr)'
4514
    url = 'https://deadlypanel.tumblr.com'
4515
4516
4517
class CatanaComics(GenericComicNotWorking):  # Not a Tumblr anymore ?
4518
    """Class to retrieve Catana comics."""
4519
    name = 'catana'
4520
    long_name = 'Catana'
4521
    url = 'http://www.catanacomics.com'
4522
4523
4524
class AngryAtNothingTumblr(GenericTumblrV1):
4525
    """Class to retrieve Angry at Nothing comics."""
4526
    # Also on http://www.angryatnothing.net
4527
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4528
    name = 'angry-tumblr'
4529
    long_name = 'Angry At Nothing (from Tumblr)'
4530
    url = 'http://angryatnothing.tumblr.com'
4531
4532
4533
class ShanghaiTango(GenericTumblrV1):
4534
    """Class to retrieve Shanghai Tango comic."""
4535
    name = 'tango'
4536
    long_name = 'Shanghai Tango'
4537
    url = 'http://tango2010weibo.tumblr.com'
4538
4539
4540
class OffTheLeashDogTumblr(GenericTumblrV1):
4541
    """Class to retrieve Off The Leash Dog comics."""
4542
    # Also on http://offtheleashdogcartoons.com
4543
    # Also on http://www.rupertfawcettcartoons.com
4544
    name = 'offtheleash-tumblr'
4545
    long_name = 'Off The Leash Dog (from Tumblr)'
4546
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4547
    _categories = ('FAWCETT', )
4548
4549
4550
class ImogenQuestTumblr(GenericTumblrV1):
4551
    """Class to retrieve Imogen Quest comics."""
4552
    # Also on http://imogenquest.net
4553
    name = 'imogen-tumblr'
4554
    long_name = 'Imogen Quest (from Tumblr)'
4555
    url = 'http://imoquest.tumblr.com'
4556
4557
4558
class Shitfest(GenericTumblrV1):
4559
    """Class to retrieve Shitfest comics."""
4560
    name = 'shitfest'
4561
    long_name = 'Shitfest'
4562
    url = 'http://shitfestcomic.com'
4563
4564
4565
class IceCreamSandwichComics(GenericTumblrV1):
4566
    """Class to retrieve Ice Cream Sandwich Comics."""
4567
    name = 'icecream'
4568
    long_name = 'Ice Cream Sandwich Comics'
4569
    url = 'http://icecreamsandwichcomics.com'
4570
4571
4572
class Dustinteractive(GenericTumblrV1):
4573
    """Class to retrieve Dustinteractive comics."""
4574
    name = 'dustinteractive'
4575
    long_name = 'Dustinteractive'
4576
    url = 'http://dustinteractive.com'
4577
4578
4579
class StickyCinemaFloor(GenericTumblrV1):
4580
    """Class to retrieve Sticky Cinema Floor comics."""
4581
    name = 'stickycinema'
4582
    long_name = 'Sticky Cinema Floor'
4583
    url = 'https://stickycinemafloor.tumblr.com'
4584
4585
4586
class IncidentalComicsTumblr(GenericTumblrV1):
4587
    """Class to retrieve Incidental Comics."""
4588
    # Also on http://www.incidentalcomics.com
4589
    name = 'incidental-tumblr'
4590
    long_name = 'Incidental Comics (from Tumblr)'
4591
    url = 'http://incidentalcomics.tumblr.com'
4592
4593
4594
class APleasantWasteOfTimeTumblr(GenericTumblrV1):
4595
    """Class to retrieve A Pleasant Waste Of Time comics."""
4596
    # Also on https://tapas.io/series/A-Pleasant-
4597
    name = 'pleasant-waste-tumblr'
4598
    long_name = 'A Pleasant Waste Of Time (from Tumblr)'
4599
    url = 'https://artjcf.tumblr.com'
4600
    _categories = ('WASTE', )
4601
4602
4603
class HorovitzComicsTumblr(GenericTumblrV1):
4604
    """Class to retrieve Horovitz new comics."""
4605
    # Also on http://www.horovitzcomics.com
4606
    name = 'horovitz-tumblr'
4607
    long_name = 'Horovitz (from Tumblr)'
4608
    url = 'https://horovitzcomics.tumblr.com'
4609
    _categories = ('HOROVITZ', )
4610
4611
4612
class DeepDarkFearsTumblr(GenericTumblrV1):
4613
    """Class to retrieve DeepvDarkvFears comics."""
4614
    name = 'deep-dark-fears-tumblr'
4615
    long_name = 'Deep Dark Fears (from Tumblr)'
4616
    url = 'http://deep-dark-fears.tumblr.com'
4617
4618
4619
class DakotaMcDadzean(GenericTumblrV1):
4620
    """Class to retrieve Dakota McDadzean comics."""
4621
    name = 'dakota'
4622
    long_name = 'Dakota McDadzean'
4623
    url = 'http://dakotamcfadzean.tumblr.com'
4624
4625
4626
class ExtraFabulousComicsTumblr(GenericTumblrV1):
4627
    """Class to retrieve Extra Fabulous Comics."""
4628
    # Also on http://extrafabulouscomics.com
4629
    name = 'efc-tumblr'
4630
    long_name = 'Extra Fabulous Comics (from Tumblr)'
4631
    url = 'https://extrafabulouscomics.tumblr.com'
4632
    _categories = ('EFC', )
4633
4634
4635
class AlexLevesque(GenericTumblrV1):
4636
    """Class to retrieve AlexLevesque comics."""
4637
    name = 'alevesque'
4638
    long_name = 'Alex Levesque'
4639
    url = 'http://alexlevesque.com'
4640
    _categories = ('FRANCAIS', )
4641
4642
4643
class JamesOfNoTradesTumblr(GenericTumblrV1):
4644
    """Class to retrieve JamesOfNoTrades comics."""
4645
    # Also on http://jamesofnotrades.com
4646
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
4647
    # Also on https://tapas.io/series/James-of-No-Trades
4648
    name = 'jamesofnotrades-tumblr'
4649
    long_name = 'James Of No Trades (from Tumblr)'
4650
    url = 'http://jamesfregan.tumblr.com'
4651
    _categories = ('JAMESOFNOTRADES', )
4652
4653
4654
class InfiniteGuff(GenericTumblrV1):
4655
    """Class to retrieve Infinite Guff comics."""
4656
    name = 'infiniteguff'
4657
    long_name = 'Infinite Guff'
4658
    url = 'http://infiniteguff.com'
4659
4660
4661
class SkeletonClaw(GenericTumblrV1):
4662
    """Class to retrieve Skeleton Claw comics."""
4663
    name = 'skeletonclaw'
4664
    long_name = 'Skeleton Claw'
4665
    url = 'http://skeletonclaw.com'
4666 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4667
4668
class MrsFrolleinTumblr(GenericTumblrV1):
4669
    """Class to retrieve Mrs Frollein comics."""
4670
    # Also on http://www.webtoons.com/en/challenge/mrsfrollein/list?title_no=51710
4671
    name = 'frollein'
4672
    long_name = 'Mrs Frollein (from Tumblr)'
4673
    url = 'https://mrsfrollein.tumblr.com'
4674
4675
4676
class GoodBearComicsTumblr(GenericTumblrV1):
4677
    """Class to retrieve GoodBearComics."""
4678
    # Also on https://goodbearcomics.com
4679
    name = 'goodbear-tumblr'
4680
    long_name = 'Good Bear Comics (from Tumblr)'
4681
    url = 'https://goodbearcomics.tumblr.com'
4682
4683
4684
class BrooklynCartoonsTumblr(GenericTumblrV1):
4685
    """Class to retrieve Brooklyn Cartoons."""
4686
    # Also on https://www.brooklyncartoons.com
4687
    # Also on https://www.instagram.com/brooklyncartoons
4688
    name = 'brooklyn-tumblr'
4689
    long_name = 'Brooklyn Cartoons (from Tumblr)'
4690
    url = 'http://brooklyncartoons.tumblr.com'
4691
4692
4693
class GemmaCorrellTumblr(GenericTumblrV1):
4694
    # Also on http://www.gemmacorrell.com/portfolio/comics/
4695
    name = 'gemma-tumblr'
4696
    long_name = 'Gemma Correll (from Tumblr)'
4697
    url = 'http://gemmacorrell.tumblr.com'
4698
4699
4700
class RobotatertotTumblr(GenericTumblrV1):
4701
    """Class to retrieve Robotatertot comics."""
4702
    # Also on https://www.instagram.com/robotatertotcomics
4703
    name = 'robotatertot-tumblr'
4704
    long_name = 'Robotatertot (from Tumblr)'
4705
    url = 'https://robotatertot.tumblr.com'
4706
4707
4708
class HuffyPenguin(GenericTumblrV1):
4709
    """Class to retrieve Huffy Penguin comics."""
4710
    name = 'huffypenguin'
4711
    long_name = 'Huffy Penguin'
4712
    url = 'http://huffy-penguin.tumblr.com'
4713
4714
4715
class CowardlyComicsTumblr(GenericTumblrV1):
4716
    """Class to retrieve Cowardly Comics."""
4717
    # Also on https://tapas.io/series/CowardlyComics
4718
    # Also on http://www.webtoons.com/en/challenge/cowardly-comics/list?title_no=65893
4719
    name = 'cowardly-tumblr'
4720
    long_name = 'Cowardly Comics (from Tumblr)'
4721
    url = 'http://cowardlycomics.tumblr.com'
4722
4723
4724
class Caw4hwTumblr(GenericTumblrV1):
4725
    """Class to retrieve Caw4hw comics."""
4726
    # Also on https://tapas.io/series/CAW4HW
4727
    name = 'caw4hw-tumblr'
4728
    long_name = 'Caw4hw (from Tumblr)'
4729
    url = 'https://caw4hw.tumblr.com'
4730
4731
4732
class WeFlapsTumblr(GenericTumblrV1):
4733
    """Class to retrieve WeFlaps comics."""
4734
    name = 'weflaps-tumblr'
4735
    long_name = 'We Flaps (from Tumblr)'
4736
    url = 'https://weflaps.tumblr.com'
4737
4738
4739
class TheseInsideJokesTumblr(GenericTumblrV1):
4740
    """Class to retrieve These Inside Jokes comics."""
4741
    # Also on http://www.theseinsidejokes.com
4742
    name = 'theseinsidejokes-tumblr'
4743
    long_name = 'These Inside Jokes (from Tumblr)'
4744
    url = 'http://theseinsidejokes.tumblr.com'
4745
4746
4747
class SinewynTumblr(GenericTumblrV1):
4748
    """Class to retrieve Sinewyn comics."""
4749
    # Also on https://sinewyn.wordpress.com
4750
    name = 'sinewyn-tumblr'
4751
    long_name = 'Sinewyn (from Tumblr)'
4752
    url = 'https://sinewyn.tumblr.com'
4753
4754
4755
class BoumeriesTumblr(GenericTumblrV1):
4756
    """Class to retrieve Boumeries comics."""
4757
    # Also on http://bd.boumerie.com
4758
    # Also on http://comics.boumerie.com
4759
    name = 'boumeries-tumblr'
4760
    long_name = 'Boumeries (from Tumblr)'
4761
    url = 'http://boumeries.tumblr.com/'
4762
    _categories = ('BOUMERIES', )
4763
4764
4765
class HorovitzComics(GenericDeletedComic, GenericListableComic):
4766
    """Generic class to handle the logic common to the different comics from Horovitz."""
4767
    # Also on https://horovitzcomics.tumblr.com
4768
    url = 'http://www.horovitzcomics.com'
4769
    _categories = ('HOROVITZ', )
4770
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4771
    link_re = NotImplemented
4772
    get_url_from_archive_element = join_cls_url_to_href
4773
4774
    @classmethod
4775
    def get_comic_info(cls, soup, link):
4776
        """Get information about a particular comics."""
4777
        href = link['href']
4778
        num = int(cls.link_re.match(href).groups()[0])
4779
        title = link.string
4780
        imgs = soup.find_all('img', id='comic')
4781
        assert len(imgs) == 1, imgs
4782
        year, month, day = [int(s)
4783
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4784
        return {
4785
            'title': title,
4786
            'day': day,
4787
            'month': month,
4788
            'year': year,
4789
            'img': [i['src'] for i in imgs],
4790
            'num': num,
4791
        }
4792
4793
    @classmethod
4794
    def get_archive_elements(cls):
4795
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4796
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4797
4798
4799
class HorovitzNew(HorovitzComics):
4800
    """Class to retrieve Horovitz new comics."""
4801
    name = 'horovitznew'
4802
    long_name = 'Horovitz New'
4803
    link_re = re.compile('^/comics/new/([0-9]+)$')
4804
4805
4806
class HorovitzClassic(HorovitzComics):
4807
    """Class to retrieve Horovitz classic comics."""
4808
    name = 'horovitzclassic'
4809
    long_name = 'Horovitz Classic'
4810
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4811
4812
4813
class GenericGoComic(GenericNavigableComic):
4814
    """Generic class to handle the logic common to comics from gocomics.com."""
4815
    _categories = ('GOCOMIC', )
4816
4817
    @classmethod
4818
    def get_first_comic_link(cls):
4819
        """Get link to first comics."""
4820
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4821
4822
    @classmethod
4823
    def get_navi_link(cls, last_soup, next_):
4824
        """Get link to next or previous comic."""
4825
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left js-previous-comic sm '
4826
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right js-next-comic hidden-sm-up sm '
4827
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4828
4829
    @classmethod
4830
    def get_url_from_link(cls, link):
4831
        gocomics = 'http://www.gocomics.com'
4832
        return urljoin_wrapper(gocomics, link['href'])
4833
4834
    @classmethod
4835
    def get_comic_info(cls, soup, link):
4836
        """Get information about a particular comics."""
4837
        date_str = soup.find('meta', property='article:published_time')['content']
4838
        day = string_to_date(date_str, "%Y-%m-%d")
4839
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4840
        author = soup.find('meta', property='article:author')['content']
4841
        tags = soup.find('meta', property='article:tag')['content']
4842
        return {
4843
            'day': day.day,
4844
            'month': day.month,
4845
            'year': day.year,
4846
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4847
            'author': author,
4848
            'tags': tags,
4849
        }
4850
4851
4852
class PearlsBeforeSwine(GenericGoComic):
4853
    """Class to retrieve Pearls Before Swine comics."""
4854
    name = 'pearls'
4855
    long_name = 'Pearls Before Swine'
4856
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4857
4858
4859
class Peanuts(GenericGoComic):
4860
    """Class to retrieve Peanuts comics."""
4861
    name = 'peanuts'
4862
    long_name = 'Peanuts'
4863
    url = 'http://www.gocomics.com/peanuts'
4864
4865
4866
class MattWuerker(GenericGoComic):
4867
    """Class to retrieve Matt Wuerker comics."""
4868
    name = 'wuerker'
4869
    long_name = 'Matt Wuerker'
4870
    url = 'http://www.gocomics.com/mattwuerker'
4871
4872
4873
class TomToles(GenericGoComic):
4874
    """Class to retrieve Tom Toles comics."""
4875
    name = 'toles'
4876
    long_name = 'Tom Toles'
4877
    url = 'http://www.gocomics.com/tomtoles'
4878
4879
4880
class BreakOfDay(GenericGoComic):
4881
    """Class to retrieve Break Of Day comics."""
4882
    name = 'breakofday'
4883
    long_name = 'Break Of Day'
4884
    url = 'http://www.gocomics.com/break-of-day'
4885
4886
4887
class Brevity(GenericGoComic):
4888
    """Class to retrieve Brevity comics."""
4889
    name = 'brevity'
4890
    long_name = 'Brevity'
4891
    url = 'http://www.gocomics.com/brevity'
4892
4893
4894
class MichaelRamirez(GenericGoComic):
4895
    """Class to retrieve Michael Ramirez comics."""
4896
    name = 'ramirez'
4897
    long_name = 'Michael Ramirez'
4898
    url = 'http://www.gocomics.com/michaelramirez'
4899
4900
4901
class MikeLuckovich(GenericGoComic):
4902
    """Class to retrieve Mike Luckovich comics."""
4903
    name = 'luckovich'
4904
    long_name = 'Mike Luckovich'
4905
    url = 'http://www.gocomics.com/mikeluckovich'
4906
4907
4908
class JimBenton(GenericGoComic):
4909
    """Class to retrieve Jim Benton comics."""
4910
    # Also on http://jimbenton.tumblr.com
4911
    name = 'benton'
4912
    long_name = 'Jim Benton'
4913
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4914
4915
4916
class TheArgyleSweater(GenericGoComic):
4917
    """Class to retrieve the Argyle Sweater comics."""
4918
    name = 'argyle'
4919
    long_name = 'Argyle Sweater'
4920
    url = 'http://www.gocomics.com/theargylesweater'
4921
4922
4923
class SunnyStreet(GenericGoComic):
4924
    """Class to retrieve Sunny Street comics."""
4925
    # Also on http://www.sunnystreetcomics.com
4926
    name = 'sunny'
4927
    long_name = 'Sunny Street'
4928
    url = 'http://www.gocomics.com/sunny-street'
4929
4930
4931
class OffTheMark(GenericGoComic):
4932
    """Class to retrieve Off The Mark comics."""
4933
    # Also on https://www.offthemark.com
4934
    name = 'offthemark'
4935
    long_name = 'Off The Mark'
4936
    url = 'http://www.gocomics.com/offthemark'
4937
4938
4939
class WuMo(GenericGoComic):
4940
    """Class to retrieve WuMo comics."""
4941
    # Also on http://wumo.com
4942
    name = 'wumo'
4943
    long_name = 'WuMo'
4944
    url = 'http://www.gocomics.com/wumo'
4945
4946
4947
class LunarBaboon(GenericGoComic):
4948
    """Class to retrieve Lunar Baboon comics."""
4949
    # Also on http://www.lunarbaboon.com
4950
    # Also on https://tapastic.com/series/Lunarbaboon
4951
    name = 'lunarbaboon'
4952
    long_name = 'Lunar Baboon'
4953
    url = 'http://www.gocomics.com/lunarbaboon'
4954
4955
4956
class SandersenGocomic(GenericGoComic):
4957
    """Class to retrieve Sarah Andersen comics."""
4958
    # Also on http://sarahcandersen.com
4959
    # Also on http://tapastic.com/series/Doodle-Time
4960
    name = 'sandersen-goc'
4961
    long_name = 'Sarah Andersen (from GoComics)'
4962
    url = 'http://www.gocomics.com/sarahs-scribbles'
4963
4964
4965
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4966
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4967
    # Also on http://smbc-comics.tumblr.com
4968
    # Also on http://www.smbc-comics.com
4969
    name = 'smbc-goc'
4970
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4971
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4972
    _categories = ('SMBC', )
4973
4974
4975
class CalvinAndHobbesGoComic(GenericGoComic):
4976
    """Class to retrieve Calvin and Hobbes comics."""
4977
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4978
    name = 'calvin-goc'
4979
    long_name = 'Calvin and Hobbes (from GoComics)'
4980
    url = 'http://www.gocomics.com/calvinandhobbes'
4981
4982
4983
class RallGoComic(GenericGoComic):
4984
    """Class to retrieve Ted Rall comics."""
4985
    # Also on http://rall.com/comic
4986
    name = 'rall-goc'
4987
    long_name = "Ted Rall (from GoComics)"
4988
    url = "http://www.gocomics.com/ted-rall"
4989
    _categories = ('RALL', )
4990
4991
4992
class TheAwkwardYetiGoComic(GenericGoComic):
4993
    """Class to retrieve The Awkward Yeti comics."""
4994
    # Also on http://larstheyeti.tumblr.com
4995
    # Also on http://theawkwardyeti.com
4996
    # Also on https://tapastic.com/series/TheAwkwardYeti
4997
    name = 'yeti-goc'
4998
    long_name = 'The Awkward Yeti (from GoComics)'
4999
    url = 'http://www.gocomics.com/the-awkward-yeti'
5000
    _categories = ('YETI', )
5001
5002
5003
class BerkeleyMewsGoComics(GenericGoComic):
5004
    """Class to retrieve Berkeley Mews comics."""
5005
    # Also on http://mews.tumblr.com
5006
    # Also on http://www.berkeleymews.com
5007
    name = 'berkeley-goc'
5008
    long_name = 'Berkeley Mews (from GoComics)'
5009
    url = 'http://www.gocomics.com/berkeley-mews'
5010
    _categories = ('BERKELEY', )
5011
5012
5013
class SheldonGoComics(GenericGoComic):
5014
    """Class to retrieve Sheldon comics."""
5015
    # Also on http://www.sheldoncomics.com
5016
    name = 'sheldon-goc'
5017
    long_name = 'Sheldon Comics (from GoComics)'
5018
    url = 'http://www.gocomics.com/sheldon'
5019
5020
5021
class FowlLanguageGoComics(GenericGoComic):
5022
    """Class to retrieve Fowl Language comics."""
5023
    # Also on http://www.fowllanguagecomics.com
5024
    # Also on http://tapastic.com/series/Fowl-Language-Comics
5025
    # Also on http://fowllanguagecomics.tumblr.com
5026
    name = 'fowllanguage-goc'
5027
    long_name = 'Fowl Language Comics (from GoComics)'
5028
    url = 'http://www.gocomics.com/fowl-language'
5029
    _categories = ('FOWLLANGUAGE', )
5030
5031
5032
class NickAnderson(GenericGoComic):
5033
    """Class to retrieve Nick Anderson comics."""
5034
    name = 'nickanderson'
5035
    long_name = 'Nick Anderson'
5036
    url = 'http://www.gocomics.com/nickanderson'
5037
5038
5039
class GarfieldGoComics(GenericGoComic):
5040
    """Class to retrieve Garfield comics."""
5041
    # Also on http://garfield.com
5042
    name = 'garfield-goc'
5043
    long_name = 'Garfield (from GoComics)'
5044
    url = 'http://www.gocomics.com/garfield'
5045
    _categories = ('GARFIELD', )
5046
5047
5048
class DorrisMcGoComics(GenericGoComic):
5049
    """Class to retrieve Dorris Mc Comics"""
5050
    # Also on http://dorrismccomics.com
5051
    name = 'dorrismc-goc'
5052
    long_name = 'Dorris Mc (from GoComics)'
5053
    url = 'http://www.gocomics.com/dorris-mccomics'
5054
5055
5056
class FoxTrot(GenericGoComic):
5057
    """Class to retrieve FoxTrot comics."""
5058
    name = 'foxtrot'
5059
    long_name = 'FoxTrot'
5060
    url = 'http://www.gocomics.com/foxtrot'
5061
5062
5063
class FoxTrotClassics(GenericGoComic):
5064
    """Class to retrieve FoxTrot Classics comics."""
5065
    name = 'foxtrot-classics'
5066
    long_name = 'FoxTrot Classics'
5067
    url = 'http://www.gocomics.com/foxtrotclassics'
5068
5069
5070
class MisterAndMeGoComics(GenericDeletedComic, GenericGoComic):
5071
    """Class to retrieve Mister & Me Comics."""
5072
    # Also on http://www.mister-and-me.com
5073
    # Also on https://tapastic.com/series/Mister-and-Me
5074
    name = 'mister-goc'
5075
    long_name = 'Mister & Me (from GoComics)'
5076
    url = 'http://www.gocomics.com/mister-and-me'
5077
5078
5079
class NonSequitur(GenericGoComic):
5080
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
5081
    name = 'nonsequitur'
5082
    long_name = 'Non Sequitur'
5083
    url = 'http://www.gocomics.com/nonsequitur'
5084
5085
5086
class JoeyAlisonSayers(GenericGoComic):
5087
    """Class to retrieve Joey Alison Sayers comics."""
5088
    name = 'joeyalison'
5089
    long_name = 'Joey Alison Sayers (from GoComics)'
5090
    url = 'http://www.gocomics.com/joey-alison-sayers-comics'
5091
5092
5093
class SavageChickenGoComics(GenericGoComic):
5094
    """Class to retrieve Savage Chicken comics."""
5095
    # Also on http://www.savagechickens.com
5096
    name = 'savage-goc'
5097
    long_name = 'Savage Chicken (from GoComics)'
5098
    url = 'http://www.gocomics.com/savage-chickens'
5099
5100
5101
class GenericTapasticComic(GenericListableComic):
5102
    """Generic class to handle the logic common to comics from tapastic.com."""
5103
    _categories = ('TAPASTIC', )
5104
5105
    @classmethod
5106
    def get_comic_info(cls, soup, archive_elt):
5107
        """Get information about a particular comics."""
5108
        timestamp = int(archive_elt['publishDate']) / 1000.0
5109
        day = datetime.datetime.fromtimestamp(timestamp).date()
5110
        imgs = soup.find_all('img', class_='art-image')
5111
        if not imgs:
5112
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
5113
            return None
5114
        assert len(imgs) > 0, imgs
5115
        return {
5116
            'day': day.day,
5117
            'year': day.year,
5118
            'month': day.month,
5119
            'img': [i['src'] for i in imgs],
5120
            'title': archive_elt['title'],
5121
        }
5122
5123
    @classmethod
5124
    def get_url_from_archive_element(cls, archive_elt):
5125
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
5126
5127
    @classmethod
5128
    def get_archive_elements(cls):
5129
        pref, suff = 'episodeList : ', ','
5130
        # Information is stored in the javascript part
5131
        # I don't know the clean way to get it so this is the ugly way.
5132
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
5133
        return json.loads(string)
5134
5135
5136
class VegetablesForDessert(GenericTapasticComic):
5137
    """Class to retrieve Vegetables For Dessert comics."""
5138
    # Also on http://vegetablesfordessert.tumblr.com
5139
    name = 'vegetables'
5140
    long_name = 'Vegetables For Dessert'
5141
    url = 'http://tapastic.com/series/vegetablesfordessert'
5142
5143
5144
class FowlLanguageTapa(GenericTapasticComic):
5145
    """Class to retrieve Fowl Language comics."""
5146
    # Also on http://www.fowllanguagecomics.com
5147
    # Also on http://fowllanguagecomics.tumblr.com
5148
    # Also on http://www.gocomics.com/fowl-language
5149
    name = 'fowllanguage-tapa'
5150
    long_name = 'Fowl Language Comics (from Tapastic)'
5151
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
5152
    _categories = ('FOWLLANGUAGE', )
5153
5154
5155
class OscillatingProfundities(GenericTapasticComic):
5156
    """Class to retrieve Oscillating Profundities comics."""
5157
    name = 'oscillating'
5158
    long_name = 'Oscillating Profundities'
5159
    url = 'http://tapastic.com/series/oscillatingprofundities'
5160
5161
5162
class ZnoflatsComics(GenericTapasticComic):
5163
    """Class to retrieve Znoflats comics."""
5164
    name = 'znoflats'
5165
    long_name = 'Znoflats Comics'
5166
    url = 'http://tapastic.com/series/Znoflats-Comics'
5167
5168
5169
class SandersenTapastic(GenericTapasticComic):
5170
    """Class to retrieve Sarah Andersen comics."""
5171
    # Also on http://sarahcandersen.com
5172
    # Also on http://www.gocomics.com/sarahs-scribbles
5173
    name = 'sandersen-tapa'
5174
    long_name = 'Sarah Andersen (from Tapastic)'
5175
    url = 'http://tapastic.com/series/Doodle-Time'
5176
5177
5178
class TubeyToonsTapastic(GenericTapasticComic):
5179
    """Class to retrieve TubeyToons comics."""
5180
    # Also on http://tubeytoons.com
5181
    # Also on https://tubeytoons.tumblr.com
5182
    name = 'tubeytoons-tapa'
5183
    long_name = 'Tubey Toons (from Tapastic)'
5184
    url = 'http://tapastic.com/series/Tubey-Toons'
5185
    _categories = ('TUNEYTOONS', )
5186
5187
5188
class AnythingComicTapastic(GenericTapasticComic):
5189
    """Class to retrieve Anything Comics."""
5190
    # Also on http://www.anythingcomic.com
5191
    name = 'anythingcomic-tapa'
5192
    long_name = 'Anything Comic (from Tapastic)'
5193
    url = 'http://tapastic.com/series/anything'
5194
5195
5196
class UnearthedComicsTapastic(GenericTapasticComic):
5197
    """Class to retrieve Unearthed comics."""
5198
    # Also on http://unearthedcomics.com
5199
    # Also on https://unearthedcomics.tumblr.com
5200
    name = 'unearthed-tapa'
5201
    long_name = 'Unearthed Comics (from Tapastic)'
5202
    url = 'http://tapastic.com/series/UnearthedComics'
5203
    _categories = ('UNEARTHED', )
5204
5205
5206
class EverythingsStupidTapastic(GenericTapasticComic):
5207
    """Class to retrieve Everything's stupid Comics."""
5208
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
5209
    # Also on http://everythingsstupid.net
5210
    name = 'stupid-tapa'
5211
    long_name = "Everything's Stupid (from Tapastic)"
5212
    url = 'http://tapastic.com/series/EverythingsStupid'
5213
5214
5215
class JustSayEhTapastic(GenericTapasticComic):
5216
    """Class to retrieve Just Say Eh comics."""
5217
    # Also on http://www.justsayeh.com
5218
    name = 'justsayeh-tapa'
5219
    long_name = 'Just Say Eh (from Tapastic)'
5220
    url = 'http://tapastic.com/series/Just-Say-Eh'
5221
5222
5223
class ThorsThundershackTapastic(GenericTapasticComic):
5224
    """Class to retrieve Thor's Thundershack comics."""
5225
    # Also on http://www.thorsthundershack.com
5226
    name = 'thor-tapa'
5227
    long_name = 'Thor\'s Thundershack (from Tapastic)'
5228
    url = 'http://tapastic.com/series/Thors-Thundershac'
5229
    _categories = ('THOR', )
5230
5231
5232
class OwlTurdTapastic(GenericTapasticComic):
5233
    """Class to retrieve Owl Turd comics."""
5234
    # Also on http://owlturd.com
5235
    name = 'owlturd-tapa'
5236
    long_name = 'Owl Turd (from Tapastic)'
5237
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
5238
    _categories = ('OWLTURD', )
5239
5240
5241
class GoneIntoRaptureTapastic(GenericTapasticComic):
5242
    """Class to retrieve Gone Into Rapture comics."""
5243
    # Also on http://goneintorapture.tumblr.com
5244
    # Also on http://goneintorapture.com
5245
    name = 'rapture-tapa'
5246
    long_name = 'Gone Into Rapture (from Tapastic)'
5247
    url = 'http://tapastic.com/series/Goneintorapture'
5248
5249
5250
class HeckIfIKnowComicsTapa(GenericTapasticComic):
5251
    """Class to retrieve Heck If I Know Comics."""
5252
    # Also on http://heckifiknowcomics.com
5253
    name = 'heck-tapa'
5254
    long_name = 'Heck if I Know comics (from Tapastic)'
5255
    url = 'http://tapastic.com/series/Regular'
5256
5257
5258
class CheerUpEmoKidTapa(GenericTapasticComic):
5259
    """Class to retrieve CheerUpEmoKid comics."""
5260
    # Also on http://www.cheerupemokid.com
5261
    # Also on https://enzocomics.tumblr.com
5262
    name = 'cuek-tapa'
5263
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
5264
    url = 'http://tapastic.com/series/CUEK'
5265
5266
5267
class BigFootJusticeTapa(GenericTapasticComic):
5268
    """Class to retrieve Big Foot Justice comics."""
5269
    # Also on http://bigfootjustice.com
5270
    name = 'bigfoot-tapa'
5271
    long_name = 'Big Foot Justice (from Tapastic)'
5272
    url = 'http://tapastic.com/series/bigfoot-justice'
5273
5274
5275
class UpAndOutTapa(GenericTapasticComic):
5276
    """Class to retrieve Up & Out comics."""
5277
    # Also on http://upandoutcomic.tumblr.com
5278
    name = 'upandout-tapa'
5279
    long_name = 'Up And Out (from Tapastic)'
5280
    url = 'http://tapastic.com/series/UP-and-OUT'
5281
5282
5283
class ToonHoleTapa(GenericTapasticComic):
5284
    """Class to retrieve Toon Holes comics."""
5285
    # Also on http://www.toonhole.com
5286
    name = 'toonhole-tapa'
5287
    long_name = 'Toon Hole (from Tapastic)'
5288
    url = 'http://tapastic.com/series/TOONHOLE'
5289
5290
5291
class AngryAtNothingTapa(GenericTapasticComic):
5292
    """Class to retrieve Angry at Nothing comics."""
5293
    # Also on http://www.angryatnothing.net
5294
    # Also on http://angryatnothing.tumblr.com
5295
    name = 'angry-tapa'
5296
    long_name = 'Angry At Nothing (from Tapastic)'
5297
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
5298
5299
5300
class LeleozTapa(GenericTapasticComic):
5301
    """Class to retrieve Leleoz comics."""
5302
    # Also on http://leleozcomics.tumblr.com
5303
    name = 'leleoz-tapa'
5304
    long_name = 'Leleoz (from Tapastic)'
5305
    url = 'https://tapastic.com/series/Leleoz'
5306
5307
5308
class TheAwkwardYetiTapa(GenericTapasticComic):
5309
    """Class to retrieve The Awkward Yeti comics."""
5310
    # Also on http://www.gocomics.com/the-awkward-yeti
5311
    # Also on http://theawkwardyeti.com
5312
    # Also on http://larstheyeti.tumblr.com
5313
    name = 'yeti-tapa'
5314
    long_name = 'The Awkward Yeti (from Tapastic)'
5315
    url = 'https://tapastic.com/series/TheAwkwardYeti'
5316
    _categories = ('YETI', )
5317
5318
5319
class AsPerUsualTapa(GenericTapasticComic):
5320
    """Class to retrieve As Per Usual comics."""
5321
    # Also on http://as-per-usual.tumblr.com
5322
    name = 'usual-tapa'
5323
    long_name = 'As Per Usual (from Tapastic)'
5324
    url = 'https://tapastic.com/series/AsPerUsual'
5325
    categories = ('DAMILEE', )
5326
5327
5328
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
5329
    """Class to retrieve Hot Comics For Cool People."""
5330
    # Also on http://hotcomicsforcoolpeople.tumblr.com
5331
    # Also on http://hotcomics.biz (links to tumblr)
5332
    # Also on http://hcfcp.com (links to tumblr)
5333
    name = 'hotcomics-tapa'
5334
    long_name = 'Hot Comics For Cool People (from Tapastic)'
5335
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
5336
    categories = ('DAMILEE', )
5337
5338
5339
class OneOneOneOneComicTapa(GenericTapasticComic):
5340
    """Class to retrieve 1111 Comics."""
5341
    # Also on http://www.1111comics.me
5342
    # Also on http://comics1111.tumblr.com
5343
    name = '1111-tapa'
5344
    long_name = '1111 Comics (from Tapastic)'
5345
    url = 'https://tapastic.com/series/1111-Comics'
5346
    _categories = ('ONEONEONEONE', )
5347
5348
5349
class TumbleDryTapa(GenericTapasticComic):
5350
    """Class to retrieve Tumble Dry comics."""
5351
    # Also on http://tumbledrycomics.com
5352
    name = 'tumbledry-tapa'
5353
    long_name = 'Tumblr Dry (from Tapastic)'
5354
    url = 'https://tapastic.com/series/TumbleDryComics'
5355
5356
5357
class DeadlyPanelTapa(GenericTapasticComic):
5358
    """Class to retrieve Deadly Panel comics."""
5359
    # Also on http://www.deadlypanel.com
5360
    # Also on https://deadlypanel.tumblr.com
5361
    name = 'deadly-tapa'
5362
    long_name = 'Deadly Panel (from Tapastic)'
5363
    url = 'https://tapastic.com/series/deadlypanel'
5364
5365
5366
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5367
    """Class to retrieve Chris Hallbeck comics."""
5368
    # Also on https://chrishallbeck.tumblr.com
5369
    # Also on http://maximumble.com
5370
    name = 'hallbeckmaxi-tapa'
5371
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5372
    url = 'https://tapastic.com/series/Maximumble'
5373
    _categories = ('HALLBACK', )
5374
5375
5376
class ChrisHallbeckMiniTapa(GenericDeletedComic, GenericTapasticComic):
5377
    """Class to retrieve Chris Hallbeck comics."""
5378
    # Also on https://chrishallbeck.tumblr.com
5379
    # Also on http://minimumble.com
5380
    name = 'hallbeckmini-tapa'
5381
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5382
    url = 'https://tapastic.com/series/Minimumble'
5383
    _categories = ('HALLBACK', )
5384
5385
5386
class ChrisHallbeckBiffTapa(GenericDeletedComic, GenericTapasticComic):
5387
    """Class to retrieve Chris Hallbeck comics."""
5388
    # Also on https://chrishallbeck.tumblr.com
5389
    # Also on http://thebookofbiff.com
5390
    name = 'hallbeckbiff-tapa'
5391
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5392
    url = 'https://tapastic.com/series/Biff'
5393
    _categories = ('HALLBACK', )
5394
5395
5396
class RandoWisTapa(GenericTapasticComic):
5397
    """Class to retrieve RandoWis comics."""
5398
    # Also on https://randowis.com
5399
    name = 'randowis-tapa'
5400
    long_name = 'RandoWis (from Tapastic)'
5401
    url = 'https://tapastic.com/series/RandoWis'
5402
5403
5404
class PigeonGazetteTapa(GenericTapasticComic):
5405
    """Class to retrieve The Pigeon Gazette comics."""
5406
    # Also on http://thepigeongazette.tumblr.com
5407
    name = 'pigeon-tapa'
5408
    long_name = 'The Pigeon Gazette (from Tapastic)'
5409
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5410
5411
5412
class TheOdd1sOutTapa(GenericTapasticComic):
5413
    """Class to retrieve The Odd 1s Out comics."""
5414
    # Also on http://theodd1sout.com
5415
    # Also on http://theodd1sout.tumblr.com
5416
    name = 'theodd-tapa'
5417
    long_name = 'The Odd 1s Out (from Tapastic)'
5418
    url = 'https://tapastic.com/series/Theodd1sout'
5419
5420
5421
class TheWorldIsFlatTapa(GenericTapasticComic):
5422
    """Class to retrieve The World Is Flat Comics."""
5423
    # Also on http://theworldisflatcomics.tumblr.com
5424
    name = 'flatworld-tapa'
5425
    long_name = 'The World Is Flat (from Tapastic)'
5426
    url = 'https://tapastic.com/series/The-World-is-Flat'
5427
5428
5429
class MisterAndMeTapa(GenericTapasticComic):
5430
    """Class to retrieve Mister & Me Comics."""
5431
    # Also on http://www.mister-and-me.com
5432
    # Also on http://www.gocomics.com/mister-and-me
5433
    name = 'mister-tapa'
5434
    long_name = 'Mister & Me (from Tapastic)'
5435
    url = 'https://tapastic.com/series/Mister-and-Me'
5436
5437
5438
class TalesOfAbsurdityTapa(GenericDeletedComic, GenericTapasticComic):
5439
    """Class to retrieve Tales Of Absurdity comics."""
5440
    # Also on http://talesofabsurdity.com
5441
    # Also on http://talesofabsurdity.tumblr.com
5442
    name = 'absurdity-tapa'
5443
    long_name = 'Tales of Absurdity (from Tapastic)'
5444
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5445
    _categories = ('ABSURDITY', )
5446
5447
5448
class BFGFSTapa(GenericTapasticComic):
5449
    """Class to retrieve BFGFS comics."""
5450
    # Also on http://bfgfs.com
5451
    # Also on https://bfgfs.tumblr.com
5452
    name = 'bfgfs-tapa'
5453
    long_name = 'BFGFS (from Tapastic)'
5454
    url = 'https://tapastic.com/series/BFGFS'
5455
5456
5457
class DoodleForFoodTapa(GenericTapasticComic):
5458
    """Class to retrieve Doodle For Food comics."""
5459
    # Also on http://www.doodleforfood.com
5460
    name = 'doodle-tapa'
5461
    long_name = 'Doodle For Food (from Tapastic)'
5462
    url = 'https://tapastic.com/series/Doodle-for-Food'
5463
5464
5465
class MrLovensteinTapa(GenericTapasticComic):
5466
    """Class to retrieve Mr Lovenstein comics."""
5467
    # Also on  https://tapastic.com/series/MrLovenstein
5468
    name = 'mrlovenstein-tapa'
5469
    long_name = 'Mr. Lovenstein (from Tapastic)'
5470
    url = 'https://tapastic.com/series/MrLovenstein'
5471
5472
5473
class CassandraCalinTapa(GenericTapasticComic):
5474
    """Class to retrieve C. Cassandra comics."""
5475
    # Also on http://cassandracalin.com
5476
    # Also on http://c-cassandra.tumblr.com
5477
    name = 'cassandra-tapa'
5478
    long_name = 'Cassandra Calin (from Tapastic)'
5479
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5480
5481
5482
class WafflesAndPancakes(GenericTapasticComic):
5483
    """Class to retrieve Waffles And Pancakes comics."""
5484
    # Also on http://wandpcomic.com
5485
    name = 'waffles'
5486
    long_name = 'Waffles And Pancakes'
5487
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5488
5489
5490
class YesterdaysPopcornTapastic(GenericTapasticComic):
5491
    """Class to retrieve Yesterday's Popcorn comics."""
5492
    # Also on http://www.yesterdayspopcorn.com
5493
    # Also on http://yesterdayspopcorn.tumblr.com
5494
    name = 'popcorn-tapa'
5495
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5496
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5497
5498
5499
class OurSuperAdventureTapastic(GenericDeletedComic, GenericTapasticComic):
5500
    """Class to retrieve Our Super Adventure comics."""
5501
    # Also on http://www.oursuperadventure.com
5502
    # http://sarahssketchbook.tumblr.com
5503
    # http://sarahgraley.com
5504
    name = 'superadventure-tapastic'
5505
    long_name = 'Our Super Adventure (from Tapastic)'
5506
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5507
5508
5509
class NamelessPCs(GenericTapasticComic):
5510
    """Class to retrieve Nameless PCs comics."""
5511
    # Also on http://namelesspcs.com
5512
    name = 'namelesspcs-tapa'
5513
    long_name = 'NamelessPCs (from Tapastic)'
5514
    url = 'https://tapastic.com/series/NamelessPC'
5515
5516
5517
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5518
    """Class to retrieve Down The Upward Spiral comics."""
5519
    # Also on http://www.downtheupwardspiral.com
5520
    # Also on http://downtheupwardspiral.tumblr.com
5521
    name = 'spiral-tapa'
5522
    long_name = 'Down the Upward Spiral (from Tapastic)'
5523
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5524
5525
5526
class UbertoolTapa(GenericTapasticComic):
5527
    """Class to retrieve Ubertool comics."""
5528
    # Also on http://ubertoolcomic.com
5529
    # Also on https://ubertool.tumblr.com
5530
    name = 'ubertool-tapa'
5531
    long_name = 'Ubertool (from Tapastic)'
5532
    url = 'https://tapastic.com/series/ubertool'
5533
    _categories = ('UBERTOOL', )
5534
5535
5536
class BarteNerdsTapa(GenericDeletedComic, GenericTapasticComic):
5537
    """Class to retrieve BarteNerds comics."""
5538
    # Also on http://www.bartenerds.com
5539
    name = 'bartenerds-tapa'
5540
    long_name = 'BarteNerds (from Tapastic)'
5541
    url = 'https://tapastic.com/series/BarteNERDS'
5542
5543
5544
class SmallBlueYonderTapa(GenericTapasticComic):
5545
    """Class to retrieve Small Blue Yonder comics."""
5546
    # Also on http://www.smallblueyonder.com
5547
    name = 'smallblue-tapa'
5548
    long_name = 'Small Blue Yonder (from Tapastic)'
5549
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5550
5551
5552
class TizzyStitchBirdTapa(GenericTapasticComic):
5553
    """Class to retrieve Tizzy Stitch Bird comics."""
5554
    # Also on http://tizzystitchbird.com
5555
    # Also on http://tizzystitchbird.tumblr.com
5556
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5557
    name = 'tizzy-tapa'
5558
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5559
    url = 'https://tapastic.com/series/TizzyStitchbird'
5560
5561
5562
class RockPaperCynicTapa(GenericTapasticComic):
5563
    """Class to retrieve RockPaperCynic comics."""
5564
    # Also on http://www.rockpapercynic.com
5565
    # Also on http://rockpapercynic.tumblr.com
5566
    name = 'rpc-tapa'
5567
    long_name = 'Rock Paper Cynic (from Tapastic)'
5568
    url = 'https://tapastic.com/series/rockpapercynic'
5569
5570
5571
class IsItCanonTapa(GenericTapasticComic):
5572
    """Class to retrieve Is It Canon comics."""
5573
    # Also on http://www.isitcanon.com
5574
    name = 'canon-tapa'
5575
    long_name = 'Is It Canon (from Tapastic)'
5576
    url = 'http://tapastic.com/series/isitcanon'
5577
5578
5579
class ItsTheTieTapa(GenericTapasticComic):
5580
    """Class to retrieve It's the tie comics."""
5581
    # Also on http://itsthetie.com
5582
    # Also on http://itsthetie.tumblr.com
5583
    name = 'tie-tapa'
5584
    long_name = "It's the tie (from Tapastic)"
5585
    url = "https://tapastic.com/series/itsthetie"
5586
    _categories = ('TIE', )
5587
5588
5589
class JamesOfNoTradesTapa(GenericTapasticComic):
5590
    """Class to retrieve JamesOfNoTrades comics."""
5591
    # Also on http://jamesofnotrades.com
5592
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
5593
    # Also on http://jamesfregan.tumblr.com
5594
    name = 'jamesofnotrades-tapa'
5595
    long_name = 'James Of No Trades (from Tapastic)'
5596
    url = 'https://tapas.io/series/James-of-No-Trades'
5597
    _categories = ('JAMESOFNOTRADES', )
5598
5599
5600
class MomentumTapa(GenericTapasticComic):
5601
    """Class to retrieve Momentum comics."""
5602
    # Also on http://www.momentumcomic.com
5603
    name = 'momentum-tapa'
5604
    long_name = 'Momentum (from Tapastic)'
5605
    url = 'https://tapastic.com/series/momentum'
5606
5607
5608
class InYourFaceCakeTapa(GenericTapasticComic):
5609
    """Class to retrieve In Your Face Cake comics."""
5610
    # Also on https://in-your-face-cake.tumblr.com
5611
    name = 'inyourfacecake-tapa'
5612
    long_name = 'In Your Face Cake (from Tapastic)'
5613
    url = 'https://tapas.io/series/In-Your-Face-Cake'
5614
    _categories = ('INYOURFACECAKE', )
5615
5616
5617
class CowardlyComicsTapa(GenericTapasticComic):
5618
    """Class to retrieve Cowardly Comics."""
5619
    # Also on http://cowardlycomics.tumblr.com
5620
    # Also on http://www.webtoons.com/en/challenge/cowardly-comics/list?title_no=65893
5621
    name = 'cowardly-tapa'
5622
    long_name = 'Cowardly Comics (from Tapastic)'
5623
    url = 'https://tapas.io/series/CowardlyComics'
5624
5625
5626
class Caw4hwTapa(GenericTapasticComic):
5627
    """Class to retrieve Caw4hw comics."""
5628
    # Also on https://caw4hw.tumblr.com
5629
    name = 'caw4hw-tapa'
5630
    long_name = 'Caw4hw (from Tapastic)'
5631
    url = 'https://tapas.io/series/CAW4HW'
5632
5633
5634
class APleasantWasteOfTimeTapa(GenericTapasticComic):
5635
    """Class to retrieve A Pleasant Waste Of Time comics."""
5636
    # Also on https://artjcf.tumblr.com
5637
    name = 'pleasant-waste-tapa'
5638
    long_name = 'A Pleasant Waste Of Time (from Tapastic)'
5639
    url = 'https://tapas.io/series/A-Pleasant-'
5640
    _categories = ('WASTE', )
5641
5642
5643
class AbsurdoLapin(GenericNavigableComic):
5644
    """Class to retrieve Absurdo Lapin comics."""
5645
    name = 'absurdo'
5646
    long_name = 'Absurdo'
5647
    url = 'https://absurdo.lapin.org'
5648
    get_url_from_link = join_cls_url_to_href
5649
5650
    @classmethod
5651
    def get_nav(cls, soup):
5652
        """Get the navigation elements from soup object."""
5653
        cont = soup.find('div', id='content')
5654
        _, b2 = cont.find_all('div', class_='buttons')
5655
        # prev, first, last, next
5656
        return [li.find('a') for li in b2.find_all('li')]
5657
5658
    @classmethod
5659
    def get_first_comic_link(cls):
5660
        """Get link to first comics."""
5661
        return cls.get_nav(get_soup_at_url(cls.url))[1]
5662
5663
    @classmethod
5664
    def get_navi_link(cls, last_soup, next_):
5665
        """Get link to next or previous comic."""
5666
        return cls.get_nav(last_soup)[3 if next_ else 0]
5667
5668
    @classmethod
5669
    def get_comic_info(cls, soup, link):
5670
        """Get information about a particular comics."""
5671
        author = soup.find('meta', attrs={'name': 'author'})['content']
5672
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
5673
        title = soup.find('title').string
5674
        imgs = soup.find('div', id='content').find_all('img')
5675
        return {
5676
            'title': title,
5677
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
5678
            'tags': tags,
5679
            'author': author,
5680
        }
5681
5682
5683
def get_subclasses(klass):
5684
    """Gets the list of direct/indirect subclasses of a class"""
5685
    subclasses = klass.__subclasses__()
5686
    for derived in list(subclasses):
5687
        subclasses.extend(get_subclasses(derived))
5688
    return subclasses
5689
5690
5691
def remove_st_nd_rd_th_from_date(string):
5692
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5693
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5694
    return (string.replace('st', '')
5695
            .replace('nd', '')
5696
            .replace('rd', '')
5697
            .replace('th', '')
5698
            .replace('Augu', 'August'))
5699
5700
5701
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5702
    """Function to convert string to date object.
5703
    Wrapper around datetime.datetime.strptime."""
5704
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5705
    prev_locale = locale.setlocale(locale.LC_ALL)
5706
    if local != prev_locale:
5707
        locale.setlocale(locale.LC_ALL, local)
5708
    ret = datetime.datetime.strptime(string, date_format).date()
5709
    if local != prev_locale:
5710
        locale.setlocale(locale.LC_ALL, prev_locale)
5711
    return ret
5712
5713
5714
COMICS = set(get_subclasses(GenericComic))
5715
VALID_COMICS = [c for c in COMICS if c.name is not None]
5716
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5717
assert len(VALID_COMICS) == len(COMIC_NAMES)
5718
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5719
assert len(VALID_COMICS) == len(CLASS_NAMES)
5720