Completed
Push — master ( c5261f...0f36ab )
by De
43s
created

comics.py (47 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    try:
333
        url = cls.first_url
334
    except AttributeError:
335
        url = input("Get starting URL: ")
336
    print(url)
337
    comic = cls.get_prev_link(get_soup_at_url(url))
338
    while comic:
339
        url = cls.get_url_from_link(comic)
340
        print(url)
341
        comic = cls.get_prev_link(get_soup_at_url(url))
342
    return {'href': url}
343
344
345
class GenericEmptyComic(GenericComic):
346
    """Generic class for comics where nothing is to be done.
347
348
    It can be useful to deactivate temporarily comics that do not work
349
    properly by replacing `def MyComic(GenericWhateverComic)` with
350
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
351
    _categories = ('EMPTY', )
352
353
    @classmethod
354
    def get_next_comic(cls, last_comic):
355
        """Implementation of get_next_comic returning no comics."""
356
        cls.log("comic is considered as empty - returning no comic")
357
        return []
358
359
360
class GenericComicNotWorking(GenericEmptyComic):
361
    """Subclass of GenericEmptyComic used when comic is not working.
362
363
    This is more explicit than GenericEmptyComic as it hilights that
364
    only the implementation is not working and it can be fixed."""
365
    _categories = ('NOTWORKING', )
366
367
368
class GenericUnavailableComic(GenericEmptyComic):
369
    """Subclass of GenericEmptyComic used when a comic is not available.
370
371
    This is more explicit than GenericEmptyComic as it hilights that
372
    the source of the comic is not available but we expect it to be back
373
    soonish. See also GenericDeletedComic."""
374
    _categories = ('UNAVAILABLE', )
375
376
377
class GenericDeletedComic(GenericEmptyComic):
378
    """Subclass of GenericEmptyComic used when a comic does not exist anymore.
379
380
    This is more explicit than GenericEmptyComic as it hilights that
381
    the source of the comic does not exist anymore and it probably cannot
382
    be fixed. Corresponding classes are kept as we can still use the
383
    downloaded data. See also GenericUnavailableComic."""
384
    _categories = ('DELETED', )
385
386
387 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
388
    """Class to retrieve Extra Fabulous Comics."""
389
    # Also on https://extrafabulouscomics.tumblr.com
390
    name = 'efc'
391
    long_name = 'Extra Fabulous Comics'
392
    url = 'http://extrafabulouscomics.com'
393
    _categories = ('EFC', )
394
    get_navi_link = get_link_rel_next
395
    get_first_comic_link = simulate_first_link
396
    first_url = 'http://extrafabulouscomics.com/comic/buttfly/'
397
398
    @classmethod
399
    def get_comic_info(cls, soup, link):
400
        """Get information about a particular comics."""
401
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
402
        imgs = soup.find_all('img', src=img_src_re)
403
        title = soup.find('meta', property='og:title')['content']
404
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
405
        day = string_to_date(date_str, "%Y-%m-%d")
406
        return {
407
            'title': title,
408
            'img': [i['src'] for i in imgs],
409
            'month': day.month,
410
            'year': day.year,
411
            'day': day.day,
412
            'prefix': title + '-'
413
        }
414
415
416 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
417
    """Generic class to retrieve comics from Le Monde blogs."""
418
    _categories = ('LEMONDE', 'FRANCAIS')
419
    get_navi_link = get_link_rel_next
420
    get_first_comic_link = simulate_first_link
421
    first_url = NotImplemented
422
423
    @classmethod
424
    def get_comic_info(cls, soup, link):
425
        """Get information about a particular comics."""
426
        url2 = soup.find('link', rel='shortlink')['href']
427
        title = soup.find('meta', property='og:title')['content']
428
        date_str = soup.find("span", class_="entry-date").string
429
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
430
        imgs = soup.find_all('meta', property='og:image')
431
        return {
432
            'title': title,
433
            'url2': url2,
434
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
435
            'month': day.month,
436
            'year': day.year,
437
            'day': day.day,
438
        }
439
440
441
class ZepWorld(GenericLeMondeBlog):
442
    """Class to retrieve Zep World comics."""
443
    name = "zep"
444
    long_name = "Zep World"
445
    url = "http://zepworld.blog.lemonde.fr"
446
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
447
448
449
class Vidberg(GenericLeMondeBlog):
450
    """Class to retrieve Vidberg comics."""
451
    name = 'vidberg'
452
    long_name = "Vidberg - l'actu en patates"
453
    url = "http://vidberg.blog.lemonde.fr"
454
    # Not the first but I didn't find an efficient way to retrieve it
455
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
456
457
458
class Plantu(GenericLeMondeBlog):
459
    """Class to retrieve Plantu comics."""
460
    name = 'plantu'
461
    long_name = "Plantu"
462
    url = "http://plantu.blog.lemonde.fr"
463
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
464
465
466
class XavierGorce(GenericLeMondeBlog):
467
    """Class to retrieve Xavier Gorce comics."""
468
    name = 'gorce'
469
    long_name = "Xavier Gorce"
470
    url = "http://xaviergorce.blog.lemonde.fr"
471
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
472
473
474
class CartooningForPeace(GenericLeMondeBlog):
475
    """Class to retrieve Cartooning For Peace comics."""
476
    name = 'forpeace'
477
    long_name = "Cartooning For Peace"
478
    url = "http://cartooningforpeace.blog.lemonde.fr"
479
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
480
481
482
class Aurel(GenericLeMondeBlog):
483
    """Class to retrieve Aurel comics."""
484
    name = 'aurel'
485
    long_name = "Aurel"
486
    url = "http://aurel.blog.lemonde.fr"
487
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
488
489
490
class LesCulottees(GenericLeMondeBlog):
491
    """Class to retrieve Les Culottees comics."""
492
    name = 'culottees'
493
    long_name = 'Les Culottees'
494
    url = "http://lesculottees.blog.lemonde.fr"
495
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
496
497
498
class UneAnneeAuLycee(GenericLeMondeBlog):
499
    """Class to retrieve Une Annee Au Lycee comics."""
500
    name = 'lycee'
501
    long_name = 'Une Annee au Lycee'
502
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
503
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
504
505
506 View Code Duplication
class Rall(GenericComicNotWorking, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
507
    """Class to retrieve Ted Rall comics."""
508
    # Also on http://www.gocomics.com/tedrall
509
    name = 'rall'
510
    long_name = "Ted Rall"
511
    url = "http://rall.com/comic"
512
    _categories = ('RALL', )
513
    get_navi_link = get_link_rel_next
514
    get_first_comic_link = simulate_first_link
515
    # Not the first but I didn't find an efficient way to retrieve it
516
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
517
518
    @classmethod
519
    def get_comic_info(cls, soup, link):
520
        """Get information about a particular comics."""
521
        title = soup.find('meta', property='og:title')['content']
522
        author = soup.find("span", class_="author vcard").find("a").string
523
        date_str = soup.find("span", class_="entry-date").string
524
        day = string_to_date(date_str, "%B %d, %Y")
525
        desc = soup.find('meta', property='og:description')['content']
526
        imgs = soup.find('div', class_='entry-content').find_all('img')
527
        imgs = imgs[:-7]  # remove social media buttons
528
        return {
529
            'title': title,
530
            'author': author,
531
            'month': day.month,
532
            'year': day.year,
533
            'day': day.day,
534
            'description': desc,
535
            'img': [i['src'] for i in imgs],
536
        }
537
538
539
class Dilem(GenericNavigableComic):
540
    """Class to retrieve Ali Dilem comics."""
541
    name = 'dilem'
542
    long_name = 'Ali Dilem'
543
    url = 'http://information.tv5monde.com/dilem'
544
    _categories = ('FRANCAIS', )
545
    get_url_from_link = join_cls_url_to_href
546
    get_first_comic_link = simulate_first_link
547
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
548
549
    @classmethod
550
    def get_navi_link(cls, last_soup, next_):
551
        """Get link to next or previous comic."""
552
        # prev is next / next is prev
553
        li = last_soup.find('li', class_='prev' if next_ else 'next')
554
        return li.find('a') if li else None
555
556
    @classmethod
557
    def get_comic_info(cls, soup, link):
558
        """Get information about a particular comics."""
559
        short_url = soup.find('link', rel='shortlink')['href']
560
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
561
        imgs = soup.find_all('meta', property='og:image')
562
        date_str = soup.find('span', property='dc:date')['content']
563
        date_str = date_str[:10]
564
        day = string_to_date(date_str, "%Y-%m-%d")
565
        return {
566
            'short_url': short_url,
567
            'title': title,
568
            'img': [i['content'] for i in imgs],
569
            'day': day.day,
570
            'month': day.month,
571
            'year': day.year,
572
        }
573
574
575
class SpaceAvalanche(GenericNavigableComic):
576
    """Class to retrieve Space Avalanche comics."""
577
    name = 'avalanche'
578
    long_name = 'Space Avalanche'
579
    url = 'http://www.spaceavalanche.com'
580
    get_navi_link = get_link_rel_next
581
582
    @classmethod
583
    def get_first_comic_link(cls):
584
        """Get link to first comics."""
585
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
586
587
    @classmethod
588
    def get_comic_info(cls, soup, link):
589
        """Get information about a particular comics."""
590
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
591
        title = link['title']
592
        url = cls.get_url_from_link(link)
593
        year, month, day = [int(s)
594
                            for s in url_date_re.match(url).groups()]
595
        imgs = soup.find("div", class_="entry").find_all("img")
596
        return {
597
            'title': title,
598
            'day': day,
599
            'month': month,
600
            'year': year,
601
            'img': [i['src'] for i in imgs],
602
        }
603
604
605
class ZenPencils(GenericNavigableComic):
606
    """Class to retrieve ZenPencils comics."""
607
    # Also on http://zenpencils.tumblr.com
608
    # Also on http://www.gocomics.com/zen-pencils
609
    name = 'zenpencils'
610
    long_name = 'Zen Pencils'
611
    url = 'http://zenpencils.com'
612
    _categories = ('ZENPENCILS', )
613
    get_navi_link = get_link_rel_next
614
    get_first_comic_link = simulate_first_link
615
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
616
617
    @classmethod
618
    def get_comic_info(cls, soup, link):
619
        """Get information about a particular comics."""
620
        imgs = soup.find('div', id='comic').find_all('img')
621
        # imgs2 = soup.find_all('meta', property='og:image')
622
        post = soup.find('div', class_='post-content')
623
        author = post.find("span", class_="post-author").find("a").string
624
        title = soup.find('h2', class_='post-title').string
625
        date_str = post.find('span', class_='post-date').string
626
        day = string_to_date(date_str, "%B %d, %Y")
627
        assert imgs
628
        assert all(i['alt'] == i['title'] for i in imgs)
629
        assert all(i['alt'] in (title, "") for i in imgs)
630
        return {
631
            'title': title,
632
            'author': author,
633
            'day': day.day,
634
            'month': day.month,
635
            'year': day.year,
636
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
637
        }
638
639
640
class ItsTheTie(GenericDeletedComic, GenericNavigableComic):
641
    """Class to retrieve It's the tie comics."""
642
    # Also on http://itsthetie.tumblr.com
643
    # Also on https://tapastic.com/series/itsthetie
644
    name = 'tie'
645
    long_name = "It's the tie"
646
    url = "http://itsthetie.com"
647
    _categories = ('TIE', )
648
    get_first_comic_link = get_div_navfirst_a
649
    get_navi_link = get_a_rel_next
650
651
    @classmethod
652
    def get_comic_info(cls, soup, link):
653
        """Get information about a particular comics."""
654
        title = soup.find('h1', class_='comic-title').find('a').string
655
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
656
        day = string_to_date(date_str, "%B %d, %Y")
657
        # Bonus images may or may not be in meta og:image.
658
        imgs = soup.find_all('meta', property='og:image')
659
        imgs_src = [i['content'] for i in imgs]
660
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
661
        bonus_src = [b['data-oversrc'] for b in bonus]
662
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
663
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
664
        tag_meta = soup.find('meta', property='article:tag')
665
        tags = tag_meta['content'] if tag_meta else ""
666
        return {
667
            'title': title,
668
            'month': day.month,
669
            'year': day.year,
670
            'day': day.day,
671
            'img': all_imgs_src,
672
            'tags': tags,
673
        }
674
675 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
676
class PenelopeBagieu(GenericNavigableComic):
677
    """Class to retrieve comics from Penelope Bagieu's blog."""
678
    name = 'bagieu'
679
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
680
    url = 'http://www.penelope-jolicoeur.com'
681
    _categories = ('FRANCAIS', )
682
    get_navi_link = get_link_rel_next
683
    get_first_comic_link = simulate_first_link
684
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
685
686
    @classmethod
687
    def get_comic_info(cls, soup, link):
688
        """Get information about a particular comics."""
689
        date_str = soup.find('h2', class_='date-header').string
690
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
691
        imgs = soup.find('div', class_='entry-body').find_all('img')
692
        title = soup.find('h3', class_='entry-header').string
693
        return {
694
            'title': title,
695
            'img': [i['src'] for i in imgs],
696
            'month': day.month,
697
            'year': day.year,
698
            'day': day.day,
699
        }
700
701 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
702
class OneOneOneOneComic(GenericComicNotWorking, GenericNavigableComic):
703
    """Class to retrieve 1111 Comics."""
704
    # Also on http://comics1111.tumblr.com
705
    # Also on https://tapastic.com/series/1111-Comics
706
    name = '1111'
707
    long_name = '1111 Comics'
708
    url = 'http://www.1111comics.me'
709
    _categories = ('ONEONEONEONE', )
710
    get_first_comic_link = get_div_navfirst_a
711
    get_navi_link = get_link_rel_next
712
713
    @classmethod
714
    def get_comic_info(cls, soup, link):
715
        """Get information about a particular comics."""
716
        title = soup.find('h1', class_='comic-title').find('a').string
717
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
718
        day = string_to_date(date_str, "%B %d, %Y")
719
        imgs = soup.find_all('meta', property='og:image')
720
        return {
721
            'title': title,
722
            'month': day.month,
723
            'year': day.year,
724
            'day': day.day,
725
            'img': [i['content'] for i in imgs],
726
        }
727
728 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
729
class AngryAtNothing(GenericDeletedComic, GenericNavigableComic):
730
    """Class to retrieve Angry at Nothing comics."""
731
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
732
    # Also on http://angryatnothing.tumblr.com
733
    name = 'angry'
734
    long_name = 'Angry At Nothing'
735
    url = 'http://www.angryatnothing.net'
736
    get_first_comic_link = get_div_navfirst_a
737
    get_navi_link = get_a_rel_next
738
739
    @classmethod
740
    def get_comic_info(cls, soup, link):
741
        """Get information about a particular comics."""
742
        title = soup.find('h1', class_='comic-title').find('a').string
743
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
744
        day = string_to_date(date_str, "%B %d, %Y")
745
        imgs = soup.find_all('meta', property='og:image')
746
        return {
747
            'title': title,
748
            'month': day.month,
749
            'year': day.year,
750
            'day': day.day,
751
            'img': [i['content'] for i in imgs],
752
        }
753
754
755
class NeDroid(GenericNavigableComic):
756
    """Class to retrieve NeDroid comics."""
757
    name = 'nedroid'
758
    long_name = 'NeDroid'
759
    url = 'http://nedroid.com'
760
    get_first_comic_link = get_div_navfirst_a
761
    get_navi_link = get_link_rel_next
762
    get_url_from_link = join_cls_url_to_href
763 View Code Duplication
764
    @classmethod
765
    def get_comic_info(cls, soup, link):
766
        """Get information about a particular comics."""
767
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
768
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
769
        num = int(short_url_re.match(short_url).groups()[0])
770
        imgs = soup.find('div', id='comic').find_all('img')
771
        assert len(imgs) == 1
772
        title = imgs[0]['alt']
773
        title2 = imgs[0]['title']
774
        return {
775
            'short_url': short_url,
776
            'title': title,
777
            'title2': title2,
778
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
779
            'num': num,
780
        }
781
782 View Code Duplication
783
class Garfield(GenericNavigableComic):
784
    """Class to retrieve Garfield comics."""
785
    # Also on http://www.gocomics.com/garfield
786
    name = 'garfield'
787
    long_name = 'Garfield'
788
    url = 'https://garfield.com'
789
    _categories = ('GARFIELD', )
790
    get_first_comic_link = simulate_first_link
791
    first_url = 'https://garfield.com/comic/1978/06/19'
792
793
    @classmethod
794
    def get_navi_link(cls, last_soup, next_):
795
        """Get link to next or previous comic."""
796
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
797
798
    @classmethod
799
    def get_comic_info(cls, soup, link):
800
        """Get information about a particular comics."""
801
        url = cls.get_url_from_link(link)
802
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
803
        year, month, day = [int(s) for s in date_re.match(url).groups()]
804
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
805
        return {
806
            'month': month,
807
            'year': year,
808
            'day': day,
809
            'img': [i['src'] for i in imgs],
810
        }
811
812
813
class Dilbert(GenericNavigableComic):
814
    """Class to retrieve Dilbert comics."""
815
    # Also on http://www.gocomics.com/dilbert-classics
816
    name = 'dilbert'
817
    long_name = 'Dilbert'
818
    url = 'http://dilbert.com'
819
    get_url_from_link = join_cls_url_to_href
820
    get_first_comic_link = simulate_first_link
821
    first_url = 'http://dilbert.com/strip/1989-04-16'
822
823
    @classmethod
824
    def get_navi_link(cls, last_soup, next_):
825
        """Get link to next or previous comic."""
826
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
827
        return link.find('a') if link else None
828
829 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
830
    def get_comic_info(cls, soup, link):
831
        """Get information about a particular comics."""
832
        title = soup.find('meta', property='og:title')['content']
833
        imgs = soup.find_all('meta', property='og:image')
834
        desc = soup.find('meta', property='og:description')['content']
835
        date_str = soup.find('meta', property='article:publish_date')['content']
836
        day = string_to_date(date_str, "%B %d, %Y")
837
        author = soup.find('meta', property='article:author')['content']
838
        tags = soup.find('meta', property='article:tag')['content']
839
        return {
840
            'title': title,
841
            'description': desc,
842
            'img': [i['content'] for i in imgs],
843
            'author': author,
844
            'tags': tags,
845
            'day': day.day,
846
            'month': day.month,
847
            'year': day.year
848
        }
849
850
851
class VictimsOfCircumsolar(GenericDeletedComic, GenericNavigableComic):
852
    """Class to retrieve VictimsOfCircumsolar comics."""
853
    # Also on https://victimsofcomics.tumblr.com
854
    name = 'circumsolar'
855
    long_name = 'Victims Of Circumsolar'
856
    url = 'http://www.victimsofcircumsolar.com'
857
    get_navi_link = get_a_navi_comicnavnext_navinext
858
    get_first_comic_link = simulate_first_link
859
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
860
861
    @classmethod
862
    def get_comic_info(cls, soup, link):
863
        """Get information about a particular comics."""
864
        # Date is on the archive page
865
        title = soup.find_all('meta', property='og:title')[-1]['content']
866
        desc = soup.find_all('meta', property='og:description')[-1]['content']
867
        imgs = soup.find('div', id='comic').find_all('img')
868
        assert all(i['title'] == i['alt'] == title for i in imgs)
869
        return {
870
            'title': title,
871
            'description': desc,
872
            'img': [i['src'] for i in imgs],
873
        }
874
875
876
class ThreeWordPhrase(GenericNavigableComic):
877
    """Class to retrieve Three Word Phrase comics."""
878
    # Also on http://www.threewordphrase.tumblr.com
879
    name = 'threeword'
880
    long_name = 'Three Word Phrase'
881
    url = 'http://threewordphrase.com'
882
    get_url_from_link = join_cls_url_to_href
883
884
    @classmethod
885
    def get_first_comic_link(cls):
886
        """Get link to first comics."""
887
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
888
889
    @classmethod
890
    def get_navi_link(cls, last_soup, next_):
891
        """Get link to next or previous comic."""
892
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
893
        return None if link.get('href') is None else link
894
895
    @classmethod
896
    def get_comic_info(cls, soup, link):
897
        """Get information about a particular comics."""
898
        title = soup.find('title')
899
        imgs = [img for img in soup.find_all('img')
900
                if not img['src'].endswith(
901
                    ('link.gif', '32.png', 'twpbookad.jpg',
902
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
903
        return {
904
            'title': title.string if title else None,
905
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
906
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
907
        }
908
909
910
class DeadlyPanel(GenericComicNotWorking, GenericNavigableComic):  # Not working on my machine
911
    """Class to retrieve Deadly Panel comics."""
912
    # Also on https://tapastic.com/series/deadlypanel
913
    # Also on https://deadlypanel.tumblr.com
914
    name = 'deadly'
915
    long_name = 'Deadly Panel'
916
    url = 'http://www.deadlypanel.com'
917
    get_first_comic_link = get_a_navi_navifirst
918
    get_navi_link = get_a_navi_comicnavnext_navinext
919
920
    @classmethod
921
    def get_comic_info(cls, soup, link):
922
        """Get information about a particular comics."""
923
        imgs = soup.find('div', id='comic').find_all('img')
924
        assert all(i['alt'] == i['title'] for i in imgs)
925
        return {
926
            'img': [i['src'] for i in imgs],
927
        }
928
929 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
930
class TheGentlemanArmchair(GenericNavigableComic):
931
    """Class to retrieve The Gentleman Armchair comics."""
932
    name = 'gentlemanarmchair'
933
    long_name = 'The Gentleman Armchair'
934
    url = 'http://thegentlemansarmchair.com'
935
    get_first_comic_link = get_a_navi_navifirst
936
    get_navi_link = get_link_rel_next
937
938
    @classmethod
939
    def get_comic_info(cls, soup, link):
940
        """Get information about a particular comics."""
941
        title = soup.find('h2', class_='post-title').string
942
        author = soup.find("span", class_="post-author").find("a").string
943
        date_str = soup.find('span', class_='post-date').string
944
        day = string_to_date(date_str, "%B %d, %Y")
945
        imgs = soup.find('div', id='comic').find_all('img')
946
        return {
947
            'img': [i['src'] for i in imgs],
948
            'title': title,
949
            'author': author,
950
            'month': day.month,
951
            'year': day.year,
952
            'day': day.day,
953
        }
954
955 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
956
class ImogenQuest(GenericNavigableComic):
957
    """Class to retrieve Imogen Quest comics."""
958
    # Also on http://imoquest.tumblr.com
959
    name = 'imogen'
960
    long_name = 'Imogen Quest'
961
    url = 'http://imogenquest.net'
962
    get_first_comic_link = get_div_navfirst_a
963
    get_navi_link = get_a_rel_next
964
965
    @classmethod
966
    def get_comic_info(cls, soup, link):
967
        """Get information about a particular comics."""
968
        title = soup.find('h2', class_='post-title').string
969
        author = soup.find("span", class_="post-author").find("a").string
970
        date_str = soup.find('span', class_='post-date').string
971
        day = string_to_date(date_str, '%B %d, %Y')
972
        imgs = soup.find('div', class_='comicpane').find_all('img')
973
        assert all(i['alt'] == i['title'] for i in imgs)
974
        title2 = imgs[0]['title']
975
        return {
976
            'day': day.day,
977
            'month': day.month,
978
            'year': day.year,
979
            'img': [i['src'] for i in imgs],
980
            'title': title,
981
            'title2': title2,
982
            'author': author,
983
        }
984
985
986 View Code Duplication
class MyExtraLife(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
987
    """Class to retrieve My Extra Life comics."""
988
    name = 'extralife'
989
    long_name = 'My Extra Life'
990
    url = 'http://www.myextralife.com'
991
    get_navi_link = get_link_rel_next
992
993
    @classmethod
994
    def get_first_comic_link(cls):
995
        """Get link to first comics."""
996
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
997
998
    @classmethod
999
    def get_comic_info(cls, soup, link):
1000
        """Get information about a particular comics."""
1001
        title = soup.find("h1", class_="comic_title").string
1002
        date_str = soup.find("span", class_="comic_date").string
1003
        day = string_to_date(date_str, "%B %d, %Y")
1004
        imgs = soup.find_all("img", class_="comic")
1005
        assert all(i['alt'] == i['title'] == title for i in imgs)
1006
        return {
1007
            'title': title,
1008
            'img': [i['src'] for i in imgs if i["src"]],
1009
            'day': day.day,
1010
            'month': day.month,
1011
            'year': day.year
1012
        }
1013
1014
1015
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
1016
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
1017
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
1018
    # Also on http://smbc-comics.tumblr.com
1019
    name = 'smbc'
1020
    long_name = 'Saturday Morning Breakfast Cereal'
1021
    url = 'http://www.smbc-comics.com'
1022
    _categories = ('SMBC', )
1023
    get_navi_link = get_a_rel_next
1024
1025
    @classmethod
1026
    def get_first_comic_link(cls):
1027
        """Get link to first comics."""
1028
        return get_soup_at_url(cls.url).find('a', rel='start')
1029
1030
    @classmethod
1031
    def get_comic_info(cls, soup, link):
1032
        """Get information about a particular comics."""
1033
        image1 = soup.find('img', id='cc-comic')
1034
        image_url1 = image1['src']
1035
        aftercomic = soup.find('div', id='aftercomic')
1036
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1037
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1038
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1039
        day = string_to_date(date_str, "%B %d, %Y")
1040
        return {
1041
            'title': image1['title'],
1042
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i)) for i in imgs],
1043
            'day': day.day,
1044
            'month': day.month,
1045
            'year': day.year
1046
        }
1047
1048 View Code Duplication
1049
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1050
    """Class to retrieve Perry Bible Fellowship comics."""
1051
    name = 'pbf'
1052
    long_name = 'Perry Bible Fellowship'
1053
    url = 'http://pbfcomics.com'
1054
    get_url_from_archive_element = join_cls_url_to_href
1055
1056
    @classmethod
1057
    def get_archive_elements(cls):
1058
        soup = get_soup_at_url(cls.url)
1059
        thumbnails = soup.find('div', id='all_thumbnails')
1060
        return reversed(thumbnails.find_all('a'))
1061
1062
    @classmethod
1063
    def get_comic_info(cls, soup, link):
1064
        """Get information about a particular comics."""
1065
        name = soup.find('meta', property='og:title')['content']
1066
        imgs = soup.find_all('meta', property='og:image')
1067
        assert len(imgs) == 1
1068
        return {
1069
            'name': name,
1070
            'img': [i['content'] for i in imgs],
1071
        }
1072
1073
1074 View Code Duplication
class Mercworks(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1075
    """Class to retrieve Mercworks comics."""
1076
    # Also on http://mercworks.tumblr.com
1077
    name = 'mercworks'
1078
    long_name = 'Mercworks'
1079
    url = 'http://mercworks.net'
1080
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1081
    get_navi_link = get_link_rel_next
1082
1083
    @classmethod
1084
    def get_comic_info(cls, soup, link):
1085
        """Get information about a particular comics."""
1086
        title = soup.find('meta', property='og:title')['content']
1087
        metadesc = soup.find('meta', property='og:description')
1088
        desc = metadesc['content'] if metadesc else ""
1089
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1090
        day = string_to_date(date_str, "%Y-%m-%d")
1091
        imgs = soup.find_all('meta', property='og:image')
1092
        return {
1093
            'img': [i['content'] for i in imgs],
1094
            'title': title,
1095
            'desc': desc,
1096
            'day': day.day,
1097
            'month': day.month,
1098
            'year': day.year
1099
        }
1100
1101
1102
class BerkeleyMews(GenericListableComic):
1103
    """Class to retrieve Berkeley Mews comics."""
1104
    # Also on http://mews.tumblr.com
1105
    # Also on http://www.gocomics.com/berkeley-mews
1106
    name = 'berkeley'
1107
    long_name = 'Berkeley Mews'
1108
    url = 'http://www.berkeleymews.com'
1109
    _categories = ('BERKELEY', )
1110
    get_url_from_archive_element = get_href
1111
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1112
1113
    @classmethod
1114
    def get_archive_elements(cls):
1115
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1116
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1117
1118
    @classmethod
1119
    def get_comic_info(cls, soup, link):
1120
        """Get information about a particular comics."""
1121
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1122
        url = cls.get_url_from_archive_element(link)
1123
        num = int(cls.comic_num_re.match(url).groups()[0])
1124
        img = soup.find('div', id='comic').find('img')
1125
        assert all(i['alt'] == i['title'] for i in [img])
1126
        title2 = img['title']
1127
        img_url = img['src']
1128
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1129
        return {
1130
            'num': num,
1131
            'title': link.string,
1132
            'title2': title2,
1133
            'img': [img_url],
1134
            'year': year,
1135
            'month': month,
1136
            'day': day,
1137
        }
1138
1139
1140
class GenericBouletCorp(GenericNavigableComic):
1141
    """Generic class to retrieve BouletCorp comics in different languages."""
1142
    # Also on https://bouletcorp.tumblr.com
1143
    _categories = ('BOULET', )
1144
    get_navi_link = get_link_rel_next
1145
1146
    @classmethod
1147
    def get_first_comic_link(cls):
1148
        """Get link to first comics."""
1149
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1150
1151
    @classmethod
1152
    def get_comic_info(cls, soup, link):
1153
        """Get information about a particular comics."""
1154
        url = cls.get_url_from_link(link)
1155
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1156
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1157
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1158
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1159
        title = soup.find('title').string
1160
        return {
1161
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1162
            'title': title,
1163
            'texts': texts,
1164
            'year': year,
1165
            'month': month,
1166
            'day': day,
1167
        }
1168
1169
1170
class BouletCorp(GenericBouletCorp):
1171
    """Class to retrieve BouletCorp comics."""
1172
    name = 'boulet'
1173
    long_name = 'Boulet Corp'
1174
    url = 'http://www.bouletcorp.com'
1175
    _categories = ('FRANCAIS', )
1176
1177
1178
class BouletCorpEn(GenericBouletCorp):
1179
    """Class to retrieve EnglishBouletCorp comics."""
1180
    name = 'boulet_en'
1181
    long_name = 'Boulet Corp English'
1182
    url = 'http://english.bouletcorp.com'
1183
1184 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1185
class AmazingSuperPowers(GenericNavigableComic):
1186
    """Class to retrieve Amazing Super Powers comics."""
1187
    name = 'asp'
1188
    long_name = 'Amazing Super Powers'
1189
    url = 'http://www.amazingsuperpowers.com'
1190
    get_first_comic_link = get_a_navi_navifirst
1191
    get_navi_link = get_a_navi_navinext
1192
1193
    @classmethod
1194
    def get_comic_info(cls, soup, link):
1195
        """Get information about a particular comics."""
1196
        author = soup.find("span", class_="post-author").find("a").string
1197
        date_str = soup.find('span', class_='post-date').string
1198
        day = string_to_date(date_str, "%B %d, %Y")
1199
        imgs = soup.find('div', id='comic').find_all('img')
1200
        title = ' '.join(i['title'] for i in imgs)
1201
        assert all(i['alt'] == i['title'] for i in imgs)
1202
        return {
1203
            'title': title,
1204
            'author': author,
1205
            'img': [img['src'] for img in imgs],
1206
            'day': day.day,
1207
            'month': day.month,
1208
            'year': day.year
1209
        }
1210
1211
1212
class ToonHole(GenericNavigableComic):
1213
    """Class to retrieve Toon Holes comics."""
1214
    # Also on http://tapastic.com/series/TOONHOLE
1215
    name = 'toonhole'
1216
    long_name = 'Toon Hole'
1217
    url = 'http://www.toonhole.com'
1218
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1219
    get_navi_link = get_a_comicnavbase_comicnavnext
1220
1221
    @classmethod
1222
    def get_comic_info(cls, soup, link):
1223
        """Get information about a particular comics."""
1224
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1225
        day = string_to_date(date_str, "%B %d, %Y")
1226
        imgs = soup.find('div', id='comic').find_all('img')
1227
        if imgs:
1228
            img = imgs[0]
1229
            title = img['alt']
1230
            assert img['title'] == title
1231
        else:
1232
            title = ""
1233
        return {
1234
            'title': title,
1235
            'month': day.month,
1236
            'year': day.year,
1237
            'day': day.day,
1238
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1239
        }
1240
1241
1242
class Channelate(GenericNavigableComic):
1243
    """Class to retrieve Channelate comics."""
1244
    name = 'channelate'
1245
    long_name = 'Channelate'
1246
    url = 'http://www.channelate.com'
1247
    get_first_comic_link = get_div_navfirst_a
1248
    get_navi_link = get_link_rel_next
1249
    get_url_from_link = join_cls_url_to_href
1250
1251
    @classmethod
1252
    def get_comic_info(cls, soup, link):
1253
        """Get information about a particular comics."""
1254
        author = soup.find("span", class_="post-author").find("a").string
1255
        date_str = soup.find('span', class_='post-date').string
1256
        day = string_to_date(date_str, '%Y/%m/%d')
1257
        title = soup.find('meta', property='og:title')['content']
1258
        post = soup.find('div', id='comic')
1259
        imgs = post.find_all('img') if post else []
1260
        extra_url = None
1261
        extra_div = soup.find('div', id='extrapanelbutton')
1262
        if extra_div:
1263
            extra_url = extra_div.find('a')['href']
1264
            extra_soup = get_soup_at_url(extra_url)
1265
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1266
            imgs.extend(extra_imgs)
1267
        return {
1268
            'url_extra': extra_url,
1269
            'title': title,
1270
            'author': author,
1271
            'month': day.month,
1272
            'year': day.year,
1273
            'day': day.day,
1274
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1275
        }
1276
1277
1278
class CyanideAndHappiness(GenericNavigableComic):
1279
    """Class to retrieve Cyanide And Happiness comics."""
1280
    name = 'cyanide'
1281
    long_name = 'Cyanide and Happiness'
1282
    url = 'http://explosm.net'
1283
    _categories = ('NSFW', )
1284
    get_url_from_link = join_cls_url_to_href
1285
1286
    @classmethod
1287
    def get_first_comic_link(cls):
1288
        """Get link to first comics."""
1289
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1290
1291
    @classmethod
1292
    def get_navi_link(cls, last_soup, next_):
1293
        """Get link to next or previous comic."""
1294
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1295
        return None if link.get('href') is None else link
1296
1297
    @classmethod
1298
    def get_comic_info(cls, soup, link):
1299
        """Get information about a particular comics."""
1300
        url2 = soup.find('meta', property='og:url')['content']
1301
        num = int(url2.split('/')[-2])
1302
        date_str = soup.find('h3').find('a').string
1303
        day = string_to_date(date_str, '%Y.%m.%d')
1304
        author = soup.find('small', class_="author-credit-name").string
1305
        assert author.startswith('by ')
1306
        author = author[3:]
1307
        imgs = soup.find_all('img', id='main-comic')
1308
        return {
1309
            'num': num,
1310
            'author': author,
1311
            'month': day.month,
1312
            'year': day.year,
1313
            'day': day.day,
1314
            'prefix': '%d-' % num,
1315
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1316
        }
1317
1318
1319
class MrLovenstein(GenericComic):
1320
    """Class to retrieve Mr Lovenstein comics."""
1321
    # Also on https://tapastic.com/series/MrLovenstein
1322
    name = 'mrlovenstein'
1323
    long_name = 'Mr. Lovenstein'
1324
    url = 'http://www.mrlovenstein.com'
1325
1326
    @classmethod
1327
    def get_next_comic(cls, last_comic):
1328
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1329
        # TODO: more info from http://www.mrlovenstein.com/archive
1330
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1331
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1332
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1333
        first, last = min(nums), max(nums)
1334
        if last_comic:
1335
            first = last_comic['num'] + 1
1336
        for num in range(first, last + 1):
1337
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1338
            soup = get_soup_at_url(url)
1339
            imgs = list(
1340
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1341
            description = soup.find('meta', attrs={'name': 'description'})['content']
1342
            yield {
1343
                'url': url,
1344
                'num': num,
1345
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1346
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1347
                'description': description,
1348
            }
1349
1350
1351
class DinosaurComics(GenericListableComic):
1352
    """Class to retrieve Dinosaur Comics comics."""
1353
    name = 'dinosaur'
1354
    long_name = 'Dinosaur Comics'
1355
    url = 'http://www.qwantz.com'
1356
    get_url_from_archive_element = get_href
1357
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1358
1359
    @classmethod
1360
    def get_archive_elements(cls):
1361
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1362
        # first link is random -> skip it
1363
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1364
1365
    @classmethod
1366
    def get_comic_info(cls, soup, link):
1367
        """Get information about a particular comics."""
1368
        url = cls.get_url_from_archive_element(link)
1369
        num = int(cls.comic_link_re.match(url).groups()[0])
1370
        date_str = link.string
1371
        text = link.next_sibling.string
1372
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1373
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1374
        img = soup.find('img', src=comic_img_re)
1375
        return {
1376
            'month': day.month,
1377
            'year': day.year,
1378
            'day': day.day,
1379
            'img': [img.get('src')],
1380
            'title': img.get('title'),
1381
            'text': text,
1382
            'num': num,
1383
        }
1384
1385
1386
class ButterSafe(GenericListableComic):
1387 View Code Duplication
    """Class to retrieve Butter Safe comics."""
1388
    name = 'butter'
1389
    long_name = 'ButterSafe'
1390
    url = 'http://buttersafe.com'
1391
    get_url_from_archive_element = get_href
1392
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1393
1394
    @classmethod
1395
    def get_archive_elements(cls):
1396
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1397
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1398
1399
    @classmethod
1400
    def get_comic_info(cls, soup, link):
1401
        """Get information about a particular comics."""
1402
        url = cls.get_url_from_archive_element(link)
1403
        title = link.string
1404
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1405
        img = soup.find('div', id='comic').find('img')
1406
        assert img['alt'] == title
1407
        return {
1408
            'title': title,
1409
            'day': day,
1410
            'month': month,
1411
            'year': year,
1412
            'img': [img['src']],
1413
        }
1414
1415
1416
class CalvinAndHobbes(GenericComic):
1417
    """Class to retrieve Calvin and Hobbes comics."""
1418
    # Also on http://www.gocomics.com/calvinandhobbes/
1419
    name = 'calvin'
1420
    long_name = 'Calvin and Hobbes'
1421
    # This is not through any official webpage but eh...
1422
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1423
1424
    @classmethod
1425
    def get_next_comic(cls, last_comic):
1426
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1427
        last_date = get_date_for_comic(
1428
            last_comic) if last_comic else date(1985, 11, 1)
1429
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1430
        img_re = re.compile('')
1431
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1432
            url = link['href']
1433
            year, month = link_re.match(url).groups()
1434
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1435
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1436
                month_url = urljoin_wrapper(cls.url, url)
1437
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1438
                    img_src = img['src']
1439
                    day = int(img_re.match(img_src).groups()[0])
1440
                    comic_date = date(int(year), int(month), day)
1441
                    if comic_date > last_date:
1442
                        yield {
1443
                            'url': month_url,
1444
                            'year': int(year),
1445
                            'month': int(month),
1446
                            'day': int(day),
1447
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1448
                        }
1449
                        last_date = comic_date
1450
1451
1452
class AbstruseGoose(GenericListableComic):
1453 View Code Duplication
    """Class to retrieve AbstruseGoose Comics."""
1454
    name = 'abstruse'
1455
    long_name = 'Abstruse Goose'
1456
    url = 'http://abstrusegoose.com'
1457
    get_url_from_archive_element = get_href
1458
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1459
    comic_img_re = re.compile('^%s/strips/.*' % url)
1460
1461
    @classmethod
1462
    def get_archive_elements(cls):
1463
        archive_url = urljoin_wrapper(cls.url, 'archive')
1464
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1465
1466
    @classmethod
1467
    def get_comic_info(cls, soup, archive_elt):
1468
        comic_url = cls.get_url_from_archive_element(archive_elt)
1469
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1470
        return {
1471
            'num': num,
1472
            'title': archive_elt.string,
1473
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1474
        }
1475
1476
1477
class PhDComics(GenericNavigableComic):
1478
    """Class to retrieve PHD Comics."""
1479
    name = 'phd'
1480
    long_name = 'PhD Comics'
1481
    url = 'http://phdcomics.com/comics/archive.php'
1482
1483
    @classmethod
1484
    def get_first_comic_link(cls):
1485
        """Get link to first comics."""
1486
        soup = get_soup_at_url(cls.url)
1487
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1488
        return None if img is None else img.parent
1489
1490
    @classmethod
1491
    def get_navi_link(cls, last_soup, next_):
1492
        """Get link to next or previous comic."""
1493
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1494
        img = last_soup.find('img', src=url)
1495
        return None if img is None else img.parent
1496
1497
    @classmethod
1498
    def get_comic_info(cls, soup, link):
1499
        """Get information about a particular comics."""
1500
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1501
        imgs = soup.find_all('meta', property='og:image')
1502
        return {
1503
            'img': [i['content'] for i in imgs],
1504
            'title': title,
1505
        }
1506
1507
1508
class Octopuns(GenericComicNotWorking, GenericNavigableComic):  # Website has changed
1509 View Code Duplication
    """Class to retrieve Octopuns comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1510
    # Also on http://octopuns.tumblr.com
1511
    name = 'octopuns'
1512
    long_name = 'Octopuns'
1513
    url = 'http://www.octopuns.net'
1514
1515
    @classmethod
1516
    def get_first_comic_link(cls):
1517
        """Get link to first comics."""
1518
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1519
1520
    @classmethod
1521
    def get_navi_link(cls, last_soup, next_):
1522
        """Get link to next or previous comic."""
1523
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1524
        return None if link.get('href') is None else link
1525
1526
    @classmethod
1527
    def get_comic_info(cls, soup, link):
1528
        """Get information about a particular comics."""
1529
        title = soup.find('h3', class_='post-title entry-title').string
1530
        date_str = soup.find('h2', class_='date-header').string
1531
        day = string_to_date(date_str, "%A, %B %d, %Y")
1532
        imgs = soup.find_all('link', rel='image_src')
1533
        return {
1534
            'img': [i['href'] for i in imgs],
1535
            'title': title,
1536
            'day': day.day,
1537
            'month': day.month,
1538
            'year': day.year,
1539
        }
1540
1541
1542
class Quarktees(GenericNavigableComic):
1543
    """Class to retrieve the Quarktees comics."""
1544
    name = 'quarktees'
1545
    long_name = 'Quarktees'
1546
    url = 'http://www.quarktees.com/blogs/news'
1547
    get_url_from_link = join_cls_url_to_href
1548
    get_first_comic_link = simulate_first_link
1549
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1550
1551
    @classmethod
1552
    def get_navi_link(cls, last_soup, next_):
1553
        """Get link to next or previous comic."""
1554
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1555
1556
    @classmethod
1557
    def get_comic_info(cls, soup, link):
1558
        """Get information about a particular comics."""
1559
        title = soup.find('meta', property='og:title')['content']
1560
        article = soup.find('div', class_='single-article')
1561
        imgs = article.find_all('img')
1562
        return {
1563
            'title': title,
1564
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1565
        }
1566
1567
1568
class OverCompensating(GenericNavigableComic):
1569
    """Class to retrieve the Over Compensating comics."""
1570
    name = 'compensating'
1571
    long_name = 'Over Compensating'
1572
    url = 'http://www.overcompensating.com'
1573
    get_url_from_link = join_cls_url_to_href
1574
1575
    @classmethod
1576
    def get_first_comic_link(cls):
1577
        """Get link to first comics."""
1578
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1579
1580
    @classmethod
1581
    def get_navi_link(cls, last_soup, next_):
1582
        """Get link to next or previous comic."""
1583
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1584
1585
    @classmethod
1586
    def get_comic_info(cls, soup, link):
1587
        """Get information about a particular comics."""
1588
        img_src_re = re.compile('^/oc/comics/.*')
1589
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1590
        comic_url = cls.get_url_from_link(link)
1591
        num = int(comic_num_re.match(comic_url).groups()[0])
1592
        img = soup.find('img', src=img_src_re)
1593
        return {
1594
            'num': num,
1595
            'img': [urljoin_wrapper(comic_url, img['src'])],
1596
            'title': img.get('title')
1597
        }
1598
1599
1600
class Oglaf(GenericNavigableComic):
1601
    """Class to retrieve Oglaf comics."""
1602
    name = 'oglaf'
1603
    long_name = 'Oglaf [NSFW]'
1604
    url = 'http://oglaf.com'
1605
    _categories = ('NSFW', )
1606
    get_url_from_link = join_cls_url_to_href
1607
1608
    @classmethod
1609
    def get_first_comic_link(cls):
1610
        """Get link to first comics."""
1611
        return get_soup_at_url(cls.url).find("div", id="st").parent
1612
1613
    @classmethod
1614
    def get_navi_link(cls, last_soup, next_):
1615
        """Get link to next or previous comic."""
1616
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1617
        return div.parent if div else None
1618
1619
    @classmethod
1620
    def get_comic_info(cls, soup, link):
1621
        """Get information about a particular comics."""
1622
        title = soup.find('title').string
1623
        title_imgs = soup.find('div', id='tt').find_all('img')
1624
        assert len(title_imgs) == 1
1625
        strip_imgs = soup.find_all('img', id='strip')
1626
        assert len(strip_imgs) == 1
1627
        imgs = title_imgs + strip_imgs
1628
        desc = ' '.join(i['title'] for i in imgs)
1629
        return {
1630
            'title': title,
1631
            'img': [i['src'] for i in imgs],
1632
            'description': desc,
1633
        }
1634
1635
1636
class ScandinaviaAndTheWorld(GenericNavigableComic):
1637
    """Class to retrieve Scandinavia And The World comics."""
1638
    name = 'satw'
1639
    long_name = 'Scandinavia And The World'
1640
    url = 'http://satwcomic.com'
1641
    get_first_comic_link = simulate_first_link
1642
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1643
1644
    @classmethod
1645
    def get_navi_link(cls, last_soup, next_):
1646
        """Get link to next or previous comic."""
1647
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1648
1649
    @classmethod
1650
    def get_comic_info(cls, soup, link):
1651
        """Get information about a particular comics."""
1652
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1653
        desc = soup.find('meta', property='og:description')['content']
1654
        imgs = soup.find_all('img', itemprop="image")
1655
        return {
1656
            'title': title,
1657
            'description': desc,
1658
            'img': [i['src'] for i in imgs],
1659
        }
1660
1661
1662
class SomethingOfThatIlk(GenericDeletedComic):
1663
    """Class to retrieve the Something Of That Ilk comics."""
1664
    name = 'somethingofthatilk'
1665
    long_name = 'Something Of That Ilk'
1666
    url = 'http://www.somethingofthatilk.com'
1667
1668
1669
class InfiniteMonkeyBusiness(GenericNavigableComic):
1670
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1671
    name = 'monkey'
1672
    long_name = 'Infinite Monkey Business'
1673
    url = 'http://infinitemonkeybusiness.net'
1674
    get_navi_link = get_a_navi_comicnavnext_navinext
1675
    get_first_comic_link = simulate_first_link
1676
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1677
1678
    @classmethod
1679
    def get_comic_info(cls, soup, link):
1680
        """Get information about a particular comics."""
1681
        title = soup.find('meta', property='og:title')['content']
1682
        imgs = soup.find('div', id='comic').find_all('img')
1683
        return {
1684
            'title': title,
1685
            'img': [i['src'] for i in imgs],
1686
        }
1687
1688
1689
class Wondermark(GenericListableComic):
1690
    """Class to retrieve the Wondermark comics."""
1691
    name = 'wondermark'
1692
    long_name = 'Wondermark'
1693
    url = 'http://wondermark.com'
1694
    get_url_from_archive_element = get_href
1695
1696
    @classmethod
1697
    def get_archive_elements(cls):
1698
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1699
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1700
1701
    @classmethod
1702
    def get_comic_info(cls, soup, link):
1703
        """Get information about a particular comics."""
1704
        date_str = soup.find('div', class_='postdate').find('em').string
1705
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1706
        div = soup.find('div', id='comic')
1707
        if div:
1708
            img = div.find('img')
1709
            img_src = [img['src']]
1710
            alt = img['alt']
1711
            assert alt == img['title']
1712
            title = soup.find('meta', property='og:title')['content']
1713
        else:
1714
            img_src = []
1715
            alt = ''
1716
            title = ''
1717
        return {
1718
            'month': day.month,
1719
            'year': day.year,
1720
            'day': day.day,
1721
            'img': img_src,
1722
            'title': title,
1723
            'alt': alt,
1724
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1725
        }
1726
1727
1728
class WarehouseComic(GenericNavigableComic):
1729
    """Class to retrieve Warehouse Comic comics."""
1730
    name = 'warehouse'
1731
    long_name = 'Warehouse Comic'
1732
    url = 'http://warehousecomic.com'
1733
    get_first_comic_link = get_a_navi_navifirst
1734
    get_navi_link = get_link_rel_next
1735
1736
    @classmethod
1737
    def get_comic_info(cls, soup, link):
1738
        """Get information about a particular comics."""
1739
        title = soup.find('h2', class_='post-title').string
1740
        date_str = soup.find('span', class_='post-date').string
1741
        day = string_to_date(date_str, "%B %d, %Y")
1742
        imgs = soup.find('div', id='comic').find_all('img')
1743
        return {
1744
            'img': [i['src'] for i in imgs],
1745
            'title': title,
1746
            'day': day.day,
1747
            'month': day.month,
1748
            'year': day.year,
1749
        }
1750
1751
1752
class JustSayEh(GenericNavigableComic):
1753
    """Class to retrieve Just Say Eh comics."""
1754
    # Also on http//tapastic.com/series/Just-Say-Eh
1755
    name = 'justsayeh'
1756
    long_name = 'Just Say Eh'
1757
    url = 'http://www.justsayeh.com'
1758
    get_first_comic_link = get_a_navi_navifirst
1759
    get_navi_link = get_a_navi_comicnavnext_navinext
1760
1761
    @classmethod
1762
    def get_comic_info(cls, soup, link):
1763
        """Get information about a particular comics."""
1764
        title = soup.find('h2', class_='post-title').string
1765
        imgs = soup.find("div", id="comic").find_all("img")
1766
        assert all(i['alt'] == i['title'] for i in imgs)
1767
        alt = imgs[0]['alt']
1768
        return {
1769
            'img': [i['src'] for i in imgs],
1770
            'title': title,
1771
            'alt': alt,
1772
        }
1773
1774
1775
class MouseBearComedy(GenericComicNotWorking):  # Website has changed
1776 View Code Duplication
    """Class to retrieve Mouse Bear Comedy comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1777
    # Also on http://mousebearcomedy.tumblr.com
1778
    name = 'mousebear'
1779
    long_name = 'Mouse Bear Comedy'
1780
    url = 'http://www.mousebearcomedy.com'
1781
    get_first_comic_link = get_a_navi_navifirst
1782
    get_navi_link = get_a_navi_comicnavnext_navinext
1783
1784
    @classmethod
1785
    def get_comic_info(cls, soup, link):
1786
        """Get information about a particular comics."""
1787
        title = soup.find('h2', class_='post-title').string
1788
        author = soup.find("span", class_="post-author").find("a").string
1789
        date_str = soup.find("span", class_="post-date").string
1790
        day = string_to_date(date_str, '%B %d, %Y')
1791
        imgs = soup.find("div", id="comic").find_all("img")
1792
        assert all(i['alt'] == i['title'] == title for i in imgs)
1793
        return {
1794
            'day': day.day,
1795
            'month': day.month,
1796
            'year': day.year,
1797
            'img': [i['src'] for i in imgs],
1798
            'title': title,
1799
            'author': author,
1800
        }
1801
1802
1803
class BigFootJustice(GenericNavigableComic):
1804 View Code Duplication
    """Class to retrieve Big Foot Justice comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1805
    # Also on http://tapastic.com/series/bigfoot-justice
1806
    name = 'bigfoot'
1807
    long_name = 'Big Foot Justice'
1808
    url = 'http://bigfootjustice.com'
1809
    get_first_comic_link = get_a_navi_navifirst
1810
    get_navi_link = get_a_navi_comicnavnext_navinext
1811
1812
    @classmethod
1813
    def get_comic_info(cls, soup, link):
1814
        """Get information about a particular comics."""
1815
        imgs = soup.find('div', id='comic').find_all('img')
1816
        assert all(i['title'] == i['alt'] for i in imgs)
1817
        title = ' '.join(i['title'] for i in imgs)
1818
        return {
1819
            'img': [i['src'] for i in imgs],
1820
            'title': title,
1821
        }
1822
1823
1824
class RespawnComic(GenericNavigableComic):
1825
    """Class to retrieve Respawn Comic."""
1826
    # Also on https://respawncomic.tumblr.com
1827
    name = 'respawn'
1828
    long_name = 'Respawn Comic'
1829
    url = 'http://respawncomic.com '
1830
    _categories = ('RESPAWN', )
1831
    get_navi_link = get_a_rel_next
1832
    get_first_comic_link = simulate_first_link
1833
    first_url = 'http://respawncomic.com/comic/c0001/'
1834
1835
    @classmethod
1836
    def get_comic_info(cls, soup, link):
1837
        """Get information about a particular comics."""
1838
        title = soup.find('meta', property='og:title')['content']
1839
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1840
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1841
        date_str = date_str[:10]
1842
        day = string_to_date(date_str, "%Y-%m-%d")
1843
        imgs = soup.find_all('meta', property='og:image')
1844
        skip_imgs = {
1845
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1846
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1847
        }
1848
        return {
1849
            'title': title,
1850
            'author': author,
1851
            'day': day.day,
1852
            'month': day.month,
1853
            'year': day.year,
1854
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1855
        }
1856
1857
1858 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
1859
    """Class to retrieve Safely Endangered comics."""
1860
    # Also on http://tumblr.safelyendangered.com
1861
    name = 'endangered'
1862
    long_name = 'Safely Endangered'
1863
    url = 'http://www.safelyendangered.com'
1864
    get_navi_link = get_link_rel_next
1865
    get_first_comic_link = simulate_first_link
1866
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1867
1868
    @classmethod
1869
    def get_comic_info(cls, soup, link):
1870
        """Get information about a particular comics."""
1871
        title = soup.find('h2', class_='post-title').string
1872
        date_str = soup.find('span', class_='post-date').string
1873
        day = string_to_date(date_str, '%B %d, %Y')
1874
        imgs = soup.find('div', id='comic').find_all('img')
1875
        alt = imgs[0]['alt']
1876
        assert all(i['alt'] == i['title'] for i in imgs)
1877
        return {
1878
            'day': day.day,
1879
            'month': day.month,
1880
            'year': day.year,
1881
            'img': [i['src'] for i in imgs],
1882
            'title': title,
1883
            'alt': alt,
1884
        }
1885
1886
1887
class PicturesInBoxes(GenericNavigableComic):
1888 View Code Duplication
    """Class to retrieve Pictures In Boxes comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1889
    # Also on https://picturesinboxescomic.tumblr.com
1890
    name = 'picturesinboxes'
1891
    long_name = 'Pictures in Boxes'
1892
    url = 'http://www.picturesinboxes.com'
1893
    get_navi_link = get_a_navi_navinext
1894
    get_first_comic_link = simulate_first_link
1895
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1896
1897
    @classmethod
1898
    def get_comic_info(cls, soup, link):
1899
        """Get information about a particular comics."""
1900
        title = soup.find('h2', class_='post-title').string
1901
        author = soup.find("span", class_="post-author").find("a").string
1902
        date_str = soup.find('span', class_='post-date').string
1903
        day = string_to_date(date_str, '%B %d, %Y')
1904
        imgs = soup.find('div', class_='comicpane').find_all('img')
1905
        assert imgs
1906
        assert all(i['title'] == i['alt'] == title for i in imgs)
1907
        return {
1908
            'day': day.day,
1909
            'month': day.month,
1910
            'year': day.year,
1911
            'img': [i['src'] for i in imgs],
1912
            'title': title,
1913
            'author': author,
1914
        }
1915
1916
1917
class Penmen(GenericNavigableComic):
1918 View Code Duplication
    """Class to retrieve Penmen comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1919
    name = 'penmen'
1920
    long_name = 'Penmen'
1921
    url = 'http://penmen.com'
1922
    get_navi_link = get_link_rel_next
1923
    get_first_comic_link = simulate_first_link
1924
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1925
1926
    @classmethod
1927
    def get_comic_info(cls, soup, link):
1928
        """Get information about a particular comics."""
1929
        title = soup.find('title').string
1930
        imgs = soup.find('div', class_='entry-content').find_all('img')
1931
        short_url = soup.find('link', rel='shortlink')['href']
1932
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1933
        date_str = soup.find('time')['datetime'][:10]
1934
        day = string_to_date(date_str, "%Y-%m-%d")
1935
        return {
1936
            'title': title,
1937
            'short_url': short_url,
1938
            'img': [i['src'] for i in imgs],
1939
            'tags': tags,
1940
            'month': day.month,
1941
            'year': day.year,
1942
            'day': day.day,
1943
        }
1944
1945
1946
class TheDoghouseDiaries(GenericDeletedComic, GenericNavigableComic):
1947
    """Class to retrieve The Dog House Diaries comics."""
1948
    name = 'doghouse'
1949
    long_name = 'The Dog House Diaries'
1950
    url = 'http://thedoghousediaries.com'
1951
1952
    @classmethod
1953
    def get_first_comic_link(cls):
1954
        """Get link to first comics."""
1955
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1956
1957
    @classmethod
1958
    def get_navi_link(cls, last_soup, next_):
1959
        """Get link to next or previous comic."""
1960
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1961
1962
    @classmethod
1963
    def get_comic_info(cls, soup, link):
1964
        """Get information about a particular comics."""
1965
        comic_img_re = re.compile('^dhdcomics/.*')
1966
        img = soup.find('img', src=comic_img_re)
1967
        comic_url = cls.get_url_from_link(link)
1968
        return {
1969
            'title': soup.find('h2', id='titleheader').string,
1970
            'title2': soup.find('div', id='subtext').string,
1971
            'alt': img.get('title'),
1972
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1973
            'num': int(comic_url.split('/')[-1]),
1974
        }
1975
1976
1977
class InvisibleBread(GenericListableComic):
1978
    """Class to retrieve Invisible Bread comics."""
1979
    # Also on http://www.gocomics.com/invisible-bread
1980
    name = 'invisiblebread'
1981
    long_name = 'Invisible Bread'
1982
    url = 'http://invisiblebread.com'
1983
1984
    @classmethod
1985
    def get_archive_elements(cls):
1986
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1987
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1988
1989
    @classmethod
1990
    def get_url_from_archive_element(cls, td):
1991
        return td.find('a')['href']
1992
1993
    @classmethod
1994 View Code Duplication
    def get_comic_info(cls, soup, td):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1995
        """Get information about a particular comics."""
1996
        url = cls.get_url_from_archive_element(td)
1997
        title = td.find('a').string
1998
        month_and_day = td.previous_sibling.string
1999
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
2000
        year = link_re.match(url).groups()[0]
2001
        date_str = month_and_day + ' ' + year
2002
        day = string_to_date(date_str, '%b %d %Y')
2003
        imgs = [soup.find('div', id='comic').find('img')]
2004
        assert len(imgs) == 1
2005
        assert all(i['title'] == i['alt'] == title for i in imgs)
2006
        return {
2007
            'month': day.month,
2008
            'year': day.year,
2009
            'day': day.day,
2010
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2011
            'title': title,
2012
        }
2013
2014
2015
class DiscoBleach(GenericDeletedComic):
2016
    """Class to retrieve Disco Bleach Comics."""
2017
    name = 'discobleach'
2018
    long_name = 'Disco Bleach'
2019
    url = 'http://discobleach.com'
2020
2021
2022
class TubeyToons(GenericDeletedComic):
2023
    """Class to retrieve TubeyToons comics."""
2024
    # Also on http://tapastic.com/series/Tubey-Toons
2025
    # Also on https://tubeytoons.tumblr.com
2026
    name = 'tubeytoons'
2027
    long_name = 'Tubey Toons'
2028
    url = 'http://tubeytoons.com'
2029
    _categories = ('TUNEYTOONS', )
2030
2031
2032
class CompletelySeriousComics(GenericNavigableComic):
2033 View Code Duplication
    """Class to retrieve Completely Serious comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2034
    name = 'completelyserious'
2035
    long_name = 'Completely Serious Comics'
2036
    url = 'http://completelyseriouscomics.com'
2037
    get_first_comic_link = get_a_navi_navifirst
2038
    get_navi_link = get_a_navi_navinext
2039
2040
    @classmethod
2041
    def get_comic_info(cls, soup, link):
2042
        """Get information about a particular comics."""
2043
        title = soup.find('h2', class_='post-title').string
2044
        author = soup.find('span', class_='post-author').contents[1].string
2045
        date_str = soup.find('span', class_='post-date').string
2046
        day = string_to_date(date_str, '%B %d, %Y')
2047
        imgs = soup.find('div', class_='comicpane').find_all('img')
2048
        assert imgs
2049
        alt = imgs[0]['title']
2050
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2051
        return {
2052
            'month': day.month,
2053
            'year': day.year,
2054
            'day': day.day,
2055
            'img': [i['src'] for i in imgs],
2056
            'title': title,
2057
            'alt': alt,
2058
            'author': author,
2059
        }
2060
2061
2062
class PoorlyDrawnLines(GenericListableComic):
2063 View Code Duplication
    """Class to retrieve Poorly Drawn Lines comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2064
    # Also on http://pdlcomics.tumblr.com
2065
    name = 'poorlydrawn'
2066
    long_name = 'Poorly Drawn Lines'
2067
    url = 'https://www.poorlydrawnlines.com'
2068
    _categories = ('POORLYDRAWN', )
2069
    get_url_from_archive_element = get_href
2070
2071
    @classmethod
2072
    def get_comic_info(cls, soup, link):
2073
        """Get information about a particular comics."""
2074
        imgs = soup.find('div', class_='post').find_all('img')
2075
        assert len(imgs) <= 1
2076
        return {
2077
            'img': [i['src'] for i in imgs],
2078
            'title': imgs[0].get('title', "") if imgs else "",
2079
        }
2080
2081
    @classmethod
2082
    def get_archive_elements(cls):
2083
        archive_url = urljoin_wrapper(cls.url, 'archive')
2084
        url_re = re.compile('^%s/comic/.' % cls.url)
2085
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2086
2087
2088
class LoadingComics(GenericNavigableComic):
2089 View Code Duplication
    """Class to retrieve Loading Artist comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2090
    name = 'loadingartist'
2091
    long_name = 'Loading Artist'
2092
    url = 'http://www.loadingartist.com/latest'
2093
2094
    @classmethod
2095
    def get_first_comic_link(cls):
2096
        """Get link to first comics."""
2097
        return get_soup_at_url(cls.url).find('a', title="First")
2098
2099
    @classmethod
2100
    def get_navi_link(cls, last_soup, next_):
2101
        """Get link to next or previous comic."""
2102
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2103
2104
    @classmethod
2105
    def get_comic_info(cls, soup, link):
2106
        """Get information about a particular comics."""
2107
        title = soup.find('h1').string
2108
        date_str = soup.find('span', class_='date').string.strip()
2109
        day = string_to_date(date_str, "%B %d, %Y")
2110
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2111
        return {
2112
            'title': title,
2113
            'img': [i['src'] for i in imgs],
2114
            'month': day.month,
2115
            'year': day.year,
2116
            'day': day.day,
2117
        }
2118
2119
2120
class ChuckleADuck(GenericNavigableComic):
2121 View Code Duplication
    """Class to retrieve Chuckle-A-Duck comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2122
    name = 'chuckleaduck'
2123
    long_name = 'Chuckle-A-duck'
2124
    url = 'http://chuckleaduck.com'
2125
    get_first_comic_link = get_div_navfirst_a
2126
    get_navi_link = get_link_rel_next
2127
2128
    @classmethod
2129
    def get_comic_info(cls, soup, link):
2130
        """Get information about a particular comics."""
2131
        date_str = soup.find('span', class_='post-date').string
2132
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2133
        author = soup.find('span', class_='post-author').string
2134
        div = soup.find('div', id='comic')
2135
        imgs = div.find_all('img') if div else []
2136
        title = imgs[0]['title'] if imgs else ""
2137
        assert all(i['title'] == i['alt'] == title for i in imgs)
2138
        return {
2139
            'month': day.month,
2140
            'year': day.year,
2141
            'day': day.day,
2142
            'img': [i['src'] for i in imgs],
2143
            'title': title,
2144
            'author': author,
2145
        }
2146
2147
2148
class DepressedAlien(GenericNavigableComic):
2149
    """Class to retrieve Depressed Alien Comics."""
2150
    name = 'depressedalien'
2151
    long_name = 'Depressed Alien'
2152
    url = 'http://depressedalien.com'
2153
    get_url_from_link = join_cls_url_to_href
2154
2155
    @classmethod
2156
    def get_first_comic_link(cls):
2157
        """Get link to first comics."""
2158
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2159
2160
    @classmethod
2161
    def get_navi_link(cls, last_soup, next_):
2162
        """Get link to next or previous comic."""
2163
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2164
2165
    @classmethod
2166
    def get_comic_info(cls, soup, link):
2167
        """Get information about a particular comics."""
2168
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2169
        imgs = soup.find_all('meta', property='og:image')
2170
        return {
2171
            'title': title,
2172
            'img': [i['content'] for i in imgs],
2173
        }
2174
2175
2176 View Code Duplication
class TurnOffUs(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2177
    """Class to retrieve TurnOffUs comics."""
2178
    name = 'turnoffus'
2179
    long_name = 'Turn Off Us'
2180
    url = 'http://turnoff.us'
2181
    get_url_from_archive_element = join_cls_url_to_href
2182
2183
    @classmethod
2184
    def get_archive_elements(cls):
2185
        archive_url = urljoin_wrapper(cls.url, 'all')
2186
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2187
        return reversed(post_list.find_all('a', class_='post-link'))
2188
2189
    @classmethod
2190
    def get_comic_info(cls, soup, archive_elt):
2191
        """Get information about a particular comics."""
2192
        title = soup.find('meta', property='og:title')['content']
2193
        imgs = soup.find_all('meta', property='og:image')
2194
        return {
2195
            'title': title,
2196
            'img': [i['content'] for i in imgs],
2197
        }
2198
2199
2200
class ThingsInSquares(GenericListableComic):
2201
    """Class to retrieve Things In Squares comics."""
2202
    # This can be retrieved in other languages
2203
    # Also on https://tapastic.com/series/Things-in-Squares
2204
    name = 'squares'
2205
    long_name = 'Things in squares'
2206
    url = 'http://www.thingsinsquares.com'
2207
2208
    @classmethod
2209
    def get_comic_info(cls, soup, tr):
2210
        """Get information about a particular comics."""
2211
        _, td2, td3 = tr.find_all('td')
2212
        a = td2.find('a')
2213
        date_str = td3.string
2214
        day = string_to_date(date_str, "%m.%d.%y")
2215
        title = a.string
2216
        title2 = soup.find('meta', property='og:title')['content']
2217
        desc = soup.find('meta', property='og:description')
2218
        description = desc['content'] if desc else ''
2219
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2220
        imgs = soup.find_all('meta', property='og:image')
2221
        return {
2222
            'day': day.day,
2223
            'month': day.month,
2224
            'year': day.year,
2225
            'title': title,
2226
            'title2': title2,
2227
            'description': description,
2228
            'tags': tags,
2229
            'img': [i['content'] for i in imgs],
2230
        }
2231
2232
    @classmethod
2233
    def get_url_from_archive_element(cls, tr):
2234
        _, td2, __ = tr.find_all('td')
2235
        return td2.find('a')['href']
2236
2237
    @classmethod
2238
    def get_archive_elements(cls):
2239
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2240
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2241
2242
2243
class HappleTea(GenericNavigableComic):
2244
    """Class to retrieve Happle Tea Comics."""
2245 View Code Duplication
    name = 'happletea'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2246
    long_name = 'Happle Tea'
2247
    url = 'http://www.happletea.com'
2248
    get_first_comic_link = get_a_navi_navifirst
2249
    get_navi_link = get_link_rel_next
2250
2251
    @classmethod
2252
    def get_comic_info(cls, soup, link):
2253
        """Get information about a particular comics."""
2254
        imgs = soup.find('div', id='comic').find_all('img')
2255
        post = soup.find('div', class_='post-content')
2256
        title = post.find('h2', class_='post-title').string
2257
        author = post.find('a', rel='author').string
2258
        date_str = post.find('span', class_='post-date').string
2259
        day = string_to_date(date_str, "%B %d, %Y")
2260
        assert all(i['alt'] == i['title'] for i in imgs)
2261
        return {
2262
            'title': title,
2263
            'img': [i['src'] for i in imgs],
2264
            'alt': ''.join(i['alt'] for i in imgs),
2265
            'month': day.month,
2266
            'year': day.year,
2267
            'day': day.day,
2268
            'author': author,
2269
        }
2270
2271
2272
class RockPaperScissors(GenericNavigableComic):
2273
    """Class to retrieve Rock Paper Scissors comics."""
2274
    name = 'rps'
2275
    long_name = 'Rock Paper Scissors'
2276
    url = 'http://rps-comics.com'
2277
    get_first_comic_link = get_a_navi_navifirst
2278
    get_navi_link = get_link_rel_next
2279
2280
    @classmethod
2281
    def get_comic_info(cls, soup, link):
2282
        """Get information about a particular comics."""
2283
        title = soup.find('title').string
2284
        imgs = soup.find_all('meta', property='og:image')
2285
        short_url = soup.find('link', rel='shortlink')['href']
2286
        transcript = soup.find('div', id='transcript-content').string
2287
        return {
2288
            'title': title,
2289
            'transcript': transcript,
2290
            'short_url': short_url,
2291
            'img': [i['content'] for i in imgs],
2292
        }
2293
2294
2295
class FatAwesomeComics(GenericNavigableComic):
2296
    """Class to retrieve Fat Awesome Comics."""
2297
    # Also on http://fatawesomecomedy.tumblr.com
2298
    name = 'fatawesome'
2299
    long_name = 'Fat Awesome'
2300
    url = 'http://fatawesome.com/comics'
2301
    get_navi_link = get_a_rel_next
2302
    get_first_comic_link = simulate_first_link
2303
    first_url = 'http://fatawesome.com/shortbus/'
2304
2305
    @classmethod
2306
    def get_comic_info(cls, soup, link):
2307
        """Get information about a particular comics."""
2308
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2309
        description = soup.find('meta', attrs={'name': 'description'})['content']
2310
        tags_prop = soup.find('meta', property='article:tag')
2311
        tags = tags_prop['content'] if tags_prop else ""
2312
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2313
        day = string_to_date(date_str, "%Y-%m-%d")
2314
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2315
        assert len(imgs) == 1
2316
        return {
2317
            'title': title,
2318
            'description': description,
2319
            'tags': tags,
2320
            'alt': "".join(i['alt'] for i in imgs),
2321
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2322
            'month': day.month,
2323
            'year': day.year,
2324
            'day': day.day,
2325
        }
2326
2327
2328 View Code Duplication
class JuliasDrawings(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2329
    """Class to retrieve Julia's Drawings."""
2330
    name = 'julia'
2331
    long_name = "Julia's Drawings"
2332
    url = 'https://drawings.jvns.ca'
2333
    get_url_from_archive_element = get_href
2334
2335
    @classmethod
2336
    def get_archive_elements(cls):
2337
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2338
        return [art.find('a') for art in reversed(articles)]
2339
2340
    @classmethod
2341
    def get_comic_info(cls, soup, archive_elt):
2342
        """Get information about a particular comics."""
2343
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2344
        day = string_to_date(date_str, "%Y-%m-%d")
2345
        title = soup.find('h3', class_='p-post-title').string
2346
        imgs = soup.find('section', class_='post-content').find_all('img')
2347
        return {
2348
            'title': title,
2349
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2350
            'month': day.month,
2351
            'year': day.year,
2352
            'day': day.day,
2353
        }
2354
2355
2356
class AnythingComic(GenericListableComic):
2357
    """Class to retrieve Anything Comics."""
2358
    # Also on http://tapastic.com/series/anything
2359
    name = 'anythingcomic'
2360
    long_name = 'Anything Comic'
2361
    url = 'http://www.anythingcomic.com'
2362
2363
    @classmethod
2364
    def get_archive_elements(cls):
2365
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2366
        # The first 2 <tr>'s do not correspond to comics
2367
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2368
2369
    @classmethod
2370
    def get_url_from_archive_element(cls, tr):
2371
        """Get url corresponding to an archive element."""
2372
        _, td_comic, td_date, _ = tr.find_all('td')
2373
        link = td_comic.find('a')
2374
        return urljoin_wrapper(cls.url, link['href'])
2375
2376
    @classmethod
2377
    def get_comic_info(cls, soup, tr):
2378 View Code Duplication
        """Get information about a particular comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2379
        td_num, td_comic, td_date, _ = tr.find_all('td')
2380
        num = int(td_num.string)
2381
        link = td_comic.find('a')
2382
        title = link.string
2383
        imgs = soup.find_all('img', id='comic_image')
2384
        date_str = td_date.string
2385
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2386
        assert len(imgs) == 1
2387
        assert all(i.get('alt') == i.get('title') for i in imgs)
2388
        return {
2389
            'num': num,
2390
            'title': title,
2391
            'alt': imgs[0].get('alt', ''),
2392
            'img': [i['src'] for i in imgs],
2393
            'month': day.month,
2394
            'year': day.year,
2395
            'day': day.day,
2396
        }
2397
2398
2399
class LonnieMillsap(GenericNavigableComic):
2400
    """Class to retrieve Lonnie Millsap's comics."""
2401 View Code Duplication
    name = 'millsap'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2402
    long_name = 'Lonnie Millsap'
2403
    url = 'http://www.lonniemillsap.com'
2404
    get_navi_link = get_link_rel_next
2405
    get_first_comic_link = simulate_first_link
2406
    first_url = 'http://www.lonniemillsap.com/?p=42'
2407
2408
    @classmethod
2409
    def get_comic_info(cls, soup, link):
2410
        """Get information about a particular comics."""
2411
        title = soup.find('h2', class_='post-title').string
2412
        post = soup.find('div', class_='post-content')
2413
        author = post.find("span", class_="post-author").find("a").string
2414
        date_str = post.find("span", class_="post-date").string
2415
        day = string_to_date(date_str, "%B %d, %Y")
2416
        imgs = post.find("div", class_="entry").find_all("img")
2417
        return {
2418
            'title': title,
2419
            'author': author,
2420
            'img': [i['src'] for i in imgs],
2421
            'month': day.month,
2422
            'year': day.year,
2423
            'day': day.day,
2424
        }
2425
2426
2427 View Code Duplication
class LinsEditions(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2428
    """Class to retrieve L.I.N.S. Editions comics."""
2429
    # Also on https://linscomics.tumblr.com
2430
    # Now on https://warandpeas.com
2431
    name = 'lins'
2432
    long_name = 'L.I.N.S. Editions'
2433
    url = 'https://linsedition.com'
2434
    _categories = ('LINS', )
2435
    get_navi_link = get_link_rel_next
2436
    get_first_comic_link = simulate_first_link
2437
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2438
2439
    @classmethod
2440
    def get_comic_info(cls, soup, link):
2441
        """Get information about a particular comics."""
2442
        title = soup.find('meta', property='og:title')['content']
2443
        imgs = soup.find_all('meta', property='og:image')
2444
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2445
        day = string_to_date(date_str, "%Y-%m-%d")
2446
        return {
2447
            'title': title,
2448
            'img': [i['content'] for i in imgs],
2449
            'month': day.month,
2450
            'year': day.year,
2451
            'day': day.day,
2452
        }
2453
2454
2455
class ThorsThundershack(GenericNavigableComic):
2456
    """Class to retrieve Thor's Thundershack comics."""
2457
    # Also on http://tapastic.com/series/Thors-Thundershac
2458
    name = 'thor'
2459
    long_name = 'Thor\'s Thundershack'
2460
    url = 'http://www.thorsthundershack.com'
2461
    _categories = ('THOR', )
2462
    get_url_from_link = join_cls_url_to_href
2463
2464
    @classmethod
2465
    def get_first_comic_link(cls):
2466
        """Get link to first comics."""
2467
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2468
2469
    @classmethod
2470
    def get_navi_link(cls, last_soup, next_):
2471
        """Get link to next or previous comic."""
2472
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2473
            if link['href'] != '/comic':
2474
                return link
2475
        return None
2476
2477
    @classmethod
2478
    def get_comic_info(cls, soup, link):
2479
        """Get information about a particular comics."""
2480
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2481
        description = soup.find('div', itemprop='articleBody').text
2482
        author = soup.find('span', itemprop='author copyrightHolder').string
2483
        imgs = soup.find_all('img', itemprop='image')
2484
        assert all(i['title'] == i['alt'] for i in imgs)
2485
        alt = imgs[0]['alt'] if imgs else ""
2486
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2487
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2488
        return {
2489
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2490
            'month': day.month,
2491
            'year': day.year,
2492
            'day': day.day,
2493
            'author': author,
2494
            'title': title,
2495
            'alt': alt,
2496
            'description': description,
2497
        }
2498
2499
2500
class GerbilWithAJetpack(GenericNavigableComic):
2501
    """Class to retrieve GerbilWithAJetpack comics."""
2502
    name = 'gerbil'
2503
    long_name = 'Gerbil With A Jetpack'
2504
    url = 'http://gerbilwithajetpack.com'
2505
    get_first_comic_link = get_a_navi_navifirst
2506
    get_navi_link = get_a_rel_next
2507
2508
    @classmethod
2509
    def get_comic_info(cls, soup, link):
2510
        """Get information about a particular comics."""
2511
        title = soup.find('h2', class_='post-title').string
2512
        author = soup.find("span", class_="post-author").find("a").string
2513
        date_str = soup.find("span", class_="post-date").string
2514
        day = string_to_date(date_str, "%B %d, %Y")
2515
        imgs = soup.find("div", id="comic").find_all("img")
2516
        alt = imgs[0]['alt']
2517
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2518
        return {
2519
            'img': [i['src'] for i in imgs],
2520
            'title': title,
2521
            'alt': alt,
2522
            'author': author,
2523
            'day': day.day,
2524
            'month': day.month,
2525
            'year': day.year
2526
        }
2527
2528
2529
class EveryDayBlues(GenericDeletedComic, GenericNavigableComic):
2530
    """Class to retrieve EveryDayBlues Comics."""
2531 View Code Duplication
    name = "blues"
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2532
    long_name = "Every Day Blues"
2533
    url = "http://everydayblues.net"
2534
    get_first_comic_link = get_a_navi_navifirst
2535
    get_navi_link = get_link_rel_next
2536
2537
    @classmethod
2538
    def get_comic_info(cls, soup, link):
2539
        """Get information about a particular comics."""
2540
        title = soup.find("h2", class_="post-title").string
2541
        author = soup.find("span", class_="post-author").find("a").string
2542
        date_str = soup.find("span", class_="post-date").string
2543
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2544
        imgs = soup.find("div", id="comic").find_all("img")
2545
        assert all(i['alt'] == i['title'] == title for i in imgs)
2546
        assert len(imgs) <= 1
2547
        return {
2548
            'img': [i['src'] for i in imgs],
2549
            'title': title,
2550
            'author': author,
2551
            'day': day.day,
2552
            'month': day.month,
2553
            'year': day.year
2554
        }
2555
2556
2557
class BiterComics(GenericNavigableComic):
2558
    """Class to retrieve Biter Comics."""
2559 View Code Duplication
    name = "biter"
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2560
    long_name = "Biter Comics"
2561
    url = "http://www.bitercomics.com"
2562
    get_first_comic_link = get_a_navi_navifirst
2563
    get_navi_link = get_link_rel_next
2564
2565
    @classmethod
2566
    def get_comic_info(cls, soup, link):
2567
        """Get information about a particular comics."""
2568
        title = soup.find("h1", class_="entry-title").string
2569
        author = soup.find("span", class_="author vcard").find("a").string
2570
        date_str = soup.find("span", class_="entry-date").string
2571
        day = string_to_date(date_str, "%B %d, %Y")
2572
        imgs = soup.find("div", id="comic").find_all("img")
2573
        assert all(i['alt'] == i['title'] for i in imgs)
2574
        assert len(imgs) == 1
2575
        alt = imgs[0]['alt']
2576
        return {
2577
            'img': [i['src'] for i in imgs],
2578
            'title': title,
2579
            'alt': alt,
2580
            'author': author,
2581
            'day': day.day,
2582
            'month': day.month,
2583
            'year': day.year
2584
        }
2585
2586
2587
class TheAwkwardYeti(GenericNavigableComic):
2588
    """Class to retrieve The Awkward Yeti comics."""
2589 View Code Duplication
    # Also on http://www.gocomics.com/the-awkward-yeti
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2590
    # Also on http://larstheyeti.tumblr.com
2591
    # Also on https://tapastic.com/series/TheAwkwardYeti
2592
    name = 'yeti'
2593
    long_name = 'The Awkward Yeti'
2594
    url = 'http://theawkwardyeti.com'
2595
    _categories = ('YETI', )
2596
    get_first_comic_link = get_a_navi_navifirst
2597
    get_navi_link = get_link_rel_next
2598
2599
    @classmethod
2600
    def get_comic_info(cls, soup, link):
2601
        """Get information about a particular comics."""
2602
        title = soup.find('h2', class_='post-title').string
2603
        date_str = soup.find("span", class_="post-date").string
2604
        day = string_to_date(date_str, "%B %d, %Y")
2605
        imgs = soup.find("div", id="comic").find_all("img")
2606
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2607
        return {
2608
            'img': [i['src'] for i in imgs],
2609
            'title': title,
2610
            'day': day.day,
2611
            'month': day.month,
2612
            'year': day.year
2613
        }
2614
2615
2616
class PleasantThoughts(GenericNavigableComic):
2617
    """Class to retrieve Pleasant Thoughts comics."""
2618
    name = 'pleasant'
2619
    long_name = 'Pleasant Thoughts'
2620
    url = 'http://pleasant-thoughts.com'
2621
    get_first_comic_link = get_a_navi_navifirst
2622
    get_navi_link = get_link_rel_next
2623
2624
    @classmethod
2625
    def get_comic_info(cls, soup, link):
2626
        """Get information about a particular comics."""
2627
        post = soup.find('div', class_='post-content')
2628
        title = post.find('h2', class_='post-title').string
2629
        imgs = post.find("div", class_="entry").find_all("img")
2630
        return {
2631
            'title': title,
2632
            'img': [i['src'] for i in imgs],
2633
        }
2634
2635
2636
class MisterAndMe(GenericNavigableComic):
2637
    """Class to retrieve Mister & Me Comics."""
2638 View Code Duplication
    # Also on http://www.gocomics.com/mister-and-me
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2639
    # Also on https://tapastic.com/series/Mister-and-Me
2640
    name = 'mister'
2641
    long_name = 'Mister & Me'
2642
    url = 'http://www.mister-and-me.com'
2643
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2644
    get_navi_link = get_link_rel_next
2645
2646
    @classmethod
2647
    def get_comic_info(cls, soup, link):
2648
        """Get information about a particular comics."""
2649
        title = soup.find('h2', class_='post-title').string
2650
        author = soup.find("span", class_="post-author").find("a").string
2651
        date_str = soup.find("span", class_="post-date").string
2652
        day = string_to_date(date_str, "%B %d, %Y")
2653
        imgs = soup.find("div", id="comic").find_all("img")
2654
        assert all(i['alt'] == i['title'] for i in imgs)
2655
        assert len(imgs) <= 1
2656
        alt = imgs[0]['alt'] if imgs else ""
2657
        return {
2658
            'img': [i['src'] for i in imgs],
2659
            'title': title,
2660
            'alt': alt,
2661
            'author': author,
2662
            'day': day.day,
2663
            'month': day.month,
2664
            'year': day.year
2665
        }
2666
2667
2668
class LastPlaceComics(GenericNavigableComic):
2669
    """Class to retrieve Last Place Comics."""
2670
    name = 'lastplace'
2671
    long_name = 'Last Place Comics'
2672
    url = "http://lastplacecomics.com"
2673
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2674
    get_navi_link = get_link_rel_next
2675
2676
    @classmethod
2677
    def get_comic_info(cls, soup, link):
2678
        """Get information about a particular comics."""
2679
        title = soup.find('h2', class_='post-title').string
2680
        author = soup.find("span", class_="post-author").find("a").string
2681
        date_str = soup.find("span", class_="post-date").string
2682
        day = string_to_date(date_str, "%B %d, %Y")
2683
        imgs = soup.find("div", id="comic").find_all("img")
2684
        assert all(i['alt'] == i['title'] for i in imgs)
2685
        assert len(imgs) <= 1
2686
        alt = imgs[0]['alt'] if imgs else ""
2687
        return {
2688
            'img': [i['src'] for i in imgs],
2689
            'title': title,
2690
            'alt': alt,
2691
            'author': author,
2692
            'day': day.day,
2693
            'month': day.month,
2694
            'year': day.year
2695
        }
2696
2697
2698
class TalesOfAbsurdity(GenericNavigableComic):
2699
    """Class to retrieve Tales Of Absurdity comics."""
2700 View Code Duplication
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2701
    # Also on http://talesofabsurdity.tumblr.com
2702
    name = 'absurdity'
2703
    long_name = 'Tales of Absurdity'
2704
    url = 'http://talesofabsurdity.com'
2705
    _categories = ('ABSURDITY', )
2706
    get_first_comic_link = get_a_navi_navifirst
2707
    get_navi_link = get_a_navi_comicnavnext_navinext
2708
2709
    @classmethod
2710
    def get_comic_info(cls, soup, link):
2711
        """Get information about a particular comics."""
2712
        title = soup.find('h2', class_='post-title').string
2713
        author = soup.find("span", class_="post-author").find("a").string
2714
        date_str = soup.find("span", class_="post-date").string
2715
        day = string_to_date(date_str, "%B %d, %Y")
2716
        imgs = soup.find("div", id="comic").find_all("img")
2717
        assert all(i['alt'] == i['title'] for i in imgs)
2718
        alt = imgs[0]['alt'] if imgs else ""
2719
        return {
2720
            'img': [i['src'] for i in imgs],
2721
            'title': title,
2722
            'alt': alt,
2723
            'author': author,
2724
            'day': day.day,
2725
            'month': day.month,
2726
            'year': day.year
2727
        }
2728
2729
2730
class EndlessOrigami(GenericComicNotWorking, GenericNavigableComic):  # Nav not working
2731
    """Class to retrieve Endless Origami Comics."""
2732 View Code Duplication
    name = "origami"
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2733
    long_name = "Endless Origami"
2734
    url = "http://endlessorigami.com"
2735
    get_first_comic_link = get_a_navi_navifirst
2736
    get_navi_link = get_link_rel_next
2737
2738
    @classmethod
2739
    def get_comic_info(cls, soup, link):
2740
        """Get information about a particular comics."""
2741
        title = soup.find('h2', class_='post-title').string
2742
        author = soup.find("span", class_="post-author").find("a").string
2743
        date_str = soup.find("span", class_="post-date").string
2744
        day = string_to_date(date_str, "%B %d, %Y")
2745
        imgs = soup.find("div", id="comic").find_all("img")
2746
        assert all(i['alt'] == i['title'] for i in imgs)
2747
        alt = imgs[0]['alt'] if imgs else ""
2748
        return {
2749
            'img': [i['src'] for i in imgs],
2750
            'title': title,
2751
            'alt': alt,
2752
            'author': author,
2753
            'day': day.day,
2754
            'month': day.month,
2755
            'year': day.year
2756
        }
2757
2758
2759
class PlanC(GenericNavigableComic):
2760
    """Class to retrieve Plan C comics."""
2761 View Code Duplication
    name = 'planc'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2762
    long_name = 'Plan C'
2763
    url = 'http://www.plancomic.com'
2764
    get_first_comic_link = get_a_navi_navifirst
2765
    get_navi_link = get_a_navi_comicnavnext_navinext
2766
2767
    @classmethod
2768
    def get_comic_info(cls, soup, link):
2769
        """Get information about a particular comics."""
2770
        title = soup.find('h2', class_='post-title').string
2771
        date_str = soup.find("span", class_="post-date").string
2772
        day = string_to_date(date_str, "%B %d, %Y")
2773
        imgs = soup.find('div', id='comic').find_all('img')
2774
        return {
2775
            'title': title,
2776
            'img': [i['src'] for i in imgs],
2777
            'month': day.month,
2778
            'year': day.year,
2779
            'day': day.day,
2780
        }
2781
2782
2783
class BuniComic(GenericNavigableComic):
2784
    """Class to retrieve Buni Comics."""
2785 View Code Duplication
    name = 'buni'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2786
    long_name = 'BuniComics'
2787
    url = 'http://www.bunicomic.com'
2788
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2789
    get_navi_link = get_link_rel_next
2790
2791
    @classmethod
2792
    def get_comic_info(cls, soup, link):
2793
        """Get information about a particular comics."""
2794
        imgs = soup.find('div', id='comic').find_all('img')
2795
        assert all(i['alt'] == i['title'] for i in imgs)
2796
        assert len(imgs) == 1
2797
        return {
2798
            'img': [i['src'] for i in imgs],
2799
            'title': imgs[0]['title'],
2800
        }
2801
2802
2803 View Code Duplication
class GenericCommitStrip(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2804
    """Generic class to retrieve Commit Strips in different languages."""
2805
    get_navi_link = get_a_rel_next
2806
    get_first_comic_link = simulate_first_link
2807
    first_url = NotImplemented
2808
2809
    @classmethod
2810
    def get_comic_info(cls, soup, link):
2811
        """Get information about a particular comics."""
2812
        desc = soup.find('meta', property='og:description')['content']
2813
        title = soup.find('meta', property='og:title')['content']
2814
        imgs = soup.find('div', class_='entry-content').find_all('img')
2815
        title2 = ' '.join(i.get('title', '') for i in imgs)
2816
        return {
2817
            'title': title,
2818
            'title2': title2,
2819
            'description': desc,
2820
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2821
        }
2822
2823
2824
class CommitStripFr(GenericCommitStrip):
2825
    """Class to retrieve Commit Strips in French."""
2826
    name = 'commit_fr'
2827
    long_name = 'Commit Strip (Fr)'
2828
    url = 'http://www.commitstrip.com/fr'
2829
    _categories = ('FRANCAIS', )
2830
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2831
2832
2833
class CommitStripEn(GenericCommitStrip):
2834
    """Class to retrieve Commit Strips in English."""
2835
    name = 'commit_en'
2836
    long_name = 'Commit Strip (En)'
2837
    url = 'http://www.commitstrip.com/en'
2838
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2839
2840
2841
class GenericBoumerie(GenericNavigableComic):
2842
    """Generic class to retrieve Boumeries comics in different languages."""
2843 View Code Duplication
    get_first_comic_link = get_a_navi_navifirst
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2844
    get_navi_link = get_link_rel_next
2845
    date_format = NotImplemented
2846
    lang = NotImplemented
2847
2848
    @classmethod
2849
    def get_comic_info(cls, soup, link):
2850
        """Get information about a particular comics."""
2851
        title = soup.find('h2', class_='post-title').string
2852
        short_url = soup.find('link', rel='shortlink')['href']
2853
        author = soup.find("span", class_="post-author").find("a").string
2854
        date_str = soup.find('span', class_='post-date').string
2855
        day = string_to_date(date_str, cls.date_format, cls.lang)
2856
        imgs = soup.find('div', id='comic').find_all('img')
2857
        assert all(i['alt'] == i['title'] for i in imgs)
2858
        return {
2859
            'short_url': short_url,
2860
            'img': [i['src'] for i in imgs],
2861
            'title': title,
2862
            'author': author,
2863
            'month': day.month,
2864
            'year': day.year,
2865
            'day': day.day,
2866
        }
2867
2868
2869
class BoumerieEn(GenericBoumerie):
2870
    """Class to retrieve Boumeries comics in English."""
2871
    name = 'boumeries_en'
2872
    long_name = 'Boumeries (En)'
2873
    url = 'http://comics.boumerie.com'
2874
    date_format = "%B %d, %Y"
2875
    lang = 'en_GB.UTF-8'
2876
2877
2878
class BoumerieFr(GenericBoumerie):
2879
    """Class to retrieve Boumeries comics in French."""
2880
    name = 'boumeries_fr'
2881
    long_name = 'Boumeries (Fr)'
2882
    url = 'http://bd.boumerie.com'
2883
    _categories = ('FRANCAIS', )
2884
    date_format = "%A, %d %B %Y"
2885
    lang = "fr_FR.utf8"
2886
2887
2888 View Code Duplication
class UnearthedComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2889
    """Class to retrieve Unearthed comics."""
2890
    # Also on http://tapastic.com/series/UnearthedComics
2891
    # Also on https://unearthedcomics.tumblr.com
2892
    name = 'unearthed'
2893
    long_name = 'Unearthed Comics'
2894
    url = 'http://unearthedcomics.com'
2895
    _categories = ('UNEARTHED', )
2896
    get_navi_link = get_link_rel_next
2897
    get_first_comic_link = simulate_first_link
2898
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2899
2900
    @classmethod
2901
    def get_comic_info(cls, soup, link):
2902
        """Get information about a particular comics."""
2903
        short_url = soup.find('link', rel='shortlink')['href']
2904
        title_elt = soup.find('h1') or soup.find('h2')
2905
        title = title_elt.string if title_elt else ""
2906
        desc = soup.find('meta', property='og:description')
2907
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2908
        day = string_to_date(date_str, "%Y-%m-%d")
2909
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2910
        imgs = post.find_all('img')
2911
        return {
2912
            'title': title,
2913
            'description': desc,
2914
            'url2': short_url,
2915
            'img': [i['src'] for i in imgs],
2916
            'month': day.month,
2917
            'year': day.year,
2918
            'day': day.day,
2919
        }
2920
2921
2922
class Optipess(GenericNavigableComic):
2923
    """Class to retrieve Optipess comics."""
2924 View Code Duplication
    name = 'optipess'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2925
    long_name = 'Optipess'
2926
    url = 'http://www.optipess.com'
2927
    get_first_comic_link = get_a_navi_navifirst
2928
    get_navi_link = get_link_rel_next
2929
2930
    @classmethod
2931
    def get_comic_info(cls, soup, link):
2932
        """Get information about a particular comics."""
2933
        title = soup.find('h2', class_='post-title').string
2934
        author = soup.find("span", class_="post-author").find("a").string
2935
        comic = soup.find('div', id='comic')
2936
        imgs = comic.find_all('img') if comic else []
2937
        alt = imgs[0]['title'] if imgs else ""
2938
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2939
        date_str = soup.find('span', class_='post-date').string
2940
        day = string_to_date(date_str, "%B %d, %Y")
2941
        return {
2942
            'title': title,
2943
            'alt': alt,
2944
            'author': author,
2945
            'img': [i['src'] for i in imgs],
2946
            'month': day.month,
2947
            'year': day.year,
2948
            'day': day.day,
2949
        }
2950
2951
2952
class PainTrainComic(GenericNavigableComic):
2953
    """Class to retrieve Pain Train Comics."""
2954
    name = 'paintrain'
2955
    long_name = 'Pain Train Comics'
2956
    url = 'http://paintraincomic.com'
2957
    get_first_comic_link = get_a_navi_navifirst
2958
    get_navi_link = get_link_rel_next
2959
2960
    @classmethod
2961
    def get_comic_info(cls, soup, link):
2962
        """Get information about a particular comics."""
2963
        title = soup.find('h2', class_='post-title').string
2964
        short_url = soup.find('link', rel='shortlink')['href']
2965
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2966
        num = int(short_url_re.match(short_url).groups()[0])
2967
        imgs = soup.find('div', id='comic').find_all('img')
2968
        alt = imgs[0]['title']
2969
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2970
        date_str = soup.find('span', class_='post-date').string
2971
        day = string_to_date(date_str, "%d/%m/%Y")
2972
        return {
2973
            'short_url': short_url,
2974
            'num': num,
2975
            'img': [i['src'] for i in imgs],
2976
            'month': day.month,
2977
            'year': day.year,
2978
            'day': day.day,
2979
            'alt': alt,
2980
            'title': title,
2981
        }
2982
2983
2984
class MoonBeard(GenericNavigableComic):
2985
    """Class to retrieve MoonBeard comics."""
2986
    # Also on http://squireseses.tumblr.com
2987
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2988
    name = 'moonbeard'
2989
    long_name = 'Moon Beard'
2990
    url = 'http://moonbeard.com'
2991
    _categories = ('MOONBEARD', )
2992
    get_first_comic_link = get_a_navi_navifirst
2993
    get_navi_link = get_a_navi_navinext
2994
2995
    @classmethod
2996
    def get_comic_info(cls, soup, link):
2997
        """Get information about a particular comics."""
2998
        title = soup.find('h2', class_='post-title').string
2999 View Code Duplication
        short_url = soup.find('link', rel='shortlink')['href']
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3000
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
3001
        num = int(short_url_re.match(short_url).groups()[0])
3002
        imgs = soup.find('div', id='comic').find_all('img')
3003
        alt = imgs[0]['title']
3004
        assert all(i['alt'] == i['title'] == alt for i in imgs)
3005
        date_str = soup.find('span', class_='post-date').string
3006
        day = string_to_date(date_str, "%B %d, %Y")
3007
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
3008
        author = soup.find('span', class_='post-author').string
3009
        return {
3010
            'short_url': short_url,
3011
            'num': num,
3012
            'img': [i['src'] for i in imgs],
3013
            'month': day.month,
3014
            'year': day.year,
3015
            'day': day.day,
3016
            'title': title,
3017
            'tags': tags,
3018
            'alt': alt,
3019
            'author': author,
3020
        }
3021
3022
3023
class SystemComic(GenericNavigableComic):
3024
    """Class to retrieve System Comic."""
3025
    name = 'system'
3026
    long_name = 'System Comic'
3027
    url = 'http://www.systemcomic.com'
3028
    get_navi_link = get_a_rel_next
3029
3030
    @classmethod
3031
    def get_first_comic_link(cls):
3032
        """Get link to first comics."""
3033
        return get_soup_at_url(cls.url).find('li', class_='first').find('a')
3034
3035
    @classmethod
3036
    def get_comic_info(cls, soup, link):
3037
        """Get information about a particular comics."""
3038
        title = soup.find('meta', property='og:title')['content']
3039
        desc = soup.find('meta', property='og:description')['content']
3040
        date_str = soup.find('time')["datetime"]
3041
        day = string_to_date(date_str, "%Y-%m-%d")
3042
        imgs = soup.find('figure').find_all('img')
3043
        return {
3044
            'title': title,
3045
            'description': desc,
3046
            'day': day.day,
3047
            'month': day.month,
3048
            'year': day.year,
3049
            'img': [i['src'] for i in imgs],
3050
        }
3051
3052
3053
class LittleLifeLines(GenericNavigableComic):
3054
    """Class to retrieve Little Life Lines comics."""
3055
    # Also on https://little-life-lines.tumblr.com
3056
    name = 'life'
3057
    long_name = 'Little Life Lines'
3058
    url = 'http://www.littlelifelines.com'
3059
    get_url_from_link = join_cls_url_to_href
3060
    get_first_comic_link = simulate_first_link
3061
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3062
3063
    @classmethod
3064
    def get_navi_link(cls, last_soup, next_):
3065
        """Get link to next or previous comic."""
3066
        # prev is next / next is prev
3067
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3068
        return li.find('a') if li else None
3069
3070
    @classmethod
3071 View Code Duplication
    def get_comic_info(cls, soup, link):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3072
        """Get information about a particular comics."""
3073
        title = soup.find('meta', property='og:title')['content']
3074
        desc = soup.find('meta', property='og:description')['content']
3075
        date_str = soup.find('time', class_='published')['datetime']
3076
        day = string_to_date(date_str, "%Y-%m-%d")
3077
        author = soup.find('a', rel='author').string
3078
        div_content = soup.find('div', class_="body entry-content")
3079
        imgs = div_content.find_all('img')
3080
        imgs = [i for i in imgs if i.get('src') is not None]
3081
        alt = imgs[0]['alt']
3082
        return {
3083
            'title': title,
3084
            'alt': alt,
3085
            'description': desc,
3086
            'author': author,
3087
            'day': day.day,
3088
            'month': day.month,
3089
            'year': day.year,
3090
            'img': [i['src'] for i in imgs],
3091
        }
3092
3093
3094
class GenericWordPressInkblot(GenericNavigableComic):
3095
    """Generic class to retrieve comics using WordPress with Inkblot."""
3096
    get_navi_link = get_link_rel_next
3097
3098
    @classmethod
3099
    def get_first_comic_link(cls):
3100
        """Get link to first comics."""
3101
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3102
3103
    @classmethod
3104
    def get_comic_info(cls, soup, link):
3105
        """Get information about a particular comics."""
3106
        title = soup.find('meta', property='og:title')['content']
3107
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3108
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3109
        day = string_to_date(date_str, "%Y-%m-%d")
3110
        return {
3111
            'title': title,
3112
            'day': day.day,
3113
            'month': day.month,
3114
            'year': day.year,
3115
            'img': [i['src'] for i in imgs],
3116
        }
3117
3118
3119
class EverythingsStupid(GenericWordPressInkblot):
3120
    """Class to retrieve Everything's stupid Comics."""
3121
    # Also on http://tapastic.com/series/EverythingsStupid
3122
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3123
    # Also on http://everythingsstupidcomics.tumblr.com
3124
    name = 'stupid'
3125
    long_name = "Everything's Stupid"
3126
    url = 'http://everythingsstupid.net'
3127
3128
3129
class TheIsmComics(GenericDeletedComic, GenericWordPressInkblot):
3130
    """Class to retrieve The Ism Comics."""
3131
    # Also on https://tapastic.com/series/TheIsm (?)
3132
    name = 'theism'
3133
    long_name = "The Ism"
3134
    url = 'http://www.theism-comics.com'
3135
3136
3137
class WoodenPlankStudios(GenericWordPressInkblot):
3138
    """Class to retrieve Wooden Plank Studios comics."""
3139
    name = 'woodenplank'
3140
    long_name = 'Wooden Plank Studios'
3141
    url = 'http://woodenplankstudios.com'
3142
3143
3144
class ElectricBunnyComic(GenericNavigableComic):
3145
    """Class to retrieve Electric Bunny Comics."""
3146
    # Also on http://electricbunnycomics.tumblr.com
3147
    name = 'bunny'
3148
    long_name = 'Electric Bunny Comic'
3149
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3150
    get_url_from_link = join_cls_url_to_href
3151
3152
    @classmethod
3153
    def get_first_comic_link(cls):
3154
        """Get link to first comics."""
3155
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3156
3157
    @classmethod
3158
    def get_navi_link(cls, last_soup, next_):
3159
        """Get link to next or previous comic."""
3160
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3161
        return img.parent if img else None
3162
3163
    @classmethod
3164
    def get_comic_info(cls, soup, link):
3165
        """Get information about a particular comics."""
3166
        title = soup.find('meta', property='og:title')['content']
3167
        imgs = soup.find_all('meta', property='og:image')
3168
        return {
3169
            'title': title,
3170
            'img': [i['content'] for i in imgs],
3171
        }
3172
3173
3174
class SheldonComics(GenericNavigableComic):
3175
    """Class to retrieve Sheldon comics."""
3176
    # Also on http://www.gocomics.com/sheldon
3177
    name = 'sheldon'
3178
    long_name = 'Sheldon Comics'
3179
    url = 'http://www.sheldoncomics.com'
3180
3181
    @classmethod
3182
    def get_first_comic_link(cls):
3183
        """Get link to first comics."""
3184
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3185
3186
    @classmethod
3187
    def get_navi_link(cls, last_soup, next_):
3188
        """Get link to next or previous comic."""
3189
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3190
            if link['href'] != 'http://www.sheldoncomics.com':
3191
                return link
3192
        return None
3193
3194
    @classmethod
3195
    def get_comic_info(cls, soup, link):
3196
        """Get information about a particular comics."""
3197
        imgs = soup.find("div", id="comic-foot").find_all("img")
3198
        assert all(i['alt'] == i['title'] for i in imgs)
3199
        assert len(imgs) == 1
3200
        title = imgs[0]['title']
3201
        return {
3202
            'title': title,
3203
            'img': [i['src'] for i in imgs],
3204
        }
3205
3206
3207
class Ubertool(GenericNavigableComic):
3208
    """Class to retrieve Ubertool comics."""
3209
    # Also on https://ubertool.tumblr.com
3210
    # Also on https://tapastic.com/series/ubertool
3211
    name = 'ubertool'
3212
    long_name = 'Ubertool'
3213
    url = 'http://ubertoolcomic.com'
3214
    _categories = ('UBERTOOL', )
3215
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3216
    get_navi_link = get_a_comicnavbase_comicnavnext
3217
3218
    @classmethod
3219
    def get_comic_info(cls, soup, link):
3220
        """Get information about a particular comics."""
3221
        title = soup.find('h2', class_='post-title').string
3222
        date_str = soup.find('span', class_='post-date').string
3223
        day = string_to_date(date_str, "%B %d, %Y")
3224
        imgs = soup.find('div', id='comic').find_all('img')
3225
        return {
3226
            'img': [i['src'] for i in imgs],
3227
            'title': title,
3228
            'month': day.month,
3229
            'year': day.year,
3230
            'day': day.day,
3231
        }
3232
3233
3234
class EarthExplodes(GenericNavigableComic):
3235
    """Class to retrieve The Earth Explodes comics."""
3236
    name = 'earthexplodes'
3237
    long_name = 'The Earth Explodes'
3238
    url = 'http://www.earthexplodes.com'
3239
    get_url_from_link = join_cls_url_to_href
3240
    get_first_comic_link = simulate_first_link
3241
    first_url = 'http://www.earthexplodes.com/comics/000/'
3242 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3243
    @classmethod
3244
    def get_navi_link(cls, last_soup, next_):
3245
        """Get link to next or previous comic."""
3246
        return last_soup.find('a', id='next' if next_ else 'prev')
3247
3248
    @classmethod
3249
    def get_comic_info(cls, soup, link):
3250
        """Get information about a particular comics."""
3251
        title = soup.find('title').string
3252
        imgs = soup.find('div', id='image').find_all('img')
3253
        alt = imgs[0].get('title', '')
3254
        return {
3255
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3256
            'title': title,
3257
            'alt': alt,
3258
        }
3259
3260
3261
class PomComics(GenericNavigableComic):
3262
    """Class to retrieve PomComics."""
3263
    name = 'pom'
3264
    long_name = 'Pom Comics / Piece of Me'
3265
    url = 'http://www.pomcomic.com'
3266
    get_url_from_link = join_cls_url_to_href
3267
3268
    @classmethod
3269
    def get_first_comic_link(cls):
3270
        """Get link to first comics."""
3271
        return get_soup_at_url(cls.url).find('a', class_='btn-first')
3272
3273
    @classmethod
3274
    def get_navi_link(cls, last_soup, next_):
3275
        """Get link to next or previous comic."""
3276
        return last_soup.find('a', class_='btn-next' if next_ else 'btn-prev')
3277
3278
    @classmethod
3279
    def get_comic_info(cls, soup, link):
3280
        """Get information about a particular comics."""
3281
        title = soup.find('h1').string
3282
        desc = soup.find('meta', property='og:description')['content']
3283
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3284
        imgs = soup.find('div', class_='comic').find_all('img')
3285
        return {
3286
            'title': title,
3287
            'desc': desc,
3288
            'tags': tags,
3289
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3290
        }
3291
3292
3293
class CubeDrone(GenericComicNotWorking, GenericNavigableComic):  # Website has changed
3294
    """Class to retrieve Cube Drone comics."""
3295
    name = 'cubedrone'
3296
    long_name = 'Cube Drone'
3297
    url = 'http://cube-drone.com/comics'
3298
    get_url_from_link = join_cls_url_to_href
3299
3300
    @classmethod
3301
    def get_first_comic_link(cls):
3302
        """Get link to first comics."""
3303
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3304
3305
    @classmethod
3306
    def get_navi_link(cls, last_soup, next_):
3307
        """Get link to next or previous comic."""
3308
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3309
        return last_soup.find('span', class_=class_).parent
3310
3311
    @classmethod
3312
    def get_comic_info(cls, soup, link):
3313
        """Get information about a particular comics."""
3314
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3315
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3316
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3317
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3318
        imgs = soup.find_all('img', class_='comic img-responsive')
3319
        title2 = imgs[0]['title']
3320
        alt = imgs[0]['alt']
3321
        return {
3322
            'url2': url2,
3323
            'title': title,
3324
            'title2': title2,
3325
            'alt': alt,
3326
            'img': [i['src'] for i in imgs],
3327
        }
3328
3329
3330
class MakeItStoopid(GenericDeletedComic, GenericNavigableComic):
3331
    """Class to retrieve Make It Stoopid Comics."""
3332
    name = 'stoopid'
3333
    long_name = 'Make it stoopid'
3334
    url = 'http://makeitstoopid.com/comic.php'
3335
3336
    @classmethod
3337
    def get_nav(cls, soup):
3338
        """Get the navigation elements from soup object."""
3339
        cnav = soup.find_all(class_='cnav')
3340
        nav1, nav2 = cnav[:5], cnav[5:]
3341
        assert nav1 == nav2
3342
        # begin, prev, archive, next_, end = nav1
3343
        return [None if i.get('href') is None else i for i in nav1]
3344
3345
    @classmethod
3346
    def get_first_comic_link(cls):
3347
        """Get link to first comics."""
3348
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3349
3350
    @classmethod
3351
    def get_navi_link(cls, last_soup, next_):
3352
        """Get link to next or previous comic."""
3353
        return cls.get_nav(last_soup)[3 if next_ else 1]
3354
3355
    @classmethod
3356
    def get_comic_info(cls, soup, link):
3357
        """Get information about a particular comics."""
3358
        title = link['title']
3359
        imgs = soup.find_all('img', id='comicimg')
3360
        return {
3361
            'title': title,
3362
            'img': [i['src'] for i in imgs],
3363
        }
3364
3365
3366
class OffTheLeashDog(GenericNavigableComic):
3367
    """Class to retrieve Off The Leash Dog comics."""
3368
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3369
    # Also on http://www.rupertfawcettcartoons.com
3370
    name = 'offtheleash'
3371
    long_name = 'Off The Leash Dog'
3372
    url = 'http://offtheleashdogcartoons.com'
3373
    _categories = ('FAWCETT', )
3374
    get_navi_link = get_a_rel_next
3375
    get_first_comic_link = simulate_first_link
3376
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3377
3378
    @classmethod
3379
    def get_comic_info(cls, soup, link):
3380
        """Get information about a particular comics."""
3381
        title = soup.find("h1", class_="entry-title").string
3382
        imgs = soup.find('div', class_='entry-content').find_all('img')
3383
        return {
3384
            'title': title,
3385
            'img': [i['src'] for i in imgs],
3386
        }
3387
3388
3389 View Code Duplication
class MarketoonistComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3390
    """Class to retrieve Marketoonist Comics."""
3391
    name = 'marketoonist'
3392
    long_name = 'Marketoonist'
3393
    url = 'https://marketoonist.com/cartoons'
3394
    get_first_comic_link = simulate_first_link
3395
    get_navi_link = get_link_rel_next
3396
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3397
3398
    @classmethod
3399
    def get_comic_info(cls, soup, link):
3400
        """Get information about a particular comics."""
3401
        imgs = soup.find_all('meta', property='og:image')
3402
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3403
        day = string_to_date(date_str, "%Y-%m-%d")
3404
        title = soup.find('meta', property='og:title')['content']
3405
        return {
3406
            'img': [i['content'] for i in imgs],
3407
            'day': day.day,
3408
            'month': day.month,
3409
            'year': day.year,
3410
            'title': title,
3411
        }
3412
3413
3414
class ConsoliaComics(GenericNavigableComic):
3415
    """Class to retrieve Consolia comics."""
3416
    name = 'consolia'
3417
    long_name = 'consolia'
3418
    url = 'https://consolia-comic.com'
3419
    get_url_from_link = join_cls_url_to_href
3420
3421
    @classmethod
3422
    def get_first_comic_link(cls):
3423
        """Get link to first comics."""
3424
        return get_soup_at_url(cls.url).find('a', class_='first')
3425
3426
    @classmethod
3427
    def get_navi_link(cls, last_soup, next_):
3428
        """Get link to next or previous comic."""
3429
        return last_soup.find('a', class_='next' if next_ else 'prev')
3430
3431
    @classmethod
3432
    def get_comic_info(cls, soup, link):
3433
        """Get information about a particular comics."""
3434
        title = soup.find('meta', property='og:title')['content']
3435
        date_str = soup.find('time')["datetime"]
3436
        day = string_to_date(date_str, "%Y-%m-%d")
3437
        imgs = soup.find_all('meta', property='og:image')
3438
        return {
3439
            'title': title,
3440
            'img': [i['content'] for i in imgs],
3441
            'day': day.day,
3442
            'month': day.month,
3443
            'year': day.year,
3444
        }
3445
3446
3447
class GenericBlogspotComic(GenericNavigableComic):
3448
    """Generic class to retrieve comics from Blogspot."""
3449
    get_first_comic_link = simulate_first_link
3450
    first_url = NotImplemented
3451
    _categories = ('BLOGSPOT', )
3452
3453
    @classmethod
3454
    def get_navi_link(cls, last_soup, next_):
3455
        """Get link to next or previous comic."""
3456
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3457
3458
3459 View Code Duplication
class TuMourrasMoinsBete(GenericBlogspotComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3460
    """Class to retrieve Tu Mourras Moins Bete comics."""
3461
    name = 'mourrasmoinsbete'
3462
    long_name = 'Tu Mourras Moins Bete'
3463
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3464
    _categories = ('FRANCAIS', )
3465
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3466
3467
    @classmethod
3468
    def get_comic_info(cls, soup, link):
3469
        """Get information about a particular comics."""
3470
        title = soup.find('title').string
3471
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3472
        author = soup.find('span', itemprop='author').string
3473
        return {
3474
            'img': [i['src'] for i in imgs],
3475
            'author': author,
3476
            'title': title,
3477
        }
3478
3479
3480
class GeekAndPoke(GenericNavigableComic):
3481
    """Class to retrieve Geek And Poke comics."""
3482
    name = 'geek'
3483
    long_name = 'Geek And Poke'
3484
    url = 'http://geek-and-poke.com'
3485
    get_url_from_link = join_cls_url_to_href
3486
    get_first_comic_link = simulate_first_link
3487
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3488
3489
    @classmethod
3490
    def get_navi_link(cls, last_soup, next_):
3491
        """Get link to next or previous comic."""
3492
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3493
3494
    @classmethod
3495
    def get_comic_info(cls, soup, link):
3496
        """Get information about a particular comics."""
3497
        title = soup.find('meta', property='og:title')['content']
3498
        desc = soup.find('meta', property='og:description')['content']
3499
        date_str = soup.find('time', class_='published')['datetime']
3500
        day = string_to_date(date_str, "%Y-%m-%d")
3501
        author = soup.find('a', rel='author').string
3502
        div_content = (soup.find('div', class_="body entry-content") or
3503
                       soup.find('div', class_="special-content"))
3504
        imgs = div_content.find_all('img')
3505
        imgs = [i for i in imgs if i.get('src') is not None]
3506
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3507
        alt = imgs[0].get('alt', "") if imgs else []
3508
        return {
3509
            'title': title,
3510
            'alt': alt,
3511
            'description': desc,
3512
            'author': author,
3513
            'day': day.day,
3514
            'month': day.month,
3515
            'year': day.year,
3516
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3517
        }
3518
3519
3520 View Code Duplication
class GloryOwlComix(GenericBlogspotComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3521
    """Class to retrieve Glory Owl comics."""
3522
    name = 'gloryowl'
3523
    long_name = 'Glory Owl'
3524
    url = 'http://gloryowlcomix.blogspot.fr'
3525
    _categories = ('NSFW', 'FRANCAIS')
3526
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3527
3528
    @classmethod
3529
    def get_comic_info(cls, soup, link):
3530
        """Get information about a particular comics."""
3531
        title = soup.find('title').string
3532
        imgs = soup.find_all('link', rel='image_src')
3533
        author = soup.find('a', rel='author').string
3534
        return {
3535
            'img': [i['href'] for i in imgs],
3536
            'author': author,
3537
            'title': title,
3538
        }
3539
3540
3541
class AtRandomComics(GenericNavigableComic):
3542
    """Class to retrieve At Random Comics."""
3543
    name = 'atrandom'
3544
    long_name = 'At Random Comics'
3545
    url = 'http://www.atrandomcomics.com'
3546
    get_url_from_link = join_cls_url_to_href
3547
    get_first_comic_link = simulate_first_link
3548
    first_url = 'http://www.atrandomcomics.com/at-random-comics-home/2015/5/5/can-of-worms'
3549
3550
    @classmethod
3551
    def get_navi_link(cls, last_soup, next_):
3552
        """Get link to next or previous comic."""
3553
        return last_soup.find('a', id='prevLink' if next_ else 'nextLink')
3554
3555
    @classmethod
3556
    def get_comic_info(cls, soup, link):
3557
        """Get information about a particular comics."""
3558
        title = soup.find('meta', property='og:title')['content']
3559
        desc = soup.find('meta', property='og:description')['content']
3560
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
3561
        day = string_to_date(date_str, "%Y-%m-%d")
3562
        author = soup.find('a', rel='author').string
3563
        imgs = soup.find_all('meta', property='og:image')
3564
        return {
3565
            'title': title,
3566
            'img': [i['content'] for i in imgs],
3567
            'month': day.month,
3568
            'year': day.year,
3569
            'day': day.day,
3570
            'author': author,
3571
            'description': desc,
3572
        }
3573
3574
3575
class GenericTumblrV1(GenericComic):
3576
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3577
    _categories = ('TUMBLR', )
3578
3579
    @classmethod
3580
    def get_next_comic(cls, last_comic):
3581
        """Generic implementation of get_next_comic for Tumblr comics."""
3582
        for p in cls.get_posts(last_comic):
3583
            comic = cls.get_comic_info(p)
3584
            if comic is not None:
3585
                yield comic
3586
3587
    @classmethod
3588
    def get_url_from_post(cls, post):
3589
        url = post['url']
3590
        if not url.startswith(cls.url):
3591
            print("url '%s' does not start with '%s'" % (url, cls.url))
3592
        return url
3593
3594
    @classmethod
3595
    def get_api_url(cls):
3596
        return urljoin_wrapper(cls.url, '/api/read/')
3597
3598
    @classmethod
3599
    def get_api_url_for_id(cls, tumblr_id):
3600
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3601
3602
    @classmethod
3603
    def get_comic_info(cls, post):
3604
        """Get information about a particular comics."""
3605
        type_ = post['type']
3606
        if type_ != 'photo':
3607
            return None
3608
        tumblr_id = int(post['id'])
3609
        api_url = cls.get_api_url_for_id(tumblr_id)
3610
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3611
        caption = post.find('photo-caption')
3612
        title = caption.string if caption else ""
3613
        tags = ' '.join(t.string for t in post.find_all('tag'))
3614
        # Photos may appear in 'photo' tags and/or straight in the post
3615
        photo_tags = post.find_all('photo')
3616
        if not photo_tags:
3617
            photo_tags = [post]
3618
        # Images are in multiple resolutions - taking the first one
3619
        imgs = [photo.find('photo-url') for photo in photo_tags]
3620
        return {
3621
            'url': cls.get_url_from_post(post),
3622
            'url2': post['url-with-slug'],
3623
            'day': day.day,
3624
            'month': day.month,
3625
            'year': day.year,
3626
            'title': title,
3627
            'tags': tags,
3628
            'img': [i.string for i in imgs],
3629
            'tumblr-id': tumblr_id,
3630
            'api_url': api_url,
3631
        }
3632
3633
    @classmethod
3634
    def get_posts(cls, last_comic, nb_post_per_call=10):
3635
        """Get posts using API. nb_post_per_call is max 50.
3636
3637
        Posts are retrieved from newer to older as per the tumblr v1 api
3638
        but are returned in chronological order."""
3639
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3640
        posts_acc = []
3641
        if last_comic is not None:
3642
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3643
            # might end up spending a lot of time looking for something that
3644
            # doesn't exist. Failing early and clearly might be a better option.
3645
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3646
            try:
3647
                get_soup_at_url(last_api_url)
3648
            except urllib.error.HTTPError:
3649
                try:
3650
                    get_soup_at_url(cls.url)
3651
                except urllib.error.HTTPError:
3652
                    print("Did not find previous post nor main url %s" % cls.url)
3653
                else:
3654
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3655
                return reversed(posts_acc)
3656
        api_url = cls.get_api_url()
3657
        posts = get_soup_at_url(api_url).find('posts')
3658
        start, total = int(posts['start']), int(posts['total'])
3659
        assert start == 0
3660
        for starting_num in range(0, total, nb_post_per_call):
3661
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3662
            posts2 = get_soup_at_url(api_url2).find('posts')
3663
            start2, total2 = int(posts2['start']), int(posts2['total'])
3664
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3665
            # This may happen and should be handled in the future
3666
            assert total == total2, "%d != %d" % (total, total2)
3667
            for p in posts2.find_all('post'):
3668
                tumblr_id = int(p['id'])
3669
                if waiting_for_id and waiting_for_id == tumblr_id:
3670
                    return reversed(posts_acc)
3671
                posts_acc.append(p)
3672
        if waiting_for_id is None:
3673
            return reversed(posts_acc)
3674
        print("Did not find %s : there might be a problem" % waiting_for_id)
3675
        return []
3676
3677
3678
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3679
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3680
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3681
    # Also on http://www.smbc-comics.com
3682
    name = 'smbc-tumblr'
3683
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3684
    url = 'http://smbc-comics.tumblr.com'
3685
    _categories = ('SMBC', )
3686
3687
3688
class AHammADay(GenericTumblrV1):
3689
    """Class to retrieve class A Hamm A Day comics."""
3690
    name = 'hamm'
3691
    long_name = 'A Hamm A Day'
3692
    url = 'http://www.ahammaday.com'
3693
3694
3695
class IrwinCardozo(GenericTumblrV1):
3696
    """Class to retrieve Irwin Cardozo Comics."""
3697
    name = 'irwinc'
3698
    long_name = 'Irwin Cardozo'
3699
    url = 'http://irwincardozocomics.tumblr.com'
3700
3701
3702
class AccordingToDevin(GenericTumblrV1):
3703
    """Class to retrieve According To Devin comics."""
3704
    name = 'devin'
3705
    long_name = 'According To Devin'
3706
    url = 'http://accordingtodevin.tumblr.com'
3707
3708
3709
class ItsTheTieTumblr(GenericTumblrV1):
3710
    """Class to retrieve It's the tie comics."""
3711
    # Also on http://itsthetie.com
3712
    # Also on https://tapastic.com/series/itsthetie
3713
    name = 'tie-tumblr'
3714
    long_name = "It's the tie (from Tumblr)"
3715
    url = "http://itsthetie.tumblr.com"
3716
    _categories = ('TIE', )
3717
3718
3719
class OctopunsTumblr(GenericTumblrV1):
3720
    """Class to retrieve Octopuns comics."""
3721
    # Also on http://www.octopuns.net
3722
    name = 'octopuns-tumblr'
3723
    long_name = 'Octopuns (from Tumblr)'
3724
    url = 'http://octopuns.tumblr.com'
3725
3726
3727
class PicturesInBoxesTumblr(GenericTumblrV1):
3728
    """Class to retrieve Pictures In Boxes comics."""
3729
    # Also on http://www.picturesinboxes.com
3730
    name = 'picturesinboxes-tumblr'
3731
    long_name = 'Pictures in Boxes (from Tumblr)'
3732
    url = 'https://picturesinboxescomic.tumblr.com'
3733
3734
3735
class TubeyToonsTumblr(GenericTumblrV1):
3736
    """Class to retrieve TubeyToons comics."""
3737
    # Also on http://tapastic.com/series/Tubey-Toons
3738
    # Also on http://tubeytoons.com
3739
    name = 'tubeytoons-tumblr'
3740
    long_name = 'Tubey Toons (from Tumblr)'
3741
    url = 'https://tubeytoons.tumblr.com'
3742
    _categories = ('TUNEYTOONS', )
3743
3744
3745
class UnearthedComicsTumblr(GenericTumblrV1):
3746
    """Class to retrieve Unearthed comics."""
3747
    # Also on http://tapastic.com/series/UnearthedComics
3748
    # Also on http://unearthedcomics.com
3749
    name = 'unearthed-tumblr'
3750
    long_name = 'Unearthed Comics (from Tumblr)'
3751
    url = 'https://unearthedcomics.tumblr.com'
3752
    _categories = ('UNEARTHED', )
3753
3754
3755
class PieComic(GenericTumblrV1):
3756
    """Class to retrieve Pie Comic comics."""
3757
    name = 'pie'
3758
    long_name = 'Pie Comic'
3759
    url = "http://piecomic.tumblr.com"
3760
3761
3762
class MrEthanDiamond(GenericTumblrV1):
3763
    """Class to retrieve Mr Ethan Diamond comics."""
3764
    name = 'diamond'
3765
    long_name = 'Mr Ethan Diamond'
3766
    url = 'http://mrethandiamond.tumblr.com'
3767
3768
3769
class Flocci(GenericTumblrV1):
3770
    """Class to retrieve floccinaucinihilipilification comics."""
3771
    name = 'flocci'
3772
    long_name = 'floccinaucinihilipilification'
3773
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3774
3775
3776
class UpAndOut(GenericTumblrV1):
3777
    """Class to retrieve Up & Out comics."""
3778
    # Also on http://tapastic.com/series/UP-and-OUT
3779
    name = 'upandout'
3780
    long_name = 'Up And Out (from Tumblr)'
3781
    url = 'http://upandoutcomic.tumblr.com'
3782
3783
3784
class Pundemonium(GenericTumblrV1):
3785
    """Class to retrieve Pundemonium comics."""
3786
    name = 'pundemonium'
3787
    long_name = 'Pundemonium'
3788
    url = 'http://monstika.tumblr.com'
3789
3790
3791
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3792
    """Class to retrieve Poorly Drawn Lines comics."""
3793
    # Also on http://poorlydrawnlines.com
3794
    name = 'poorlydrawn-tumblr'
3795
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3796
    url = 'http://pdlcomics.tumblr.com'
3797
    _categories = ('POORLYDRAWN', )
3798
3799
3800
class PearShapedComics(GenericTumblrV1):
3801
    """Class to retrieve Pear Shaped Comics."""
3802
    name = 'pearshaped'
3803
    long_name = 'Pear-Shaped Comics'
3804
    url = 'http://pearshapedcomics.com'
3805
3806
3807
class PondScumComics(GenericTumblrV1):
3808
    """Class to retrieve Pond Scum Comics."""
3809
    name = 'pond'
3810
    long_name = 'Pond Scum'
3811
    url = 'http://pondscumcomic.tumblr.com'
3812
3813
3814
class MercworksTumblr(GenericTumblrV1):
3815
    """Class to retrieve Mercworks comics."""
3816
    # Also on http://mercworks.net
3817
    name = 'mercworks-tumblr'
3818
    long_name = 'Mercworks (from Tumblr)'
3819
    url = 'http://mercworks.tumblr.com'
3820
3821
3822
class OwlTurdTumblr(GenericTumblrV1):
3823
    """Class to retrieve Owl Turd comics."""
3824
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3825
    name = 'owlturd-tumblr'
3826
    long_name = 'Owl Turd (from Tumblr)'
3827
    url = 'http://owlturd.com'
3828
    _categories = ('OWLTURD', )
3829
3830
3831
class VectorBelly(GenericTumblrV1):
3832
    """Class to retrieve Vector Belly comics."""
3833
    # Also on http://vectorbelly.com
3834
    name = 'vector'
3835
    long_name = 'Vector Belly'
3836
    url = 'http://vectorbelly.tumblr.com'
3837
3838
3839
class GoneIntoRapture(GenericTumblrV1):
3840
    """Class to retrieve Gone Into Rapture comics."""
3841
    # Also on http://goneintorapture.tumblr.com
3842
    # Also on http://tapastic.com/series/Goneintorapture
3843
    name = 'rapture'
3844
    long_name = 'Gone Into Rapture'
3845
    url = 'http://goneintorapture.com'
3846
3847
3848
class TheOatmealTumblr(GenericTumblrV1):
3849
    """Class to retrieve The Oatmeal comics."""
3850
    # Also on http://theoatmeal.com
3851
    name = 'oatmeal-tumblr'
3852
    long_name = 'The Oatmeal (from Tumblr)'
3853
    url = 'http://oatmeal.tumblr.com'
3854
3855
3856
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3857
    """Class to retrieve Heck If I Know Comics."""
3858
    # Also on http://tapastic.com/series/Regular
3859
    name = 'heck-tumblr'
3860
    long_name = 'Heck if I Know comics (from Tumblr)'
3861
    url = 'http://heckifiknowcomics.com'
3862
3863
3864
class MyJetPack(GenericTumblrV1):
3865
    """Class to retrieve My Jet Pack comics."""
3866
    name = 'jetpack'
3867
    long_name = 'My Jet Pack'
3868
    url = 'http://myjetpack.tumblr.com'
3869
3870
3871
class CheerUpEmoKidTumblr(GenericTumblrV1):
3872
    """Class to retrieve CheerUpEmoKid comics."""
3873
    # Also on http://www.cheerupemokid.com
3874
    # Also on http://tapastic.com/series/CUEK
3875
    name = 'cuek-tumblr'
3876
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3877
    url = 'https://enzocomics.tumblr.com'
3878
3879
3880
class ForLackOfABetterComic(GenericTumblrV1):
3881
    """Class to retrieve For Lack Of A Better Comics."""
3882
    # Also on http://forlackofabettercomic.com
3883
    name = 'lack'
3884
    long_name = 'For Lack Of A Better Comic'
3885
    url = 'http://forlackofabettercomic.tumblr.com'
3886
3887
3888
class ZenPencilsTumblr(GenericTumblrV1):
3889
    """Class to retrieve ZenPencils comics."""
3890
    # Also on http://zenpencils.com
3891
    # Also on http://www.gocomics.com/zen-pencils
3892
    name = 'zenpencils-tumblr'
3893
    long_name = 'Zen Pencils (from Tumblr)'
3894
    url = 'http://zenpencils.tumblr.com'
3895
    _categories = ('ZENPENCILS', )
3896
3897
3898
class ThreeWordPhraseTumblr(GenericTumblrV1):
3899
    """Class to retrieve Three Word Phrase comics."""
3900
    # Also on http://threewordphrase.com
3901
    name = 'threeword-tumblr'
3902
    long_name = 'Three Word Phrase (from Tumblr)'
3903
    url = 'http://threewordphrase.tumblr.com'
3904
3905
3906
class TimeTrabbleTumblr(GenericTumblrV1):
3907
    """Class to retrieve Time Trabble comics."""
3908
    # Also on http://timetrabble.com
3909
    name = 'timetrabble-tumblr'
3910
    long_name = 'Time Trabble (from Tumblr)'
3911
    url = 'http://timetrabble.tumblr.com'
3912
3913
3914
class SafelyEndangeredTumblr(GenericTumblrV1):
3915
    """Class to retrieve Safely Endangered comics."""
3916
    # Also on http://www.safelyendangered.com
3917
    name = 'endangered-tumblr'
3918
    long_name = 'Safely Endangered (from Tumblr)'
3919
    url = 'http://tumblr.safelyendangered.com'
3920
3921
3922
class MouseBearComedyTumblr(GenericTumblrV1):
3923
    """Class to retrieve Mouse Bear Comedy comics."""
3924
    # Also on http://www.mousebearcomedy.com
3925
    name = 'mousebear-tumblr'
3926
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3927
    url = 'http://mousebearcomedy.tumblr.com'
3928
3929
3930
class BouletCorpTumblr(GenericTumblrV1):
3931
    """Class to retrieve BouletCorp comics."""
3932
    # Also on http://www.bouletcorp.com
3933
    name = 'boulet-tumblr'
3934
    long_name = 'Boulet Corp (from Tumblr)'
3935
    url = 'https://bouletcorp.tumblr.com'
3936
    _categories = ('BOULET', )
3937
3938
3939
class TheAwkwardYetiTumblr(GenericTumblrV1):
3940
    """Class to retrieve The Awkward Yeti comics."""
3941
    # Also on http://www.gocomics.com/the-awkward-yeti
3942
    # Also on http://theawkwardyeti.com
3943
    # Also on https://tapastic.com/series/TheAwkwardYeti
3944
    name = 'yeti-tumblr'
3945
    long_name = 'The Awkward Yeti (from Tumblr)'
3946
    url = 'http://larstheyeti.tumblr.com'
3947
    _categories = ('YETI', )
3948
3949
3950
class NellucNhoj(GenericTumblrV1):
3951
    """Class to retrieve NellucNhoj comics."""
3952
    name = 'nhoj'
3953
    long_name = 'Nelluc Nhoj'
3954
    url = 'http://nellucnhoj.com'
3955
3956
3957
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3958
    """Class to retrieve Down The Upward Spiral comics."""
3959
    # Also on http://www.downtheupwardspiral.com
3960
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
3961
    name = 'spiral-tumblr'
3962
    long_name = 'Down the Upward Spiral (from Tumblr)'
3963
    url = 'http://downtheupwardspiral.tumblr.com'
3964
3965
3966
class AsPerUsualTumblr(GenericTumblrV1):
3967
    """Class to retrieve As Per Usual comics."""
3968
    # Also on https://tapastic.com/series/AsPerUsual
3969
    name = 'usual-tumblr'
3970
    long_name = 'As Per Usual (from Tumblr)'
3971
    url = 'http://as-per-usual.tumblr.com'
3972
    categories = ('DAMILEE', )
3973
3974
3975
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3976
    """Class to retrieve Hot Comics For Cool People."""
3977
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3978
    # Also on http://hotcomics.biz (links to tumblr)
3979
    # Also on http://hcfcp.com (links to tumblr)
3980
    name = 'hotcomics-tumblr'
3981
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3982
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3983
    categories = ('DAMILEE', )
3984
3985
3986
class OneOneOneOneComicTumblr(GenericTumblrV1):
3987
    """Class to retrieve 1111 Comics."""
3988
    # Also on http://www.1111comics.me
3989
    # Also on https://tapastic.com/series/1111-Comics
3990
    name = '1111-tumblr'
3991
    long_name = '1111 Comics (from Tumblr)'
3992
    url = 'http://comics1111.tumblr.com'
3993
    _categories = ('ONEONEONEONE', )
3994
3995
3996
class JhallComicsTumblr(GenericTumblrV1):
3997
    """Class to retrieve Jhall Comics."""
3998
    # Also on http://jhallcomics.com
3999
    name = 'jhall-tumblr'
4000
    long_name = 'Jhall Comics (from Tumblr)'
4001
    url = 'http://jhallcomics.tumblr.com'
4002
4003
4004
class BerkeleyMewsTumblr(GenericTumblrV1):
4005
    """Class to retrieve Berkeley Mews comics."""
4006
    # Also on http://www.gocomics.com/berkeley-mews
4007
    # Also on http://www.berkeleymews.com
4008
    name = 'berkeley-tumblr'
4009
    long_name = 'Berkeley Mews (from Tumblr)'
4010
    url = 'http://mews.tumblr.com'
4011
    _categories = ('BERKELEY', )
4012
4013
4014
class JoanCornellaTumblr(GenericTumblrV1):
4015
    """Class to retrieve Joan Cornella comics."""
4016
    # Also on http://joancornella.net
4017
    name = 'cornella-tumblr'
4018
    long_name = 'Joan Cornella (from Tumblr)'
4019
    url = 'http://cornellajoan.tumblr.com'
4020
4021
4022
class RespawnComicTumblr(GenericTumblrV1):
4023
    """Class to retrieve Respawn Comic."""
4024
    # Also on http://respawncomic.com
4025
    name = 'respawn-tumblr'
4026
    long_name = 'Respawn Comic (from Tumblr)'
4027
    url = 'https://respawncomic.tumblr.com'
4028
4029
4030
class ChrisHallbeckTumblr(GenericTumblrV1):
4031
    """Class to retrieve Chris Hallbeck comics."""
4032
    # Also on https://tapastic.com/ChrisHallbeck
4033
    # Also on http://maximumble.com
4034
    # Also on http://minimumble.com
4035
    # Also on http://thebookofbiff.com
4036
    name = 'hallbeck-tumblr'
4037
    long_name = 'Chris Hallback (from Tumblr)'
4038
    url = 'https://chrishallbeck.tumblr.com'
4039
    _categories = ('HALLBACK', )
4040
4041
4042
class ComicNuggets(GenericTumblrV1):
4043
    """Class to retrieve Comic Nuggets."""
4044
    name = 'nuggets'
4045
    long_name = 'Comic Nuggets'
4046
    url = 'http://comicnuggets.com'
4047
4048
4049
class PigeonGazetteTumblr(GenericTumblrV1):
4050
    """Class to retrieve The Pigeon Gazette comics."""
4051
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
4052
    name = 'pigeon-tumblr'
4053
    long_name = 'The Pigeon Gazette (from Tumblr)'
4054
    url = 'http://thepigeongazette.tumblr.com'
4055
4056
4057
class CancerOwl(GenericTumblrV1):
4058
    """Class to retrieve Cancer Owl comics."""
4059
    # Also on http://cancerowl.com
4060
    name = 'cancerowl-tumblr'
4061
    long_name = 'Cancer Owl (from Tumblr)'
4062
    url = 'http://cancerowl.tumblr.com'
4063
4064
4065
class FowlLanguageTumblr(GenericTumblrV1):
4066
    """Class to retrieve Fowl Language comics."""
4067
    # Also on http://www.fowllanguagecomics.com
4068
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4069
    # Also on http://www.gocomics.com/fowl-language
4070
    name = 'fowllanguage-tumblr'
4071
    long_name = 'Fowl Language Comics (from Tumblr)'
4072
    url = 'http://fowllanguagecomics.tumblr.com'
4073
    _categories = ('FOWLLANGUAGE', )
4074
4075
4076
class TheOdd1sOutTumblr(GenericTumblrV1):
4077
    """Class to retrieve The Odd 1s Out comics."""
4078
    # Also on http://theodd1sout.com
4079
    # Also on https://tapastic.com/series/Theodd1sout
4080
    name = 'theodd-tumblr'
4081
    long_name = 'The Odd 1s Out (from Tumblr)'
4082
    url = 'http://theodd1sout.tumblr.com'
4083
4084
4085
class TheUnderfoldTumblr(GenericTumblrV1):
4086
    """Class to retrieve The Underfold comics."""
4087
    # Also on http://theunderfold.com
4088
    name = 'underfold-tumblr'
4089
    long_name = 'The Underfold (from Tumblr)'
4090
    url = 'http://theunderfold.tumblr.com'
4091
4092
4093
class LolNeinTumblr(GenericTumblrV1):
4094
    """Class to retrieve Lol Nein comics."""
4095
    # Also on http://lolnein.com
4096
    name = 'lolnein-tumblr'
4097
    long_name = 'Lol Nein (from Tumblr)'
4098
    url = 'http://lolneincom.tumblr.com'
4099
4100
4101
class FatAwesomeComicsTumblr(GenericTumblrV1):
4102
    """Class to retrieve Fat Awesome Comics."""
4103
    # Also on http://fatawesome.com/comics
4104
    name = 'fatawesome-tumblr'
4105
    long_name = 'Fat Awesome (from Tumblr)'
4106
    url = 'http://fatawesomecomedy.tumblr.com'
4107
4108
4109
class TheWorldIsFlatTumblr(GenericTumblrV1):
4110
    """Class to retrieve The World Is Flat Comics."""
4111
    # Also on https://tapastic.com/series/The-World-is-Flat
4112
    name = 'flatworld-tumblr'
4113
    long_name = 'The World Is Flat (from Tumblr)'
4114
    url = 'http://theworldisflatcomics.com'
4115
4116
4117
class DorrisMc(GenericTumblrV1):
4118
    """Class to retrieve Dorris Mc Comics"""
4119
    # Also on http://www.gocomics.com/dorris-mccomics
4120
    name = 'dorrismc'
4121
    long_name = 'Dorris Mc'
4122
    url = 'http://dorrismccomics.com'
4123
4124
4125
class LeleozTumblr(GenericDeletedComic, GenericTumblrV1):
4126
    """Class to retrieve Leleoz comics."""
4127
    # Also on https://tapastic.com/series/Leleoz
4128
    name = 'leleoz-tumblr'
4129
    long_name = 'Leleoz (from Tumblr)'
4130
    url = 'http://leleozcomics.tumblr.com'
4131
4132
4133
class MoonBeardTumblr(GenericTumblrV1):
4134
    """Class to retrieve MoonBeard comics."""
4135
    # Also on http://moonbeard.com
4136
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4137
    name = 'moonbeard-tumblr'
4138
    long_name = 'Moon Beard (from Tumblr)'
4139
    url = 'http://squireseses.tumblr.com'
4140
    _categories = ('MOONBEARD', )
4141
4142
4143
class AComik(GenericTumblrV1):
4144
    """Class to retrieve A Comik"""
4145
    name = 'comik'
4146
    long_name = 'A Comik'
4147
    url = 'http://acomik.com'
4148
4149
4150
class ClassicRandy(GenericTumblrV1):
4151
    """Class to retrieve Classic Randy comics."""
4152
    name = 'randy'
4153
    long_name = 'Classic Randy'
4154
    url = 'http://classicrandy.tumblr.com'
4155
4156
4157
class DagssonTumblr(GenericTumblrV1):
4158
    """Class to retrieve Dagsson comics."""
4159
    # Also on http://www.dagsson.com
4160
    name = 'dagsson-tumblr'
4161
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4162
    url = 'https://hugleikurdagsson.tumblr.com'
4163
4164
4165
class LinsEditionsTumblr(GenericTumblrV1):
4166
    """Class to retrieve L.I.N.S. Editions comics."""
4167
    # Also on https://linsedition.com
4168
    # Now on http://warandpeas.tumblr.com
4169
    name = 'lins-tumblr'
4170
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4171
    url = 'https://linscomics.tumblr.com'
4172
    _categories = ('LINS', )
4173
4174
4175
class WarAndPeasTumblr(GenericTumblrV1):
4176
    """Class to retrieve War And Peas comics."""
4177
    # Was on https://linscomics.tumblr.com
4178
    name = 'warandpeas-tumblr'
4179
    long_name = 'War And Peas (from Tumblr)'
4180
    url = 'http://warandpeas.tumblr.com'
4181
    _categories = ('WARANDPEAS', )
4182
4183
4184
class OrigamiHotDish(GenericTumblrV1):
4185
    """Class to retrieve Origami Hot Dish comics."""
4186
    name = 'origamihotdish'
4187
    long_name = 'Origami Hot Dish'
4188
    url = 'http://origamihotdish.com'
4189
4190
4191
class HitAndMissComicsTumblr(GenericTumblrV1):
4192
    """Class to retrieve Hit and Miss Comics."""
4193
    name = 'hitandmiss'
4194
    long_name = 'Hit and Miss Comics'
4195
    url = 'https://hitandmisscomics.tumblr.com'
4196
4197
4198
class HMBlanc(GenericTumblrV1):
4199
    """Class to retrieve HM Blanc comics."""
4200
    name = 'hmblanc'
4201
    long_name = 'HM Blanc'
4202
    url = 'http://hmblanc.tumblr.com'
4203
4204
4205
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4206
    """Class to retrieve Tales Of Absurdity comics."""
4207
    # Also on http://talesofabsurdity.com
4208
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4209
    name = 'absurdity-tumblr'
4210
    long_name = 'Tales of Absurdity (from Tumblr)'
4211
    url = 'http://talesofabsurdity.tumblr.com'
4212
    _categories = ('ABSURDITY', )
4213
4214
4215
class RobbieAndBobby(GenericTumblrV1):
4216
    """Class to retrieve Robbie And Bobby comics."""
4217
    # Also on http://robbieandbobby.com
4218
    name = 'robbie-tumblr'
4219
    long_name = 'Robbie And Bobby (from Tumblr)'
4220
    url = 'http://robbieandbobby.tumblr.com'
4221
4222
4223
class ElectricBunnyComicTumblr(GenericTumblrV1):
4224
    """Class to retrieve Electric Bunny Comics."""
4225
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4226
    name = 'bunny-tumblr'
4227
    long_name = 'Electric Bunny Comic (from Tumblr)'
4228
    url = 'http://electricbunnycomics.tumblr.com'
4229
4230
4231
class Hoomph(GenericTumblrV1):
4232
    """Class to retrieve Hoomph comics."""
4233
    name = 'hoomph'
4234
    long_name = 'Hoomph'
4235
    url = 'http://hoom.ph'
4236
4237
4238
class BFGFSTumblr(GenericTumblrV1):
4239
    """Class to retrieve BFGFS comics."""
4240
    # Also on https://tapastic.com/series/BFGFS
4241
    # Also on http://bfgfs.com
4242
    name = 'bfgfs-tumblr'
4243
    long_name = 'BFGFS (from Tumblr)'
4244
    url = 'https://bfgfs.tumblr.com'
4245
4246
4247
class DoodleForFood(GenericTumblrV1):
4248
    """Class to retrieve Doodle For Food comics."""
4249
    # Also on https://tapastic.com/series/Doodle-for-Food
4250
    name = 'doodle'
4251
    long_name = 'Doodle For Food'
4252
    url = 'http://www.doodleforfood.com'
4253
4254
4255
class CassandraCalinTumblr(GenericTumblrV1):
4256
    """Class to retrieve C. Cassandra comics."""
4257
    # Also on http://cassandracalin.com
4258
    # Also on https://tapastic.com/series/C-Cassandra-comics
4259
    name = 'cassandra-tumblr'
4260
    long_name = 'Cassandra Calin (from Tumblr)'
4261
    url = 'http://c-cassandra.tumblr.com'
4262
4263
4264
class DougWasTaken(GenericTumblrV1):
4265
    """Class to retrieve Doug Was Taken comics."""
4266
    name = 'doug'
4267
    long_name = 'Doug Was Taken'
4268
    url = 'https://dougwastaken.tumblr.com'
4269
4270
4271
class MandatoryRollerCoaster(GenericTumblrV1):
4272
    """Class to retrieve Mandatory Roller Coaster comics."""
4273
    name = 'rollercoaster'
4274
    long_name = 'Mandatory Roller Coaster'
4275
    url = 'http://mandatoryrollercoaster.com'
4276
4277
4278
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4279
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4280
    name = 'cperspqccltt'
4281
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4282
    url = 'http://marcoandco.tumblr.com'
4283
4284
4285
class TheGrohlTroll(GenericTumblrV1):
4286
    """Class to retrieve The Grohl Troll comics."""
4287
    name = 'grohltroll'
4288
    long_name = 'The Grohl Troll'
4289
    url = 'http://thegrohltroll.com'
4290
4291
4292
class WebcomicName(GenericTumblrV1):
4293
    """Class to retrieve Webcomic Name comics."""
4294
    name = 'webcomicname'
4295
    long_name = 'Webcomic Name'
4296
    url = 'http://webcomicname.com'
4297
4298
4299
class BooksOfAdam(GenericTumblrV1):
4300
    """Class to retrieve Books of Adam comics."""
4301
    # Also on http://www.booksofadam.com
4302
    name = 'booksofadam'
4303
    long_name = 'Books of Adam'
4304
    url = 'http://booksofadam.tumblr.com'
4305
4306
4307
class HarkAVagrant(GenericTumblrV1):
4308
    """Class to retrieve Hark A Vagrant comics."""
4309
    # Also on http://www.harkavagrant.com
4310
    name = 'hark-tumblr'
4311
    long_name = 'Hark A Vagrant (from Tumblr)'
4312
    url = 'http://beatonna.tumblr.com'
4313
4314
4315
class OurSuperAdventureTumblr(GenericTumblrV1):
4316
    """Class to retrieve Our Super Adventure comics."""
4317
    # Also on https://tapastic.com/series/Our-Super-Adventure
4318
    # Also on http://www.oursuperadventure.com
4319
    # http://sarahgraley.com
4320
    name = 'superadventure-tumblr'
4321
    long_name = 'Our Super Adventure (from Tumblr)'
4322
    url = 'http://sarahssketchbook.tumblr.com'
4323
4324
4325
class JakeLikesOnions(GenericTumblrV1):
4326
    """Class to retrieve Jake Likes Onions comics."""
4327
    name = 'jake'
4328
    long_name = 'Jake Likes Onions'
4329
    url = 'http://jakelikesonions.com'
4330
4331
4332
class InYourFaceCakeTumblr(GenericTumblrV1):
4333
    """Class to retrieve In Your Face Cake comics."""
4334
    # Also on https://tapas.io/series/In-Your-Face-Cake
4335
    name = 'inyourfacecake-tumblr'
4336
    long_name = 'In Your Face Cake (from Tumblr)'
4337
    url = 'https://in-your-face-cake.tumblr.com'
4338
    _categories = ('INYOURFACECAKE', )
4339
4340
4341
class Robospunk(GenericTumblrV1):
4342
    """Class to retrieve Robospunk comics."""
4343
    name = 'robospunk'
4344
    long_name = 'Robospunk'
4345
    url = 'http://robospunk.com'
4346
4347
4348
class BananaTwinky(GenericTumblrV1):
4349
    """Class to retrieve Banana Twinky comics."""
4350
    name = 'banana'
4351
    long_name = 'Banana Twinky'
4352
    url = 'https://bananatwinky.tumblr.com'
4353
4354
4355
class YesterdaysPopcornTumblr(GenericTumblrV1):
4356
    """Class to retrieve Yesterday's Popcorn comics."""
4357
    # Also on http://www.yesterdayspopcorn.com
4358
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4359
    name = 'popcorn-tumblr'
4360
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4361
    url = 'http://yesterdayspopcorn.tumblr.com'
4362
4363
4364
class TwistedDoodles(GenericTumblrV1):
4365
    """Class to retrieve Twisted Doodles comics."""
4366
    name = 'twisted'
4367
    long_name = 'Twisted Doodles'
4368
    url = 'http://www.twisteddoodles.com'
4369
4370
4371
class UbertoolTumblr(GenericTumblrV1):
4372
    """Class to retrieve Ubertool comics."""
4373
    # Also on http://ubertoolcomic.com
4374
    # Also on https://tapastic.com/series/ubertool
4375
    name = 'ubertool-tumblr'
4376
    long_name = 'Ubertool (from Tumblr)'
4377
    url = 'https://ubertool.tumblr.com'
4378
    _categories = ('UBERTOOL', )
4379
4380
4381
class LittleLifeLinesTumblr(GenericDeletedComic, GenericTumblrV1):
4382
    """Class to retrieve Little Life Lines comics."""
4383
    # Also on http://www.littlelifelines.com
4384
    name = 'life-tumblr'
4385
    long_name = 'Little Life Lines (from Tumblr)'
4386
    url = 'https://little-life-lines.tumblr.com'
4387
4388
4389
class TheyCanTalk(GenericTumblrV1):
4390
    """Class to retrieve They Can Talk comics."""
4391
    name = 'theycantalk'
4392
    long_name = 'They Can Talk'
4393
    url = 'http://theycantalk.com'
4394
4395
4396
class Will5NeverCome(GenericTumblrV1):
4397
    """Class to retrieve Will 5:00 Never Come comics."""
4398
    name = 'will5'
4399
    long_name = 'Will 5:00 Never Come ?'
4400
    url = 'http://will5nevercome.com'
4401
4402
4403
class Sephko(GenericTumblrV1):
4404
    """Class to retrieve Sephko Comics."""
4405
    # Also on http://www.sephko.com
4406
    name = 'sephko'
4407
    long_name = 'Sephko'
4408
    url = 'https://sephko.tumblr.com'
4409
4410
4411
class BlazersAtDawn(GenericTumblrV1):
4412
    """Class to retrieve Blazers At Dawn Comics."""
4413
    name = 'blazers'
4414
    long_name = 'Blazers At Dawn'
4415
    url = 'http://blazersatdawn.tumblr.com'
4416
4417
4418
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4419
    """Class to retrieve Art By Moga Comics."""
4420
    name = 'moga'
4421
    long_name = 'Art By Moga'
4422
    url = 'http://artbymoga.tumblr.com'
4423
4424
4425
class VerbalVomitTumblr(GenericTumblrV1):
4426
    """Class to retrieve Verbal Vomit comics."""
4427
    # Also on http://www.verbal-vomit.com
4428
    name = 'vomit-tumblr'
4429
    long_name = 'Verbal Vomit (from Tumblr)'
4430
    url = 'http://verbalvomits.tumblr.com'
4431
4432
4433
class LibraryComic(GenericTumblrV1):
4434
    """Class to retrieve LibraryComic."""
4435
    # Also on http://librarycomic.com
4436
    name = 'library-tumblr'
4437
    long_name = 'LibraryComic (from Tumblr)'
4438
    url = 'https://librarycomic.tumblr.com'
4439
4440
4441
class TizzyStitchBirdTumblr(GenericTumblrV1):
4442
    """Class to retrieve Tizzy Stitch Bird comics."""
4443
    # Also on http://tizzystitchbird.com
4444
    # Also on https://tapastic.com/series/TizzyStitchbird
4445
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4446
    name = 'tizzy-tumblr'
4447
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4448
    url = 'http://tizzystitchbird.tumblr.com'
4449
4450
4451
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4452
    """Class to retrieve VictimsOfCircumsolar comics."""
4453
    # Also on http://www.victimsofcircumsolar.com
4454
    name = 'circumsolar-tumblr'
4455
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4456
    url = 'https://victimsofcomics.tumblr.com'
4457
4458
4459
class RockPaperCynicTumblr(GenericTumblrV1):
4460
    """Class to retrieve RockPaperCynic comics."""
4461
    # Also on http://www.rockpapercynic.com
4462
    # Also on https://tapastic.com/series/rockpapercynic
4463
    name = 'rpc-tumblr'
4464
    long_name = 'Rock Paper Cynic (from Tumblr)'
4465
    url = 'http://rockpapercynic.tumblr.com'
4466
4467
4468
class DeadlyPanelTumblr(GenericTumblrV1):
4469
    """Class to retrieve Deadly Panel comics."""
4470
    # Also on http://www.deadlypanel.com
4471
    # Also on https://tapastic.com/series/deadlypanel
4472
    name = 'deadly-tumblr'
4473
    long_name = 'Deadly Panel (from Tumblr)'
4474
    url = 'https://deadlypanel.tumblr.com'
4475
4476
4477
class CatanaComics(GenericComicNotWorking):  # Not a Tumblr anymore ?
4478
    """Class to retrieve Catana comics."""
4479
    name = 'catana'
4480
    long_name = 'Catana'
4481
    url = 'http://www.catanacomics.com'
4482
4483
4484
class AngryAtNothingTumblr(GenericTumblrV1):
4485
    """Class to retrieve Angry at Nothing comics."""
4486
    # Also on http://www.angryatnothing.net
4487
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4488
    name = 'angry-tumblr'
4489
    long_name = 'Angry At Nothing (from Tumblr)'
4490
    url = 'http://angryatnothing.tumblr.com'
4491
4492
4493
class ShanghaiTango(GenericTumblrV1):
4494
    """Class to retrieve Shanghai Tango comic."""
4495
    name = 'tango'
4496
    long_name = 'Shanghai Tango'
4497
    url = 'http://tango2010weibo.tumblr.com'
4498
4499
4500
class OffTheLeashDogTumblr(GenericTumblrV1):
4501
    """Class to retrieve Off The Leash Dog comics."""
4502
    # Also on http://offtheleashdogcartoons.com
4503
    # Also on http://www.rupertfawcettcartoons.com
4504
    name = 'offtheleash-tumblr'
4505
    long_name = 'Off The Leash Dog (from Tumblr)'
4506
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4507
    _categories = ('FAWCETT', )
4508
4509
4510
class ImogenQuestTumblr(GenericTumblrV1):
4511
    """Class to retrieve Imogen Quest comics."""
4512
    # Also on http://imogenquest.net
4513
    name = 'imogen-tumblr'
4514
    long_name = 'Imogen Quest (from Tumblr)'
4515
    url = 'http://imoquest.tumblr.com'
4516
4517
4518
class Shitfest(GenericTumblrV1):
4519
    """Class to retrieve Shitfest comics."""
4520
    name = 'shitfest'
4521
    long_name = 'Shitfest'
4522
    url = 'http://shitfestcomic.com'
4523
4524
4525
class IceCreamSandwichComics(GenericTumblrV1):
4526
    """Class to retrieve Ice Cream Sandwich Comics."""
4527
    name = 'icecream'
4528
    long_name = 'Ice Cream Sandwich Comics'
4529
    url = 'http://icecreamsandwichcomics.com'
4530
4531
4532
class Dustinteractive(GenericTumblrV1):
4533
    """Class to retrieve Dustinteractive comics."""
4534
    name = 'dustinteractive'
4535
    long_name = 'Dustinteractive'
4536
    url = 'http://dustinteractive.com'
4537
4538
4539
class StickyCinemaFloor(GenericTumblrV1):
4540
    """Class to retrieve Sticky Cinema Floor comics."""
4541
    name = 'stickycinema'
4542
    long_name = 'Sticky Cinema Floor'
4543
    url = 'https://stickycinemafloor.tumblr.com'
4544
4545
4546
class IncidentalComicsTumblr(GenericTumblrV1):
4547
    """Class to retrieve Incidental Comics."""
4548
    # Also on http://www.incidentalcomics.com
4549
    name = 'incidental-tumblr'
4550
    long_name = 'Incidental Comics (from Tumblr)'
4551
    url = 'http://incidentalcomics.tumblr.com'
4552
4553
4554
class APleasantWasteOfTimeTumblr(GenericTumblrV1):
4555
    """Class to retrieve A Pleasant Waste Of Time comics."""
4556
    # Also on https://tapas.io/series/A-Pleasant-
4557
    name = 'pleasant-waste-tumblr'
4558
    long_name = 'A Pleasant Waste Of Time (from Tumblr)'
4559
    url = 'https://artjcf.tumblr.com'
4560
    _categories = ('WASTE', )
4561
4562
4563
class HorovitzComicsTumblr(GenericTumblrV1):
4564
    """Class to retrieve Horovitz new comics."""
4565
    # Also on http://www.horovitzcomics.com
4566
    name = 'horovitz-tumblr'
4567
    long_name = 'Horovitz (from Tumblr)'
4568
    url = 'https://horovitzcomics.tumblr.com'
4569
    _categories = ('HOROVITZ', )
4570
4571
4572
class DeepDarkFearsTumblr(GenericTumblrV1):
4573
    """Class to retrieve DeepvDarkvFears comics."""
4574
    name = 'deep-dark-fears-tumblr'
4575
    long_name = 'Deep Dark Fears (from Tumblr)'
4576
    url = 'http://deep-dark-fears.tumblr.com'
4577
4578
4579
class DakotaMcDadzean(GenericTumblrV1):
4580
    """Class to retrieve Dakota McDadzean comics."""
4581
    name = 'dakota'
4582
    long_name = 'Dakota McDadzean'
4583
    url = 'http://dakotamcfadzean.tumblr.com'
4584
4585
4586
class ExtraFabulousComicsTumblr(GenericTumblrV1):
4587
    """Class to retrieve Extra Fabulous Comics."""
4588
    # Also on http://extrafabulouscomics.com
4589
    name = 'efc-tumblr'
4590
    long_name = 'Extra Fabulous Comics (from Tumblr)'
4591
    url = 'https://extrafabulouscomics.tumblr.com'
4592
    _categories = ('EFC', )
4593
4594
4595
class AlexLevesque(GenericTumblrV1):
4596
    """Class to retrieve AlexLevesque comics."""
4597
    name = 'alevesque'
4598
    long_name = 'Alex Levesque'
4599
    url = 'http://alexlevesque.com'
4600
    _categories = ('FRANCAIS', )
4601
4602
4603
class JamesOfNoTradesTumblr(GenericTumblrV1):
4604
    """Class to retrieve JamesOfNoTrades comics."""
4605
    # Also on http://jamesofnotrades.com
4606
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
4607
    # Also on https://tapas.io/series/James-of-No-Trades
4608
    name = 'jamesofnotrades-tumblr'
4609
    long_name = 'James Of No Trades (from Tumblr)'
4610
    url = 'http://jamesfregan.tumblr.com'
4611
    _categories = ('JAMESOFNOTRADES', )
4612
4613
4614
class InfiniteGuff(GenericTumblrV1):
4615
    """Class to retrieve Infinite Guff comics."""
4616
    name = 'infiniteguff'
4617
    long_name = 'Infinite Guff'
4618
    url = 'http://infiniteguff.com'
4619
4620
4621
class SkeletonClaw(GenericTumblrV1):
4622
    """Class to retrieve Skeleton Claw comics."""
4623
    name = 'skeletonclaw'
4624
    long_name = 'Skeleton Claw'
4625
    url = 'http://skeletonclaw.com'
4626
4627
4628
class HorovitzComics(GenericDeletedComic, GenericListableComic):
4629
    """Generic class to handle the logic common to the different comics from Horovitz."""
4630
    # Also on https://horovitzcomics.tumblr.com
4631
    url = 'http://www.horovitzcomics.com'
4632
    _categories = ('HOROVITZ', )
4633
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4634
    link_re = NotImplemented
4635
    get_url_from_archive_element = join_cls_url_to_href
4636
4637
    @classmethod
4638
    def get_comic_info(cls, soup, link):
4639
        """Get information about a particular comics."""
4640
        href = link['href']
4641
        num = int(cls.link_re.match(href).groups()[0])
4642
        title = link.string
4643
        imgs = soup.find_all('img', id='comic')
4644
        assert len(imgs) == 1
4645
        year, month, day = [int(s)
4646
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4647
        return {
4648
            'title': title,
4649
            'day': day,
4650
            'month': month,
4651
            'year': year,
4652
            'img': [i['src'] for i in imgs],
4653
            'num': num,
4654
        }
4655
4656
    @classmethod
4657
    def get_archive_elements(cls):
4658
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4659
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4660
4661
4662
class HorovitzNew(HorovitzComics):
4663
    """Class to retrieve Horovitz new comics."""
4664
    name = 'horovitznew'
4665
    long_name = 'Horovitz New'
4666 View Code Duplication
    link_re = re.compile('^/comics/new/([0-9]+)$')
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4667
4668
4669
class HorovitzClassic(HorovitzComics):
4670
    """Class to retrieve Horovitz classic comics."""
4671
    name = 'horovitzclassic'
4672
    long_name = 'Horovitz Classic'
4673
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4674
4675
4676
class GenericGoComic(GenericNavigableComic):
4677
    """Generic class to handle the logic common to comics from gocomics.com."""
4678
    _categories = ('GOCOMIC', )
4679
4680
    @classmethod
4681
    def get_first_comic_link(cls):
4682
        """Get link to first comics."""
4683
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4684
4685
    @classmethod
4686
    def get_navi_link(cls, last_soup, next_):
4687
        """Get link to next or previous comic."""
4688
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left js-previous-comic sm '
4689
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right js-next-comic hidden-sm-up sm '
4690
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4691
4692
    @classmethod
4693
    def get_url_from_link(cls, link):
4694
        gocomics = 'http://www.gocomics.com'
4695
        return urljoin_wrapper(gocomics, link['href'])
4696
4697
    @classmethod
4698
    def get_comic_info(cls, soup, link):
4699
        """Get information about a particular comics."""
4700
        date_str = soup.find('meta', property='article:published_time')['content']
4701
        day = string_to_date(date_str, "%Y-%m-%d")
4702
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4703
        author = soup.find('meta', property='article:author')['content']
4704
        tags = soup.find('meta', property='article:tag')['content']
4705
        return {
4706
            'day': day.day,
4707
            'month': day.month,
4708
            'year': day.year,
4709
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4710
            'author': author,
4711
            'tags': tags,
4712
        }
4713
4714
4715
class PearlsBeforeSwine(GenericGoComic):
4716
    """Class to retrieve Pearls Before Swine comics."""
4717
    name = 'pearls'
4718
    long_name = 'Pearls Before Swine'
4719
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4720
4721
4722
class Peanuts(GenericGoComic):
4723
    """Class to retrieve Peanuts comics."""
4724
    name = 'peanuts'
4725
    long_name = 'Peanuts'
4726
    url = 'http://www.gocomics.com/peanuts'
4727
4728
4729
class MattWuerker(GenericGoComic):
4730
    """Class to retrieve Matt Wuerker comics."""
4731
    name = 'wuerker'
4732
    long_name = 'Matt Wuerker'
4733
    url = 'http://www.gocomics.com/mattwuerker'
4734
4735
4736
class TomToles(GenericGoComic):
4737
    """Class to retrieve Tom Toles comics."""
4738
    name = 'toles'
4739
    long_name = 'Tom Toles'
4740
    url = 'http://www.gocomics.com/tomtoles'
4741
4742
4743
class BreakOfDay(GenericGoComic):
4744
    """Class to retrieve Break Of Day comics."""
4745
    name = 'breakofday'
4746
    long_name = 'Break Of Day'
4747
    url = 'http://www.gocomics.com/break-of-day'
4748
4749
4750
class Brevity(GenericGoComic):
4751
    """Class to retrieve Brevity comics."""
4752
    name = 'brevity'
4753
    long_name = 'Brevity'
4754
    url = 'http://www.gocomics.com/brevity'
4755
4756
4757
class MichaelRamirez(GenericGoComic):
4758
    """Class to retrieve Michael Ramirez comics."""
4759
    name = 'ramirez'
4760
    long_name = 'Michael Ramirez'
4761
    url = 'http://www.gocomics.com/michaelramirez'
4762
4763
4764
class MikeLuckovich(GenericGoComic):
4765
    """Class to retrieve Mike Luckovich comics."""
4766
    name = 'luckovich'
4767
    long_name = 'Mike Luckovich'
4768
    url = 'http://www.gocomics.com/mikeluckovich'
4769
4770
4771
class JimBenton(GenericGoComic):
4772
    """Class to retrieve Jim Benton comics."""
4773
    # Also on http://jimbenton.tumblr.com
4774
    name = 'benton'
4775
    long_name = 'Jim Benton'
4776
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4777
4778
4779
class TheArgyleSweater(GenericGoComic):
4780
    """Class to retrieve the Argyle Sweater comics."""
4781
    name = 'argyle'
4782
    long_name = 'Argyle Sweater'
4783
    url = 'http://www.gocomics.com/theargylesweater'
4784
4785
4786
class SunnyStreet(GenericGoComic):
4787
    """Class to retrieve Sunny Street comics."""
4788
    # Also on http://www.sunnystreetcomics.com
4789
    name = 'sunny'
4790
    long_name = 'Sunny Street'
4791
    url = 'http://www.gocomics.com/sunny-street'
4792
4793
4794
class OffTheMark(GenericGoComic):
4795
    """Class to retrieve Off The Mark comics."""
4796
    # Also on https://www.offthemark.com
4797
    name = 'offthemark'
4798
    long_name = 'Off The Mark'
4799
    url = 'http://www.gocomics.com/offthemark'
4800
4801
4802
class WuMo(GenericGoComic):
4803
    """Class to retrieve WuMo comics."""
4804
    # Also on http://wumo.com
4805
    name = 'wumo'
4806
    long_name = 'WuMo'
4807
    url = 'http://www.gocomics.com/wumo'
4808
4809
4810
class LunarBaboon(GenericGoComic):
4811
    """Class to retrieve Lunar Baboon comics."""
4812
    # Also on http://www.lunarbaboon.com
4813
    # Also on https://tapastic.com/series/Lunarbaboon
4814
    name = 'lunarbaboon'
4815
    long_name = 'Lunar Baboon'
4816
    url = 'http://www.gocomics.com/lunarbaboon'
4817
4818
4819
class SandersenGocomic(GenericGoComic):
4820
    """Class to retrieve Sarah Andersen comics."""
4821
    # Also on http://sarahcandersen.com
4822
    # Also on http://tapastic.com/series/Doodle-Time
4823
    name = 'sandersen-goc'
4824
    long_name = 'Sarah Andersen (from GoComics)'
4825
    url = 'http://www.gocomics.com/sarahs-scribbles'
4826
4827
4828
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4829
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4830
    # Also on http://smbc-comics.tumblr.com
4831
    # Also on http://www.smbc-comics.com
4832
    name = 'smbc-goc'
4833
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4834
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4835
    _categories = ('SMBC', )
4836
4837
4838
class CalvinAndHobbesGoComic(GenericGoComic):
4839
    """Class to retrieve Calvin and Hobbes comics."""
4840
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4841
    name = 'calvin-goc'
4842
    long_name = 'Calvin and Hobbes (from GoComics)'
4843
    url = 'http://www.gocomics.com/calvinandhobbes'
4844
4845
4846
class RallGoComic(GenericGoComic):
4847
    """Class to retrieve Ted Rall comics."""
4848
    # Also on http://rall.com/comic
4849
    name = 'rall-goc'
4850
    long_name = "Ted Rall (from GoComics)"
4851
    url = "http://www.gocomics.com/ted-rall"
4852
    _categories = ('RALL', )
4853
4854
4855
class TheAwkwardYetiGoComic(GenericGoComic):
4856
    """Class to retrieve The Awkward Yeti comics."""
4857
    # Also on http://larstheyeti.tumblr.com
4858
    # Also on http://theawkwardyeti.com
4859
    # Also on https://tapastic.com/series/TheAwkwardYeti
4860
    name = 'yeti-goc'
4861
    long_name = 'The Awkward Yeti (from GoComics)'
4862
    url = 'http://www.gocomics.com/the-awkward-yeti'
4863
    _categories = ('YETI', )
4864
4865
4866
class BerkeleyMewsGoComics(GenericGoComic):
4867
    """Class to retrieve Berkeley Mews comics."""
4868
    # Also on http://mews.tumblr.com
4869
    # Also on http://www.berkeleymews.com
4870
    name = 'berkeley-goc'
4871
    long_name = 'Berkeley Mews (from GoComics)'
4872
    url = 'http://www.gocomics.com/berkeley-mews'
4873
    _categories = ('BERKELEY', )
4874
4875
4876
class SheldonGoComics(GenericGoComic):
4877
    """Class to retrieve Sheldon comics."""
4878
    # Also on http://www.sheldoncomics.com
4879
    name = 'sheldon-goc'
4880
    long_name = 'Sheldon Comics (from GoComics)'
4881
    url = 'http://www.gocomics.com/sheldon'
4882
4883
4884
class FowlLanguageGoComics(GenericGoComic):
4885
    """Class to retrieve Fowl Language comics."""
4886
    # Also on http://www.fowllanguagecomics.com
4887
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4888
    # Also on http://fowllanguagecomics.tumblr.com
4889
    name = 'fowllanguage-goc'
4890
    long_name = 'Fowl Language Comics (from GoComics)'
4891
    url = 'http://www.gocomics.com/fowl-language'
4892
    _categories = ('FOWLLANGUAGE', )
4893
4894
4895
class NickAnderson(GenericGoComic):
4896
    """Class to retrieve Nick Anderson comics."""
4897
    name = 'nickanderson'
4898
    long_name = 'Nick Anderson'
4899
    url = 'http://www.gocomics.com/nickanderson'
4900
4901
4902
class GarfieldGoComics(GenericGoComic):
4903
    """Class to retrieve Garfield comics."""
4904
    # Also on http://garfield.com
4905
    name = 'garfield-goc'
4906
    long_name = 'Garfield (from GoComics)'
4907
    url = 'http://www.gocomics.com/garfield'
4908
    _categories = ('GARFIELD', )
4909
4910
4911
class DorrisMcGoComics(GenericGoComic):
4912
    """Class to retrieve Dorris Mc Comics"""
4913
    # Also on http://dorrismccomics.com
4914
    name = 'dorrismc-goc'
4915
    long_name = 'Dorris Mc (from GoComics)'
4916
    url = 'http://www.gocomics.com/dorris-mccomics'
4917
4918
4919
class FoxTrot(GenericGoComic):
4920
    """Class to retrieve FoxTrot comics."""
4921
    name = 'foxtrot'
4922
    long_name = 'FoxTrot'
4923
    url = 'http://www.gocomics.com/foxtrot'
4924
4925
4926
class FoxTrotClassics(GenericGoComic):
4927
    """Class to retrieve FoxTrot Classics comics."""
4928
    name = 'foxtrot-classics'
4929
    long_name = 'FoxTrot Classics'
4930
    url = 'http://www.gocomics.com/foxtrotclassics'
4931
4932
4933
class MisterAndMeGoComics(GenericDeletedComic, GenericGoComic):
4934
    """Class to retrieve Mister & Me Comics."""
4935
    # Also on http://www.mister-and-me.com
4936
    # Also on https://tapastic.com/series/Mister-and-Me
4937
    name = 'mister-goc'
4938
    long_name = 'Mister & Me (from GoComics)'
4939
    url = 'http://www.gocomics.com/mister-and-me'
4940
4941
4942
class NonSequitur(GenericGoComic):
4943
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4944
    name = 'nonsequitur'
4945
    long_name = 'Non Sequitur'
4946
    url = 'http://www.gocomics.com/nonsequitur'
4947
4948
4949
class GenericTapasticComic(GenericListableComic):
4950
    """Generic class to handle the logic common to comics from tapastic.com."""
4951
    _categories = ('TAPASTIC', )
4952
4953
    @classmethod
4954
    def get_comic_info(cls, soup, archive_elt):
4955
        """Get information about a particular comics."""
4956
        timestamp = int(archive_elt['publishDate']) / 1000.0
4957
        day = datetime.datetime.fromtimestamp(timestamp).date()
4958
        imgs = soup.find_all('img', class_='art-image')
4959
        if not imgs:
4960
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4961
            return None
4962
        assert len(imgs) > 0
4963
        return {
4964
            'day': day.day,
4965
            'year': day.year,
4966
            'month': day.month,
4967
            'img': [i['src'] for i in imgs],
4968
            'title': archive_elt['title'],
4969
        }
4970
4971
    @classmethod
4972
    def get_url_from_archive_element(cls, archive_elt):
4973
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4974
4975
    @classmethod
4976
    def get_archive_elements(cls):
4977
        pref, suff = 'episodeList : ', ','
4978
        # Information is stored in the javascript part
4979
        # I don't know the clean way to get it so this is the ugly way.
4980
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4981
        return json.loads(string)
4982
4983
4984
class VegetablesForDessert(GenericTapasticComic):
4985
    """Class to retrieve Vegetables For Dessert comics."""
4986
    # Also on http://vegetablesfordessert.tumblr.com
4987
    name = 'vegetables'
4988
    long_name = 'Vegetables For Dessert'
4989
    url = 'http://tapastic.com/series/vegetablesfordessert'
4990
4991
4992
class FowlLanguageTapa(GenericTapasticComic):
4993
    """Class to retrieve Fowl Language comics."""
4994
    # Also on http://www.fowllanguagecomics.com
4995
    # Also on http://fowllanguagecomics.tumblr.com
4996
    # Also on http://www.gocomics.com/fowl-language
4997
    name = 'fowllanguage-tapa'
4998
    long_name = 'Fowl Language Comics (from Tapastic)'
4999
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
5000
    _categories = ('FOWLLANGUAGE', )
5001
5002
5003
class OscillatingProfundities(GenericTapasticComic):
5004
    """Class to retrieve Oscillating Profundities comics."""
5005
    name = 'oscillating'
5006
    long_name = 'Oscillating Profundities'
5007
    url = 'http://tapastic.com/series/oscillatingprofundities'
5008
5009
5010
class ZnoflatsComics(GenericTapasticComic):
5011
    """Class to retrieve Znoflats comics."""
5012
    name = 'znoflats'
5013
    long_name = 'Znoflats Comics'
5014
    url = 'http://tapastic.com/series/Znoflats-Comics'
5015
5016
5017
class SandersenTapastic(GenericTapasticComic):
5018
    """Class to retrieve Sarah Andersen comics."""
5019
    # Also on http://sarahcandersen.com
5020
    # Also on http://www.gocomics.com/sarahs-scribbles
5021
    name = 'sandersen-tapa'
5022
    long_name = 'Sarah Andersen (from Tapastic)'
5023
    url = 'http://tapastic.com/series/Doodle-Time'
5024
5025
5026
class TubeyToonsTapastic(GenericTapasticComic):
5027
    """Class to retrieve TubeyToons comics."""
5028
    # Also on http://tubeytoons.com
5029
    # Also on https://tubeytoons.tumblr.com
5030
    name = 'tubeytoons-tapa'
5031
    long_name = 'Tubey Toons (from Tapastic)'
5032
    url = 'http://tapastic.com/series/Tubey-Toons'
5033
    _categories = ('TUNEYTOONS', )
5034
5035
5036
class AnythingComicTapastic(GenericTapasticComic):
5037
    """Class to retrieve Anything Comics."""
5038
    # Also on http://www.anythingcomic.com
5039
    name = 'anythingcomic-tapa'
5040
    long_name = 'Anything Comic (from Tapastic)'
5041
    url = 'http://tapastic.com/series/anything'
5042
5043
5044
class UnearthedComicsTapastic(GenericTapasticComic):
5045
    """Class to retrieve Unearthed comics."""
5046
    # Also on http://unearthedcomics.com
5047
    # Also on https://unearthedcomics.tumblr.com
5048
    name = 'unearthed-tapa'
5049
    long_name = 'Unearthed Comics (from Tapastic)'
5050
    url = 'http://tapastic.com/series/UnearthedComics'
5051
    _categories = ('UNEARTHED', )
5052
5053
5054
class EverythingsStupidTapastic(GenericTapasticComic):
5055
    """Class to retrieve Everything's stupid Comics."""
5056
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
5057
    # Also on http://everythingsstupid.net
5058
    name = 'stupid-tapa'
5059
    long_name = "Everything's Stupid (from Tapastic)"
5060
    url = 'http://tapastic.com/series/EverythingsStupid'
5061
5062
5063
class JustSayEhTapastic(GenericTapasticComic):
5064
    """Class to retrieve Just Say Eh comics."""
5065
    # Also on http://www.justsayeh.com
5066
    name = 'justsayeh-tapa'
5067
    long_name = 'Just Say Eh (from Tapastic)'
5068
    url = 'http://tapastic.com/series/Just-Say-Eh'
5069
5070
5071
class ThorsThundershackTapastic(GenericTapasticComic):
5072
    """Class to retrieve Thor's Thundershack comics."""
5073
    # Also on http://www.thorsthundershack.com
5074
    name = 'thor-tapa'
5075
    long_name = 'Thor\'s Thundershack (from Tapastic)'
5076
    url = 'http://tapastic.com/series/Thors-Thundershac'
5077
    _categories = ('THOR', )
5078
5079
5080
class OwlTurdTapastic(GenericTapasticComic):
5081
    """Class to retrieve Owl Turd comics."""
5082
    # Also on http://owlturd.com
5083
    name = 'owlturd-tapa'
5084
    long_name = 'Owl Turd (from Tapastic)'
5085
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
5086
    _categories = ('OWLTURD', )
5087
5088
5089
class GoneIntoRaptureTapastic(GenericTapasticComic):
5090
    """Class to retrieve Gone Into Rapture comics."""
5091
    # Also on http://goneintorapture.tumblr.com
5092
    # Also on http://goneintorapture.com
5093
    name = 'rapture-tapa'
5094
    long_name = 'Gone Into Rapture (from Tapastic)'
5095
    url = 'http://tapastic.com/series/Goneintorapture'
5096
5097
5098
class HeckIfIKnowComicsTapa(GenericTapasticComic):
5099
    """Class to retrieve Heck If I Know Comics."""
5100
    # Also on http://heckifiknowcomics.com
5101
    name = 'heck-tapa'
5102
    long_name = 'Heck if I Know comics (from Tapastic)'
5103
    url = 'http://tapastic.com/series/Regular'
5104
5105
5106
class CheerUpEmoKidTapa(GenericTapasticComic):
5107
    """Class to retrieve CheerUpEmoKid comics."""
5108
    # Also on http://www.cheerupemokid.com
5109
    # Also on https://enzocomics.tumblr.com
5110
    name = 'cuek-tapa'
5111
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
5112
    url = 'http://tapastic.com/series/CUEK'
5113
5114
5115
class BigFootJusticeTapa(GenericTapasticComic):
5116
    """Class to retrieve Big Foot Justice comics."""
5117
    # Also on http://bigfootjustice.com
5118
    name = 'bigfoot-tapa'
5119
    long_name = 'Big Foot Justice (from Tapastic)'
5120
    url = 'http://tapastic.com/series/bigfoot-justice'
5121
5122
5123
class UpAndOutTapa(GenericTapasticComic):
5124
    """Class to retrieve Up & Out comics."""
5125
    # Also on http://upandoutcomic.tumblr.com
5126
    name = 'upandout-tapa'
5127
    long_name = 'Up And Out (from Tapastic)'
5128
    url = 'http://tapastic.com/series/UP-and-OUT'
5129
5130
5131
class ToonHoleTapa(GenericTapasticComic):
5132
    """Class to retrieve Toon Holes comics."""
5133
    # Also on http://www.toonhole.com
5134
    name = 'toonhole-tapa'
5135
    long_name = 'Toon Hole (from Tapastic)'
5136
    url = 'http://tapastic.com/series/TOONHOLE'
5137
5138
5139
class AngryAtNothingTapa(GenericTapasticComic):
5140
    """Class to retrieve Angry at Nothing comics."""
5141
    # Also on http://www.angryatnothing.net
5142
    # Also on http://angryatnothing.tumblr.com
5143
    name = 'angry-tapa'
5144
    long_name = 'Angry At Nothing (from Tapastic)'
5145
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
5146
5147
5148
class LeleozTapa(GenericTapasticComic):
5149
    """Class to retrieve Leleoz comics."""
5150
    # Also on http://leleozcomics.tumblr.com
5151
    name = 'leleoz-tapa'
5152
    long_name = 'Leleoz (from Tapastic)'
5153
    url = 'https://tapastic.com/series/Leleoz'
5154
5155
5156
class TheAwkwardYetiTapa(GenericTapasticComic):
5157
    """Class to retrieve The Awkward Yeti comics."""
5158
    # Also on http://www.gocomics.com/the-awkward-yeti
5159
    # Also on http://theawkwardyeti.com
5160
    # Also on http://larstheyeti.tumblr.com
5161
    name = 'yeti-tapa'
5162
    long_name = 'The Awkward Yeti (from Tapastic)'
5163
    url = 'https://tapastic.com/series/TheAwkwardYeti'
5164
    _categories = ('YETI', )
5165
5166
5167
class AsPerUsualTapa(GenericTapasticComic):
5168
    """Class to retrieve As Per Usual comics."""
5169
    # Also on http://as-per-usual.tumblr.com
5170
    name = 'usual-tapa'
5171
    long_name = 'As Per Usual (from Tapastic)'
5172
    url = 'https://tapastic.com/series/AsPerUsual'
5173
    categories = ('DAMILEE', )
5174
5175
5176
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
5177
    """Class to retrieve Hot Comics For Cool People."""
5178
    # Also on http://hotcomicsforcoolpeople.tumblr.com
5179
    # Also on http://hotcomics.biz (links to tumblr)
5180
    # Also on http://hcfcp.com (links to tumblr)
5181
    name = 'hotcomics-tapa'
5182
    long_name = 'Hot Comics For Cool People (from Tapastic)'
5183
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
5184
    categories = ('DAMILEE', )
5185
5186
5187
class OneOneOneOneComicTapa(GenericTapasticComic):
5188
    """Class to retrieve 1111 Comics."""
5189
    # Also on http://www.1111comics.me
5190
    # Also on http://comics1111.tumblr.com
5191
    name = '1111-tapa'
5192
    long_name = '1111 Comics (from Tapastic)'
5193
    url = 'https://tapastic.com/series/1111-Comics'
5194
    _categories = ('ONEONEONEONE', )
5195
5196
5197
class TumbleDryTapa(GenericTapasticComic):
5198
    """Class to retrieve Tumble Dry comics."""
5199
    # Also on http://tumbledrycomics.com
5200
    name = 'tumbledry-tapa'
5201
    long_name = 'Tumblr Dry (from Tapastic)'
5202
    url = 'https://tapastic.com/series/TumbleDryComics'
5203
5204
5205
class DeadlyPanelTapa(GenericTapasticComic):
5206
    """Class to retrieve Deadly Panel comics."""
5207
    # Also on http://www.deadlypanel.com
5208
    # Also on https://deadlypanel.tumblr.com
5209
    name = 'deadly-tapa'
5210
    long_name = 'Deadly Panel (from Tapastic)'
5211
    url = 'https://tapastic.com/series/deadlypanel'
5212
5213
5214
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5215
    """Class to retrieve Chris Hallbeck comics."""
5216
    # Also on https://chrishallbeck.tumblr.com
5217
    # Also on http://maximumble.com
5218
    name = 'hallbeckmaxi-tapa'
5219
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5220
    url = 'https://tapastic.com/series/Maximumble'
5221
    _categories = ('HALLBACK', )
5222
5223
5224
class ChrisHallbeckMiniTapa(GenericDeletedComic, GenericTapasticComic):
5225
    """Class to retrieve Chris Hallbeck comics."""
5226
    # Also on https://chrishallbeck.tumblr.com
5227
    # Also on http://minimumble.com
5228
    name = 'hallbeckmini-tapa'
5229
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5230
    url = 'https://tapastic.com/series/Minimumble'
5231
    _categories = ('HALLBACK', )
5232
5233
5234
class ChrisHallbeckBiffTapa(GenericDeletedComic, GenericTapasticComic):
5235
    """Class to retrieve Chris Hallbeck comics."""
5236
    # Also on https://chrishallbeck.tumblr.com
5237
    # Also on http://thebookofbiff.com
5238
    name = 'hallbeckbiff-tapa'
5239
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5240
    url = 'https://tapastic.com/series/Biff'
5241
    _categories = ('HALLBACK', )
5242
5243
5244
class RandoWisTapa(GenericTapasticComic):
5245
    """Class to retrieve RandoWis comics."""
5246
    # Also on https://randowis.com
5247
    name = 'randowis-tapa'
5248
    long_name = 'RandoWis (from Tapastic)'
5249
    url = 'https://tapastic.com/series/RandoWis'
5250
5251
5252
class PigeonGazetteTapa(GenericTapasticComic):
5253
    """Class to retrieve The Pigeon Gazette comics."""
5254
    # Also on http://thepigeongazette.tumblr.com
5255
    name = 'pigeon-tapa'
5256
    long_name = 'The Pigeon Gazette (from Tapastic)'
5257
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5258
5259
5260
class TheOdd1sOutTapa(GenericTapasticComic):
5261
    """Class to retrieve The Odd 1s Out comics."""
5262
    # Also on http://theodd1sout.com
5263
    # Also on http://theodd1sout.tumblr.com
5264
    name = 'theodd-tapa'
5265
    long_name = 'The Odd 1s Out (from Tapastic)'
5266
    url = 'https://tapastic.com/series/Theodd1sout'
5267
5268
5269
class TheWorldIsFlatTapa(GenericTapasticComic):
5270
    """Class to retrieve The World Is Flat Comics."""
5271
    # Also on http://theworldisflatcomics.tumblr.com
5272
    name = 'flatworld-tapa'
5273
    long_name = 'The World Is Flat (from Tapastic)'
5274
    url = 'https://tapastic.com/series/The-World-is-Flat'
5275
5276
5277
class MisterAndMeTapa(GenericTapasticComic):
5278
    """Class to retrieve Mister & Me Comics."""
5279
    # Also on http://www.mister-and-me.com
5280
    # Also on http://www.gocomics.com/mister-and-me
5281
    name = 'mister-tapa'
5282
    long_name = 'Mister & Me (from Tapastic)'
5283
    url = 'https://tapastic.com/series/Mister-and-Me'
5284
5285
5286
class TalesOfAbsurdityTapa(GenericDeletedComic, GenericTapasticComic):
5287
    """Class to retrieve Tales Of Absurdity comics."""
5288
    # Also on http://talesofabsurdity.com
5289
    # Also on http://talesofabsurdity.tumblr.com
5290
    name = 'absurdity-tapa'
5291
    long_name = 'Tales of Absurdity (from Tapastic)'
5292
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5293
    _categories = ('ABSURDITY', )
5294
5295
5296
class BFGFSTapa(GenericTapasticComic):
5297
    """Class to retrieve BFGFS comics."""
5298
    # Also on http://bfgfs.com
5299
    # Also on https://bfgfs.tumblr.com
5300
    name = 'bfgfs-tapa'
5301
    long_name = 'BFGFS (from Tapastic)'
5302
    url = 'https://tapastic.com/series/BFGFS'
5303
5304
5305
class DoodleForFoodTapa(GenericTapasticComic):
5306
    """Class to retrieve Doodle For Food comics."""
5307
    # Also on http://www.doodleforfood.com
5308
    name = 'doodle-tapa'
5309
    long_name = 'Doodle For Food (from Tapastic)'
5310
    url = 'https://tapastic.com/series/Doodle-for-Food'
5311
5312
5313
class MrLovensteinTapa(GenericTapasticComic):
5314
    """Class to retrieve Mr Lovenstein comics."""
5315
    # Also on  https://tapastic.com/series/MrLovenstein
5316
    name = 'mrlovenstein-tapa'
5317
    long_name = 'Mr. Lovenstein (from Tapastic)'
5318
    url = 'https://tapastic.com/series/MrLovenstein'
5319
5320
5321
class CassandraCalinTapa(GenericTapasticComic):
5322
    """Class to retrieve C. Cassandra comics."""
5323
    # Also on http://cassandracalin.com
5324
    # Also on http://c-cassandra.tumblr.com
5325
    name = 'cassandra-tapa'
5326
    long_name = 'Cassandra Calin (from Tapastic)'
5327
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5328
5329
5330
class WafflesAndPancakes(GenericTapasticComic):
5331
    """Class to retrieve Waffles And Pancakes comics."""
5332
    # Also on http://wandpcomic.com
5333
    name = 'waffles'
5334
    long_name = 'Waffles And Pancakes'
5335
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5336
5337
5338
class YesterdaysPopcornTapastic(GenericTapasticComic):
5339
    """Class to retrieve Yesterday's Popcorn comics."""
5340
    # Also on http://www.yesterdayspopcorn.com
5341
    # Also on http://yesterdayspopcorn.tumblr.com
5342
    name = 'popcorn-tapa'
5343
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5344
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5345
5346
5347
class OurSuperAdventureTapastic(GenericDeletedComic, GenericTapasticComic):
5348
    """Class to retrieve Our Super Adventure comics."""
5349
    # Also on http://www.oursuperadventure.com
5350
    # http://sarahssketchbook.tumblr.com
5351
    # http://sarahgraley.com
5352
    name = 'superadventure-tapastic'
5353
    long_name = 'Our Super Adventure (from Tapastic)'
5354
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5355
5356
5357
class NamelessPCs(GenericTapasticComic):
5358
    """Class to retrieve Nameless PCs comics."""
5359
    # Also on http://namelesspcs.com
5360
    name = 'namelesspcs-tapa'
5361
    long_name = 'NamelessPCs (from Tapastic)'
5362
    url = 'https://tapastic.com/series/NamelessPC'
5363
5364
5365
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5366
    """Class to retrieve Down The Upward Spiral comics."""
5367
    # Also on http://www.downtheupwardspiral.com
5368
    # Also on http://downtheupwardspiral.tumblr.com
5369
    name = 'spiral-tapa'
5370
    long_name = 'Down the Upward Spiral (from Tapastic)'
5371
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5372
5373
5374
class UbertoolTapa(GenericTapasticComic):
5375
    """Class to retrieve Ubertool comics."""
5376
    # Also on http://ubertoolcomic.com
5377
    # Also on https://ubertool.tumblr.com
5378
    name = 'ubertool-tapa'
5379
    long_name = 'Ubertool (from Tapastic)'
5380
    url = 'https://tapastic.com/series/ubertool'
5381
    _categories = ('UBERTOOL', )
5382
5383
5384
class BarteNerdsTapa(GenericDeletedComic, GenericTapasticComic):
5385
    """Class to retrieve BarteNerds comics."""
5386
    # Also on http://www.bartenerds.com
5387
    name = 'bartenerds-tapa'
5388
    long_name = 'BarteNerds (from Tapastic)'
5389
    url = 'https://tapastic.com/series/BarteNERDS'
5390
5391
5392
class SmallBlueYonderTapa(GenericTapasticComic):
5393
    """Class to retrieve Small Blue Yonder comics."""
5394
    # Also on http://www.smallblueyonder.com
5395
    name = 'smallblue-tapa'
5396
    long_name = 'Small Blue Yonder (from Tapastic)'
5397
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5398
5399
5400
class TizzyStitchBirdTapa(GenericTapasticComic):
5401
    """Class to retrieve Tizzy Stitch Bird comics."""
5402
    # Also on http://tizzystitchbird.com
5403
    # Also on http://tizzystitchbird.tumblr.com
5404
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5405
    name = 'tizzy-tapa'
5406
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5407
    url = 'https://tapastic.com/series/TizzyStitchbird'
5408
5409
5410
class RockPaperCynicTapa(GenericTapasticComic):
5411
    """Class to retrieve RockPaperCynic comics."""
5412
    # Also on http://www.rockpapercynic.com
5413
    # Also on http://rockpapercynic.tumblr.com
5414
    name = 'rpc-tapa'
5415
    long_name = 'Rock Paper Cynic (from Tapastic)'
5416
    url = 'https://tapastic.com/series/rockpapercynic'
5417
5418
5419
class IsItCanonTapa(GenericTapasticComic):
5420
    """Class to retrieve Is It Canon comics."""
5421
    # Also on http://www.isitcanon.com
5422
    name = 'canon-tapa'
5423
    long_name = 'Is It Canon (from Tapastic)'
5424
    url = 'http://tapastic.com/series/isitcanon'
5425
5426
5427
class ItsTheTieTapa(GenericTapasticComic):
5428
    """Class to retrieve It's the tie comics."""
5429
    # Also on http://itsthetie.com
5430
    # Also on http://itsthetie.tumblr.com
5431
    name = 'tie-tapa'
5432
    long_name = "It's the tie (from Tapastic)"
5433
    url = "https://tapastic.com/series/itsthetie"
5434
    _categories = ('TIE', )
5435
5436
5437
class JamesOfNoTradesTapa(GenericTapasticComic):
5438
    """Class to retrieve JamesOfNoTrades comics."""
5439
    # Also on http://jamesofnotrades.com
5440
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
5441
    # Also on http://jamesfregan.tumblr.com
5442
    name = 'jamesofnotrades-tapa'
5443
    long_name = 'James Of No Trades (from Tapastic)'
5444
    url = 'https://tapas.io/series/James-of-No-Trades'
5445
    _categories = ('JAMESOFNOTRADES', )
5446
5447
5448
class MomentumTapa(GenericTapasticComic):
5449
    """Class to retrieve Momentum comics."""
5450
    # Also on http://www.momentumcomic.com
5451
    name = 'momentum-tapa'
5452
    long_name = 'Momentum (from Tapastic)'
5453
    url = 'https://tapastic.com/series/momentum'
5454
5455
5456
class InYourFaceCakeTapa(GenericTapasticComic):
5457
    """Class to retrieve In Your Face Cake comics."""
5458
    # Also on https://in-your-face-cake.tumblr.com
5459
    name = 'inyourfacecake-tapa'
5460
    long_name = 'In Your Face Cake (from Tapastic)'
5461
    url = 'https://tapas.io/series/In-Your-Face-Cake'
5462
    _categories = ('INYOURFACECAKE', )
5463
5464
5465
class APleasantWasteOfTimeTapa(GenericTapasticComic):
5466
    """Class to retrieve A Pleasant Waste Of Time comics."""
5467
    # Also on https://artjcf.tumblr.com
5468
    name = 'pleasant-waste-tapa'
5469
    long_name = 'A Pleasant Waste Of Time (from Tapastic)'
5470
    url = 'https://tapas.io/series/A-Pleasant-'
5471
    _categories = ('WASTE', )
5472
5473
5474
def get_subclasses(klass):
5475
    """Gets the list of direct/indirect subclasses of a class"""
5476
    subclasses = klass.__subclasses__()
5477
    for derived in list(subclasses):
5478
        subclasses.extend(get_subclasses(derived))
5479
    return subclasses
5480
5481
5482
def remove_st_nd_rd_th_from_date(string):
5483
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5484
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5485
    return (string.replace('st', '')
5486
            .replace('nd', '')
5487
            .replace('rd', '')
5488
            .replace('th', '')
5489
            .replace('Augu', 'August'))
5490
5491
5492
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5493
    """Function to convert string to date object.
5494
    Wrapper around datetime.datetime.strptime."""
5495
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5496
    prev_locale = locale.setlocale(locale.LC_ALL)
5497
    if local != prev_locale:
5498
        locale.setlocale(locale.LC_ALL, local)
5499
    ret = datetime.datetime.strptime(string, date_format).date()
5500
    if local != prev_locale:
5501
        locale.setlocale(locale.LC_ALL, prev_locale)
5502
    return ret
5503
5504
5505
COMICS = set(get_subclasses(GenericComic))
5506
VALID_COMICS = [c for c in COMICS if c.name is not None]
5507
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5508
assert len(VALID_COMICS) == len(COMIC_NAMES)
5509
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5510
assert len(VALID_COMICS) == len(CLASS_NAMES)
5511