Completed
Push — master ( 1a4df5...eb0d82 )
by De
05:53 queued 03:09
created

comics.py (52 issues)

Code
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    try:
333
        url = cls.first_url
334
    except AttributeError:
335
        url = input("Get starting URL: ")
336
    print(url)
337
    comic = cls.get_prev_link(get_soup_at_url(url))
338
    while comic:
339
        url = cls.get_url_from_link(comic)
340
        print(url)
341
        comic = cls.get_prev_link(get_soup_at_url(url))
342
    return {'href': url}
343
344
345
class GenericEmptyComic(GenericComic):
346
    """Generic class for comics where nothing is to be done.
347
348
    It can be useful to deactivate temporarily comics that do not work
349
    properly by replacing `def MyComic(GenericWhateverComic)` with
350
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
351
    _categories = ('EMPTY', )
352
353
    @classmethod
354
    def get_next_comic(cls, last_comic):
355
        """Implementation of get_next_comic returning no comics."""
356
        cls.log("comic is considered as empty - returning no comic")
357
        return []
358
359
360
class GenericComicNotWorking(GenericEmptyComic):
361
    """Subclass of GenericEmptyComic used when comic is not working.
362
363
    This is more explicit than GenericEmptyComic as it hilights that
364
    only the implementation is not working and it can be fixed."""
365
    _categories = ('NOTWORKING', )
366
367
368
class GenericUnavailableComic(GenericEmptyComic):
369
    """Subclass of GenericEmptyComic used when a comic is not available.
370
371
    This is more explicit than GenericEmptyComic as it hilights that
372
    the source of the comic is not available but we expect it to be back
373
    soonish. See also GenericDeletedComic."""
374
    _categories = ('UNAVAILABLE', )
375
376
377
class GenericDeletedComic(GenericEmptyComic):
378
    """Subclass of GenericEmptyComic used when a comic does not exist anymore.
379
380
    This is more explicit than GenericEmptyComic as it hilights that
381
    the source of the comic does not exist anymore and it probably cannot
382
    be fixed. Corresponding classes are kept as we can still use the
383
    downloaded data. See also GenericUnavailableComic."""
384
    _categories = ('DELETED', )
385
386
387 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
388
    """Class to retrieve Extra Fabulous Comics."""
389
    # Also on https://extrafabulouscomics.tumblr.com
390
    name = 'efc'
391
    long_name = 'Extra Fabulous Comics'
392
    url = 'http://extrafabulouscomics.com'
393
    _categories = ('EFC', )
394
    get_first_comic_link = get_a_navi_navifirst
395
    get_navi_link = get_link_rel_next
396
397
    @classmethod
398
    def get_comic_info(cls, soup, link):
399
        """Get information about a particular comics."""
400
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
401
        imgs = soup.find_all('img', src=img_src_re)
402
        title = soup.find('meta', property='og:title')['content']
403
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
404
        day = string_to_date(date_str, "%Y-%m-%d")
405
        return {
406
            'title': title,
407
            'img': [i['src'] for i in imgs],
408
            'month': day.month,
409
            'year': day.year,
410
            'day': day.day,
411
            'prefix': title + '-'
412
        }
413
414
415 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
416
    """Generic class to retrieve comics from Le Monde blogs."""
417
    _categories = ('LEMONDE', 'FRANCAIS')
418
    get_navi_link = get_link_rel_next
419
    get_first_comic_link = simulate_first_link
420
    first_url = NotImplemented
421
422
    @classmethod
423
    def get_comic_info(cls, soup, link):
424
        """Get information about a particular comics."""
425
        url2 = soup.find('link', rel='shortlink')['href']
426
        title = soup.find('meta', property='og:title')['content']
427
        date_str = soup.find("span", class_="entry-date").string
428
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
429
        imgs = soup.find_all('meta', property='og:image')
430
        return {
431
            'title': title,
432
            'url2': url2,
433
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
434
            'month': day.month,
435
            'year': day.year,
436
            'day': day.day,
437
        }
438
439
440
class ZepWorld(GenericLeMondeBlog):
441
    """Class to retrieve Zep World comics."""
442
    name = "zep"
443
    long_name = "Zep World"
444
    url = "http://zepworld.blog.lemonde.fr"
445
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
446
447
448
class Vidberg(GenericLeMondeBlog):
449
    """Class to retrieve Vidberg comics."""
450
    name = 'vidberg'
451
    long_name = "Vidberg - l'actu en patates"
452
    url = "http://vidberg.blog.lemonde.fr"
453
    # Not the first but I didn't find an efficient way to retrieve it
454
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
455
456
457
class Plantu(GenericLeMondeBlog):
458
    """Class to retrieve Plantu comics."""
459
    name = 'plantu'
460
    long_name = "Plantu"
461
    url = "http://plantu.blog.lemonde.fr"
462
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
463
464
465
class XavierGorce(GenericLeMondeBlog):
466
    """Class to retrieve Xavier Gorce comics."""
467
    name = 'gorce'
468
    long_name = "Xavier Gorce"
469
    url = "http://xaviergorce.blog.lemonde.fr"
470
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
471
472
473
class CartooningForPeace(GenericLeMondeBlog):
474
    """Class to retrieve Cartooning For Peace comics."""
475
    name = 'forpeace'
476
    long_name = "Cartooning For Peace"
477
    url = "http://cartooningforpeace.blog.lemonde.fr"
478
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
479
480
481
class Aurel(GenericLeMondeBlog):
482
    """Class to retrieve Aurel comics."""
483
    name = 'aurel'
484
    long_name = "Aurel"
485
    url = "http://aurel.blog.lemonde.fr"
486
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
487
488
489
class LesCulottees(GenericLeMondeBlog):
490
    """Class to retrieve Les Culottees comics."""
491
    name = 'culottees'
492
    long_name = 'Les Culottees'
493
    url = "http://lesculottees.blog.lemonde.fr"
494
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
495
496
497
class UneAnneeAuLycee(GenericLeMondeBlog):
498
    """Class to retrieve Une Annee Au Lycee comics."""
499
    name = 'lycee'
500
    long_name = 'Une Annee au Lycee'
501
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
502
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
503
504
505 View Code Duplication
class Rall(GenericComicNotWorking, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
506
    """Class to retrieve Ted Rall comics."""
507
    # Also on http://www.gocomics.com/tedrall
508
    name = 'rall'
509
    long_name = "Ted Rall"
510
    url = "http://rall.com/comic"
511
    _categories = ('RALL', )
512
    get_navi_link = get_link_rel_next
513
    get_first_comic_link = simulate_first_link
514
    # Not the first but I didn't find an efficient way to retrieve it
515
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
516
517
    @classmethod
518
    def get_comic_info(cls, soup, link):
519
        """Get information about a particular comics."""
520
        title = soup.find('meta', property='og:title')['content']
521
        author = soup.find("span", class_="author vcard").find("a").string
522
        date_str = soup.find("span", class_="entry-date").string
523
        day = string_to_date(date_str, "%B %d, %Y")
524
        desc = soup.find('meta', property='og:description')['content']
525
        imgs = soup.find('div', class_='entry-content').find_all('img')
526
        imgs = imgs[:-7]  # remove social media buttons
527
        return {
528
            'title': title,
529
            'author': author,
530
            'month': day.month,
531
            'year': day.year,
532
            'day': day.day,
533
            'description': desc,
534
            'img': [i['src'] for i in imgs],
535
        }
536
537
538
class Dilem(GenericNavigableComic):
539
    """Class to retrieve Ali Dilem comics."""
540
    name = 'dilem'
541
    long_name = 'Ali Dilem'
542
    url = 'http://information.tv5monde.com/dilem'
543
    _categories = ('FRANCAIS', )
544
    get_url_from_link = join_cls_url_to_href
545
    get_first_comic_link = simulate_first_link
546
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
547
548
    @classmethod
549
    def get_navi_link(cls, last_soup, next_):
550
        """Get link to next or previous comic."""
551
        # prev is next / next is prev
552
        li = last_soup.find('li', class_='prev' if next_ else 'next')
553
        return li.find('a') if li else None
554
555
    @classmethod
556
    def get_comic_info(cls, soup, link):
557
        """Get information about a particular comics."""
558
        short_url = soup.find('link', rel='shortlink')['href']
559
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
560
        imgs = soup.find_all('meta', property='og:image')
561
        date_str = soup.find('span', property='dc:date')['content']
562
        date_str = date_str[:10]
563
        day = string_to_date(date_str, "%Y-%m-%d")
564
        return {
565
            'short_url': short_url,
566
            'title': title,
567
            'img': [i['content'] for i in imgs],
568
            'day': day.day,
569
            'month': day.month,
570
            'year': day.year,
571
        }
572
573
574
class SpaceAvalanche(GenericNavigableComic):
575
    """Class to retrieve Space Avalanche comics."""
576
    name = 'avalanche'
577
    long_name = 'Space Avalanche'
578
    url = 'http://www.spaceavalanche.com'
579
    get_navi_link = get_link_rel_next
580
581
    @classmethod
582
    def get_first_comic_link(cls):
583
        """Get link to first comics."""
584
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
585
586
    @classmethod
587
    def get_comic_info(cls, soup, link):
588
        """Get information about a particular comics."""
589
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
590
        title = link['title']
591
        url = cls.get_url_from_link(link)
592
        year, month, day = [int(s)
593
                            for s in url_date_re.match(url).groups()]
594
        imgs = soup.find("div", class_="entry").find_all("img")
595
        return {
596
            'title': title,
597
            'day': day,
598
            'month': month,
599
            'year': year,
600
            'img': [i['src'] for i in imgs],
601
        }
602
603
604
class ZenPencils(GenericNavigableComic):
605
    """Class to retrieve ZenPencils comics."""
606
    # Also on http://zenpencils.tumblr.com
607
    # Also on http://www.gocomics.com/zen-pencils
608
    name = 'zenpencils'
609
    long_name = 'Zen Pencils'
610
    url = 'http://zenpencils.com'
611
    _categories = ('ZENPENCILS', )
612
    get_navi_link = get_link_rel_next
613
    get_first_comic_link = simulate_first_link
614
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
615
616
    @classmethod
617
    def get_comic_info(cls, soup, link):
618
        """Get information about a particular comics."""
619
        imgs = soup.find('div', id='comic').find_all('img')
620
        # imgs2 = soup.find_all('meta', property='og:image')
621
        post = soup.find('div', class_='post-content')
622
        author = post.find("span", class_="post-author").find("a").string
623
        title = soup.find('h2', class_='post-title').string
624
        date_str = post.find('span', class_='post-date').string
625
        day = string_to_date(date_str, "%B %d, %Y")
626
        assert imgs
627
        assert all(i['alt'] == i['title'] for i in imgs)
628
        assert all(i['alt'] in (title, "") for i in imgs)
629
        return {
630
            'title': title,
631
            'author': author,
632
            'day': day.day,
633
            'month': day.month,
634
            'year': day.year,
635
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
636
        }
637
638
639
class ItsTheTie(GenericDeletedComic, GenericNavigableComic):
640
    """Class to retrieve It's the tie comics."""
641
    # Also on http://itsthetie.tumblr.com
642
    # Also on https://tapastic.com/series/itsthetie
643
    name = 'tie'
644
    long_name = "It's the tie"
645
    url = "http://itsthetie.com"
646
    _categories = ('TIE', )
647
    get_first_comic_link = get_div_navfirst_a
648
    get_navi_link = get_a_rel_next
649
650
    @classmethod
651
    def get_comic_info(cls, soup, link):
652
        """Get information about a particular comics."""
653
        title = soup.find('h1', class_='comic-title').find('a').string
654
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
655
        day = string_to_date(date_str, "%B %d, %Y")
656
        # Bonus images may or may not be in meta og:image.
657
        imgs = soup.find_all('meta', property='og:image')
658
        imgs_src = [i['content'] for i in imgs]
659
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
660
        bonus_src = [b['data-oversrc'] for b in bonus]
661
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
662
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
663
        tag_meta = soup.find('meta', property='article:tag')
664
        tags = tag_meta['content'] if tag_meta else ""
665
        return {
666
            'title': title,
667
            'month': day.month,
668
            'year': day.year,
669
            'day': day.day,
670
            'img': all_imgs_src,
671
            'tags': tags,
672
        }
673
674
675 View Code Duplication
class PenelopeBagieu(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
676
    """Class to retrieve comics from Penelope Bagieu's blog."""
677
    name = 'bagieu'
678
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
679
    url = 'http://www.penelope-jolicoeur.com'
680
    _categories = ('FRANCAIS', )
681
    get_navi_link = get_link_rel_next
682
    get_first_comic_link = simulate_first_link
683
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
684
685
    @classmethod
686
    def get_comic_info(cls, soup, link):
687
        """Get information about a particular comics."""
688
        date_str = soup.find('h2', class_='date-header').string
689
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
690
        imgs = soup.find('div', class_='entry-body').find_all('img')
691
        title = soup.find('h3', class_='entry-header').string
692
        return {
693
            'title': title,
694
            'img': [i['src'] for i in imgs],
695
            'month': day.month,
696
            'year': day.year,
697
            'day': day.day,
698
        }
699
700
701 View Code Duplication
class OneOneOneOneComic(GenericComicNotWorking, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
702
    """Class to retrieve 1111 Comics."""
703
    # Also on http://comics1111.tumblr.com
704
    # Also on https://tapastic.com/series/1111-Comics
705
    name = '1111'
706
    long_name = '1111 Comics'
707
    url = 'http://www.1111comics.me'
708
    _categories = ('ONEONEONEONE', )
709
    get_first_comic_link = get_div_navfirst_a
710
    get_navi_link = get_link_rel_next
711
712
    @classmethod
713
    def get_comic_info(cls, soup, link):
714
        """Get information about a particular comics."""
715
        title = soup.find('h1', class_='comic-title').find('a').string
716
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
717
        day = string_to_date(date_str, "%B %d, %Y")
718
        imgs = soup.find_all('meta', property='og:image')
719
        return {
720
            'title': title,
721
            'month': day.month,
722
            'year': day.year,
723
            'day': day.day,
724
            'img': [i['content'] for i in imgs],
725
        }
726
727
728 View Code Duplication
class AngryAtNothing(GenericDeletedComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
729
    """Class to retrieve Angry at Nothing comics."""
730
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
731
    # Also on http://angryatnothing.tumblr.com
732
    name = 'angry'
733
    long_name = 'Angry At Nothing'
734
    url = 'http://www.angryatnothing.net'
735
    get_first_comic_link = get_div_navfirst_a
736
    get_navi_link = get_a_rel_next
737
738
    @classmethod
739
    def get_comic_info(cls, soup, link):
740
        """Get information about a particular comics."""
741
        title = soup.find('h1', class_='comic-title').find('a').string
742
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
743
        day = string_to_date(date_str, "%B %d, %Y")
744
        imgs = soup.find_all('meta', property='og:image')
745
        return {
746
            'title': title,
747
            'month': day.month,
748
            'year': day.year,
749
            'day': day.day,
750
            'img': [i['content'] for i in imgs],
751
        }
752
753
754
class NeDroid(GenericNavigableComic):
755
    """Class to retrieve NeDroid comics."""
756
    name = 'nedroid'
757
    long_name = 'NeDroid'
758
    url = 'http://nedroid.com'
759
    get_first_comic_link = get_div_navfirst_a
760
    get_navi_link = get_link_rel_next
761
    get_url_from_link = join_cls_url_to_href
762
763 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
764
    def get_comic_info(cls, soup, link):
765
        """Get information about a particular comics."""
766
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
767
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
768
        num = int(short_url_re.match(short_url).groups()[0])
769
        imgs = soup.find('div', id='comic').find_all('img')
770
        assert len(imgs) == 1
771
        title = imgs[0]['alt']
772
        title2 = imgs[0]['title']
773
        return {
774
            'short_url': short_url,
775
            'title': title,
776
            'title2': title2,
777
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
778
            'num': num,
779
        }
780
781
782 View Code Duplication
class Garfield(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
783
    """Class to retrieve Garfield comics."""
784
    # Also on http://www.gocomics.com/garfield
785
    name = 'garfield'
786
    long_name = 'Garfield'
787
    url = 'https://garfield.com'
788
    _categories = ('GARFIELD', )
789
    get_first_comic_link = simulate_first_link
790
    first_url = 'https://garfield.com/comic/1978/06/19'
791
792
    @classmethod
793
    def get_navi_link(cls, last_soup, next_):
794
        """Get link to next or previous comic."""
795
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
796
797
    @classmethod
798
    def get_comic_info(cls, soup, link):
799
        """Get information about a particular comics."""
800
        url = cls.get_url_from_link(link)
801
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
802
        year, month, day = [int(s) for s in date_re.match(url).groups()]
803
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
804
        return {
805
            'month': month,
806
            'year': year,
807
            'day': day,
808
            'img': [i['src'] for i in imgs],
809
        }
810
811
812
class Dilbert(GenericNavigableComic):
813
    """Class to retrieve Dilbert comics."""
814
    # Also on http://www.gocomics.com/dilbert-classics
815
    name = 'dilbert'
816
    long_name = 'Dilbert'
817
    url = 'http://dilbert.com'
818
    get_url_from_link = join_cls_url_to_href
819
    get_first_comic_link = simulate_first_link
820
    first_url = 'http://dilbert.com/strip/1989-04-16'
821
822
    @classmethod
823
    def get_navi_link(cls, last_soup, next_):
824
        """Get link to next or previous comic."""
825
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
826
        return link.find('a') if link else None
827
828 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
829
    def get_comic_info(cls, soup, link):
830
        """Get information about a particular comics."""
831
        title = soup.find('meta', property='og:title')['content']
832
        imgs = soup.find_all('meta', property='og:image')
833
        desc = soup.find('meta', property='og:description')['content']
834
        date_str = soup.find('meta', property='article:publish_date')['content']
835
        day = string_to_date(date_str, "%B %d, %Y")
836
        author = soup.find('meta', property='article:author')['content']
837
        tags = soup.find('meta', property='article:tag')['content']
838
        return {
839
            'title': title,
840
            'description': desc,
841
            'img': [i['content'] for i in imgs],
842
            'author': author,
843
            'tags': tags,
844
            'day': day.day,
845
            'month': day.month,
846
            'year': day.year
847
        }
848
849
850
class VictimsOfCircumsolar(GenericDeletedComic, GenericNavigableComic):
851
    """Class to retrieve VictimsOfCircumsolar comics."""
852
    # Also on https://victimsofcomics.tumblr.com
853
    name = 'circumsolar'
854
    long_name = 'Victims Of Circumsolar'
855
    url = 'http://www.victimsofcircumsolar.com'
856
    get_navi_link = get_a_navi_comicnavnext_navinext
857
    get_first_comic_link = simulate_first_link
858
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
859
860
    @classmethod
861
    def get_comic_info(cls, soup, link):
862
        """Get information about a particular comics."""
863
        # Date is on the archive page
864
        title = soup.find_all('meta', property='og:title')[-1]['content']
865
        desc = soup.find_all('meta', property='og:description')[-1]['content']
866
        imgs = soup.find('div', id='comic').find_all('img')
867
        assert all(i['title'] == i['alt'] == title for i in imgs)
868
        return {
869
            'title': title,
870
            'description': desc,
871
            'img': [i['src'] for i in imgs],
872
        }
873
874
875
class ThreeWordPhrase(GenericNavigableComic):
876
    """Class to retrieve Three Word Phrase comics."""
877
    # Also on http://www.threewordphrase.tumblr.com
878
    name = 'threeword'
879
    long_name = 'Three Word Phrase'
880
    url = 'http://threewordphrase.com'
881
    get_url_from_link = join_cls_url_to_href
882
883
    @classmethod
884
    def get_first_comic_link(cls):
885
        """Get link to first comics."""
886
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
887
888
    @classmethod
889
    def get_navi_link(cls, last_soup, next_):
890
        """Get link to next or previous comic."""
891
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
892
        return None if link.get('href') is None else link
893
894
    @classmethod
895
    def get_comic_info(cls, soup, link):
896
        """Get information about a particular comics."""
897
        title = soup.find('title')
898
        imgs = [img for img in soup.find_all('img')
899
                if not img['src'].endswith(
900
                    ('link.gif', '32.png', 'twpbookad.jpg',
901
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
902
        return {
903
            'title': title.string if title else None,
904
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
905
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
906
        }
907
908
909
class DeadlyPanel(GenericComicNotWorking, GenericNavigableComic):  # Not working on my machine
910
    """Class to retrieve Deadly Panel comics."""
911
    # Also on https://tapastic.com/series/deadlypanel
912
    # Also on https://deadlypanel.tumblr.com
913
    name = 'deadly'
914
    long_name = 'Deadly Panel'
915
    url = 'http://www.deadlypanel.com'
916
    get_first_comic_link = get_a_navi_navifirst
917
    get_navi_link = get_a_navi_comicnavnext_navinext
918
919
    @classmethod
920
    def get_comic_info(cls, soup, link):
921
        """Get information about a particular comics."""
922
        imgs = soup.find('div', id='comic').find_all('img')
923
        assert all(i['alt'] == i['title'] for i in imgs)
924
        return {
925
            'img': [i['src'] for i in imgs],
926
        }
927
928
929 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
930
    """Class to retrieve The Gentleman Armchair comics."""
931
    name = 'gentlemanarmchair'
932
    long_name = 'The Gentleman Armchair'
933
    url = 'http://thegentlemansarmchair.com'
934
    get_first_comic_link = get_a_navi_navifirst
935
    get_navi_link = get_link_rel_next
936
937
    @classmethod
938
    def get_comic_info(cls, soup, link):
939
        """Get information about a particular comics."""
940
        title = soup.find('h2', class_='post-title').string
941
        author = soup.find("span", class_="post-author").find("a").string
942
        date_str = soup.find('span', class_='post-date').string
943
        day = string_to_date(date_str, "%B %d, %Y")
944
        imgs = soup.find('div', id='comic').find_all('img')
945
        return {
946
            'img': [i['src'] for i in imgs],
947
            'title': title,
948
            'author': author,
949
            'month': day.month,
950
            'year': day.year,
951
            'day': day.day,
952
        }
953
954
955 View Code Duplication
class ImogenQuest(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
956
    """Class to retrieve Imogen Quest comics."""
957
    # Also on http://imoquest.tumblr.com
958
    name = 'imogen'
959
    long_name = 'Imogen Quest'
960
    url = 'http://imogenquest.net'
961
    get_first_comic_link = get_div_navfirst_a
962
    get_navi_link = get_a_rel_next
963
964
    @classmethod
965
    def get_comic_info(cls, soup, link):
966
        """Get information about a particular comics."""
967
        title = soup.find('h2', class_='post-title').string
968
        author = soup.find("span", class_="post-author").find("a").string
969
        date_str = soup.find('span', class_='post-date').string
970
        day = string_to_date(date_str, '%B %d, %Y')
971
        imgs = soup.find('div', class_='comicpane').find_all('img')
972
        assert all(i['alt'] == i['title'] for i in imgs)
973
        title2 = imgs[0]['title']
974
        return {
975
            'day': day.day,
976
            'month': day.month,
977
            'year': day.year,
978
            'img': [i['src'] for i in imgs],
979
            'title': title,
980
            'title2': title2,
981
            'author': author,
982
        }
983
984
985 View Code Duplication
class MyExtraLife(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
986
    """Class to retrieve My Extra Life comics."""
987
    name = 'extralife'
988
    long_name = 'My Extra Life'
989
    url = 'http://www.myextralife.com'
990
    get_navi_link = get_link_rel_next
991
992
    @classmethod
993
    def get_first_comic_link(cls):
994
        """Get link to first comics."""
995
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
996
997
    @classmethod
998
    def get_comic_info(cls, soup, link):
999
        """Get information about a particular comics."""
1000
        title = soup.find("h1", class_="comic_title").string
1001
        date_str = soup.find("span", class_="comic_date").string
1002
        day = string_to_date(date_str, "%B %d, %Y")
1003
        imgs = soup.find_all("img", class_="comic")
1004
        assert all(i['alt'] == i['title'] == title for i in imgs)
1005
        return {
1006
            'title': title,
1007
            'img': [i['src'] for i in imgs if i["src"]],
1008
            'day': day.day,
1009
            'month': day.month,
1010
            'year': day.year
1011
        }
1012
1013
1014
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
1015
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
1016
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
1017
    # Also on http://smbc-comics.tumblr.com
1018
    name = 'smbc'
1019
    long_name = 'Saturday Morning Breakfast Cereal'
1020
    url = 'http://www.smbc-comics.com'
1021
    _categories = ('SMBC', )
1022
    get_navi_link = get_a_rel_next
1023
1024
    @classmethod
1025
    def get_first_comic_link(cls):
1026
        """Get link to first comics."""
1027
        return get_soup_at_url(cls.url).find('a', rel='start')
1028
1029
    @classmethod
1030
    def get_comic_info(cls, soup, link):
1031
        """Get information about a particular comics."""
1032
        image1 = soup.find('img', id='cc-comic')
1033
        image_url1 = image1['src']
1034
        aftercomic = soup.find('div', id='aftercomic')
1035
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1036
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1037
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1038
        day = string_to_date(date_str, "%B %d, %Y")
1039
        return {
1040
            'title': image1['title'],
1041
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i)) for i in imgs],
1042
            'day': day.day,
1043
            'month': day.month,
1044
            'year': day.year
1045
        }
1046
1047
1048
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1049
    """Class to retrieve Perry Bible Fellowship comics."""
1050
    name = 'pbf'
1051
    long_name = 'Perry Bible Fellowship'
1052
    url = 'http://pbfcomics.com'
1053
    get_url_from_archive_element = join_cls_url_to_href
1054
1055
    @classmethod
1056
    def get_archive_elements(cls):
1057
        soup = get_soup_at_url(cls.url)
1058
        thumbnails = soup.find('div', id='all_thumbnails')
1059
        return reversed(thumbnails.find_all('a'))
1060
1061
    @classmethod
1062
    def get_comic_info(cls, soup, link):
1063
        """Get information about a particular comics."""
1064
        name = soup.find('meta', property='og:title')['content']
1065
        imgs = soup.find_all('meta', property='og:image')
1066
        assert len(imgs) == 1
1067
        return {
1068
            'name': name,
1069
            'img': [i['content'] for i in imgs],
1070
        }
1071
1072
1073 View Code Duplication
class Mercworks(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1074
    """Class to retrieve Mercworks comics."""
1075
    # Also on http://mercworks.tumblr.com
1076
    name = 'mercworks'
1077
    long_name = 'Mercworks'
1078
    url = 'http://mercworks.net'
1079
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1080
    get_navi_link = get_link_rel_next
1081
1082
    @classmethod
1083
    def get_comic_info(cls, soup, link):
1084
        """Get information about a particular comics."""
1085
        title = soup.find('meta', property='og:title')['content']
1086
        metadesc = soup.find('meta', property='og:description')
1087
        desc = metadesc['content'] if metadesc else ""
1088
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1089
        day = string_to_date(date_str, "%Y-%m-%d")
1090
        imgs = soup.find_all('meta', property='og:image')
1091
        return {
1092
            'img': [i['content'] for i in imgs],
1093
            'title': title,
1094
            'desc': desc,
1095
            'day': day.day,
1096
            'month': day.month,
1097
            'year': day.year
1098
        }
1099
1100
1101
class BerkeleyMews(GenericListableComic):
1102
    """Class to retrieve Berkeley Mews comics."""
1103
    # Also on http://mews.tumblr.com
1104
    # Also on http://www.gocomics.com/berkeley-mews
1105
    name = 'berkeley'
1106
    long_name = 'Berkeley Mews'
1107
    url = 'http://www.berkeleymews.com'
1108
    _categories = ('BERKELEY', )
1109
    get_url_from_archive_element = get_href
1110
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1111
1112
    @classmethod
1113
    def get_archive_elements(cls):
1114
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1115
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1116
1117
    @classmethod
1118
    def get_comic_info(cls, soup, link):
1119
        """Get information about a particular comics."""
1120
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1121
        url = cls.get_url_from_archive_element(link)
1122
        num = int(cls.comic_num_re.match(url).groups()[0])
1123
        img = soup.find('div', id='comic').find('img')
1124
        assert all(i['alt'] == i['title'] for i in [img])
1125
        title2 = img['title']
1126
        img_url = img['src']
1127
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1128
        return {
1129
            'num': num,
1130
            'title': link.string,
1131
            'title2': title2,
1132
            'img': [img_url],
1133
            'year': year,
1134
            'month': month,
1135
            'day': day,
1136
        }
1137
1138
1139
class GenericBouletCorp(GenericNavigableComic):
1140
    """Generic class to retrieve BouletCorp comics in different languages."""
1141
    # Also on https://bouletcorp.tumblr.com
1142
    _categories = ('BOULET', )
1143
    get_navi_link = get_link_rel_next
1144
1145
    @classmethod
1146
    def get_first_comic_link(cls):
1147
        """Get link to first comics."""
1148
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1149
1150
    @classmethod
1151
    def get_comic_info(cls, soup, link):
1152
        """Get information about a particular comics."""
1153
        url = cls.get_url_from_link(link)
1154
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1155
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1156
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1157
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1158
        title = soup.find('title').string
1159
        return {
1160
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1161
            'title': title,
1162
            'texts': texts,
1163
            'year': year,
1164
            'month': month,
1165
            'day': day,
1166
        }
1167
1168
1169
class BouletCorp(GenericBouletCorp):
1170
    """Class to retrieve BouletCorp comics."""
1171
    name = 'boulet'
1172
    long_name = 'Boulet Corp'
1173
    url = 'http://www.bouletcorp.com'
1174
    _categories = ('FRANCAIS', )
1175
1176
1177
class BouletCorpEn(GenericBouletCorp):
1178
    """Class to retrieve EnglishBouletCorp comics."""
1179
    name = 'boulet_en'
1180
    long_name = 'Boulet Corp English'
1181
    url = 'http://english.bouletcorp.com'
1182
1183
1184 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1185
    """Class to retrieve Amazing Super Powers comics."""
1186
    name = 'asp'
1187
    long_name = 'Amazing Super Powers'
1188
    url = 'http://www.amazingsuperpowers.com'
1189
    get_first_comic_link = get_a_navi_navifirst
1190
    get_navi_link = get_a_navi_navinext
1191
1192
    @classmethod
1193
    def get_comic_info(cls, soup, link):
1194
        """Get information about a particular comics."""
1195
        author = soup.find("span", class_="post-author").find("a").string
1196
        date_str = soup.find('span', class_='post-date').string
1197
        day = string_to_date(date_str, "%B %d, %Y")
1198
        imgs = soup.find('div', id='comic').find_all('img')
1199
        title = ' '.join(i['title'] for i in imgs)
1200
        assert all(i['alt'] == i['title'] for i in imgs)
1201
        return {
1202
            'title': title,
1203
            'author': author,
1204
            'img': [img['src'] for img in imgs],
1205
            'day': day.day,
1206
            'month': day.month,
1207
            'year': day.year
1208
        }
1209
1210
1211
class ToonHole(GenericNavigableComic):
1212
    """Class to retrieve Toon Holes comics."""
1213
    # Also on http://tapastic.com/series/TOONHOLE
1214
    name = 'toonhole'
1215
    long_name = 'Toon Hole'
1216
    url = 'http://www.toonhole.com'
1217
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1218
    get_navi_link = get_link_rel_next
1219
1220
    @classmethod
1221
    def get_comic_info(cls, soup, link):
1222
        """Get information about a particular comics."""
1223
        short_url = soup.find('link', rel='shortlink')['href']
1224
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1225
        day = string_to_date(date_str, "%B %d, %Y")
1226
        imgs = soup.find('div', id='comic').find_all('img')
1227
        if imgs:
1228
            img = imgs[0]
1229
            title = img['alt']
1230
            assert img['title'] == title
1231
        else:
1232
            title = ""
1233
        return {
1234
            'short_url': short_url,
1235
            'title': title,
1236
            'month': day.month,
1237
            'year': day.year,
1238
            'day': day.day,
1239
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1240
        }
1241
1242
1243
class Channelate(GenericNavigableComic):
1244
    """Class to retrieve Channelate comics."""
1245
    name = 'channelate'
1246
    long_name = 'Channelate'
1247
    url = 'http://www.channelate.com'
1248
    get_first_comic_link = get_div_navfirst_a
1249
    get_navi_link = get_link_rel_next
1250
    get_url_from_link = join_cls_url_to_href
1251
1252
    @classmethod
1253
    def get_comic_info(cls, soup, link):
1254
        """Get information about a particular comics."""
1255
        author = soup.find("span", class_="post-author").find("a").string
1256
        date_str = soup.find('span', class_='post-date').string
1257
        day = string_to_date(date_str, '%Y/%m/%d')
1258
        title = soup.find('meta', property='og:title')['content']
1259
        post = soup.find('div', id='comic')
1260
        imgs = post.find_all('img') if post else []
1261
        extra_url = None
1262
        extra_div = soup.find('div', id='extrapanelbutton')
1263
        if extra_div:
1264
            extra_url = extra_div.find('a')['href']
1265
            extra_soup = get_soup_at_url(extra_url)
1266
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1267
            imgs.extend(extra_imgs)
1268
        return {
1269
            'url_extra': extra_url,
1270
            'title': title,
1271
            'author': author,
1272
            'month': day.month,
1273
            'year': day.year,
1274
            'day': day.day,
1275
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1276
        }
1277
1278
1279
class CyanideAndHappiness(GenericNavigableComic):
1280
    """Class to retrieve Cyanide And Happiness comics."""
1281
    name = 'cyanide'
1282
    long_name = 'Cyanide and Happiness'
1283
    url = 'http://explosm.net'
1284
    _categories = ('NSFW', )
1285
    get_url_from_link = join_cls_url_to_href
1286
1287
    @classmethod
1288
    def get_first_comic_link(cls):
1289
        """Get link to first comics."""
1290
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1291
1292
    @classmethod
1293
    def get_navi_link(cls, last_soup, next_):
1294
        """Get link to next or previous comic."""
1295
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1296
        return None if link.get('href') is None else link
1297
1298
    @classmethod
1299
    def get_comic_info(cls, soup, link):
1300
        """Get information about a particular comics."""
1301
        url2 = soup.find('meta', property='og:url')['content']
1302
        num = int(url2.split('/')[-2])
1303
        date_str = soup.find('h3').find('a').string
1304
        day = string_to_date(date_str, '%Y.%m.%d')
1305
        author = soup.find('small', class_="author-credit-name").string
1306
        assert author.startswith('by ')
1307
        author = author[3:]
1308
        imgs = soup.find_all('img', id='main-comic')
1309
        return {
1310
            'num': num,
1311
            'author': author,
1312
            'month': day.month,
1313
            'year': day.year,
1314
            'day': day.day,
1315
            'prefix': '%d-' % num,
1316
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1317
        }
1318
1319
1320
class MrLovenstein(GenericComic):
1321
    """Class to retrieve Mr Lovenstein comics."""
1322
    # Also on https://tapastic.com/series/MrLovenstein
1323
    name = 'mrlovenstein'
1324
    long_name = 'Mr. Lovenstein'
1325
    url = 'http://www.mrlovenstein.com'
1326
1327
    @classmethod
1328
    def get_next_comic(cls, last_comic):
1329
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1330
        # TODO: more info from http://www.mrlovenstein.com/archive
1331
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1332
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1333
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1334
        first, last = min(nums), max(nums)
1335
        if last_comic:
1336
            first = last_comic['num'] + 1
1337
        for num in range(first, last + 1):
1338
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1339
            soup = get_soup_at_url(url)
1340
            imgs = list(
1341
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1342
            description = soup.find('meta', attrs={'name': 'description'})['content']
1343
            yield {
1344
                'url': url,
1345
                'num': num,
1346
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1347
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1348
                'description': description,
1349
            }
1350
1351
1352
class DinosaurComics(GenericListableComic):
1353
    """Class to retrieve Dinosaur Comics comics."""
1354
    name = 'dinosaur'
1355
    long_name = 'Dinosaur Comics'
1356
    url = 'http://www.qwantz.com'
1357
    get_url_from_archive_element = get_href
1358
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1359
1360
    @classmethod
1361
    def get_archive_elements(cls):
1362
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1363
        # first link is random -> skip it
1364
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1365
1366
    @classmethod
1367
    def get_comic_info(cls, soup, link):
1368
        """Get information about a particular comics."""
1369
        url = cls.get_url_from_archive_element(link)
1370
        num = int(cls.comic_link_re.match(url).groups()[0])
1371
        date_str = link.string
1372
        text = link.next_sibling.string
1373
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1374
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1375
        img = soup.find('img', src=comic_img_re)
1376
        return {
1377
            'month': day.month,
1378
            'year': day.year,
1379
            'day': day.day,
1380
            'img': [img.get('src')],
1381
            'title': img.get('title'),
1382
            'text': text,
1383
            'num': num,
1384
        }
1385
1386
1387 View Code Duplication
class ButterSafe(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1388
    """Class to retrieve Butter Safe comics."""
1389
    name = 'butter'
1390
    long_name = 'ButterSafe'
1391
    url = 'http://buttersafe.com'
1392
    get_url_from_archive_element = get_href
1393
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1394
1395
    @classmethod
1396
    def get_archive_elements(cls):
1397
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1398
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1399
1400
    @classmethod
1401
    def get_comic_info(cls, soup, link):
1402
        """Get information about a particular comics."""
1403
        url = cls.get_url_from_archive_element(link)
1404
        title = link.string
1405
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1406
        img = soup.find('div', id='comic').find('img')
1407
        assert img['alt'] == title
1408
        return {
1409
            'title': title,
1410
            'day': day,
1411
            'month': month,
1412
            'year': year,
1413
            'img': [img['src']],
1414
        }
1415
1416
1417
class CalvinAndHobbes(GenericComic):
1418
    """Class to retrieve Calvin and Hobbes comics."""
1419
    # Also on http://www.gocomics.com/calvinandhobbes/
1420
    name = 'calvin'
1421
    long_name = 'Calvin and Hobbes'
1422
    # This is not through any official webpage but eh...
1423
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1424
1425
    @classmethod
1426
    def get_next_comic(cls, last_comic):
1427
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1428
        last_date = get_date_for_comic(
1429
            last_comic) if last_comic else date(1985, 11, 1)
1430
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1431
        img_re = re.compile('')
1432
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1433
            url = link['href']
1434
            year, month = link_re.match(url).groups()
1435
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1436
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1437
                month_url = urljoin_wrapper(cls.url, url)
1438
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1439
                    img_src = img['src']
1440
                    day = int(img_re.match(img_src).groups()[0])
1441
                    comic_date = date(int(year), int(month), day)
1442
                    if comic_date > last_date:
1443
                        yield {
1444
                            'url': month_url,
1445
                            'year': int(year),
1446
                            'month': int(month),
1447
                            'day': int(day),
1448
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1449
                        }
1450
                        last_date = comic_date
1451
1452
1453 View Code Duplication
class AbstruseGoose(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1454
    """Class to retrieve AbstruseGoose Comics."""
1455
    name = 'abstruse'
1456
    long_name = 'Abstruse Goose'
1457
    url = 'http://abstrusegoose.com'
1458
    get_url_from_archive_element = get_href
1459
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1460
    comic_img_re = re.compile('^%s/strips/.*' % url)
1461
1462
    @classmethod
1463
    def get_archive_elements(cls):
1464
        archive_url = urljoin_wrapper(cls.url, 'archive')
1465
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1466
1467
    @classmethod
1468
    def get_comic_info(cls, soup, archive_elt):
1469
        comic_url = cls.get_url_from_archive_element(archive_elt)
1470
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1471
        return {
1472
            'num': num,
1473
            'title': archive_elt.string,
1474
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1475
        }
1476
1477
1478
class PhDComics(GenericNavigableComic):
1479
    """Class to retrieve PHD Comics."""
1480
    name = 'phd'
1481
    long_name = 'PhD Comics'
1482
    url = 'http://phdcomics.com/comics/archive.php'
1483
1484
    @classmethod
1485
    def get_first_comic_link(cls):
1486
        """Get link to first comics."""
1487
        soup = get_soup_at_url(cls.url)
1488
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1489
        return None if img is None else img.parent
1490
1491
    @classmethod
1492
    def get_navi_link(cls, last_soup, next_):
1493
        """Get link to next or previous comic."""
1494
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1495
        img = last_soup.find('img', src=url)
1496
        return None if img is None else img.parent
1497
1498
    @classmethod
1499
    def get_comic_info(cls, soup, link):
1500
        """Get information about a particular comics."""
1501
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1502
        imgs = soup.find_all('meta', property='og:image')
1503
        return {
1504
            'img': [i['content'] for i in imgs],
1505
            'title': title,
1506
        }
1507
1508
1509
class Octopuns(GenericComicNotWorking, GenericNavigableComic):  # Website has changed
1510
    """Class to retrieve Octopuns comics."""
1511
    # Also on http://octopuns.tumblr.com
1512
    name = 'octopuns'
1513
    long_name = 'Octopuns'
1514
    url = 'http://www.octopuns.net'
1515
1516
    @classmethod
1517
    def get_first_comic_link(cls):
1518
        """Get link to first comics."""
1519
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1520
1521
    @classmethod
1522
    def get_navi_link(cls, last_soup, next_):
1523
        """Get link to next or previous comic."""
1524
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1525
        return None if link.get('href') is None else link
1526
1527
    @classmethod
1528
    def get_comic_info(cls, soup, link):
1529
        """Get information about a particular comics."""
1530
        title = soup.find('h3', class_='post-title entry-title').string
1531
        date_str = soup.find('h2', class_='date-header').string
1532
        day = string_to_date(date_str, "%A, %B %d, %Y")
1533
        imgs = soup.find_all('link', rel='image_src')
1534
        return {
1535
            'img': [i['href'] for i in imgs],
1536
            'title': title,
1537
            'day': day.day,
1538
            'month': day.month,
1539
            'year': day.year,
1540
        }
1541
1542
1543
class Quarktees(GenericNavigableComic):
1544
    """Class to retrieve the Quarktees comics."""
1545
    name = 'quarktees'
1546
    long_name = 'Quarktees'
1547
    url = 'http://www.quarktees.com/blogs/news'
1548
    get_url_from_link = join_cls_url_to_href
1549
    get_first_comic_link = simulate_first_link
1550
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1551
1552
    @classmethod
1553
    def get_navi_link(cls, last_soup, next_):
1554
        """Get link to next or previous comic."""
1555
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1556
1557
    @classmethod
1558
    def get_comic_info(cls, soup, link):
1559
        """Get information about a particular comics."""
1560
        title = soup.find('meta', property='og:title')['content']
1561
        article = soup.find('div', class_='single-article')
1562
        imgs = article.find_all('img')
1563
        return {
1564
            'title': title,
1565
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1566
        }
1567
1568
1569
class OverCompensating(GenericNavigableComic):
1570
    """Class to retrieve the Over Compensating comics."""
1571
    name = 'compensating'
1572
    long_name = 'Over Compensating'
1573
    url = 'http://www.overcompensating.com'
1574
    get_url_from_link = join_cls_url_to_href
1575
1576
    @classmethod
1577
    def get_first_comic_link(cls):
1578
        """Get link to first comics."""
1579
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1580
1581
    @classmethod
1582
    def get_navi_link(cls, last_soup, next_):
1583
        """Get link to next or previous comic."""
1584
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1585
1586
    @classmethod
1587
    def get_comic_info(cls, soup, link):
1588
        """Get information about a particular comics."""
1589
        img_src_re = re.compile('^/oc/comics/.*')
1590
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1591
        comic_url = cls.get_url_from_link(link)
1592
        num = int(comic_num_re.match(comic_url).groups()[0])
1593
        img = soup.find('img', src=img_src_re)
1594
        return {
1595
            'num': num,
1596
            'img': [urljoin_wrapper(comic_url, img['src'])],
1597
            'title': img.get('title')
1598
        }
1599
1600
1601
class Oglaf(GenericNavigableComic):
1602
    """Class to retrieve Oglaf comics."""
1603
    name = 'oglaf'
1604
    long_name = 'Oglaf [NSFW]'
1605
    url = 'http://oglaf.com'
1606
    _categories = ('NSFW', )
1607
    get_url_from_link = join_cls_url_to_href
1608
1609
    @classmethod
1610
    def get_first_comic_link(cls):
1611
        """Get link to first comics."""
1612
        return get_soup_at_url(cls.url).find("div", id="st").parent
1613
1614
    @classmethod
1615
    def get_navi_link(cls, last_soup, next_):
1616
        """Get link to next or previous comic."""
1617
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1618
        return div.parent if div else None
1619
1620
    @classmethod
1621
    def get_comic_info(cls, soup, link):
1622
        """Get information about a particular comics."""
1623
        title = soup.find('title').string
1624
        title_imgs = soup.find('div', id='tt').find_all('img')
1625
        assert len(title_imgs) == 1
1626
        strip_imgs = soup.find_all('img', id='strip')
1627
        assert len(strip_imgs) == 1
1628
        imgs = title_imgs + strip_imgs
1629
        desc = ' '.join(i['title'] for i in imgs)
1630
        return {
1631
            'title': title,
1632
            'img': [i['src'] for i in imgs],
1633
            'description': desc,
1634
        }
1635
1636
1637
class ScandinaviaAndTheWorld(GenericNavigableComic):
1638
    """Class to retrieve Scandinavia And The World comics."""
1639
    name = 'satw'
1640
    long_name = 'Scandinavia And The World'
1641
    url = 'http://satwcomic.com'
1642
    get_first_comic_link = simulate_first_link
1643
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1644
1645
    @classmethod
1646
    def get_navi_link(cls, last_soup, next_):
1647
        """Get link to next or previous comic."""
1648
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1649
1650
    @classmethod
1651
    def get_comic_info(cls, soup, link):
1652
        """Get information about a particular comics."""
1653
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1654
        desc = soup.find('meta', property='og:description')['content']
1655
        imgs = soup.find_all('img', itemprop="image")
1656
        return {
1657
            'title': title,
1658
            'description': desc,
1659
            'img': [i['src'] for i in imgs],
1660
        }
1661
1662
1663
class SomethingOfThatIlk(GenericDeletedComic):
1664
    """Class to retrieve the Something Of That Ilk comics."""
1665
    name = 'somethingofthatilk'
1666
    long_name = 'Something Of That Ilk'
1667
    url = 'http://www.somethingofthatilk.com'
1668
1669
1670
class InfiniteMonkeyBusiness(GenericNavigableComic):
1671
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1672
    name = 'monkey'
1673
    long_name = 'Infinite Monkey Business'
1674
    url = 'http://infinitemonkeybusiness.net'
1675
    get_navi_link = get_a_navi_comicnavnext_navinext
1676
    get_first_comic_link = simulate_first_link
1677
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1678
1679
    @classmethod
1680
    def get_comic_info(cls, soup, link):
1681
        """Get information about a particular comics."""
1682
        title = soup.find('meta', property='og:title')['content']
1683
        imgs = soup.find('div', id='comic').find_all('img')
1684
        return {
1685
            'title': title,
1686
            'img': [i['src'] for i in imgs],
1687
        }
1688
1689
1690
class Wondermark(GenericListableComic):
1691
    """Class to retrieve the Wondermark comics."""
1692
    name = 'wondermark'
1693
    long_name = 'Wondermark'
1694
    url = 'http://wondermark.com'
1695
    get_url_from_archive_element = get_href
1696
1697
    @classmethod
1698
    def get_archive_elements(cls):
1699
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1700
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1701
1702
    @classmethod
1703
    def get_comic_info(cls, soup, link):
1704
        """Get information about a particular comics."""
1705
        date_str = soup.find('div', class_='postdate').find('em').string
1706
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1707
        div = soup.find('div', id='comic')
1708
        if div:
1709
            img = div.find('img')
1710
            img_src = [img['src']]
1711
            alt = img['alt']
1712
            assert alt == img['title']
1713
            title = soup.find('meta', property='og:title')['content']
1714
        else:
1715
            img_src = []
1716
            alt = ''
1717
            title = ''
1718
        return {
1719
            'month': day.month,
1720
            'year': day.year,
1721
            'day': day.day,
1722
            'img': img_src,
1723
            'title': title,
1724
            'alt': alt,
1725
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1726
        }
1727
1728
1729 View Code Duplication
class WarehouseComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1730
    """Class to retrieve Warehouse Comic comics."""
1731
    name = 'warehouse'
1732
    long_name = 'Warehouse Comic'
1733
    url = 'http://warehousecomic.com'
1734
    get_first_comic_link = get_a_navi_navifirst
1735
    get_navi_link = get_link_rel_next
1736
1737
    @classmethod
1738
    def get_comic_info(cls, soup, link):
1739
        """Get information about a particular comics."""
1740
        title = soup.find('h2', class_='post-title').string
1741
        date_str = soup.find('span', class_='post-date').string
1742
        day = string_to_date(date_str, "%B %d, %Y")
1743
        imgs = soup.find('div', id='comic').find_all('img')
1744
        return {
1745
            'img': [i['src'] for i in imgs],
1746
            'title': title,
1747
            'day': day.day,
1748
            'month': day.month,
1749
            'year': day.year,
1750
        }
1751
1752
1753
class JustSayEh(GenericNavigableComic):
1754
    """Class to retrieve Just Say Eh comics."""
1755
    # Also on http//tapastic.com/series/Just-Say-Eh
1756
    name = 'justsayeh'
1757
    long_name = 'Just Say Eh'
1758
    url = 'http://www.justsayeh.com'
1759
    get_first_comic_link = get_a_navi_navifirst
1760
    get_navi_link = get_a_navi_comicnavnext_navinext
1761
1762
    @classmethod
1763
    def get_comic_info(cls, soup, link):
1764
        """Get information about a particular comics."""
1765
        title = soup.find('h2', class_='post-title').string
1766
        imgs = soup.find("div", id="comic").find_all("img")
1767
        assert all(i['alt'] == i['title'] for i in imgs)
1768
        alt = imgs[0]['alt']
1769
        return {
1770
            'img': [i['src'] for i in imgs],
1771
            'title': title,
1772
            'alt': alt,
1773
        }
1774
1775
1776 View Code Duplication
class MouseBearComedy(GenericComicNotWorking):  # Website has changed
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1777
    """Class to retrieve Mouse Bear Comedy comics."""
1778
    # Also on http://mousebearcomedy.tumblr.com
1779
    name = 'mousebear'
1780
    long_name = 'Mouse Bear Comedy'
1781
    url = 'http://www.mousebearcomedy.com'
1782
    get_first_comic_link = get_a_navi_navifirst
1783
    get_navi_link = get_a_navi_comicnavnext_navinext
1784
1785
    @classmethod
1786
    def get_comic_info(cls, soup, link):
1787
        """Get information about a particular comics."""
1788
        title = soup.find('h2', class_='post-title').string
1789
        author = soup.find("span", class_="post-author").find("a").string
1790
        date_str = soup.find("span", class_="post-date").string
1791
        day = string_to_date(date_str, '%B %d, %Y')
1792
        imgs = soup.find("div", id="comic").find_all("img")
1793
        assert all(i['alt'] == i['title'] == title for i in imgs)
1794
        return {
1795
            'day': day.day,
1796
            'month': day.month,
1797
            'year': day.year,
1798
            'img': [i['src'] for i in imgs],
1799
            'title': title,
1800
            'author': author,
1801
        }
1802
1803
1804
class BigFootJustice(GenericNavigableComic):
1805
    """Class to retrieve Big Foot Justice comics."""
1806
    # Also on http://tapastic.com/series/bigfoot-justice
1807
    name = 'bigfoot'
1808
    long_name = 'Big Foot Justice'
1809
    url = 'http://bigfootjustice.com'
1810
    get_first_comic_link = get_a_navi_navifirst
1811
    get_navi_link = get_a_navi_comicnavnext_navinext
1812
1813
    @classmethod
1814
    def get_comic_info(cls, soup, link):
1815
        """Get information about a particular comics."""
1816
        imgs = soup.find('div', id='comic').find_all('img')
1817
        assert all(i['title'] == i['alt'] for i in imgs)
1818
        title = ' '.join(i['title'] for i in imgs)
1819
        return {
1820
            'img': [i['src'] for i in imgs],
1821
            'title': title,
1822
        }
1823
1824
1825
class RespawnComic(GenericNavigableComic):
1826
    """Class to retrieve Respawn Comic."""
1827
    # Also on https://respawncomic.tumblr.com
1828
    name = 'respawn'
1829
    long_name = 'Respawn Comic'
1830
    url = 'http://respawncomic.com '
1831
    _categories = ('RESPAWN', )
1832
    get_navi_link = get_a_rel_next
1833
    get_first_comic_link = simulate_first_link
1834
    first_url = 'http://respawncomic.com/comic/c0001/'
1835
1836
    @classmethod
1837
    def get_comic_info(cls, soup, link):
1838
        """Get information about a particular comics."""
1839
        title = soup.find('meta', property='og:title')['content']
1840
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1841
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1842
        date_str = date_str[:10]
1843
        day = string_to_date(date_str, "%Y-%m-%d")
1844
        imgs = soup.find_all('meta', property='og:image')
1845
        skip_imgs = {
1846
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1847
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1848
        }
1849
        return {
1850
            'title': title,
1851
            'author': author,
1852
            'day': day.day,
1853
            'month': day.month,
1854
            'year': day.year,
1855
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1856
        }
1857
1858
1859 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1860
    """Class to retrieve Safely Endangered comics."""
1861
    # Also on http://tumblr.safelyendangered.com
1862
    name = 'endangered'
1863
    long_name = 'Safely Endangered'
1864
    url = 'http://www.safelyendangered.com'
1865
    get_navi_link = get_link_rel_next
1866
    get_first_comic_link = simulate_first_link
1867
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1868
1869
    @classmethod
1870
    def get_comic_info(cls, soup, link):
1871
        """Get information about a particular comics."""
1872
        title = soup.find('h2', class_='post-title').string
1873
        date_str = soup.find('span', class_='post-date').string
1874
        day = string_to_date(date_str, '%B %d, %Y')
1875
        imgs = soup.find('div', id='comic').find_all('img')
1876
        alt = imgs[0]['alt']
1877
        assert all(i['alt'] == i['title'] for i in imgs)
1878
        return {
1879
            'day': day.day,
1880
            'month': day.month,
1881
            'year': day.year,
1882
            'img': [i['src'] for i in imgs],
1883
            'title': title,
1884
            'alt': alt,
1885
        }
1886
1887
1888 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1889
    """Class to retrieve Pictures In Boxes comics."""
1890
    # Also on https://picturesinboxescomic.tumblr.com
1891
    name = 'picturesinboxes'
1892
    long_name = 'Pictures in Boxes'
1893
    url = 'http://www.picturesinboxes.com'
1894
    get_navi_link = get_a_navi_navinext
1895
    get_first_comic_link = simulate_first_link
1896
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1897
1898
    @classmethod
1899
    def get_comic_info(cls, soup, link):
1900
        """Get information about a particular comics."""
1901
        title = soup.find('h2', class_='post-title').string
1902
        author = soup.find("span", class_="post-author").find("a").string
1903
        date_str = soup.find('span', class_='post-date').string
1904
        day = string_to_date(date_str, '%B %d, %Y')
1905
        imgs = soup.find('div', class_='comicpane').find_all('img')
1906
        assert imgs
1907
        assert all(i['title'] == i['alt'] == title for i in imgs)
1908
        return {
1909
            'day': day.day,
1910
            'month': day.month,
1911
            'year': day.year,
1912
            'img': [i['src'] for i in imgs],
1913
            'title': title,
1914
            'author': author,
1915
        }
1916
1917
1918 View Code Duplication
class Penmen(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1919
    """Class to retrieve Penmen comics."""
1920
    name = 'penmen'
1921
    long_name = 'Penmen'
1922
    url = 'http://penmen.com'
1923
    get_navi_link = get_link_rel_next
1924
    get_first_comic_link = simulate_first_link
1925
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1926
1927
    @classmethod
1928
    def get_comic_info(cls, soup, link):
1929
        """Get information about a particular comics."""
1930
        title = soup.find('title').string
1931
        imgs = soup.find('div', class_='entry-content').find_all('img')
1932
        short_url = soup.find('link', rel='shortlink')['href']
1933
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1934
        date_str = soup.find('time')['datetime'][:10]
1935
        day = string_to_date(date_str, "%Y-%m-%d")
1936
        return {
1937
            'title': title,
1938
            'short_url': short_url,
1939
            'img': [i['src'] for i in imgs],
1940
            'tags': tags,
1941
            'month': day.month,
1942
            'year': day.year,
1943
            'day': day.day,
1944
        }
1945
1946
1947
class TheDoghouseDiaries(GenericDeletedComic, GenericNavigableComic):
1948
    """Class to retrieve The Dog House Diaries comics."""
1949
    name = 'doghouse'
1950
    long_name = 'The Dog House Diaries'
1951
    url = 'http://thedoghousediaries.com'
1952
1953
    @classmethod
1954
    def get_first_comic_link(cls):
1955
        """Get link to first comics."""
1956
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1957
1958
    @classmethod
1959
    def get_navi_link(cls, last_soup, next_):
1960
        """Get link to next or previous comic."""
1961
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1962
1963
    @classmethod
1964
    def get_comic_info(cls, soup, link):
1965
        """Get information about a particular comics."""
1966
        comic_img_re = re.compile('^dhdcomics/.*')
1967
        img = soup.find('img', src=comic_img_re)
1968
        comic_url = cls.get_url_from_link(link)
1969
        return {
1970
            'title': soup.find('h2', id='titleheader').string,
1971
            'title2': soup.find('div', id='subtext').string,
1972
            'alt': img.get('title'),
1973
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1974
            'num': int(comic_url.split('/')[-1]),
1975
        }
1976
1977
1978
class InvisibleBread(GenericListableComic):
1979
    """Class to retrieve Invisible Bread comics."""
1980
    # Also on http://www.gocomics.com/invisible-bread
1981
    name = 'invisiblebread'
1982
    long_name = 'Invisible Bread'
1983
    url = 'http://invisiblebread.com'
1984
1985
    @classmethod
1986
    def get_archive_elements(cls):
1987
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1988
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1989
1990
    @classmethod
1991
    def get_url_from_archive_element(cls, td):
1992
        return td.find('a')['href']
1993
1994 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1995
    def get_comic_info(cls, soup, td):
1996
        """Get information about a particular comics."""
1997
        url = cls.get_url_from_archive_element(td)
1998
        title = td.find('a').string
1999
        month_and_day = td.previous_sibling.string
2000
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
2001
        year = link_re.match(url).groups()[0]
2002
        date_str = month_and_day + ' ' + year
2003
        day = string_to_date(date_str, '%b %d %Y')
2004
        imgs = [soup.find('div', id='comic').find('img')]
2005
        assert len(imgs) == 1
2006
        assert all(i['title'] == i['alt'] == title for i in imgs)
2007
        return {
2008
            'month': day.month,
2009
            'year': day.year,
2010
            'day': day.day,
2011
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2012
            'title': title,
2013
        }
2014
2015
2016
class DiscoBleach(GenericDeletedComic):
2017
    """Class to retrieve Disco Bleach Comics."""
2018
    name = 'discobleach'
2019
    long_name = 'Disco Bleach'
2020
    url = 'http://discobleach.com'
2021
2022
2023
class TubeyToons(GenericDeletedComic):
2024
    """Class to retrieve TubeyToons comics."""
2025
    # Also on http://tapastic.com/series/Tubey-Toons
2026
    # Also on https://tubeytoons.tumblr.com
2027
    name = 'tubeytoons'
2028
    long_name = 'Tubey Toons'
2029
    url = 'http://tubeytoons.com'
2030
    _categories = ('TUNEYTOONS', )
2031
2032
2033 View Code Duplication
class CompletelySeriousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2034
    """Class to retrieve Completely Serious comics."""
2035
    name = 'completelyserious'
2036
    long_name = 'Completely Serious Comics'
2037
    url = 'http://completelyseriouscomics.com'
2038
    get_first_comic_link = get_a_navi_navifirst
2039
    get_navi_link = get_a_navi_navinext
2040
2041
    @classmethod
2042
    def get_comic_info(cls, soup, link):
2043
        """Get information about a particular comics."""
2044
        title = soup.find('h2', class_='post-title').string
2045
        author = soup.find('span', class_='post-author').contents[1].string
2046
        date_str = soup.find('span', class_='post-date').string
2047
        day = string_to_date(date_str, '%B %d, %Y')
2048
        imgs = soup.find('div', class_='comicpane').find_all('img')
2049
        assert imgs
2050
        alt = imgs[0]['title']
2051
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2052
        return {
2053
            'month': day.month,
2054
            'year': day.year,
2055
            'day': day.day,
2056
            'img': [i['src'] for i in imgs],
2057
            'title': title,
2058
            'alt': alt,
2059
            'author': author,
2060
        }
2061
2062
2063 View Code Duplication
class PoorlyDrawnLines(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2064
    """Class to retrieve Poorly Drawn Lines comics."""
2065
    # Also on http://pdlcomics.tumblr.com
2066
    name = 'poorlydrawn'
2067
    long_name = 'Poorly Drawn Lines'
2068
    url = 'https://www.poorlydrawnlines.com'
2069
    _categories = ('POORLYDRAWN', )
2070
    get_url_from_archive_element = get_href
2071
2072
    @classmethod
2073
    def get_comic_info(cls, soup, link):
2074
        """Get information about a particular comics."""
2075
        imgs = soup.find('div', class_='post').find_all('img')
2076
        assert len(imgs) <= 1
2077
        return {
2078
            'img': [i['src'] for i in imgs],
2079
            'title': imgs[0].get('title', "") if imgs else "",
2080
        }
2081
2082
    @classmethod
2083
    def get_archive_elements(cls):
2084
        archive_url = urljoin_wrapper(cls.url, 'archive')
2085
        url_re = re.compile('^%s/comic/.' % cls.url)
2086
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2087
2088
2089
class LoadingComics(GenericNavigableComic):
2090
    """Class to retrieve Loading Artist comics."""
2091
    name = 'loadingartist'
2092
    long_name = 'Loading Artist'
2093
    url = 'http://www.loadingartist.com/latest'
2094
2095
    @classmethod
2096
    def get_first_comic_link(cls):
2097
        """Get link to first comics."""
2098
        return get_soup_at_url(cls.url).find('a', title="First")
2099
2100
    @classmethod
2101
    def get_navi_link(cls, last_soup, next_):
2102
        """Get link to next or previous comic."""
2103
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2104
2105
    @classmethod
2106
    def get_comic_info(cls, soup, link):
2107
        """Get information about a particular comics."""
2108
        title = soup.find('h1').string
2109
        date_str = soup.find('span', class_='date').string.strip()
2110
        day = string_to_date(date_str, "%B %d, %Y")
2111
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2112
        return {
2113
            'title': title,
2114
            'img': [i['src'] for i in imgs],
2115
            'month': day.month,
2116
            'year': day.year,
2117
            'day': day.day,
2118
        }
2119
2120
2121 View Code Duplication
class ChuckleADuck(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2122
    """Class to retrieve Chuckle-A-Duck comics."""
2123
    name = 'chuckleaduck'
2124
    long_name = 'Chuckle-A-duck'
2125
    url = 'http://chuckleaduck.com'
2126
    get_first_comic_link = get_div_navfirst_a
2127
    get_navi_link = get_link_rel_next
2128
2129
    @classmethod
2130
    def get_comic_info(cls, soup, link):
2131
        """Get information about a particular comics."""
2132
        date_str = soup.find('span', class_='post-date').string
2133
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2134
        author = soup.find('span', class_='post-author').string
2135
        div = soup.find('div', id='comic')
2136
        imgs = div.find_all('img') if div else []
2137
        title = imgs[0]['title'] if imgs else ""
2138
        assert all(i['title'] == i['alt'] == title for i in imgs)
2139
        return {
2140
            'month': day.month,
2141
            'year': day.year,
2142
            'day': day.day,
2143
            'img': [i['src'] for i in imgs],
2144
            'title': title,
2145
            'author': author,
2146
        }
2147
2148
2149
class DepressedAlien(GenericNavigableComic):
2150
    """Class to retrieve Depressed Alien Comics."""
2151
    name = 'depressedalien'
2152
    long_name = 'Depressed Alien'
2153
    url = 'http://depressedalien.com'
2154
    get_url_from_link = join_cls_url_to_href
2155
2156
    @classmethod
2157
    def get_first_comic_link(cls):
2158
        """Get link to first comics."""
2159
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2160
2161
    @classmethod
2162
    def get_navi_link(cls, last_soup, next_):
2163
        """Get link to next or previous comic."""
2164
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2165
2166
    @classmethod
2167
    def get_comic_info(cls, soup, link):
2168
        """Get information about a particular comics."""
2169
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2170
        imgs = soup.find_all('meta', property='og:image')
2171
        return {
2172
            'title': title,
2173
            'img': [i['content'] for i in imgs],
2174
        }
2175
2176
2177
class TurnOffUs(GenericListableComic):
2178
    """Class to retrieve TurnOffUs comics."""
2179
    name = 'turnoffus'
2180
    long_name = 'Turn Off Us'
2181
    url = 'http://turnoff.us'
2182
    get_url_from_archive_element = join_cls_url_to_href
2183
2184
    @classmethod
2185
    def get_archive_elements(cls):
2186
        archive_url = urljoin_wrapper(cls.url, 'all')
2187
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2188
        return reversed(post_list.find_all('a', class_='post-link'))
2189
2190
    @classmethod
2191
    def get_comic_info(cls, soup, archive_elt):
2192
        """Get information about a particular comics."""
2193
        title = soup.find('meta', property='og:title')['content']
2194
        imgs = soup.find_all('meta', property='og:image')
2195
        return {
2196
            'title': title,
2197
            'img': [i['content'] for i in imgs],
2198
        }
2199
2200
2201
class ThingsInSquares(GenericListableComic):
2202
    """Class to retrieve Things In Squares comics."""
2203
    # This can be retrieved in other languages
2204
    # Also on https://tapastic.com/series/Things-in-Squares
2205
    name = 'squares'
2206
    long_name = 'Things in squares'
2207
    url = 'http://www.thingsinsquares.com'
2208
2209
    @classmethod
2210
    def get_comic_info(cls, soup, tr):
2211
        """Get information about a particular comics."""
2212
        _, td2, td3 = tr.find_all('td')
2213
        a = td2.find('a')
2214
        date_str = td3.string
2215
        day = string_to_date(date_str, "%m.%d.%y")
2216
        title = a.string
2217
        title2 = soup.find('meta', property='og:title')['content']
2218
        desc = soup.find('meta', property='og:description')
2219
        description = desc['content'] if desc else ''
2220
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2221
        imgs = soup.find('div', class_='entry-content').find_all('img')
2222
        return {
2223
            'day': day.day,
2224
            'month': day.month,
2225
            'year': day.year,
2226
            'title': title,
2227
            'title2': title2,
2228
            'description': description,
2229
            'tags': tags,
2230
            'img': [i['src'] for i in imgs],
2231
            'alt': ' '.join(i['alt'] for i in imgs),
2232
        }
2233
2234
    @classmethod
2235
    def get_url_from_archive_element(cls, tr):
2236
        _, td2, __ = tr.find_all('td')
2237
        return td2.find('a')['href']
2238
2239
    @classmethod
2240
    def get_archive_elements(cls):
2241
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2242
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2243
2244
2245 View Code Duplication
class HappleTea(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2246
    """Class to retrieve Happle Tea Comics."""
2247
    name = 'happletea'
2248
    long_name = 'Happle Tea'
2249
    url = 'http://www.happletea.com'
2250
    get_first_comic_link = get_a_navi_navifirst
2251
    get_navi_link = get_link_rel_next
2252
2253
    @classmethod
2254
    def get_comic_info(cls, soup, link):
2255
        """Get information about a particular comics."""
2256
        imgs = soup.find('div', id='comic').find_all('img')
2257
        post = soup.find('div', class_='post-content')
2258
        title = post.find('h2', class_='post-title').string
2259
        author = post.find('a', rel='author').string
2260
        date_str = post.find('span', class_='post-date').string
2261
        day = string_to_date(date_str, "%B %d, %Y")
2262
        assert all(i['alt'] == i['title'] for i in imgs)
2263
        return {
2264
            'title': title,
2265
            'img': [i['src'] for i in imgs],
2266
            'alt': ''.join(i['alt'] for i in imgs),
2267
            'month': day.month,
2268
            'year': day.year,
2269
            'day': day.day,
2270
            'author': author,
2271
        }
2272
2273
2274
class RockPaperScissors(GenericNavigableComic):
2275
    """Class to retrieve Rock Paper Scissors comics."""
2276
    name = 'rps'
2277
    long_name = 'Rock Paper Scissors'
2278
    url = 'http://rps-comics.com'
2279
    get_first_comic_link = get_a_navi_navifirst
2280
    get_navi_link = get_link_rel_next
2281
2282
    @classmethod
2283
    def get_comic_info(cls, soup, link):
2284
        """Get information about a particular comics."""
2285
        title = soup.find('title').string
2286
        imgs = soup.find_all('meta', property='og:image')
2287
        short_url = soup.find('link', rel='shortlink')['href']
2288
        transcript = soup.find('div', id='transcript-content').string
2289
        return {
2290
            'title': title,
2291
            'transcript': transcript,
2292
            'short_url': short_url,
2293
            'img': [i['content'] for i in imgs],
2294
        }
2295
2296
2297
class FatAwesomeComics(GenericNavigableComic):
2298
    """Class to retrieve Fat Awesome Comics."""
2299
    # Also on http://fatawesomecomedy.tumblr.com
2300
    name = 'fatawesome'
2301
    long_name = 'Fat Awesome'
2302
    url = 'http://fatawesome.com/comics'
2303
    get_navi_link = get_a_rel_next
2304
    get_first_comic_link = simulate_first_link
2305
    first_url = 'http://fatawesome.com/shortbus/'
2306
2307
    @classmethod
2308
    def get_comic_info(cls, soup, link):
2309
        """Get information about a particular comics."""
2310
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2311
        description = soup.find('meta', attrs={'name': 'description'})['content']
2312
        tags_prop = soup.find('meta', property='article:tag')
2313
        tags = tags_prop['content'] if tags_prop else ""
2314
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2315
        day = string_to_date(date_str, "%Y-%m-%d")
2316
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2317
        assert len(imgs) == 1
2318
        return {
2319
            'title': title,
2320
            'description': description,
2321
            'tags': tags,
2322
            'alt': "".join(i['alt'] for i in imgs),
2323
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2324
            'month': day.month,
2325
            'year': day.year,
2326
            'day': day.day,
2327
        }
2328
2329
2330 View Code Duplication
class JuliasDrawings(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2331
    """Class to retrieve Julia's Drawings."""
2332
    name = 'julia'
2333
    long_name = "Julia's Drawings"
2334
    url = 'https://drawings.jvns.ca'
2335
    get_url_from_archive_element = get_href
2336
2337
    @classmethod
2338
    def get_archive_elements(cls):
2339
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2340
        return [art.find('a') for art in reversed(articles)]
2341
2342
    @classmethod
2343
    def get_comic_info(cls, soup, archive_elt):
2344
        """Get information about a particular comics."""
2345
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2346
        day = string_to_date(date_str, "%Y-%m-%d")
2347
        title = soup.find('h3', class_='p-post-title').string
2348
        imgs = soup.find('section', class_='post-content').find_all('img')
2349
        return {
2350
            'title': title,
2351
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2352
            'month': day.month,
2353
            'year': day.year,
2354
            'day': day.day,
2355
        }
2356
2357
2358
class AnythingComic(GenericListableComic):
2359
    """Class to retrieve Anything Comics."""
2360
    # Also on http://tapastic.com/series/anything
2361
    name = 'anythingcomic'
2362
    long_name = 'Anything Comic'
2363
    url = 'http://www.anythingcomic.com'
2364
2365
    @classmethod
2366
    def get_archive_elements(cls):
2367
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2368
        # The first 2 <tr>'s do not correspond to comics
2369
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2370
2371
    @classmethod
2372
    def get_url_from_archive_element(cls, tr):
2373
        """Get url corresponding to an archive element."""
2374
        _, td_comic, td_date, _ = tr.find_all('td')
2375
        link = td_comic.find('a')
2376
        return urljoin_wrapper(cls.url, link['href'])
2377
2378 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2379
    def get_comic_info(cls, soup, tr):
2380
        """Get information about a particular comics."""
2381
        td_num, td_comic, td_date, _ = tr.find_all('td')
2382
        num = int(td_num.string)
2383
        link = td_comic.find('a')
2384
        title = link.string
2385
        imgs = soup.find_all('img', id='comic_image')
2386
        date_str = td_date.string
2387
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2388
        assert len(imgs) == 1
2389
        assert all(i.get('alt') == i.get('title') for i in imgs)
2390
        return {
2391
            'num': num,
2392
            'title': title,
2393
            'alt': imgs[0].get('alt', ''),
2394
            'img': [i['src'] for i in imgs],
2395
            'month': day.month,
2396
            'year': day.year,
2397
            'day': day.day,
2398
        }
2399
2400
2401 View Code Duplication
class LonnieMillsap(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2402
    """Class to retrieve Lonnie Millsap's comics."""
2403
    name = 'millsap'
2404
    long_name = 'Lonnie Millsap'
2405
    url = 'http://www.lonniemillsap.com'
2406
    get_navi_link = get_link_rel_next
2407
    get_first_comic_link = simulate_first_link
2408
    first_url = 'http://www.lonniemillsap.com/?p=42'
2409
2410
    @classmethod
2411
    def get_comic_info(cls, soup, link):
2412
        """Get information about a particular comics."""
2413
        title = soup.find('h2', class_='post-title').string
2414
        post = soup.find('div', class_='post-content')
2415
        author = post.find("span", class_="post-author").find("a").string
2416
        date_str = post.find("span", class_="post-date").string
2417
        day = string_to_date(date_str, "%B %d, %Y")
2418
        imgs = post.find("div", class_="entry").find_all("img")
2419
        return {
2420
            'title': title,
2421
            'author': author,
2422
            'img': [i['src'] for i in imgs],
2423
            'month': day.month,
2424
            'year': day.year,
2425
            'day': day.day,
2426
        }
2427
2428
2429 View Code Duplication
class LinsEditions(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2430
    """Class to retrieve L.I.N.S. Editions comics."""
2431
    # Also on https://linscomics.tumblr.com
2432
    # Now on https://warandpeas.com
2433
    name = 'lins'
2434
    long_name = 'L.I.N.S. Editions'
2435
    url = 'https://linsedition.com'
2436
    _categories = ('LINS', )
2437
    get_navi_link = get_link_rel_next
2438
    get_first_comic_link = simulate_first_link
2439
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2440
2441
    @classmethod
2442
    def get_comic_info(cls, soup, link):
2443
        """Get information about a particular comics."""
2444
        title = soup.find('meta', property='og:title')['content']
2445
        imgs = soup.find_all('meta', property='og:image')
2446
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2447
        day = string_to_date(date_str, "%Y-%m-%d")
2448
        return {
2449
            'title': title,
2450
            'img': [i['content'] for i in imgs],
2451
            'month': day.month,
2452
            'year': day.year,
2453
            'day': day.day,
2454
        }
2455
2456
2457
class ThorsThundershack(GenericNavigableComic):
2458
    """Class to retrieve Thor's Thundershack comics."""
2459
    # Also on http://tapastic.com/series/Thors-Thundershac
2460
    name = 'thor'
2461
    long_name = 'Thor\'s Thundershack'
2462
    url = 'http://www.thorsthundershack.com'
2463
    _categories = ('THOR', )
2464
    get_url_from_link = join_cls_url_to_href
2465
2466
    @classmethod
2467
    def get_first_comic_link(cls):
2468
        """Get link to first comics."""
2469
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2470
2471
    @classmethod
2472
    def get_navi_link(cls, last_soup, next_):
2473
        """Get link to next or previous comic."""
2474
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2475
            if link['href'] != '/comic':
2476
                return link
2477
        return None
2478
2479
    @classmethod
2480
    def get_comic_info(cls, soup, link):
2481
        """Get information about a particular comics."""
2482
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2483
        description = soup.find('div', itemprop='articleBody').text
2484
        author = soup.find('span', itemprop='author copyrightHolder').string
2485
        imgs = soup.find_all('img', itemprop='image')
2486
        assert all(i['title'] == i['alt'] for i in imgs)
2487
        alt = imgs[0]['alt'] if imgs else ""
2488
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2489
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2490
        return {
2491
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2492
            'month': day.month,
2493
            'year': day.year,
2494
            'day': day.day,
2495
            'author': author,
2496
            'title': title,
2497
            'alt': alt,
2498
            'description': description,
2499
        }
2500
2501
2502
class GerbilWithAJetpack(GenericNavigableComic):
2503
    """Class to retrieve GerbilWithAJetpack comics."""
2504
    name = 'gerbil'
2505
    long_name = 'Gerbil With A Jetpack'
2506
    url = 'http://gerbilwithajetpack.com'
2507
    get_first_comic_link = get_a_navi_navifirst
2508
    get_navi_link = get_a_rel_next
2509
2510
    @classmethod
2511
    def get_comic_info(cls, soup, link):
2512
        """Get information about a particular comics."""
2513
        title = soup.find('h2', class_='post-title').string
2514
        author = soup.find("span", class_="post-author").find("a").string
2515
        date_str = soup.find("span", class_="post-date").string
2516
        day = string_to_date(date_str, "%B %d, %Y")
2517
        imgs = soup.find("div", id="comic").find_all("img")
2518
        alt = imgs[0]['alt']
2519
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2520
        return {
2521
            'img': [i['src'] for i in imgs],
2522
            'title': title,
2523
            'alt': alt,
2524
            'author': author,
2525
            'day': day.day,
2526
            'month': day.month,
2527
            'year': day.year
2528
        }
2529
2530
2531 View Code Duplication
class EveryDayBlues(GenericDeletedComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2532
    """Class to retrieve EveryDayBlues Comics."""
2533
    name = "blues"
2534
    long_name = "Every Day Blues"
2535
    url = "http://everydayblues.net"
2536
    get_first_comic_link = get_a_navi_navifirst
2537
    get_navi_link = get_link_rel_next
2538
2539
    @classmethod
2540
    def get_comic_info(cls, soup, link):
2541
        """Get information about a particular comics."""
2542
        title = soup.find("h2", class_="post-title").string
2543
        author = soup.find("span", class_="post-author").find("a").string
2544
        date_str = soup.find("span", class_="post-date").string
2545
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2546
        imgs = soup.find("div", id="comic").find_all("img")
2547
        assert all(i['alt'] == i['title'] == title for i in imgs)
2548
        assert len(imgs) <= 1
2549
        return {
2550
            'img': [i['src'] for i in imgs],
2551
            'title': title,
2552
            'author': author,
2553
            'day': day.day,
2554
            'month': day.month,
2555
            'year': day.year
2556
        }
2557
2558
2559 View Code Duplication
class BiterComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2560
    """Class to retrieve Biter Comics."""
2561
    name = "biter"
2562
    long_name = "Biter Comics"
2563
    url = "http://www.bitercomics.com"
2564
    get_first_comic_link = get_a_navi_navifirst
2565
    get_navi_link = get_link_rel_next
2566
2567
    @classmethod
2568
    def get_comic_info(cls, soup, link):
2569
        """Get information about a particular comics."""
2570
        title = soup.find("h1", class_="entry-title").string
2571
        author = soup.find("span", class_="author vcard").find("a").string
2572
        date_str = soup.find("span", class_="entry-date").string
2573
        day = string_to_date(date_str, "%B %d, %Y")
2574
        imgs = soup.find("div", id="comic").find_all("img")
2575
        assert all(i['alt'] == i['title'] for i in imgs)
2576
        assert len(imgs) == 1
2577
        alt = imgs[0]['alt']
2578
        return {
2579
            'img': [i['src'] for i in imgs],
2580
            'title': title,
2581
            'alt': alt,
2582
            'author': author,
2583
            'day': day.day,
2584
            'month': day.month,
2585
            'year': day.year
2586
        }
2587
2588
2589 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2590
    """Class to retrieve The Awkward Yeti comics."""
2591
    # Also on http://www.gocomics.com/the-awkward-yeti
2592
    # Also on http://larstheyeti.tumblr.com
2593
    # Also on https://tapastic.com/series/TheAwkwardYeti
2594
    name = 'yeti'
2595
    long_name = 'The Awkward Yeti'
2596
    url = 'http://theawkwardyeti.com'
2597
    _categories = ('YETI', )
2598
    get_first_comic_link = get_a_navi_navifirst
2599
    get_navi_link = get_link_rel_next
2600
2601
    @classmethod
2602
    def get_comic_info(cls, soup, link):
2603
        """Get information about a particular comics."""
2604
        title = soup.find('h2', class_='post-title').string
2605
        date_str = soup.find("span", class_="post-date").string
2606
        day = string_to_date(date_str, "%B %d, %Y")
2607
        imgs = soup.find("div", id="comic").find_all("img")
2608
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2609
        return {
2610
            'img': [i['src'] for i in imgs],
2611
            'title': title,
2612
            'day': day.day,
2613
            'month': day.month,
2614
            'year': day.year
2615
        }
2616
2617
2618
class PleasantThoughts(GenericNavigableComic):
2619
    """Class to retrieve Pleasant Thoughts comics."""
2620
    name = 'pleasant'
2621
    long_name = 'Pleasant Thoughts'
2622
    url = 'http://pleasant-thoughts.com'
2623
    get_first_comic_link = get_a_navi_navifirst
2624
    get_navi_link = get_link_rel_next
2625
2626
    @classmethod
2627
    def get_comic_info(cls, soup, link):
2628
        """Get information about a particular comics."""
2629
        post = soup.find('div', class_='post-content')
2630
        title = post.find('h2', class_='post-title').string
2631
        imgs = post.find("div", class_="entry").find_all("img")
2632
        return {
2633
            'title': title,
2634
            'img': [i['src'] for i in imgs],
2635
        }
2636
2637
2638 View Code Duplication
class MisterAndMe(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2639
    """Class to retrieve Mister & Me Comics."""
2640
    # Also on http://www.gocomics.com/mister-and-me
2641
    # Also on https://tapastic.com/series/Mister-and-Me
2642
    name = 'mister'
2643
    long_name = 'Mister & Me'
2644
    url = 'http://www.mister-and-me.com'
2645
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2646
    get_navi_link = get_link_rel_next
2647
2648
    @classmethod
2649
    def get_comic_info(cls, soup, link):
2650
        """Get information about a particular comics."""
2651
        title = soup.find('h2', class_='post-title').string
2652
        author = soup.find("span", class_="post-author").find("a").string
2653
        date_str = soup.find("span", class_="post-date").string
2654
        day = string_to_date(date_str, "%B %d, %Y")
2655
        imgs = soup.find("div", id="comic").find_all("img")
2656
        assert all(i['alt'] == i['title'] for i in imgs)
2657
        assert len(imgs) <= 1
2658
        alt = imgs[0]['alt'] if imgs else ""
2659
        return {
2660
            'img': [i['src'] for i in imgs],
2661
            'title': title,
2662
            'alt': alt,
2663
            'author': author,
2664
            'day': day.day,
2665
            'month': day.month,
2666
            'year': day.year
2667
        }
2668
2669
2670 View Code Duplication
class LastPlaceComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2671
    """Class to retrieve Last Place Comics."""
2672
    name = 'lastplace'
2673
    long_name = 'Last Place Comics'
2674
    url = "http://lastplacecomics.com"
2675
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2676
    get_navi_link = get_link_rel_next
2677
2678
    @classmethod
2679
    def get_comic_info(cls, soup, link):
2680
        """Get information about a particular comics."""
2681
        title = soup.find('h2', class_='post-title').string
2682
        author = soup.find("span", class_="post-author").find("a").string
2683
        date_str = soup.find("span", class_="post-date").string
2684
        day = string_to_date(date_str, "%B %d, %Y")
2685
        imgs = soup.find("div", id="comic").find_all("img")
2686
        assert all(i['alt'] == i['title'] for i in imgs)
2687
        assert len(imgs) <= 1
2688
        alt = imgs[0]['alt'] if imgs else ""
2689
        return {
2690
            'img': [i['src'] for i in imgs],
2691
            'title': title,
2692
            'alt': alt,
2693
            'author': author,
2694
            'day': day.day,
2695
            'month': day.month,
2696
            'year': day.year
2697
        }
2698
2699
2700 View Code Duplication
class TalesOfAbsurdity(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2701
    """Class to retrieve Tales Of Absurdity comics."""
2702
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2703
    # Also on http://talesofabsurdity.tumblr.com
2704
    name = 'absurdity'
2705
    long_name = 'Tales of Absurdity'
2706
    url = 'http://talesofabsurdity.com'
2707
    _categories = ('ABSURDITY', )
2708
    get_first_comic_link = get_a_navi_navifirst
2709
    get_navi_link = get_a_navi_comicnavnext_navinext
2710
2711
    @classmethod
2712
    def get_comic_info(cls, soup, link):
2713
        """Get information about a particular comics."""
2714
        title = soup.find('h2', class_='post-title').string
2715
        author = soup.find("span", class_="post-author").find("a").string
2716
        date_str = soup.find("span", class_="post-date").string
2717
        day = string_to_date(date_str, "%B %d, %Y")
2718
        imgs = soup.find("div", id="comic").find_all("img")
2719
        assert all(i['alt'] == i['title'] for i in imgs)
2720
        alt = imgs[0]['alt'] if imgs else ""
2721
        return {
2722
            'img': [i['src'] for i in imgs],
2723
            'title': title,
2724
            'alt': alt,
2725
            'author': author,
2726
            'day': day.day,
2727
            'month': day.month,
2728
            'year': day.year
2729
        }
2730
2731
2732 View Code Duplication
class EndlessOrigami(GenericComicNotWorking, GenericNavigableComic):  # Nav not working
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2733
    """Class to retrieve Endless Origami Comics."""
2734
    name = "origami"
2735
    long_name = "Endless Origami"
2736
    url = "http://endlessorigami.com"
2737
    get_first_comic_link = get_a_navi_navifirst
2738
    get_navi_link = get_link_rel_next
2739
2740
    @classmethod
2741
    def get_comic_info(cls, soup, link):
2742
        """Get information about a particular comics."""
2743
        title = soup.find('h2', class_='post-title').string
2744
        author = soup.find("span", class_="post-author").find("a").string
2745
        date_str = soup.find("span", class_="post-date").string
2746
        day = string_to_date(date_str, "%B %d, %Y")
2747
        imgs = soup.find("div", id="comic").find_all("img")
2748
        assert all(i['alt'] == i['title'] for i in imgs)
2749
        alt = imgs[0]['alt'] if imgs else ""
2750
        return {
2751
            'img': [i['src'] for i in imgs],
2752
            'title': title,
2753
            'alt': alt,
2754
            'author': author,
2755
            'day': day.day,
2756
            'month': day.month,
2757
            'year': day.year
2758
        }
2759
2760
2761 View Code Duplication
class PlanC(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2762
    """Class to retrieve Plan C comics."""
2763
    name = 'planc'
2764
    long_name = 'Plan C'
2765
    url = 'http://www.plancomic.com'
2766
    get_first_comic_link = get_a_navi_navifirst
2767
    get_navi_link = get_a_navi_comicnavnext_navinext
2768
2769
    @classmethod
2770
    def get_comic_info(cls, soup, link):
2771
        """Get information about a particular comics."""
2772
        title = soup.find('h2', class_='post-title').string
2773
        date_str = soup.find("span", class_="post-date").string
2774
        day = string_to_date(date_str, "%B %d, %Y")
2775
        imgs = soup.find('div', id='comic').find_all('img')
2776
        return {
2777
            'title': title,
2778
            'img': [i['src'] for i in imgs],
2779
            'month': day.month,
2780
            'year': day.year,
2781
            'day': day.day,
2782
        }
2783
2784
2785
class BuniComic(GenericNavigableComic):
2786
    """Class to retrieve Buni Comics."""
2787
    name = 'buni'
2788
    long_name = 'BuniComics'
2789
    url = 'http://www.bunicomic.com'
2790
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2791
    get_navi_link = get_link_rel_next
2792
2793
    @classmethod
2794
    def get_comic_info(cls, soup, link):
2795
        """Get information about a particular comics."""
2796
        imgs = soup.find('div', id='comic').find_all('img')
2797
        assert all(i['alt'] == i['title'] for i in imgs)
2798
        assert len(imgs) == 1
2799
        return {
2800
            'img': [i['src'] for i in imgs],
2801
            'title': imgs[0]['title'],
2802
        }
2803
2804
2805
class GenericCommitStrip(GenericNavigableComic):
2806
    """Generic class to retrieve Commit Strips in different languages."""
2807
    get_navi_link = get_a_rel_next
2808
    get_first_comic_link = simulate_first_link
2809
    first_url = NotImplemented
2810
2811
    @classmethod
2812
    def get_comic_info(cls, soup, link):
2813
        """Get information about a particular comics."""
2814
        desc = soup.find('meta', property='og:description')['content']
2815
        title = soup.find('meta', property='og:title')['content']
2816
        imgs = soup.find('div', class_='entry-content').find_all('img')
2817
        title2 = ' '.join(i.get('title', '') for i in imgs)
2818
        return {
2819
            'title': title,
2820
            'title2': title2,
2821
            'description': desc,
2822
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2823
        }
2824
2825
2826
class CommitStripFr(GenericCommitStrip):
2827
    """Class to retrieve Commit Strips in French."""
2828
    name = 'commit_fr'
2829
    long_name = 'Commit Strip (Fr)'
2830
    url = 'http://www.commitstrip.com/fr'
2831
    _categories = ('FRANCAIS', )
2832
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2833
2834
2835
class CommitStripEn(GenericCommitStrip):
2836
    """Class to retrieve Commit Strips in English."""
2837
    name = 'commit_en'
2838
    long_name = 'Commit Strip (En)'
2839
    url = 'http://www.commitstrip.com/en'
2840
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2841
2842
2843 View Code Duplication
class GenericBoumerie(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2844
    """Generic class to retrieve Boumeries comics in different languages."""
2845
    get_first_comic_link = get_a_navi_navifirst
2846
    get_navi_link = get_link_rel_next
2847
    date_format = NotImplemented
2848
    lang = NotImplemented
2849
2850
    @classmethod
2851
    def get_comic_info(cls, soup, link):
2852
        """Get information about a particular comics."""
2853
        title = soup.find('h2', class_='post-title').string
2854
        short_url = soup.find('link', rel='shortlink')['href']
2855
        author = soup.find("span", class_="post-author").find("a").string
2856
        date_str = soup.find('span', class_='post-date').string
2857
        day = string_to_date(date_str, cls.date_format, cls.lang)
2858
        imgs = soup.find('div', id='comic').find_all('img')
2859
        assert all(i['alt'] == i['title'] for i in imgs)
2860
        return {
2861
            'short_url': short_url,
2862
            'img': [i['src'] for i in imgs],
2863
            'title': title,
2864
            'author': author,
2865
            'month': day.month,
2866
            'year': day.year,
2867
            'day': day.day,
2868
        }
2869
2870
2871
class BoumerieEn(GenericBoumerie):
2872
    """Class to retrieve Boumeries comics in English."""
2873
    name = 'boumeries_en'
2874
    long_name = 'Boumeries (En)'
2875
    url = 'http://comics.boumerie.com'
2876
    date_format = "%B %d, %Y"
2877
    lang = 'en_GB.UTF-8'
2878
2879
2880
class BoumerieFr(GenericBoumerie):
2881
    """Class to retrieve Boumeries comics in French."""
2882
    name = 'boumeries_fr'
2883
    long_name = 'Boumeries (Fr)'
2884
    url = 'http://bd.boumerie.com'
2885
    _categories = ('FRANCAIS', )
2886
    date_format = "%A, %d %B %Y"
2887
    lang = "fr_FR.utf8"
2888
2889
2890 View Code Duplication
class UnearthedComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2891
    """Class to retrieve Unearthed comics."""
2892
    # Also on http://tapastic.com/series/UnearthedComics
2893
    # Also on https://unearthedcomics.tumblr.com
2894
    name = 'unearthed'
2895
    long_name = 'Unearthed Comics'
2896
    url = 'http://unearthedcomics.com'
2897
    _categories = ('UNEARTHED', )
2898
    get_navi_link = get_link_rel_next
2899
    get_first_comic_link = simulate_first_link
2900
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2901
2902
    @classmethod
2903
    def get_comic_info(cls, soup, link):
2904
        """Get information about a particular comics."""
2905
        short_url = soup.find('link', rel='shortlink')['href']
2906
        title_elt = soup.find('h1') or soup.find('h2')
2907
        title = title_elt.string if title_elt else ""
2908
        desc = soup.find('meta', property='og:description')
2909
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2910
        day = string_to_date(date_str, "%Y-%m-%d")
2911
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2912
        imgs = post.find_all('img')
2913
        return {
2914
            'title': title,
2915
            'description': desc,
2916
            'url2': short_url,
2917
            'img': [i['src'] for i in imgs],
2918
            'month': day.month,
2919
            'year': day.year,
2920
            'day': day.day,
2921
        }
2922
2923
2924 View Code Duplication
class Optipess(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2925
    """Class to retrieve Optipess comics."""
2926
    name = 'optipess'
2927
    long_name = 'Optipess'
2928
    url = 'http://www.optipess.com'
2929
    get_first_comic_link = get_a_navi_navifirst
2930
    get_navi_link = get_link_rel_next
2931
2932
    @classmethod
2933
    def get_comic_info(cls, soup, link):
2934
        """Get information about a particular comics."""
2935
        title = soup.find('h2', class_='post-title').string
2936
        author = soup.find("span", class_="post-author").find("a").string
2937
        comic = soup.find('div', id='comic')
2938
        imgs = comic.find_all('img') if comic else []
2939
        alt = imgs[0]['title'] if imgs else ""
2940
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2941
        date_str = soup.find('span', class_='post-date').string
2942
        day = string_to_date(date_str, "%B %d, %Y")
2943
        return {
2944
            'title': title,
2945
            'alt': alt,
2946
            'author': author,
2947
            'img': [i['src'] for i in imgs],
2948
            'month': day.month,
2949
            'year': day.year,
2950
            'day': day.day,
2951
        }
2952
2953
2954
class PainTrainComic(GenericNavigableComic):
2955
    """Class to retrieve Pain Train Comics."""
2956
    name = 'paintrain'
2957
    long_name = 'Pain Train Comics'
2958
    url = 'http://paintraincomic.com'
2959
    get_first_comic_link = get_a_navi_navifirst
2960
    get_navi_link = get_link_rel_next
2961
2962 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2963
    def get_comic_info(cls, soup, link):
2964
        """Get information about a particular comics."""
2965
        title = soup.find('h2', class_='post-title').string
2966
        short_url = soup.find('link', rel='shortlink')['href']
2967
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2968
        num = int(short_url_re.match(short_url).groups()[0])
2969
        imgs = soup.find('div', id='comic').find_all('img')
2970
        alt = imgs[0]['title']
2971
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2972
        date_str = soup.find('span', class_='post-date').string
2973
        day = string_to_date(date_str, "%d/%m/%Y")
2974
        return {
2975
            'short_url': short_url,
2976
            'num': num,
2977
            'img': [i['src'] for i in imgs],
2978
            'month': day.month,
2979
            'year': day.year,
2980
            'day': day.day,
2981
            'alt': alt,
2982
            'title': title,
2983
        }
2984
2985
2986
class MoonBeard(GenericNavigableComic):
2987
    """Class to retrieve MoonBeard comics."""
2988
    # Also on http://squireseses.tumblr.com
2989
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2990
    name = 'moonbeard'
2991
    long_name = 'Moon Beard'
2992
    url = 'http://moonbeard.com'
2993
    _categories = ('MOONBEARD', )
2994
    get_first_comic_link = get_a_navi_navifirst
2995
    get_navi_link = get_a_navi_navinext
2996
2997
    @classmethod
2998
    def get_comic_info(cls, soup, link):
2999 View Code Duplication
        """Get information about a particular comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3000
        title = soup.find('h2', class_='post-title').string
3001
        short_url = soup.find('link', rel='shortlink')['href']
3002
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
3003
        num = int(short_url_re.match(short_url).groups()[0])
3004
        imgs = soup.find('div', id='comic').find_all('img')
3005
        alt = imgs[0]['title']
3006
        assert all(i['alt'] == i['title'] == alt for i in imgs)
3007
        date_str = soup.find('span', class_='post-date').string
3008
        day = string_to_date(date_str, "%B %d, %Y")
3009
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
3010
        author = soup.find('span', class_='post-author').string
3011
        return {
3012
            'short_url': short_url,
3013
            'num': num,
3014
            'img': [i['src'] for i in imgs],
3015
            'month': day.month,
3016
            'year': day.year,
3017
            'day': day.day,
3018
            'title': title,
3019
            'tags': tags,
3020
            'alt': alt,
3021
            'author': author,
3022
        }
3023
3024
3025
class AHammADay(GenericComicNotWorking, GenericNavigableComic):  # Website has changed
3026
    """Class to retrieve class A Hamm A Day comics."""
3027
    name = 'hamm'
3028
    long_name = 'A Hamm A Day'
3029
    url = 'http://www.ahammaday.com'
3030
    get_url_from_link = join_cls_url_to_href
3031
    get_first_comic_link = simulate_first_link
3032
    first_url = 'http://www.ahammaday.com/today/3/6/french'
3033
3034
    @classmethod
3035
    def get_navi_link(cls, last_soup, next_):
3036
        """Get link to next or previous comic."""
3037
        # prev is next / next is prev
3038
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
3039
3040
    @classmethod
3041
    def get_comic_info(cls, soup, link):
3042
        """Get information about a particular comics."""
3043
        date_str = soup.find('time', class_='published')['datetime']
3044
        day = string_to_date(date_str, "%Y-%m-%d")
3045
        author = soup.find('span', class_='blog-author').find('a').string
3046
        title = soup.find('meta', property='og:title')['content']
3047
        imgs = soup.find_all('meta', itemprop='image')
3048
        return {
3049
            'img': [i['content'] for i in imgs],
3050
            'title': title,
3051
            'author': author,
3052
            'day': day.day,
3053
            'month': day.month,
3054
            'year': day.year,
3055
        }
3056
3057
3058
class SystemComic(GenericNavigableComic):
3059
    """Class to retrieve System Comic."""
3060
    name = 'system'
3061
    long_name = 'System Comic'
3062
    url = 'http://www.systemcomic.com'
3063
    get_navi_link = get_a_rel_next
3064
3065
    @classmethod
3066
    def get_first_comic_link(cls):
3067
        """Get link to first comics."""
3068
        return get_soup_at_url(cls.url).find('li', class_='first').find('a')
3069
3070
    @classmethod
3071
    def get_comic_info(cls, soup, link):
3072
        """Get information about a particular comics."""
3073
        title = soup.find('meta', property='og:title')['content']
3074
        desc = soup.find('meta', property='og:description')['content']
3075
        date_str = soup.find('time')["datetime"]
3076
        day = string_to_date(date_str, "%Y-%m-%d")
3077
        imgs = soup.find('figure').find_all('img')
3078
        return {
3079
            'title': title,
3080
            'description': desc,
3081
            'day': day.day,
3082
            'month': day.month,
3083
            'year': day.year,
3084
            'img': [i['src'] for i in imgs],
3085
        }
3086
3087
3088
class LittleLifeLines(GenericNavigableComic):
3089
    """Class to retrieve Little Life Lines comics."""
3090
    # Also on https://little-life-lines.tumblr.com
3091
    name = 'life'
3092
    long_name = 'Little Life Lines'
3093
    url = 'http://www.littlelifelines.com'
3094
    get_url_from_link = join_cls_url_to_href
3095
    get_first_comic_link = simulate_first_link
3096
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3097
3098
    @classmethod
3099
    def get_navi_link(cls, last_soup, next_):
3100
        """Get link to next or previous comic."""
3101
        # prev is next / next is prev
3102
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3103
        return li.find('a') if li else None
3104
3105 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3106
    def get_comic_info(cls, soup, link):
3107
        """Get information about a particular comics."""
3108
        title = soup.find('meta', property='og:title')['content']
3109
        desc = soup.find('meta', property='og:description')['content']
3110
        date_str = soup.find('time', class_='published')['datetime']
3111
        day = string_to_date(date_str, "%Y-%m-%d")
3112
        author = soup.find('a', rel='author').string
3113
        div_content = soup.find('div', class_="body entry-content")
3114
        imgs = div_content.find_all('img')
3115
        imgs = [i for i in imgs if i.get('src') is not None]
3116
        alt = imgs[0]['alt']
3117
        return {
3118
            'title': title,
3119
            'alt': alt,
3120
            'description': desc,
3121
            'author': author,
3122
            'day': day.day,
3123
            'month': day.month,
3124
            'year': day.year,
3125
            'img': [i['src'] for i in imgs],
3126
        }
3127
3128
3129
class GenericWordPressInkblot(GenericNavigableComic):
3130
    """Generic class to retrieve comics using WordPress with Inkblot."""
3131
    get_navi_link = get_link_rel_next
3132
3133
    @classmethod
3134
    def get_first_comic_link(cls):
3135
        """Get link to first comics."""
3136
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3137
3138
    @classmethod
3139
    def get_comic_info(cls, soup, link):
3140
        """Get information about a particular comics."""
3141
        title = soup.find('meta', property='og:title')['content']
3142
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3143
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3144
        day = string_to_date(date_str, "%Y-%m-%d")
3145
        return {
3146
            'title': title,
3147
            'day': day.day,
3148
            'month': day.month,
3149
            'year': day.year,
3150
            'img': [i['src'] for i in imgs],
3151
        }
3152
3153
3154
class EverythingsStupid(GenericWordPressInkblot):
3155
    """Class to retrieve Everything's stupid Comics."""
3156
    # Also on http://tapastic.com/series/EverythingsStupid
3157
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3158
    # Also on http://everythingsstupidcomics.tumblr.com
3159
    name = 'stupid'
3160
    long_name = "Everything's Stupid"
3161
    url = 'http://everythingsstupid.net'
3162
3163
3164
class TheIsmComics(GenericWordPressInkblot):
3165
    """Class to retrieve The Ism Comics."""
3166
    # Also on https://tapastic.com/series/TheIsm (?)
3167
    name = 'theism'
3168
    long_name = "The Ism"
3169
    url = 'http://www.theism-comics.com'
3170
3171
3172
class WoodenPlankStudios(GenericWordPressInkblot):
3173
    """Class to retrieve Wooden Plank Studios comics."""
3174
    name = 'woodenplank'
3175
    long_name = 'Wooden Plank Studios'
3176
    url = 'http://woodenplankstudios.com'
3177
3178
3179
class ElectricBunnyComic(GenericNavigableComic):
3180
    """Class to retrieve Electric Bunny Comics."""
3181
    # Also on http://electricbunnycomics.tumblr.com
3182
    name = 'bunny'
3183
    long_name = 'Electric Bunny Comic'
3184
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3185
    get_url_from_link = join_cls_url_to_href
3186
3187
    @classmethod
3188
    def get_first_comic_link(cls):
3189
        """Get link to first comics."""
3190
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3191
3192
    @classmethod
3193
    def get_navi_link(cls, last_soup, next_):
3194
        """Get link to next or previous comic."""
3195
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3196
        return img.parent if img else None
3197
3198
    @classmethod
3199
    def get_comic_info(cls, soup, link):
3200
        """Get information about a particular comics."""
3201
        title = soup.find('meta', property='og:title')['content']
3202
        imgs = soup.find_all('meta', property='og:image')
3203
        return {
3204
            'title': title,
3205
            'img': [i['content'] for i in imgs],
3206
        }
3207
3208
3209
class SheldonComics(GenericNavigableComic):
3210
    """Class to retrieve Sheldon comics."""
3211
    # Also on http://www.gocomics.com/sheldon
3212
    name = 'sheldon'
3213
    long_name = 'Sheldon Comics'
3214
    url = 'http://www.sheldoncomics.com'
3215
3216
    @classmethod
3217
    def get_first_comic_link(cls):
3218
        """Get link to first comics."""
3219
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3220
3221
    @classmethod
3222
    def get_navi_link(cls, last_soup, next_):
3223
        """Get link to next or previous comic."""
3224
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3225
            if link['href'] != 'http://www.sheldoncomics.com':
3226
                return link
3227
        return None
3228
3229
    @classmethod
3230
    def get_comic_info(cls, soup, link):
3231
        """Get information about a particular comics."""
3232
        imgs = soup.find("div", id="comic-foot").find_all("img")
3233
        assert all(i['alt'] == i['title'] for i in imgs)
3234
        assert len(imgs) == 1
3235
        title = imgs[0]['title']
3236
        return {
3237
            'title': title,
3238
            'img': [i['src'] for i in imgs],
3239
        }
3240
3241
3242 View Code Duplication
class Ubertool(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3243
    """Class to retrieve Ubertool comics."""
3244
    # Also on https://ubertool.tumblr.com
3245
    # Also on https://tapastic.com/series/ubertool
3246
    name = 'ubertool'
3247
    long_name = 'Ubertool'
3248
    url = 'http://ubertoolcomic.com'
3249
    _categories = ('UBERTOOL', )
3250
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3251
    get_navi_link = get_a_comicnavbase_comicnavnext
3252
3253
    @classmethod
3254
    def get_comic_info(cls, soup, link):
3255
        """Get information about a particular comics."""
3256
        title = soup.find('h2', class_='post-title').string
3257
        date_str = soup.find('span', class_='post-date').string
3258
        day = string_to_date(date_str, "%B %d, %Y")
3259
        imgs = soup.find('div', id='comic').find_all('img')
3260
        return {
3261
            'img': [i['src'] for i in imgs],
3262
            'title': title,
3263
            'month': day.month,
3264
            'year': day.year,
3265
            'day': day.day,
3266
        }
3267
3268
3269 View Code Duplication
class EarthExplodes(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3270
    """Class to retrieve The Earth Explodes comics."""
3271
    name = 'earthexplodes'
3272
    long_name = 'The Earth Explodes'
3273
    url = 'http://www.earthexplodes.com'
3274
    get_url_from_link = join_cls_url_to_href
3275
    get_first_comic_link = simulate_first_link
3276
    first_url = 'http://www.earthexplodes.com/comics/000/'
3277
3278
    @classmethod
3279
    def get_navi_link(cls, last_soup, next_):
3280
        """Get link to next or previous comic."""
3281
        return last_soup.find('a', id='next' if next_ else 'prev')
3282
3283
    @classmethod
3284
    def get_comic_info(cls, soup, link):
3285
        """Get information about a particular comics."""
3286
        title = soup.find('title').string
3287
        imgs = soup.find('div', id='image').find_all('img')
3288
        alt = imgs[0].get('title', '')
3289
        return {
3290
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3291
            'title': title,
3292
            'alt': alt,
3293
        }
3294
3295
3296 View Code Duplication
class PomComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3297
    """Class to retrieve PomComics."""
3298
    name = 'pom'
3299
    long_name = 'Pom Comics / Piece of Me'
3300
    url = 'http://www.pomcomic.com'
3301
    get_url_from_link = join_cls_url_to_href
3302
3303
    @classmethod
3304
    def get_first_comic_link(cls):
3305
        """Get link to first comics."""
3306
        return get_soup_at_url(cls.url).find('a', class_='btn-first')
3307
3308
    @classmethod
3309
    def get_navi_link(cls, last_soup, next_):
3310
        """Get link to next or previous comic."""
3311
        return last_soup.find('a', class_='btn-next' if next_ else 'btn-prev')
3312
3313
    @classmethod
3314
    def get_comic_info(cls, soup, link):
3315
        """Get information about a particular comics."""
3316
        title = soup.find('h1').string
3317
        desc = soup.find('meta', property='og:description')['content']
3318
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3319
        imgs = soup.find('div', class_='comic').find_all('img')
3320
        return {
3321
            'title': title,
3322
            'desc': desc,
3323
            'tags': tags,
3324
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3325
        }
3326
3327
3328
class CubeDrone(GenericComicNotWorking, GenericNavigableComic):  # Website has changed
3329
    """Class to retrieve Cube Drone comics."""
3330
    name = 'cubedrone'
3331
    long_name = 'Cube Drone'
3332
    url = 'http://cube-drone.com/comics'
3333
    get_url_from_link = join_cls_url_to_href
3334
3335
    @classmethod
3336
    def get_first_comic_link(cls):
3337
        """Get link to first comics."""
3338
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3339
3340
    @classmethod
3341
    def get_navi_link(cls, last_soup, next_):
3342
        """Get link to next or previous comic."""
3343
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3344
        return last_soup.find('span', class_=class_).parent
3345
3346
    @classmethod
3347
    def get_comic_info(cls, soup, link):
3348
        """Get information about a particular comics."""
3349
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3350
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3351
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3352
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3353
        imgs = soup.find_all('img', class_='comic img-responsive')
3354
        title2 = imgs[0]['title']
3355
        alt = imgs[0]['alt']
3356
        return {
3357
            'url2': url2,
3358
            'title': title,
3359
            'title2': title2,
3360
            'alt': alt,
3361
            'img': [i['src'] for i in imgs],
3362
        }
3363
3364
3365
class MakeItStoopid(GenericDeletedComic, GenericNavigableComic):
3366
    """Class to retrieve Make It Stoopid Comics."""
3367
    name = 'stoopid'
3368
    long_name = 'Make it stoopid'
3369
    url = 'http://makeitstoopid.com/comic.php'
3370
3371
    @classmethod
3372
    def get_nav(cls, soup):
3373
        """Get the navigation elements from soup object."""
3374
        cnav = soup.find_all(class_='cnav')
3375
        nav1, nav2 = cnav[:5], cnav[5:]
3376
        assert nav1 == nav2
3377
        # begin, prev, archive, next_, end = nav1
3378
        return [None if i.get('href') is None else i for i in nav1]
3379
3380
    @classmethod
3381
    def get_first_comic_link(cls):
3382
        """Get link to first comics."""
3383
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3384
3385
    @classmethod
3386
    def get_navi_link(cls, last_soup, next_):
3387
        """Get link to next or previous comic."""
3388
        return cls.get_nav(last_soup)[3 if next_ else 1]
3389
3390
    @classmethod
3391
    def get_comic_info(cls, soup, link):
3392
        """Get information about a particular comics."""
3393
        title = link['title']
3394
        imgs = soup.find_all('img', id='comicimg')
3395
        return {
3396
            'title': title,
3397
            'img': [i['src'] for i in imgs],
3398
        }
3399
3400
3401
class OffTheLeashDog(GenericNavigableComic):
3402
    """Class to retrieve Off The Leash Dog comics."""
3403
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3404
    # Also on http://www.rupertfawcettcartoons.com
3405
    name = 'offtheleash'
3406
    long_name = 'Off The Leash Dog'
3407
    url = 'http://offtheleashdogcartoons.com'
3408
    _categories = ('FAWCETT', )
3409
    get_navi_link = get_a_rel_next
3410
    get_first_comic_link = simulate_first_link
3411
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3412
3413
    @classmethod
3414
    def get_comic_info(cls, soup, link):
3415
        """Get information about a particular comics."""
3416
        print(link)
3417
        title = soup.find("h1", class_="entry-title").string
3418
        imgs = soup.find('div', class_='entry-content').find_all('img')
3419
        return {
3420
            'title': title,
3421
            'img': [i['src'] for i in imgs],
3422
        }
3423
3424
3425 View Code Duplication
class MarketoonistComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3426
    """Class to retrieve Marketoonist Comics."""
3427
    name = 'marketoonist'
3428
    long_name = 'Marketoonist'
3429
    url = 'https://marketoonist.com/cartoons'
3430
    get_first_comic_link = simulate_first_link
3431
    get_navi_link = get_link_rel_next
3432
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3433
3434
    @classmethod
3435
    def get_comic_info(cls, soup, link):
3436
        """Get information about a particular comics."""
3437
        imgs = soup.find_all('meta', property='og:image')
3438
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3439
        day = string_to_date(date_str, "%Y-%m-%d")
3440
        title = soup.find('meta', property='og:title')['content']
3441
        return {
3442
            'img': [i['content'] for i in imgs],
3443
            'day': day.day,
3444
            'month': day.month,
3445
            'year': day.year,
3446
            'title': title,
3447
        }
3448
3449
3450 View Code Duplication
class ConsoliaComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3451
    """Class to retrieve Consolia comics."""
3452
    name = 'consolia'
3453
    long_name = 'consolia'
3454
    url = 'https://consolia-comic.com'
3455
    get_url_from_link = join_cls_url_to_href
3456
3457
    @classmethod
3458
    def get_first_comic_link(cls):
3459
        """Get link to first comics."""
3460
        return get_soup_at_url(cls.url).find('a', class_='first')
3461
3462
    @classmethod
3463
    def get_navi_link(cls, last_soup, next_):
3464
        """Get link to next or previous comic."""
3465
        return last_soup.find('a', class_='next' if next_ else 'prev')
3466
3467
    @classmethod
3468
    def get_comic_info(cls, soup, link):
3469
        """Get information about a particular comics."""
3470
        title = soup.find('meta', property='og:title')['content']
3471
        date_str = soup.find('time')["datetime"]
3472
        day = string_to_date(date_str, "%Y-%m-%d")
3473
        imgs = soup.find_all('meta', property='og:image')
3474
        return {
3475
            'title': title,
3476
            'img': [i['content'] for i in imgs],
3477
            'day': day.day,
3478
            'month': day.month,
3479
            'year': day.year,
3480
        }
3481
3482
3483 View Code Duplication
class TuMourrasMoinsBete(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3484
    """Class to retrieve Tu Mourras Moins Bete comics."""
3485
    name = 'mourrasmoinsbete'
3486
    long_name = 'Tu Mourras Moins Bete'
3487
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3488
    _categories = ('FRANCAIS', )
3489
    get_first_comic_link = simulate_first_link
3490
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3491
3492
    @classmethod
3493
    def get_navi_link(cls, last_soup, next_):
3494
        """Get link to next or previous comic."""
3495
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3496
3497
    @classmethod
3498
    def get_comic_info(cls, soup, link):
3499
        """Get information about a particular comics."""
3500
        title = soup.find('title').string
3501
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3502
        author = soup.find('span', itemprop='author').string
3503
        return {
3504
            'img': [i['src'] for i in imgs],
3505
            'author': author,
3506
            'title': title,
3507
        }
3508
3509
3510
class GeekAndPoke(GenericNavigableComic):
3511
    """Class to retrieve Geek And Poke comics."""
3512
    name = 'geek'
3513
    long_name = 'Geek And Poke'
3514
    url = 'http://geek-and-poke.com'
3515
    get_url_from_link = join_cls_url_to_href
3516
    get_first_comic_link = simulate_first_link
3517
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3518
3519
    @classmethod
3520
    def get_navi_link(cls, last_soup, next_):
3521
        """Get link to next or previous comic."""
3522
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3523
3524
    @classmethod
3525
    def get_comic_info(cls, soup, link):
3526
        """Get information about a particular comics."""
3527
        title = soup.find('meta', property='og:title')['content']
3528
        desc = soup.find('meta', property='og:description')['content']
3529
        date_str = soup.find('time', class_='published')['datetime']
3530
        day = string_to_date(date_str, "%Y-%m-%d")
3531
        author = soup.find('a', rel='author').string
3532
        div_content = (soup.find('div', class_="body entry-content") or
3533
                       soup.find('div', class_="special-content"))
3534
        imgs = div_content.find_all('img')
3535
        imgs = [i for i in imgs if i.get('src') is not None]
3536
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3537
        alt = imgs[0].get('alt', "") if imgs else []
3538
        return {
3539
            'title': title,
3540
            'alt': alt,
3541
            'description': desc,
3542
            'author': author,
3543
            'day': day.day,
3544
            'month': day.month,
3545
            'year': day.year,
3546
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3547
        }
3548
3549
3550 View Code Duplication
class GloryOwlComix(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3551
    """Class to retrieve Glory Owl comics."""
3552
    name = 'gloryowl'
3553
    long_name = 'Glory Owl'
3554
    url = 'http://gloryowlcomix.blogspot.fr'
3555
    _categories = ('NSFW', 'FRANCAIS')
3556
    get_first_comic_link = simulate_first_link
3557
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3558
3559
    @classmethod
3560
    def get_navi_link(cls, last_soup, next_):
3561
        """Get link to next or previous comic."""
3562
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3563
3564
    @classmethod
3565
    def get_comic_info(cls, soup, link):
3566
        """Get information about a particular comics."""
3567
        title = soup.find('title').string
3568
        imgs = soup.find_all('link', rel='image_src')
3569
        author = soup.find('a', rel='author').string
3570
        return {
3571
            'img': [i['href'] for i in imgs],
3572
            'author': author,
3573
            'title': title,
3574
        }
3575
3576
3577
class AtRandomComics(GenericNavigableComic):
3578
    """Class to retrieve At Random Comics."""
3579
    name = 'atrandom'
3580
    long_name = 'At Random Comics'
3581
    url = 'http://www.atrandomcomics.com'
3582
    get_url_from_link = join_cls_url_to_href
3583
    get_first_comic_link = simulate_first_link
3584
    first_url = 'http://www.atrandomcomics.com/at-random-comics-home/2015/5/5/can-of-worms'
3585
3586
    @classmethod
3587
    def get_navi_link(cls, last_soup, next_):
3588
        """Get link to next or previous comic."""
3589
        return last_soup.find('a', id='prevLink' if next_ else 'nextLink')
3590
3591
    @classmethod
3592
    def get_comic_info(cls, soup, link):
3593
        """Get information about a particular comics."""
3594
        title = soup.find('meta', property='og:title')['content']
3595
        desc = soup.find('meta', property='og:description')['content']
3596
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
3597
        day = string_to_date(date_str, "%Y-%m-%d")
3598
        author = soup.find('a', rel='author').string
3599
        imgs = soup.find_all('meta', property='og:image')
3600
        return {
3601
            'title': title,
3602
            'img': [i['content'] for i in imgs],
3603
            'month': day.month,
3604
            'year': day.year,
3605
            'day': day.day,
3606
            'author': author,
3607
            'description': desc,
3608
        }
3609
3610
3611
class GenericTumblrV1(GenericComic):
3612
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3613
    _categories = ('TUMBLR', )
3614
3615
    @classmethod
3616
    def get_next_comic(cls, last_comic):
3617
        """Generic implementation of get_next_comic for Tumblr comics."""
3618
        for p in cls.get_posts(last_comic):
3619
            comic = cls.get_comic_info(p)
3620
            if comic is not None:
3621
                yield comic
3622
3623
    @classmethod
3624
    def get_url_from_post(cls, post):
3625
        url = post['url']
3626
        if not url.startswith(cls.url):
3627
            print("url '%s' does not start with '%s'" % (url, cls.url))
3628
        return url
3629
3630
    @classmethod
3631
    def get_api_url(cls):
3632
        return urljoin_wrapper(cls.url, '/api/read/')
3633
3634
    @classmethod
3635
    def get_api_url_for_id(cls, tumblr_id):
3636
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3637
3638
    @classmethod
3639
    def get_comic_info(cls, post):
3640
        """Get information about a particular comics."""
3641
        type_ = post['type']
3642
        if type_ != 'photo':
3643
            return None
3644
        tumblr_id = int(post['id'])
3645
        api_url = cls.get_api_url_for_id(tumblr_id)
3646
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3647
        caption = post.find('photo-caption')
3648
        title = caption.string if caption else ""
3649
        tags = ' '.join(t.string for t in post.find_all('tag'))
3650
        # Photos may appear in 'photo' tags and/or straight in the post
3651
        photo_tags = post.find_all('photo')
3652
        if not photo_tags:
3653
            photo_tags = [post]
3654
        # Images are in multiple resolutions - taking the first one
3655
        imgs = [photo.find('photo-url') for photo in photo_tags]
3656
        return {
3657
            'url': cls.get_url_from_post(post),
3658
            'url2': post['url-with-slug'],
3659
            'day': day.day,
3660
            'month': day.month,
3661
            'year': day.year,
3662
            'title': title,
3663
            'tags': tags,
3664
            'img': [i.string for i in imgs],
3665
            'tumblr-id': tumblr_id,
3666
            'api_url': api_url,
3667
        }
3668
3669
    @classmethod
3670
    def get_posts(cls, last_comic, nb_post_per_call=10):
3671
        """Get posts using API. nb_post_per_call is max 50.
3672
3673
        Posts are retrieved from newer to older as per the tumblr v1 api
3674
        but are returned in chronological order."""
3675
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3676
        posts_acc = []
3677
        if last_comic is not None:
3678
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3679
            # might end up spending a lot of time looking for something that
3680
            # doesn't exist. Failing early and clearly might be a better option.
3681
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3682
            try:
3683
                get_soup_at_url(last_api_url)
3684
            except urllib.error.HTTPError:
3685
                try:
3686
                    get_soup_at_url(cls.url)
3687
                except urllib.error.HTTPError:
3688
                    print("Did not find previous post nor main url %s" % cls.url)
3689
                else:
3690
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3691
                return reversed(posts_acc)
3692
        api_url = cls.get_api_url()
3693
        posts = get_soup_at_url(api_url).find('posts')
3694
        start, total = int(posts['start']), int(posts['total'])
3695
        assert start == 0
3696
        for starting_num in range(0, total, nb_post_per_call):
3697
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3698
            posts2 = get_soup_at_url(api_url2).find('posts')
3699
            start2, total2 = int(posts2['start']), int(posts2['total'])
3700
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3701
            # This may happen and should be handled in the future
3702
            assert total == total2, "%d != %d" % (total, total2)
3703
            for p in posts2.find_all('post'):
3704
                tumblr_id = int(p['id'])
3705
                if waiting_for_id and waiting_for_id == tumblr_id:
3706
                    return reversed(posts_acc)
3707
                posts_acc.append(p)
3708
        if waiting_for_id is None:
3709
            return reversed(posts_acc)
3710
        print("Did not find %s : there might be a problem" % waiting_for_id)
3711
        return []
3712
3713
3714
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3715
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3716
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3717
    # Also on http://www.smbc-comics.com
3718
    name = 'smbc-tumblr'
3719
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3720
    url = 'http://smbc-comics.tumblr.com'
3721
    _categories = ('SMBC', )
3722
3723
3724
class IrwinCardozo(GenericTumblrV1):
3725
    """Class to retrieve Irwin Cardozo Comics."""
3726
    name = 'irwinc'
3727
    long_name = 'Irwin Cardozo'
3728
    url = 'http://irwincardozocomics.tumblr.com'
3729
3730
3731
class AccordingToDevin(GenericTumblrV1):
3732
    """Class to retrieve According To Devin comics."""
3733
    name = 'devin'
3734
    long_name = 'According To Devin'
3735
    url = 'http://accordingtodevin.tumblr.com'
3736
3737
3738
class ItsTheTieTumblr(GenericTumblrV1):
3739
    """Class to retrieve It's the tie comics."""
3740
    # Also on http://itsthetie.com
3741
    # Also on https://tapastic.com/series/itsthetie
3742
    name = 'tie-tumblr'
3743
    long_name = "It's the tie (from Tumblr)"
3744
    url = "http://itsthetie.tumblr.com"
3745
    _categories = ('TIE', )
3746
3747
3748
class OctopunsTumblr(GenericTumblrV1):
3749
    """Class to retrieve Octopuns comics."""
3750
    # Also on http://www.octopuns.net
3751
    name = 'octopuns-tumblr'
3752
    long_name = 'Octopuns (from Tumblr)'
3753
    url = 'http://octopuns.tumblr.com'
3754
3755
3756
class PicturesInBoxesTumblr(GenericTumblrV1):
3757
    """Class to retrieve Pictures In Boxes comics."""
3758
    # Also on http://www.picturesinboxes.com
3759
    name = 'picturesinboxes-tumblr'
3760
    long_name = 'Pictures in Boxes (from Tumblr)'
3761
    url = 'https://picturesinboxescomic.tumblr.com'
3762
3763
3764
class TubeyToonsTumblr(GenericTumblrV1):
3765
    """Class to retrieve TubeyToons comics."""
3766
    # Also on http://tapastic.com/series/Tubey-Toons
3767
    # Also on http://tubeytoons.com
3768
    name = 'tubeytoons-tumblr'
3769
    long_name = 'Tubey Toons (from Tumblr)'
3770
    url = 'https://tubeytoons.tumblr.com'
3771
    _categories = ('TUNEYTOONS', )
3772
3773
3774
class UnearthedComicsTumblr(GenericTumblrV1):
3775
    """Class to retrieve Unearthed comics."""
3776
    # Also on http://tapastic.com/series/UnearthedComics
3777
    # Also on http://unearthedcomics.com
3778
    name = 'unearthed-tumblr'
3779
    long_name = 'Unearthed Comics (from Tumblr)'
3780
    url = 'https://unearthedcomics.tumblr.com'
3781
    _categories = ('UNEARTHED', )
3782
3783
3784
class PieComic(GenericTumblrV1):
3785
    """Class to retrieve Pie Comic comics."""
3786
    name = 'pie'
3787
    long_name = 'Pie Comic'
3788
    url = "http://piecomic.tumblr.com"
3789
3790
3791
class MrEthanDiamond(GenericTumblrV1):
3792
    """Class to retrieve Mr Ethan Diamond comics."""
3793
    name = 'diamond'
3794
    long_name = 'Mr Ethan Diamond'
3795
    url = 'http://mrethandiamond.tumblr.com'
3796
3797
3798
class Flocci(GenericTumblrV1):
3799
    """Class to retrieve floccinaucinihilipilification comics."""
3800
    name = 'flocci'
3801
    long_name = 'floccinaucinihilipilification'
3802
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3803
3804
3805
class UpAndOut(GenericTumblrV1):
3806
    """Class to retrieve Up & Out comics."""
3807
    # Also on http://tapastic.com/series/UP-and-OUT
3808
    name = 'upandout'
3809
    long_name = 'Up And Out (from Tumblr)'
3810
    url = 'http://upandoutcomic.tumblr.com'
3811
3812
3813
class Pundemonium(GenericTumblrV1):
3814
    """Class to retrieve Pundemonium comics."""
3815
    name = 'pundemonium'
3816
    long_name = 'Pundemonium'
3817
    url = 'http://monstika.tumblr.com'
3818
3819
3820
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3821
    """Class to retrieve Poorly Drawn Lines comics."""
3822
    # Also on http://poorlydrawnlines.com
3823
    name = 'poorlydrawn-tumblr'
3824
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3825
    url = 'http://pdlcomics.tumblr.com'
3826
    _categories = ('POORLYDRAWN', )
3827
3828
3829
class PearShapedComics(GenericTumblrV1):
3830
    """Class to retrieve Pear Shaped Comics."""
3831
    name = 'pearshaped'
3832
    long_name = 'Pear-Shaped Comics'
3833
    url = 'http://pearshapedcomics.com'
3834
3835
3836
class PondScumComics(GenericTumblrV1):
3837
    """Class to retrieve Pond Scum Comics."""
3838
    name = 'pond'
3839
    long_name = 'Pond Scum'
3840
    url = 'http://pondscumcomic.tumblr.com'
3841
3842
3843
class MercworksTumblr(GenericTumblrV1):
3844
    """Class to retrieve Mercworks comics."""
3845
    # Also on http://mercworks.net
3846
    name = 'mercworks-tumblr'
3847
    long_name = 'Mercworks (from Tumblr)'
3848
    url = 'http://mercworks.tumblr.com'
3849
3850
3851
class OwlTurdTumblr(GenericTumblrV1):
3852
    """Class to retrieve Owl Turd comics."""
3853
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3854
    name = 'owlturd-tumblr'
3855
    long_name = 'Owl Turd (from Tumblr)'
3856
    url = 'http://owlturd.com'
3857
    _categories = ('OWLTURD', )
3858
3859
3860
class VectorBelly(GenericTumblrV1):
3861
    """Class to retrieve Vector Belly comics."""
3862
    # Also on http://vectorbelly.com
3863
    name = 'vector'
3864
    long_name = 'Vector Belly'
3865
    url = 'http://vectorbelly.tumblr.com'
3866
3867
3868
class GoneIntoRapture(GenericTumblrV1):
3869
    """Class to retrieve Gone Into Rapture comics."""
3870
    # Also on http://goneintorapture.tumblr.com
3871
    # Also on http://tapastic.com/series/Goneintorapture
3872
    name = 'rapture'
3873
    long_name = 'Gone Into Rapture'
3874
    url = 'http://goneintorapture.com'
3875
3876
3877
class TheOatmealTumblr(GenericTumblrV1):
3878
    """Class to retrieve The Oatmeal comics."""
3879
    # Also on http://theoatmeal.com
3880
    name = 'oatmeal-tumblr'
3881
    long_name = 'The Oatmeal (from Tumblr)'
3882
    url = 'http://oatmeal.tumblr.com'
3883
3884
3885
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3886
    """Class to retrieve Heck If I Know Comics."""
3887
    # Also on http://tapastic.com/series/Regular
3888
    name = 'heck-tumblr'
3889
    long_name = 'Heck if I Know comics (from Tumblr)'
3890
    url = 'http://heckifiknowcomics.com'
3891
3892
3893
class MyJetPack(GenericTumblrV1):
3894
    """Class to retrieve My Jet Pack comics."""
3895
    name = 'jetpack'
3896
    long_name = 'My Jet Pack'
3897
    url = 'http://myjetpack.tumblr.com'
3898
3899
3900
class CheerUpEmoKidTumblr(GenericTumblrV1):
3901
    """Class to retrieve CheerUpEmoKid comics."""
3902
    # Also on http://www.cheerupemokid.com
3903
    # Also on http://tapastic.com/series/CUEK
3904
    name = 'cuek-tumblr'
3905
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3906
    url = 'https://enzocomics.tumblr.com'
3907
3908
3909
class ForLackOfABetterComic(GenericTumblrV1):
3910
    """Class to retrieve For Lack Of A Better Comics."""
3911
    # Also on http://forlackofabettercomic.com
3912
    name = 'lack'
3913
    long_name = 'For Lack Of A Better Comic'
3914
    url = 'http://forlackofabettercomic.tumblr.com'
3915
3916
3917
class ZenPencilsTumblr(GenericTumblrV1):
3918
    """Class to retrieve ZenPencils comics."""
3919
    # Also on http://zenpencils.com
3920
    # Also on http://www.gocomics.com/zen-pencils
3921
    name = 'zenpencils-tumblr'
3922
    long_name = 'Zen Pencils (from Tumblr)'
3923
    url = 'http://zenpencils.tumblr.com'
3924
    _categories = ('ZENPENCILS', )
3925
3926
3927
class ThreeWordPhraseTumblr(GenericTumblrV1):
3928
    """Class to retrieve Three Word Phrase comics."""
3929
    # Also on http://threewordphrase.com
3930
    name = 'threeword-tumblr'
3931
    long_name = 'Three Word Phrase (from Tumblr)'
3932
    url = 'http://threewordphrase.tumblr.com'
3933
3934
3935
class TimeTrabbleTumblr(GenericTumblrV1):
3936
    """Class to retrieve Time Trabble comics."""
3937
    # Also on http://timetrabble.com
3938
    name = 'timetrabble-tumblr'
3939
    long_name = 'Time Trabble (from Tumblr)'
3940
    url = 'http://timetrabble.tumblr.com'
3941
3942
3943
class SafelyEndangeredTumblr(GenericTumblrV1):
3944
    """Class to retrieve Safely Endangered comics."""
3945
    # Also on http://www.safelyendangered.com
3946
    name = 'endangered-tumblr'
3947
    long_name = 'Safely Endangered (from Tumblr)'
3948
    url = 'http://tumblr.safelyendangered.com'
3949
3950
3951
class MouseBearComedyTumblr(GenericTumblrV1):
3952
    """Class to retrieve Mouse Bear Comedy comics."""
3953
    # Also on http://www.mousebearcomedy.com
3954
    name = 'mousebear-tumblr'
3955
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3956
    url = 'http://mousebearcomedy.tumblr.com'
3957
3958
3959
class BouletCorpTumblr(GenericTumblrV1):
3960
    """Class to retrieve BouletCorp comics."""
3961
    # Also on http://www.bouletcorp.com
3962
    name = 'boulet-tumblr'
3963
    long_name = 'Boulet Corp (from Tumblr)'
3964
    url = 'https://bouletcorp.tumblr.com'
3965
    _categories = ('BOULET', )
3966
3967
3968
class TheAwkwardYetiTumblr(GenericTumblrV1):
3969
    """Class to retrieve The Awkward Yeti comics."""
3970
    # Also on http://www.gocomics.com/the-awkward-yeti
3971
    # Also on http://theawkwardyeti.com
3972
    # Also on https://tapastic.com/series/TheAwkwardYeti
3973
    name = 'yeti-tumblr'
3974
    long_name = 'The Awkward Yeti (from Tumblr)'
3975
    url = 'http://larstheyeti.tumblr.com'
3976
    _categories = ('YETI', )
3977
3978
3979
class NellucNhoj(GenericTumblrV1):
3980
    """Class to retrieve NellucNhoj comics."""
3981
    name = 'nhoj'
3982
    long_name = 'Nelluc Nhoj'
3983
    url = 'http://nellucnhoj.com'
3984
3985
3986
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3987
    """Class to retrieve Down The Upward Spiral comics."""
3988
    # Also on http://www.downtheupwardspiral.com
3989
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
3990
    name = 'spiral-tumblr'
3991
    long_name = 'Down the Upward Spiral (from Tumblr)'
3992
    url = 'http://downtheupwardspiral.tumblr.com'
3993
3994
3995
class AsPerUsualTumblr(GenericTumblrV1):
3996
    """Class to retrieve As Per Usual comics."""
3997
    # Also on https://tapastic.com/series/AsPerUsual
3998
    name = 'usual-tumblr'
3999
    long_name = 'As Per Usual (from Tumblr)'
4000
    url = 'http://as-per-usual.tumblr.com'
4001
    categories = ('DAMILEE', )
4002
4003
4004
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
4005
    """Class to retrieve Hot Comics For Cool People."""
4006
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
4007
    # Also on http://hotcomics.biz (links to tumblr)
4008
    # Also on http://hcfcp.com (links to tumblr)
4009
    name = 'hotcomics-tumblr'
4010
    long_name = 'Hot Comics For Cool People (from Tumblr)'
4011
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
4012
    categories = ('DAMILEE', )
4013
4014
4015
class OneOneOneOneComicTumblr(GenericTumblrV1):
4016
    """Class to retrieve 1111 Comics."""
4017
    # Also on http://www.1111comics.me
4018
    # Also on https://tapastic.com/series/1111-Comics
4019
    name = '1111-tumblr'
4020
    long_name = '1111 Comics (from Tumblr)'
4021
    url = 'http://comics1111.tumblr.com'
4022
    _categories = ('ONEONEONEONE', )
4023
4024
4025
class JhallComicsTumblr(GenericTumblrV1):
4026
    """Class to retrieve Jhall Comics."""
4027
    # Also on http://jhallcomics.com
4028
    name = 'jhall-tumblr'
4029
    long_name = 'Jhall Comics (from Tumblr)'
4030
    url = 'http://jhallcomics.tumblr.com'
4031
4032
4033
class BerkeleyMewsTumblr(GenericTumblrV1):
4034
    """Class to retrieve Berkeley Mews comics."""
4035
    # Also on http://www.gocomics.com/berkeley-mews
4036
    # Also on http://www.berkeleymews.com
4037
    name = 'berkeley-tumblr'
4038
    long_name = 'Berkeley Mews (from Tumblr)'
4039
    url = 'http://mews.tumblr.com'
4040
    _categories = ('BERKELEY', )
4041
4042
4043
class JoanCornellaTumblr(GenericTumblrV1):
4044
    """Class to retrieve Joan Cornella comics."""
4045
    # Also on http://joancornella.net
4046
    name = 'cornella-tumblr'
4047
    long_name = 'Joan Cornella (from Tumblr)'
4048
    url = 'http://cornellajoan.tumblr.com'
4049
4050
4051
class RespawnComicTumblr(GenericTumblrV1):
4052
    """Class to retrieve Respawn Comic."""
4053
    # Also on http://respawncomic.com
4054
    name = 'respawn-tumblr'
4055
    long_name = 'Respawn Comic (from Tumblr)'
4056
    url = 'https://respawncomic.tumblr.com'
4057
4058
4059
class ChrisHallbeckTumblr(GenericTumblrV1):
4060
    """Class to retrieve Chris Hallbeck comics."""
4061
    # Also on https://tapastic.com/ChrisHallbeck
4062
    # Also on http://maximumble.com
4063
    # Also on http://minimumble.com
4064
    # Also on http://thebookofbiff.com
4065
    name = 'hallbeck-tumblr'
4066
    long_name = 'Chris Hallback (from Tumblr)'
4067
    url = 'https://chrishallbeck.tumblr.com'
4068
    _categories = ('HALLBACK', )
4069
4070
4071
class ComicNuggets(GenericTumblrV1):
4072
    """Class to retrieve Comic Nuggets."""
4073
    name = 'nuggets'
4074
    long_name = 'Comic Nuggets'
4075
    url = 'http://comicnuggets.com'
4076
4077
4078
class PigeonGazetteTumblr(GenericTumblrV1):
4079
    """Class to retrieve The Pigeon Gazette comics."""
4080
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
4081
    name = 'pigeon-tumblr'
4082
    long_name = 'The Pigeon Gazette (from Tumblr)'
4083
    url = 'http://thepigeongazette.tumblr.com'
4084
4085
4086
class CancerOwl(GenericTumblrV1):
4087
    """Class to retrieve Cancer Owl comics."""
4088
    # Also on http://cancerowl.com
4089
    name = 'cancerowl-tumblr'
4090
    long_name = 'Cancer Owl (from Tumblr)'
4091
    url = 'http://cancerowl.tumblr.com'
4092
4093
4094
class FowlLanguageTumblr(GenericTumblrV1):
4095
    """Class to retrieve Fowl Language comics."""
4096
    # Also on http://www.fowllanguagecomics.com
4097
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4098
    # Also on http://www.gocomics.com/fowl-language
4099
    name = 'fowllanguage-tumblr'
4100
    long_name = 'Fowl Language Comics (from Tumblr)'
4101
    url = 'http://fowllanguagecomics.tumblr.com'
4102
    _categories = ('FOWLLANGUAGE', )
4103
4104
4105
class TheOdd1sOutTumblr(GenericTumblrV1):
4106
    """Class to retrieve The Odd 1s Out comics."""
4107
    # Also on http://theodd1sout.com
4108
    # Also on https://tapastic.com/series/Theodd1sout
4109
    name = 'theodd-tumblr'
4110
    long_name = 'The Odd 1s Out (from Tumblr)'
4111
    url = 'http://theodd1sout.tumblr.com'
4112
4113
4114
class TheUnderfoldTumblr(GenericTumblrV1):
4115
    """Class to retrieve The Underfold comics."""
4116
    # Also on http://theunderfold.com
4117
    name = 'underfold-tumblr'
4118
    long_name = 'The Underfold (from Tumblr)'
4119
    url = 'http://theunderfold.tumblr.com'
4120
4121
4122
class LolNeinTumblr(GenericTumblrV1):
4123
    """Class to retrieve Lol Nein comics."""
4124
    # Also on http://lolnein.com
4125
    name = 'lolnein-tumblr'
4126
    long_name = 'Lol Nein (from Tumblr)'
4127
    url = 'http://lolneincom.tumblr.com'
4128
4129
4130
class FatAwesomeComicsTumblr(GenericTumblrV1):
4131
    """Class to retrieve Fat Awesome Comics."""
4132
    # Also on http://fatawesome.com/comics
4133
    name = 'fatawesome-tumblr'
4134
    long_name = 'Fat Awesome (from Tumblr)'
4135
    url = 'http://fatawesomecomedy.tumblr.com'
4136
4137
4138
class TheWorldIsFlatTumblr(GenericTumblrV1):
4139
    """Class to retrieve The World Is Flat Comics."""
4140
    # Also on https://tapastic.com/series/The-World-is-Flat
4141
    name = 'flatworld-tumblr'
4142
    long_name = 'The World Is Flat (from Tumblr)'
4143
    url = 'http://theworldisflatcomics.com'
4144
4145
4146
class DorrisMc(GenericTumblrV1):
4147
    """Class to retrieve Dorris Mc Comics"""
4148
    # Also on http://www.gocomics.com/dorris-mccomics
4149
    name = 'dorrismc'
4150
    long_name = 'Dorris Mc'
4151
    url = 'http://dorrismccomics.com'
4152
4153
4154
class LeleozTumblr(GenericDeletedComic, GenericTumblrV1):
4155
    """Class to retrieve Leleoz comics."""
4156
    # Also on https://tapastic.com/series/Leleoz
4157
    name = 'leleoz-tumblr'
4158
    long_name = 'Leleoz (from Tumblr)'
4159
    url = 'http://leleozcomics.tumblr.com'
4160
4161
4162
class MoonBeardTumblr(GenericTumblrV1):
4163
    """Class to retrieve MoonBeard comics."""
4164
    # Also on http://moonbeard.com
4165
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4166
    name = 'moonbeard-tumblr'
4167
    long_name = 'Moon Beard (from Tumblr)'
4168
    url = 'http://squireseses.tumblr.com'
4169
    _categories = ('MOONBEARD', )
4170
4171
4172
class AComik(GenericTumblrV1):
4173
    """Class to retrieve A Comik"""
4174
    name = 'comik'
4175
    long_name = 'A Comik'
4176
    url = 'http://acomik.com'
4177
4178
4179
class ClassicRandy(GenericTumblrV1):
4180
    """Class to retrieve Classic Randy comics."""
4181
    name = 'randy'
4182
    long_name = 'Classic Randy'
4183
    url = 'http://classicrandy.tumblr.com'
4184
4185
4186
class DagssonTumblr(GenericTumblrV1):
4187
    """Class to retrieve Dagsson comics."""
4188
    # Also on http://www.dagsson.com
4189
    name = 'dagsson-tumblr'
4190
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4191
    url = 'https://hugleikurdagsson.tumblr.com'
4192
4193
4194
class LinsEditionsTumblr(GenericTumblrV1):
4195
    """Class to retrieve L.I.N.S. Editions comics."""
4196
    # Also on https://linsedition.com
4197
    # Now on http://warandpeas.tumblr.com
4198
    name = 'lins-tumblr'
4199
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4200
    url = 'https://linscomics.tumblr.com'
4201
    _categories = ('LINS', )
4202
4203
4204
class WarAndPeasTumblr(GenericTumblrV1):
4205
    """Class to retrieve War And Peas comics."""
4206
    # Was on https://linscomics.tumblr.com
4207
    name = 'warandpeas-tumblr'
4208
    long_name = 'War And Peas (from Tumblr)'
4209
    url = 'http://warandpeas.tumblr.com'
4210
    _categories = ('WARANDPEAS', )
4211
4212
4213
class OrigamiHotDish(GenericTumblrV1):
4214
    """Class to retrieve Origami Hot Dish comics."""
4215
    name = 'origamihotdish'
4216
    long_name = 'Origami Hot Dish'
4217
    url = 'http://origamihotdish.com'
4218
4219
4220
class HitAndMissComicsTumblr(GenericTumblrV1):
4221
    """Class to retrieve Hit and Miss Comics."""
4222
    name = 'hitandmiss'
4223
    long_name = 'Hit and Miss Comics'
4224
    url = 'https://hitandmisscomics.tumblr.com'
4225
4226
4227
class HMBlanc(GenericTumblrV1):
4228
    """Class to retrieve HM Blanc comics."""
4229
    name = 'hmblanc'
4230
    long_name = 'HM Blanc'
4231
    url = 'http://hmblanc.tumblr.com'
4232
4233
4234
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4235
    """Class to retrieve Tales Of Absurdity comics."""
4236
    # Also on http://talesofabsurdity.com
4237
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4238
    name = 'absurdity-tumblr'
4239
    long_name = 'Tales of Absurdity (from Tumblr)'
4240
    url = 'http://talesofabsurdity.tumblr.com'
4241
    _categories = ('ABSURDITY', )
4242
4243
4244
class RobbieAndBobby(GenericTumblrV1):
4245
    """Class to retrieve Robbie And Bobby comics."""
4246
    # Also on http://robbieandbobby.com
4247
    name = 'robbie-tumblr'
4248
    long_name = 'Robbie And Bobby (from Tumblr)'
4249
    url = 'http://robbieandbobby.tumblr.com'
4250
4251
4252
class ElectricBunnyComicTumblr(GenericTumblrV1):
4253
    """Class to retrieve Electric Bunny Comics."""
4254
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4255
    name = 'bunny-tumblr'
4256
    long_name = 'Electric Bunny Comic (from Tumblr)'
4257
    url = 'http://electricbunnycomics.tumblr.com'
4258
4259
4260
class Hoomph(GenericTumblrV1):
4261
    """Class to retrieve Hoomph comics."""
4262
    name = 'hoomph'
4263
    long_name = 'Hoomph'
4264
    url = 'http://hoom.ph'
4265
4266
4267
class BFGFSTumblr(GenericTumblrV1):
4268
    """Class to retrieve BFGFS comics."""
4269
    # Also on https://tapastic.com/series/BFGFS
4270
    # Also on http://bfgfs.com
4271
    name = 'bfgfs-tumblr'
4272
    long_name = 'BFGFS (from Tumblr)'
4273
    url = 'https://bfgfs.tumblr.com'
4274
4275
4276
class DoodleForFood(GenericTumblrV1):
4277
    """Class to retrieve Doodle For Food comics."""
4278
    # Also on https://tapastic.com/series/Doodle-for-Food
4279
    name = 'doodle'
4280
    long_name = 'Doodle For Food'
4281
    url = 'http://www.doodleforfood.com'
4282
4283
4284
class CassandraCalinTumblr(GenericTumblrV1):
4285
    """Class to retrieve C. Cassandra comics."""
4286
    # Also on http://cassandracalin.com
4287
    # Also on https://tapastic.com/series/C-Cassandra-comics
4288
    name = 'cassandra-tumblr'
4289
    long_name = 'Cassandra Calin (from Tumblr)'
4290
    url = 'http://c-cassandra.tumblr.com'
4291
4292
4293
class DougWasTaken(GenericTumblrV1):
4294
    """Class to retrieve Doug Was Taken comics."""
4295
    name = 'doug'
4296
    long_name = 'Doug Was Taken'
4297
    url = 'https://dougwastaken.tumblr.com'
4298
4299
4300
class MandatoryRollerCoaster(GenericTumblrV1):
4301
    """Class to retrieve Mandatory Roller Coaster comics."""
4302
    name = 'rollercoaster'
4303
    long_name = 'Mandatory Roller Coaster'
4304
    url = 'http://mandatoryrollercoaster.com'
4305
4306
4307
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4308
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4309
    name = 'cperspqccltt'
4310
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4311
    url = 'http://marcoandco.tumblr.com'
4312
4313
4314
class TheGrohlTroll(GenericTumblrV1):
4315
    """Class to retrieve The Grohl Troll comics."""
4316
    name = 'grohltroll'
4317
    long_name = 'The Grohl Troll'
4318
    url = 'http://thegrohltroll.com'
4319
4320
4321
class WebcomicName(GenericTumblrV1):
4322
    """Class to retrieve Webcomic Name comics."""
4323
    name = 'webcomicname'
4324
    long_name = 'Webcomic Name'
4325
    url = 'http://webcomicname.com'
4326
4327
4328
class BooksOfAdam(GenericTumblrV1):
4329
    """Class to retrieve Books of Adam comics."""
4330
    # Also on http://www.booksofadam.com
4331
    name = 'booksofadam'
4332
    long_name = 'Books of Adam'
4333
    url = 'http://booksofadam.tumblr.com'
4334
4335
4336
class HarkAVagrant(GenericTumblrV1):
4337
    """Class to retrieve Hark A Vagrant comics."""
4338
    # Also on http://www.harkavagrant.com
4339
    name = 'hark-tumblr'
4340
    long_name = 'Hark A Vagrant (from Tumblr)'
4341
    url = 'http://beatonna.tumblr.com'
4342
4343
4344
class OurSuperAdventureTumblr(GenericTumblrV1):
4345
    """Class to retrieve Our Super Adventure comics."""
4346
    # Also on https://tapastic.com/series/Our-Super-Adventure
4347
    # Also on http://www.oursuperadventure.com
4348
    # http://sarahgraley.com
4349
    name = 'superadventure-tumblr'
4350
    long_name = 'Our Super Adventure (from Tumblr)'
4351
    url = 'http://sarahssketchbook.tumblr.com'
4352
4353
4354
class JakeLikesOnions(GenericTumblrV1):
4355
    """Class to retrieve Jake Likes Onions comics."""
4356
    name = 'jake'
4357
    long_name = 'Jake Likes Onions'
4358
    url = 'http://jakelikesonions.com'
4359
4360
4361
class InYourFaceCakeTumblr(GenericTumblrV1):
4362
    """Class to retrieve In Your Face Cake comics."""
4363
    # Also on https://tapas.io/series/In-Your-Face-Cake
4364
    name = 'inyourfacecake-tumblr'
4365
    long_name = 'In Your Face Cake (from Tumblr)'
4366
    url = 'https://in-your-face-cake.tumblr.com'
4367
    _categories = ('INYOURFACECAKE', )
4368
4369
4370
class Robospunk(GenericTumblrV1):
4371
    """Class to retrieve Robospunk comics."""
4372
    name = 'robospunk'
4373
    long_name = 'Robospunk'
4374
    url = 'http://robospunk.com'
4375
4376
4377
class BananaTwinky(GenericTumblrV1):
4378
    """Class to retrieve Banana Twinky comics."""
4379
    name = 'banana'
4380
    long_name = 'Banana Twinky'
4381
    url = 'https://bananatwinky.tumblr.com'
4382
4383
4384
class YesterdaysPopcornTumblr(GenericTumblrV1):
4385
    """Class to retrieve Yesterday's Popcorn comics."""
4386
    # Also on http://www.yesterdayspopcorn.com
4387
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4388
    name = 'popcorn-tumblr'
4389
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4390
    url = 'http://yesterdayspopcorn.tumblr.com'
4391
4392
4393
class TwistedDoodles(GenericTumblrV1):
4394
    """Class to retrieve Twisted Doodles comics."""
4395
    name = 'twisted'
4396
    long_name = 'Twisted Doodles'
4397
    url = 'http://www.twisteddoodles.com'
4398
4399
4400
class UbertoolTumblr(GenericTumblrV1):
4401
    """Class to retrieve Ubertool comics."""
4402
    # Also on http://ubertoolcomic.com
4403
    # Also on https://tapastic.com/series/ubertool
4404
    name = 'ubertool-tumblr'
4405
    long_name = 'Ubertool (from Tumblr)'
4406
    url = 'https://ubertool.tumblr.com'
4407
    _categories = ('UBERTOOL', )
4408
4409
4410
class LittleLifeLinesTumblr(GenericDeletedComic, GenericTumblrV1):
4411
    """Class to retrieve Little Life Lines comics."""
4412
    # Also on http://www.littlelifelines.com
4413
    name = 'life-tumblr'
4414
    long_name = 'Little Life Lines (from Tumblr)'
4415
    url = 'https://little-life-lines.tumblr.com'
4416
4417
4418
class TheyCanTalk(GenericTumblrV1):
4419
    """Class to retrieve They Can Talk comics."""
4420
    name = 'theycantalk'
4421
    long_name = 'They Can Talk'
4422
    url = 'http://theycantalk.com'
4423
4424
4425
class Will5NeverCome(GenericTumblrV1):
4426
    """Class to retrieve Will 5:00 Never Come comics."""
4427
    name = 'will5'
4428
    long_name = 'Will 5:00 Never Come ?'
4429
    url = 'http://will5nevercome.com'
4430
4431
4432
class Sephko(GenericTumblrV1):
4433
    """Class to retrieve Sephko Comics."""
4434
    # Also on http://www.sephko.com
4435
    name = 'sephko'
4436
    long_name = 'Sephko'
4437
    url = 'https://sephko.tumblr.com'
4438
4439
4440
class BlazersAtDawn(GenericTumblrV1):
4441
    """Class to retrieve Blazers At Dawn Comics."""
4442
    name = 'blazers'
4443
    long_name = 'Blazers At Dawn'
4444
    url = 'http://blazersatdawn.tumblr.com'
4445
4446
4447
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4448
    """Class to retrieve Art By Moga Comics."""
4449
    name = 'moga'
4450
    long_name = 'Art By Moga'
4451
    url = 'http://artbymoga.tumblr.com'
4452
4453
4454
class VerbalVomitTumblr(GenericTumblrV1):
4455
    """Class to retrieve Verbal Vomit comics."""
4456
    # Also on http://www.verbal-vomit.com
4457
    name = 'vomit-tumblr'
4458
    long_name = 'Verbal Vomit (from Tumblr)'
4459
    url = 'http://verbalvomits.tumblr.com'
4460
4461
4462
class LibraryComic(GenericTumblrV1):
4463
    """Class to retrieve LibraryComic."""
4464
    # Also on http://librarycomic.com
4465
    name = 'library-tumblr'
4466
    long_name = 'LibraryComic (from Tumblr)'
4467
    url = 'https://librarycomic.tumblr.com'
4468
4469
4470
class TizzyStitchBirdTumblr(GenericTumblrV1):
4471
    """Class to retrieve Tizzy Stitch Bird comics."""
4472
    # Also on http://tizzystitchbird.com
4473
    # Also on https://tapastic.com/series/TizzyStitchbird
4474
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4475
    name = 'tizzy-tumblr'
4476
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4477
    url = 'http://tizzystitchbird.tumblr.com'
4478
4479
4480
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4481
    """Class to retrieve VictimsOfCircumsolar comics."""
4482
    # Also on http://www.victimsofcircumsolar.com
4483
    name = 'circumsolar-tumblr'
4484
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4485
    url = 'https://victimsofcomics.tumblr.com'
4486
4487
4488
class RockPaperCynicTumblr(GenericTumblrV1):
4489
    """Class to retrieve RockPaperCynic comics."""
4490
    # Also on http://www.rockpapercynic.com
4491
    # Also on https://tapastic.com/series/rockpapercynic
4492
    name = 'rpc-tumblr'
4493
    long_name = 'Rock Paper Cynic (from Tumblr)'
4494
    url = 'http://rockpapercynic.tumblr.com'
4495
4496
4497
class DeadlyPanelTumblr(GenericTumblrV1):
4498
    """Class to retrieve Deadly Panel comics."""
4499
    # Also on http://www.deadlypanel.com
4500
    # Also on https://tapastic.com/series/deadlypanel
4501
    name = 'deadly-tumblr'
4502
    long_name = 'Deadly Panel (from Tumblr)'
4503
    url = 'https://deadlypanel.tumblr.com'
4504
4505
4506
class CatanaComics(GenericComicNotWorking):  # Not a Tumblr anymore ?
4507
    """Class to retrieve Catana comics."""
4508
    name = 'catana'
4509
    long_name = 'Catana'
4510
    url = 'http://www.catanacomics.com'
4511
4512
4513
class AngryAtNothingTumblr(GenericTumblrV1):
4514
    """Class to retrieve Angry at Nothing comics."""
4515
    # Also on http://www.angryatnothing.net
4516
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4517
    name = 'angry-tumblr'
4518
    long_name = 'Angry At Nothing (from Tumblr)'
4519
    url = 'http://angryatnothing.tumblr.com'
4520
4521
4522
class ShanghaiTango(GenericTumblrV1):
4523
    """Class to retrieve Shanghai Tango comic."""
4524
    name = 'tango'
4525
    long_name = 'Shanghai Tango'
4526
    url = 'http://tango2010weibo.tumblr.com'
4527
4528
4529
class OffTheLeashDogTumblr(GenericTumblrV1):
4530
    """Class to retrieve Off The Leash Dog comics."""
4531
    # Also on http://offtheleashdogcartoons.com
4532
    # Also on http://www.rupertfawcettcartoons.com
4533
    name = 'offtheleash-tumblr'
4534
    long_name = 'Off The Leash Dog (from Tumblr)'
4535
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4536
    _categories = ('FAWCETT', )
4537
4538
4539
class ImogenQuestTumblr(GenericTumblrV1):
4540
    """Class to retrieve Imogen Quest comics."""
4541
    # Also on http://imogenquest.net
4542
    name = 'imogen-tumblr'
4543
    long_name = 'Imogen Quest (from Tumblr)'
4544
    url = 'http://imoquest.tumblr.com'
4545
4546
4547
class Shitfest(GenericTumblrV1):
4548
    """Class to retrieve Shitfest comics."""
4549
    name = 'shitfest'
4550
    long_name = 'Shitfest'
4551
    url = 'http://shitfestcomic.com'
4552
4553
4554
class IceCreamSandwichComics(GenericTumblrV1):
4555
    """Class to retrieve Ice Cream Sandwich Comics."""
4556
    name = 'icecream'
4557
    long_name = 'Ice Cream Sandwich Comics'
4558
    url = 'http://icecreamsandwichcomics.com'
4559
4560
4561
class Dustinteractive(GenericTumblrV1):
4562
    """Class to retrieve Dustinteractive comics."""
4563
    name = 'dustinteractive'
4564
    long_name = 'Dustinteractive'
4565
    url = 'http://dustinteractive.com'
4566
4567
4568
class StickyCinemaFloor(GenericTumblrV1):
4569
    """Class to retrieve Sticky Cinema Floor comics."""
4570
    name = 'stickycinema'
4571
    long_name = 'Sticky Cinema Floor'
4572
    url = 'https://stickycinemafloor.tumblr.com'
4573
4574
4575
class IncidentalComicsTumblr(GenericTumblrV1):
4576
    """Class to retrieve Incidental Comics."""
4577
    # Also on http://www.incidentalcomics.com
4578
    name = 'incidental-tumblr'
4579
    long_name = 'Incidental Comics (from Tumblr)'
4580
    url = 'http://incidentalcomics.tumblr.com'
4581
4582
4583
class APleasantWasteOfTimeTumblr(GenericTumblrV1):
4584
    """Class to retrieve A Pleasant Waste Of Time comics."""
4585
    # Also on https://tapas.io/series/A-Pleasant-
4586
    name = 'pleasant-waste-tumblr'
4587
    long_name = 'A Pleasant Waste Of Time (from Tumblr)'
4588
    url = 'https://artjcf.tumblr.com'
4589
    _categories = ('WASTE', )
4590
4591
4592
class HorovitzComicsTumblr(GenericTumblrV1):
4593
    """Class to retrieve Horovitz new comics."""
4594
    # Also on http://www.horovitzcomics.com
4595
    name = 'horovitz-tumblr'
4596
    long_name = 'Horovitz (from Tumblr)'
4597
    url = 'https://horovitzcomics.tumblr.com'
4598
    _categories = ('HOROVITZ', )
4599
4600
4601
class DeepDarkFearsTumblr(GenericTumblrV1):
4602
    """Class to retrieve DeepvDarkvFears comics."""
4603
    name = 'deep-dark-fears-tumblr'
4604
    long_name = 'Deep Dark Fears (from Tumblr)'
4605
    url = 'http://deep-dark-fears.tumblr.com'
4606
4607
4608
class DakotaMcDadzean(GenericTumblrV1):
4609
    """Class to retrieve Dakota McDadzean comics."""
4610
    name = 'dakota'
4611
    long_name = 'Dakota McDadzean'
4612
    url = 'http://dakotamcfadzean.tumblr.com'
4613
4614
4615
class ExtraFabulousComicsTumblr(GenericTumblrV1):
4616
    """Class to retrieve Extra Fabulous Comics."""
4617
    # Also on http://extrafabulouscomics.com
4618
    name = 'efc-tumblr'
4619
    long_name = 'Extra Fabulous Comics (from Tumblr)'
4620
    url = 'https://extrafabulouscomics.tumblr.com'
4621
    _categories = ('EFC', )
4622
4623
4624
class AlexLevesque(GenericTumblrV1):
4625
    """Class to retrieve AlexLevesque comics."""
4626
    name = 'alevesque'
4627
    long_name = 'Alex Levesque'
4628
    url = 'http://alexlevesque.com'
4629
    _categories = ('FRANCAIS', )
4630
4631
4632
class JamesOfNoTradesTumblr(GenericTumblrV1):
4633
    """Class to retrieve JamesOfNoTrades comics."""
4634
    # Also on http://jamesofnotrades.com
4635
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
4636
    # Also on https://tapas.io/series/James-of-No-Trades
4637
    name = 'jamesofnotrades-tumblr'
4638
    long_name = 'James Of No Trades (from Tumblr)'
4639
    url = 'http://jamesfregan.tumblr.com'
4640
    _categories = ('JAMESOFNOTRADES', )
4641
4642
4643
class InfiniteGuff(GenericTumblrV1):
4644
    """Class to retrieve Infinite Guff comics."""
4645
    name = 'infiniteguff'
4646
    long_name = 'Infinite Guff'
4647
    url = 'http://infiniteguff.com'
4648
4649
4650
class SkeletonClaw(GenericTumblrV1):
4651
    """Class to retrieve Skeleton Claw comics."""
4652
    name = 'skeletonclaw'
4653
    long_name = 'Skeleton Claw'
4654
    url = 'http://skeletonclaw.com'
4655
4656
4657
class HorovitzComics(GenericDeletedComic, GenericListableComic):
4658
    """Generic class to handle the logic common to the different comics from Horovitz."""
4659
    # Also on https://horovitzcomics.tumblr.com
4660
    url = 'http://www.horovitzcomics.com'
4661
    _categories = ('HOROVITZ', )
4662
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4663
    link_re = NotImplemented
4664
    get_url_from_archive_element = join_cls_url_to_href
4665
4666 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4667
    def get_comic_info(cls, soup, link):
4668
        """Get information about a particular comics."""
4669
        href = link['href']
4670
        num = int(cls.link_re.match(href).groups()[0])
4671
        title = link.string
4672
        imgs = soup.find_all('img', id='comic')
4673
        assert len(imgs) == 1
4674
        year, month, day = [int(s)
4675
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4676
        return {
4677
            'title': title,
4678
            'day': day,
4679
            'month': month,
4680
            'year': year,
4681
            'img': [i['src'] for i in imgs],
4682
            'num': num,
4683
        }
4684
4685
    @classmethod
4686
    def get_archive_elements(cls):
4687
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4688
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4689
4690
4691
class HorovitzNew(HorovitzComics):
4692
    """Class to retrieve Horovitz new comics."""
4693
    name = 'horovitznew'
4694
    long_name = 'Horovitz New'
4695
    link_re = re.compile('^/comics/new/([0-9]+)$')
4696
4697
4698
class HorovitzClassic(HorovitzComics):
4699
    """Class to retrieve Horovitz classic comics."""
4700
    name = 'horovitzclassic'
4701
    long_name = 'Horovitz Classic'
4702
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4703
4704
4705
class GenericGoComic(GenericNavigableComic):
4706
    """Generic class to handle the logic common to comics from gocomics.com."""
4707
    _categories = ('GOCOMIC', )
4708
4709
    @classmethod
4710
    def get_first_comic_link(cls):
4711
        """Get link to first comics."""
4712
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4713
4714
    @classmethod
4715
    def get_navi_link(cls, last_soup, next_):
4716
        """Get link to next or previous comic."""
4717
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left js-previous-comic sm '
4718
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right js-next-comic hidden-sm-up sm '
4719
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4720
4721
    @classmethod
4722
    def get_url_from_link(cls, link):
4723
        gocomics = 'http://www.gocomics.com'
4724
        return urljoin_wrapper(gocomics, link['href'])
4725
4726
    @classmethod
4727
    def get_comic_info(cls, soup, link):
4728
        """Get information about a particular comics."""
4729
        date_str = soup.find('meta', property='article:published_time')['content']
4730
        day = string_to_date(date_str, "%Y-%m-%d")
4731
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4732
        author = soup.find('meta', property='article:author')['content']
4733
        tags = soup.find('meta', property='article:tag')['content']
4734
        return {
4735
            'day': day.day,
4736
            'month': day.month,
4737
            'year': day.year,
4738
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4739
            'author': author,
4740
            'tags': tags,
4741
        }
4742
4743
4744
class PearlsBeforeSwine(GenericGoComic):
4745
    """Class to retrieve Pearls Before Swine comics."""
4746
    name = 'pearls'
4747
    long_name = 'Pearls Before Swine'
4748
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4749
4750
4751
class Peanuts(GenericGoComic):
4752
    """Class to retrieve Peanuts comics."""
4753
    name = 'peanuts'
4754
    long_name = 'Peanuts'
4755
    url = 'http://www.gocomics.com/peanuts'
4756
4757
4758
class MattWuerker(GenericGoComic):
4759
    """Class to retrieve Matt Wuerker comics."""
4760
    name = 'wuerker'
4761
    long_name = 'Matt Wuerker'
4762
    url = 'http://www.gocomics.com/mattwuerker'
4763
4764
4765
class TomToles(GenericGoComic):
4766
    """Class to retrieve Tom Toles comics."""
4767
    name = 'toles'
4768
    long_name = 'Tom Toles'
4769
    url = 'http://www.gocomics.com/tomtoles'
4770
4771
4772
class BreakOfDay(GenericGoComic):
4773
    """Class to retrieve Break Of Day comics."""
4774
    name = 'breakofday'
4775
    long_name = 'Break Of Day'
4776
    url = 'http://www.gocomics.com/break-of-day'
4777
4778
4779
class Brevity(GenericGoComic):
4780
    """Class to retrieve Brevity comics."""
4781
    name = 'brevity'
4782
    long_name = 'Brevity'
4783
    url = 'http://www.gocomics.com/brevity'
4784
4785
4786
class MichaelRamirez(GenericGoComic):
4787
    """Class to retrieve Michael Ramirez comics."""
4788
    name = 'ramirez'
4789
    long_name = 'Michael Ramirez'
4790
    url = 'http://www.gocomics.com/michaelramirez'
4791
4792
4793
class MikeLuckovich(GenericGoComic):
4794
    """Class to retrieve Mike Luckovich comics."""
4795
    name = 'luckovich'
4796
    long_name = 'Mike Luckovich'
4797
    url = 'http://www.gocomics.com/mikeluckovich'
4798
4799
4800
class JimBenton(GenericGoComic):
4801
    """Class to retrieve Jim Benton comics."""
4802
    # Also on http://jimbenton.tumblr.com
4803
    name = 'benton'
4804
    long_name = 'Jim Benton'
4805
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4806
4807
4808
class TheArgyleSweater(GenericGoComic):
4809
    """Class to retrieve the Argyle Sweater comics."""
4810
    name = 'argyle'
4811
    long_name = 'Argyle Sweater'
4812
    url = 'http://www.gocomics.com/theargylesweater'
4813
4814
4815
class SunnyStreet(GenericGoComic):
4816
    """Class to retrieve Sunny Street comics."""
4817
    # Also on http://www.sunnystreetcomics.com
4818
    name = 'sunny'
4819
    long_name = 'Sunny Street'
4820
    url = 'http://www.gocomics.com/sunny-street'
4821
4822
4823
class OffTheMark(GenericGoComic):
4824
    """Class to retrieve Off The Mark comics."""
4825
    # Also on https://www.offthemark.com
4826
    name = 'offthemark'
4827
    long_name = 'Off The Mark'
4828
    url = 'http://www.gocomics.com/offthemark'
4829
4830
4831
class WuMo(GenericGoComic):
4832
    """Class to retrieve WuMo comics."""
4833
    # Also on http://wumo.com
4834
    name = 'wumo'
4835
    long_name = 'WuMo'
4836
    url = 'http://www.gocomics.com/wumo'
4837
4838
4839
class LunarBaboon(GenericGoComic):
4840
    """Class to retrieve Lunar Baboon comics."""
4841
    # Also on http://www.lunarbaboon.com
4842
    # Also on https://tapastic.com/series/Lunarbaboon
4843
    name = 'lunarbaboon'
4844
    long_name = 'Lunar Baboon'
4845
    url = 'http://www.gocomics.com/lunarbaboon'
4846
4847
4848
class SandersenGocomic(GenericGoComic):
4849
    """Class to retrieve Sarah Andersen comics."""
4850
    # Also on http://sarahcandersen.com
4851
    # Also on http://tapastic.com/series/Doodle-Time
4852
    name = 'sandersen-goc'
4853
    long_name = 'Sarah Andersen (from GoComics)'
4854
    url = 'http://www.gocomics.com/sarahs-scribbles'
4855
4856
4857
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4858
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4859
    # Also on http://smbc-comics.tumblr.com
4860
    # Also on http://www.smbc-comics.com
4861
    name = 'smbc-goc'
4862
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4863
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4864
    _categories = ('SMBC', )
4865
4866
4867
class CalvinAndHobbesGoComic(GenericGoComic):
4868
    """Class to retrieve Calvin and Hobbes comics."""
4869
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4870
    name = 'calvin-goc'
4871
    long_name = 'Calvin and Hobbes (from GoComics)'
4872
    url = 'http://www.gocomics.com/calvinandhobbes'
4873
4874
4875
class RallGoComic(GenericGoComic):
4876
    """Class to retrieve Ted Rall comics."""
4877
    # Also on http://rall.com/comic
4878
    name = 'rall-goc'
4879
    long_name = "Ted Rall (from GoComics)"
4880
    url = "http://www.gocomics.com/ted-rall"
4881
    _categories = ('RALL', )
4882
4883
4884
class TheAwkwardYetiGoComic(GenericGoComic):
4885
    """Class to retrieve The Awkward Yeti comics."""
4886
    # Also on http://larstheyeti.tumblr.com
4887
    # Also on http://theawkwardyeti.com
4888
    # Also on https://tapastic.com/series/TheAwkwardYeti
4889
    name = 'yeti-goc'
4890
    long_name = 'The Awkward Yeti (from GoComics)'
4891
    url = 'http://www.gocomics.com/the-awkward-yeti'
4892
    _categories = ('YETI', )
4893
4894
4895
class BerkeleyMewsGoComics(GenericGoComic):
4896
    """Class to retrieve Berkeley Mews comics."""
4897
    # Also on http://mews.tumblr.com
4898
    # Also on http://www.berkeleymews.com
4899
    name = 'berkeley-goc'
4900
    long_name = 'Berkeley Mews (from GoComics)'
4901
    url = 'http://www.gocomics.com/berkeley-mews'
4902
    _categories = ('BERKELEY', )
4903
4904
4905
class SheldonGoComics(GenericGoComic):
4906
    """Class to retrieve Sheldon comics."""
4907
    # Also on http://www.sheldoncomics.com
4908
    name = 'sheldon-goc'
4909
    long_name = 'Sheldon Comics (from GoComics)'
4910
    url = 'http://www.gocomics.com/sheldon'
4911
4912
4913
class FowlLanguageGoComics(GenericGoComic):
4914
    """Class to retrieve Fowl Language comics."""
4915
    # Also on http://www.fowllanguagecomics.com
4916
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4917
    # Also on http://fowllanguagecomics.tumblr.com
4918
    name = 'fowllanguage-goc'
4919
    long_name = 'Fowl Language Comics (from GoComics)'
4920
    url = 'http://www.gocomics.com/fowl-language'
4921
    _categories = ('FOWLLANGUAGE', )
4922
4923
4924
class NickAnderson(GenericGoComic):
4925
    """Class to retrieve Nick Anderson comics."""
4926
    name = 'nickanderson'
4927
    long_name = 'Nick Anderson'
4928
    url = 'http://www.gocomics.com/nickanderson'
4929
4930
4931
class GarfieldGoComics(GenericGoComic):
4932
    """Class to retrieve Garfield comics."""
4933
    # Also on http://garfield.com
4934
    name = 'garfield-goc'
4935
    long_name = 'Garfield (from GoComics)'
4936
    url = 'http://www.gocomics.com/garfield'
4937
    _categories = ('GARFIELD', )
4938
4939
4940
class DorrisMcGoComics(GenericGoComic):
4941
    """Class to retrieve Dorris Mc Comics"""
4942
    # Also on http://dorrismccomics.com
4943
    name = 'dorrismc-goc'
4944
    long_name = 'Dorris Mc (from GoComics)'
4945
    url = 'http://www.gocomics.com/dorris-mccomics'
4946
4947
4948
class FoxTrot(GenericGoComic):
4949
    """Class to retrieve FoxTrot comics."""
4950
    name = 'foxtrot'
4951
    long_name = 'FoxTrot'
4952
    url = 'http://www.gocomics.com/foxtrot'
4953
4954
4955
class FoxTrotClassics(GenericGoComic):
4956
    """Class to retrieve FoxTrot Classics comics."""
4957
    name = 'foxtrot-classics'
4958
    long_name = 'FoxTrot Classics'
4959
    url = 'http://www.gocomics.com/foxtrotclassics'
4960
4961
4962
class MisterAndMeGoComics(GenericDeletedComic, GenericGoComic):
4963
    """Class to retrieve Mister & Me Comics."""
4964
    # Also on http://www.mister-and-me.com
4965
    # Also on https://tapastic.com/series/Mister-and-Me
4966
    name = 'mister-goc'
4967
    long_name = 'Mister & Me (from GoComics)'
4968
    url = 'http://www.gocomics.com/mister-and-me'
4969
4970
4971
class NonSequitur(GenericGoComic):
4972
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4973
    name = 'nonsequitur'
4974
    long_name = 'Non Sequitur'
4975
    url = 'http://www.gocomics.com/nonsequitur'
4976
4977
4978
class GenericTapasticComic(GenericListableComic):
4979
    """Generic class to handle the logic common to comics from tapastic.com."""
4980
    _categories = ('TAPASTIC', )
4981
4982
    @classmethod
4983
    def get_comic_info(cls, soup, archive_elt):
4984
        """Get information about a particular comics."""
4985
        timestamp = int(archive_elt['publishDate']) / 1000.0
4986
        day = datetime.datetime.fromtimestamp(timestamp).date()
4987
        imgs = soup.find_all('img', class_='art-image')
4988
        if not imgs:
4989
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4990
            return None
4991
        assert len(imgs) > 0
4992
        return {
4993
            'day': day.day,
4994
            'year': day.year,
4995
            'month': day.month,
4996
            'img': [i['src'] for i in imgs],
4997
            'title': archive_elt['title'],
4998
        }
4999
5000
    @classmethod
5001
    def get_url_from_archive_element(cls, archive_elt):
5002
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
5003
5004
    @classmethod
5005
    def get_archive_elements(cls):
5006
        pref, suff = 'episodeList : ', ','
5007
        # Information is stored in the javascript part
5008
        # I don't know the clean way to get it so this is the ugly way.
5009
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
5010
        return json.loads(string)
5011
5012
5013
class VegetablesForDessert(GenericTapasticComic):
5014
    """Class to retrieve Vegetables For Dessert comics."""
5015
    # Also on http://vegetablesfordessert.tumblr.com
5016
    name = 'vegetables'
5017
    long_name = 'Vegetables For Dessert'
5018
    url = 'http://tapastic.com/series/vegetablesfordessert'
5019
5020
5021
class FowlLanguageTapa(GenericTapasticComic):
5022
    """Class to retrieve Fowl Language comics."""
5023
    # Also on http://www.fowllanguagecomics.com
5024
    # Also on http://fowllanguagecomics.tumblr.com
5025
    # Also on http://www.gocomics.com/fowl-language
5026
    name = 'fowllanguage-tapa'
5027
    long_name = 'Fowl Language Comics (from Tapastic)'
5028
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
5029
    _categories = ('FOWLLANGUAGE', )
5030
5031
5032
class OscillatingProfundities(GenericTapasticComic):
5033
    """Class to retrieve Oscillating Profundities comics."""
5034
    name = 'oscillating'
5035
    long_name = 'Oscillating Profundities'
5036
    url = 'http://tapastic.com/series/oscillatingprofundities'
5037
5038
5039
class ZnoflatsComics(GenericTapasticComic):
5040
    """Class to retrieve Znoflats comics."""
5041
    name = 'znoflats'
5042
    long_name = 'Znoflats Comics'
5043
    url = 'http://tapastic.com/series/Znoflats-Comics'
5044
5045
5046
class SandersenTapastic(GenericTapasticComic):
5047
    """Class to retrieve Sarah Andersen comics."""
5048
    # Also on http://sarahcandersen.com
5049
    # Also on http://www.gocomics.com/sarahs-scribbles
5050
    name = 'sandersen-tapa'
5051
    long_name = 'Sarah Andersen (from Tapastic)'
5052
    url = 'http://tapastic.com/series/Doodle-Time'
5053
5054
5055
class TubeyToonsTapastic(GenericTapasticComic):
5056
    """Class to retrieve TubeyToons comics."""
5057
    # Also on http://tubeytoons.com
5058
    # Also on https://tubeytoons.tumblr.com
5059
    name = 'tubeytoons-tapa'
5060
    long_name = 'Tubey Toons (from Tapastic)'
5061
    url = 'http://tapastic.com/series/Tubey-Toons'
5062
    _categories = ('TUNEYTOONS', )
5063
5064
5065
class AnythingComicTapastic(GenericTapasticComic):
5066
    """Class to retrieve Anything Comics."""
5067
    # Also on http://www.anythingcomic.com
5068
    name = 'anythingcomic-tapa'
5069
    long_name = 'Anything Comic (from Tapastic)'
5070
    url = 'http://tapastic.com/series/anything'
5071
5072
5073
class UnearthedComicsTapastic(GenericTapasticComic):
5074
    """Class to retrieve Unearthed comics."""
5075
    # Also on http://unearthedcomics.com
5076
    # Also on https://unearthedcomics.tumblr.com
5077
    name = 'unearthed-tapa'
5078
    long_name = 'Unearthed Comics (from Tapastic)'
5079
    url = 'http://tapastic.com/series/UnearthedComics'
5080
    _categories = ('UNEARTHED', )
5081
5082
5083
class EverythingsStupidTapastic(GenericTapasticComic):
5084
    """Class to retrieve Everything's stupid Comics."""
5085
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
5086
    # Also on http://everythingsstupid.net
5087
    name = 'stupid-tapa'
5088
    long_name = "Everything's Stupid (from Tapastic)"
5089
    url = 'http://tapastic.com/series/EverythingsStupid'
5090
5091
5092
class JustSayEhTapastic(GenericTapasticComic):
5093
    """Class to retrieve Just Say Eh comics."""
5094
    # Also on http://www.justsayeh.com
5095
    name = 'justsayeh-tapa'
5096
    long_name = 'Just Say Eh (from Tapastic)'
5097
    url = 'http://tapastic.com/series/Just-Say-Eh'
5098
5099
5100
class ThorsThundershackTapastic(GenericTapasticComic):
5101
    """Class to retrieve Thor's Thundershack comics."""
5102
    # Also on http://www.thorsthundershack.com
5103
    name = 'thor-tapa'
5104
    long_name = 'Thor\'s Thundershack (from Tapastic)'
5105
    url = 'http://tapastic.com/series/Thors-Thundershac'
5106
    _categories = ('THOR', )
5107
5108
5109
class OwlTurdTapastic(GenericTapasticComic):
5110
    """Class to retrieve Owl Turd comics."""
5111
    # Also on http://owlturd.com
5112
    name = 'owlturd-tapa'
5113
    long_name = 'Owl Turd (from Tapastic)'
5114
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
5115
    _categories = ('OWLTURD', )
5116
5117
5118
class GoneIntoRaptureTapastic(GenericTapasticComic):
5119
    """Class to retrieve Gone Into Rapture comics."""
5120
    # Also on http://goneintorapture.tumblr.com
5121
    # Also on http://goneintorapture.com
5122
    name = 'rapture-tapa'
5123
    long_name = 'Gone Into Rapture (from Tapastic)'
5124
    url = 'http://tapastic.com/series/Goneintorapture'
5125
5126
5127
class HeckIfIKnowComicsTapa(GenericTapasticComic):
5128
    """Class to retrieve Heck If I Know Comics."""
5129
    # Also on http://heckifiknowcomics.com
5130
    name = 'heck-tapa'
5131
    long_name = 'Heck if I Know comics (from Tapastic)'
5132
    url = 'http://tapastic.com/series/Regular'
5133
5134
5135
class CheerUpEmoKidTapa(GenericTapasticComic):
5136
    """Class to retrieve CheerUpEmoKid comics."""
5137
    # Also on http://www.cheerupemokid.com
5138
    # Also on https://enzocomics.tumblr.com
5139
    name = 'cuek-tapa'
5140
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
5141
    url = 'http://tapastic.com/series/CUEK'
5142
5143
5144
class BigFootJusticeTapa(GenericTapasticComic):
5145
    """Class to retrieve Big Foot Justice comics."""
5146
    # Also on http://bigfootjustice.com
5147
    name = 'bigfoot-tapa'
5148
    long_name = 'Big Foot Justice (from Tapastic)'
5149
    url = 'http://tapastic.com/series/bigfoot-justice'
5150
5151
5152
class UpAndOutTapa(GenericTapasticComic):
5153
    """Class to retrieve Up & Out comics."""
5154
    # Also on http://upandoutcomic.tumblr.com
5155
    name = 'upandout-tapa'
5156
    long_name = 'Up And Out (from Tapastic)'
5157
    url = 'http://tapastic.com/series/UP-and-OUT'
5158
5159
5160
class ToonHoleTapa(GenericTapasticComic):
5161
    """Class to retrieve Toon Holes comics."""
5162
    # Also on http://www.toonhole.com
5163
    name = 'toonhole-tapa'
5164
    long_name = 'Toon Hole (from Tapastic)'
5165
    url = 'http://tapastic.com/series/TOONHOLE'
5166
5167
5168
class AngryAtNothingTapa(GenericTapasticComic):
5169
    """Class to retrieve Angry at Nothing comics."""
5170
    # Also on http://www.angryatnothing.net
5171
    # Also on http://angryatnothing.tumblr.com
5172
    name = 'angry-tapa'
5173
    long_name = 'Angry At Nothing (from Tapastic)'
5174
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
5175
5176
5177
class LeleozTapa(GenericTapasticComic):
5178
    """Class to retrieve Leleoz comics."""
5179
    # Also on http://leleozcomics.tumblr.com
5180
    name = 'leleoz-tapa'
5181
    long_name = 'Leleoz (from Tapastic)'
5182
    url = 'https://tapastic.com/series/Leleoz'
5183
5184
5185
class TheAwkwardYetiTapa(GenericTapasticComic):
5186
    """Class to retrieve The Awkward Yeti comics."""
5187
    # Also on http://www.gocomics.com/the-awkward-yeti
5188
    # Also on http://theawkwardyeti.com
5189
    # Also on http://larstheyeti.tumblr.com
5190
    name = 'yeti-tapa'
5191
    long_name = 'The Awkward Yeti (from Tapastic)'
5192
    url = 'https://tapastic.com/series/TheAwkwardYeti'
5193
    _categories = ('YETI', )
5194
5195
5196
class AsPerUsualTapa(GenericTapasticComic):
5197
    """Class to retrieve As Per Usual comics."""
5198
    # Also on http://as-per-usual.tumblr.com
5199
    name = 'usual-tapa'
5200
    long_name = 'As Per Usual (from Tapastic)'
5201
    url = 'https://tapastic.com/series/AsPerUsual'
5202
    categories = ('DAMILEE', )
5203
5204
5205
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
5206
    """Class to retrieve Hot Comics For Cool People."""
5207
    # Also on http://hotcomicsforcoolpeople.tumblr.com
5208
    # Also on http://hotcomics.biz (links to tumblr)
5209
    # Also on http://hcfcp.com (links to tumblr)
5210
    name = 'hotcomics-tapa'
5211
    long_name = 'Hot Comics For Cool People (from Tapastic)'
5212
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
5213
    categories = ('DAMILEE', )
5214
5215
5216
class OneOneOneOneComicTapa(GenericTapasticComic):
5217
    """Class to retrieve 1111 Comics."""
5218
    # Also on http://www.1111comics.me
5219
    # Also on http://comics1111.tumblr.com
5220
    name = '1111-tapa'
5221
    long_name = '1111 Comics (from Tapastic)'
5222
    url = 'https://tapastic.com/series/1111-Comics'
5223
    _categories = ('ONEONEONEONE', )
5224
5225
5226
class TumbleDryTapa(GenericTapasticComic):
5227
    """Class to retrieve Tumble Dry comics."""
5228
    # Also on http://tumbledrycomics.com
5229
    name = 'tumbledry-tapa'
5230
    long_name = 'Tumblr Dry (from Tapastic)'
5231
    url = 'https://tapastic.com/series/TumbleDryComics'
5232
5233
5234
class DeadlyPanelTapa(GenericTapasticComic):
5235
    """Class to retrieve Deadly Panel comics."""
5236
    # Also on http://www.deadlypanel.com
5237
    # Also on https://deadlypanel.tumblr.com
5238
    name = 'deadly-tapa'
5239
    long_name = 'Deadly Panel (from Tapastic)'
5240
    url = 'https://tapastic.com/series/deadlypanel'
5241
5242
5243
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5244
    """Class to retrieve Chris Hallbeck comics."""
5245
    # Also on https://chrishallbeck.tumblr.com
5246
    # Also on http://maximumble.com
5247
    name = 'hallbeckmaxi-tapa'
5248
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5249
    url = 'https://tapastic.com/series/Maximumble'
5250
    _categories = ('HALLBACK', )
5251
5252
5253
class ChrisHallbeckMiniTapa(GenericDeletedComic, GenericTapasticComic):
5254
    """Class to retrieve Chris Hallbeck comics."""
5255
    # Also on https://chrishallbeck.tumblr.com
5256
    # Also on http://minimumble.com
5257
    name = 'hallbeckmini-tapa'
5258
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5259
    url = 'https://tapastic.com/series/Minimumble'
5260
    _categories = ('HALLBACK', )
5261
5262
5263
class ChrisHallbeckBiffTapa(GenericDeletedComic, GenericTapasticComic):
5264
    """Class to retrieve Chris Hallbeck comics."""
5265
    # Also on https://chrishallbeck.tumblr.com
5266
    # Also on http://thebookofbiff.com
5267
    name = 'hallbeckbiff-tapa'
5268
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5269
    url = 'https://tapastic.com/series/Biff'
5270
    _categories = ('HALLBACK', )
5271
5272
5273
class RandoWisTapa(GenericTapasticComic):
5274
    """Class to retrieve RandoWis comics."""
5275
    # Also on https://randowis.com
5276
    name = 'randowis-tapa'
5277
    long_name = 'RandoWis (from Tapastic)'
5278
    url = 'https://tapastic.com/series/RandoWis'
5279
5280
5281
class PigeonGazetteTapa(GenericTapasticComic):
5282
    """Class to retrieve The Pigeon Gazette comics."""
5283
    # Also on http://thepigeongazette.tumblr.com
5284
    name = 'pigeon-tapa'
5285
    long_name = 'The Pigeon Gazette (from Tapastic)'
5286
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5287
5288
5289
class TheOdd1sOutTapa(GenericTapasticComic):
5290
    """Class to retrieve The Odd 1s Out comics."""
5291
    # Also on http://theodd1sout.com
5292
    # Also on http://theodd1sout.tumblr.com
5293
    name = 'theodd-tapa'
5294
    long_name = 'The Odd 1s Out (from Tapastic)'
5295
    url = 'https://tapastic.com/series/Theodd1sout'
5296
5297
5298
class TheWorldIsFlatTapa(GenericTapasticComic):
5299
    """Class to retrieve The World Is Flat Comics."""
5300
    # Also on http://theworldisflatcomics.tumblr.com
5301
    name = 'flatworld-tapa'
5302
    long_name = 'The World Is Flat (from Tapastic)'
5303
    url = 'https://tapastic.com/series/The-World-is-Flat'
5304
5305
5306
class MisterAndMeTapa(GenericTapasticComic):
5307
    """Class to retrieve Mister & Me Comics."""
5308
    # Also on http://www.mister-and-me.com
5309
    # Also on http://www.gocomics.com/mister-and-me
5310
    name = 'mister-tapa'
5311
    long_name = 'Mister & Me (from Tapastic)'
5312
    url = 'https://tapastic.com/series/Mister-and-Me'
5313
5314
5315
class TalesOfAbsurdityTapa(GenericDeletedComic, GenericTapasticComic):
5316
    """Class to retrieve Tales Of Absurdity comics."""
5317
    # Also on http://talesofabsurdity.com
5318
    # Also on http://talesofabsurdity.tumblr.com
5319
    name = 'absurdity-tapa'
5320
    long_name = 'Tales of Absurdity (from Tapastic)'
5321
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5322
    _categories = ('ABSURDITY', )
5323
5324
5325
class BFGFSTapa(GenericTapasticComic):
5326
    """Class to retrieve BFGFS comics."""
5327
    # Also on http://bfgfs.com
5328
    # Also on https://bfgfs.tumblr.com
5329
    name = 'bfgfs-tapa'
5330
    long_name = 'BFGFS (from Tapastic)'
5331
    url = 'https://tapastic.com/series/BFGFS'
5332
5333
5334
class DoodleForFoodTapa(GenericTapasticComic):
5335
    """Class to retrieve Doodle For Food comics."""
5336
    # Also on http://www.doodleforfood.com
5337
    name = 'doodle-tapa'
5338
    long_name = 'Doodle For Food (from Tapastic)'
5339
    url = 'https://tapastic.com/series/Doodle-for-Food'
5340
5341
5342
class MrLovensteinTapa(GenericTapasticComic):
5343
    """Class to retrieve Mr Lovenstein comics."""
5344
    # Also on  https://tapastic.com/series/MrLovenstein
5345
    name = 'mrlovenstein-tapa'
5346
    long_name = 'Mr. Lovenstein (from Tapastic)'
5347
    url = 'https://tapastic.com/series/MrLovenstein'
5348
5349
5350
class CassandraCalinTapa(GenericTapasticComic):
5351
    """Class to retrieve C. Cassandra comics."""
5352
    # Also on http://cassandracalin.com
5353
    # Also on http://c-cassandra.tumblr.com
5354
    name = 'cassandra-tapa'
5355
    long_name = 'Cassandra Calin (from Tapastic)'
5356
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5357
5358
5359
class WafflesAndPancakes(GenericTapasticComic):
5360
    """Class to retrieve Waffles And Pancakes comics."""
5361
    # Also on http://wandpcomic.com
5362
    name = 'waffles'
5363
    long_name = 'Waffles And Pancakes'
5364
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5365
5366
5367
class YesterdaysPopcornTapastic(GenericTapasticComic):
5368
    """Class to retrieve Yesterday's Popcorn comics."""
5369
    # Also on http://www.yesterdayspopcorn.com
5370
    # Also on http://yesterdayspopcorn.tumblr.com
5371
    name = 'popcorn-tapa'
5372
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5373
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5374
5375
5376
class OurSuperAdventureTapastic(GenericDeletedComic, GenericTapasticComic):
5377
    """Class to retrieve Our Super Adventure comics."""
5378
    # Also on http://www.oursuperadventure.com
5379
    # http://sarahssketchbook.tumblr.com
5380
    # http://sarahgraley.com
5381
    name = 'superadventure-tapastic'
5382
    long_name = 'Our Super Adventure (from Tapastic)'
5383
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5384
5385
5386
class NamelessPCs(GenericTapasticComic):
5387
    """Class to retrieve Nameless PCs comics."""
5388
    # Also on http://namelesspcs.com
5389
    name = 'namelesspcs-tapa'
5390
    long_name = 'NamelessPCs (from Tapastic)'
5391
    url = 'https://tapastic.com/series/NamelessPC'
5392
5393
5394
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5395
    """Class to retrieve Down The Upward Spiral comics."""
5396
    # Also on http://www.downtheupwardspiral.com
5397
    # Also on http://downtheupwardspiral.tumblr.com
5398
    name = 'spiral-tapa'
5399
    long_name = 'Down the Upward Spiral (from Tapastic)'
5400
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5401
5402
5403
class UbertoolTapa(GenericTapasticComic):
5404
    """Class to retrieve Ubertool comics."""
5405
    # Also on http://ubertoolcomic.com
5406
    # Also on https://ubertool.tumblr.com
5407
    name = 'ubertool-tapa'
5408
    long_name = 'Ubertool (from Tapastic)'
5409
    url = 'https://tapastic.com/series/ubertool'
5410
    _categories = ('UBERTOOL', )
5411
5412
5413
class BarteNerdsTapa(GenericDeletedComic, GenericTapasticComic):
5414
    """Class to retrieve BarteNerds comics."""
5415
    # Also on http://www.bartenerds.com
5416
    name = 'bartenerds-tapa'
5417
    long_name = 'BarteNerds (from Tapastic)'
5418
    url = 'https://tapastic.com/series/BarteNERDS'
5419
5420
5421
class SmallBlueYonderTapa(GenericTapasticComic):
5422
    """Class to retrieve Small Blue Yonder comics."""
5423
    # Also on http://www.smallblueyonder.com
5424
    name = 'smallblue-tapa'
5425
    long_name = 'Small Blue Yonder (from Tapastic)'
5426
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5427
5428
5429
class TizzyStitchBirdTapa(GenericTapasticComic):
5430
    """Class to retrieve Tizzy Stitch Bird comics."""
5431
    # Also on http://tizzystitchbird.com
5432
    # Also on http://tizzystitchbird.tumblr.com
5433
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5434
    name = 'tizzy-tapa'
5435
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5436
    url = 'https://tapastic.com/series/TizzyStitchbird'
5437
5438
5439
class RockPaperCynicTapa(GenericTapasticComic):
5440
    """Class to retrieve RockPaperCynic comics."""
5441
    # Also on http://www.rockpapercynic.com
5442
    # Also on http://rockpapercynic.tumblr.com
5443
    name = 'rpc-tapa'
5444
    long_name = 'Rock Paper Cynic (from Tapastic)'
5445
    url = 'https://tapastic.com/series/rockpapercynic'
5446
5447
5448
class IsItCanonTapa(GenericTapasticComic):
5449
    """Class to retrieve Is It Canon comics."""
5450
    # Also on http://www.isitcanon.com
5451
    name = 'canon-tapa'
5452
    long_name = 'Is It Canon (from Tapastic)'
5453
    url = 'http://tapastic.com/series/isitcanon'
5454
5455
5456
class ItsTheTieTapa(GenericTapasticComic):
5457
    """Class to retrieve It's the tie comics."""
5458
    # Also on http://itsthetie.com
5459
    # Also on http://itsthetie.tumblr.com
5460
    name = 'tie-tapa'
5461
    long_name = "It's the tie (from Tapastic)"
5462
    url = "https://tapastic.com/series/itsthetie"
5463
    _categories = ('TIE', )
5464
5465
5466
class JamesOfNoTradesTapa(GenericTapasticComic):
5467
    """Class to retrieve JamesOfNoTrades comics."""
5468
    # Also on http://jamesofnotrades.com
5469
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
5470
    # Also on http://jamesfregan.tumblr.com
5471
    name = 'jamesofnotrades-tapa'
5472
    long_name = 'James Of No Trades (from Tapastic)'
5473
    url = 'https://tapas.io/series/James-of-No-Trades'
5474
    _categories = ('JAMESOFNOTRADES', )
5475
5476
5477
class MomentumTapa(GenericTapasticComic):
5478
    """Class to retrieve Momentum comics."""
5479
    # Also on http://www.momentumcomic.com
5480
    name = 'momentum-tapa'
5481
    long_name = 'Momentum (from Tapastic)'
5482
    url = 'https://tapastic.com/series/momentum'
5483
5484
5485
class InYourFaceCakeTapa(GenericTapasticComic):
5486
    """Class to retrieve In Your Face Cake comics."""
5487
    # Also on https://in-your-face-cake.tumblr.com
5488
    name = 'inyourfacecake-tapa'
5489
    long_name = 'In Your Face Cake (from Tapastic)'
5490
    url = 'https://tapas.io/series/In-Your-Face-Cake'
5491
    _categories = ('INYOURFACECAKE', )
5492
5493
5494
class APleasantWasteOfTimeTapa(GenericTapasticComic):
5495
    """Class to retrieve A Pleasant Waste Of Time comics."""
5496
    # Also on https://artjcf.tumblr.com
5497
    name = 'pleasant-waste-tapa'
5498
    long_name = 'A Pleasant Waste Of Time (from Tapastic)'
5499
    url = 'https://tapas.io/series/A-Pleasant-'
5500
    _categories = ('WASTE', )
5501
5502
5503
def get_subclasses(klass):
5504
    """Gets the list of direct/indirect subclasses of a class"""
5505
    subclasses = klass.__subclasses__()
5506
    for derived in list(subclasses):
5507
        subclasses.extend(get_subclasses(derived))
5508
    return subclasses
5509
5510
5511
def remove_st_nd_rd_th_from_date(string):
5512
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5513
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5514
    return (string.replace('st', '')
5515
            .replace('nd', '')
5516
            .replace('rd', '')
5517
            .replace('th', '')
5518
            .replace('Augu', 'August'))
5519
5520
5521
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5522
    """Function to convert string to date object.
5523
    Wrapper around datetime.datetime.strptime."""
5524
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5525
    prev_locale = locale.setlocale(locale.LC_ALL)
5526
    if local != prev_locale:
5527
        locale.setlocale(locale.LC_ALL, local)
5528
    ret = datetime.datetime.strptime(string, date_format).date()
5529
    if local != prev_locale:
5530
        locale.setlocale(locale.LC_ALL, prev_locale)
5531
    return ret
5532
5533
5534
COMICS = set(get_subclasses(GenericComic))
5535
VALID_COMICS = [c for c in COMICS if c.name is not None]
5536
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5537
assert len(VALID_COMICS) == len(COMIC_NAMES)
5538
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5539
assert len(VALID_COMICS) == len(CLASS_NAMES)
5540