Completed
Push — master ( 3bbc38...95f075 )
by De
34s
created

comics.py (52 issues)

Code
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    try:
333
        url = cls.first_url
334
    except AttributeError:
335
        url = input("Get starting URL: ")
336
    print(url)
337
    comic = cls.get_prev_link(get_soup_at_url(url))
338
    while comic:
339
        url = cls.get_url_from_link(comic)
340
        print(url)
341
        comic = cls.get_prev_link(get_soup_at_url(url))
342
    return {'href': url}
343
344
345
class GenericEmptyComic(GenericComic):
346
    """Generic class for comics where nothing is to be done.
347
348
    It can be useful to deactivate temporarily comics that do not work
349
    properly by replacing `def MyComic(GenericWhateverComic)` with
350
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
351
    _categories = ('EMPTY', )
352
353
    @classmethod
354
    def get_next_comic(cls, last_comic):
355
        """Implementation of get_next_comic returning no comics."""
356
        cls.log("comic is considered as empty - returning no comic")
357
        return []
358
359
360 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
361
    """Class to retrieve Extra Fabulous Comics."""
362
    name = 'efc'
363
    long_name = 'Extra Fabulous Comics'
364
    url = 'http://extrafabulouscomics.com'
365
    get_first_comic_link = get_a_navi_navifirst
366
    get_navi_link = get_link_rel_next
367
368
    @classmethod
369
    def get_comic_info(cls, soup, link):
370
        """Get information about a particular comics."""
371
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
372
        imgs = soup.find_all('img', src=img_src_re)
373
        title = soup.find('meta', property='og:title')['content']
374
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
375
        day = string_to_date(date_str, "%Y-%m-%d")
376
        return {
377
            'title': title,
378
            'img': [i['src'] for i in imgs],
379
            'month': day.month,
380
            'year': day.year,
381
            'day': day.day,
382
            'prefix': title + '-'
383
        }
384
385
386 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
387
    """Generic class to retrieve comics from Le Monde blogs."""
388
    _categories = ('LEMONDE', 'FRANCAIS')
389
    get_navi_link = get_link_rel_next
390
    get_first_comic_link = simulate_first_link
391
    first_url = NotImplemented
392
393
    @classmethod
394
    def get_comic_info(cls, soup, link):
395
        """Get information about a particular comics."""
396
        url2 = soup.find('link', rel='shortlink')['href']
397
        title = soup.find('meta', property='og:title')['content']
398
        date_str = soup.find("span", class_="entry-date").string
399
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
400
        imgs = soup.find_all('meta', property='og:image')
401
        return {
402
            'title': title,
403
            'url2': url2,
404
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
405
            'month': day.month,
406
            'year': day.year,
407
            'day': day.day,
408
        }
409
410
411
class ZepWorld(GenericLeMondeBlog):
412
    """Class to retrieve Zep World comics."""
413
    name = "zep"
414
    long_name = "Zep World"
415
    url = "http://zepworld.blog.lemonde.fr"
416
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
417
418
419
class Vidberg(GenericLeMondeBlog):
420
    """Class to retrieve Vidberg comics."""
421
    name = 'vidberg'
422
    long_name = "Vidberg - l'actu en patates"
423
    url = "http://vidberg.blog.lemonde.fr"
424
    # Not the first but I didn't find an efficient way to retrieve it
425
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
426
427
428
class Plantu(GenericLeMondeBlog):
429
    """Class to retrieve Plantu comics."""
430
    name = 'plantu'
431
    long_name = "Plantu"
432
    url = "http://plantu.blog.lemonde.fr"
433
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
434
435
436
class XavierGorce(GenericLeMondeBlog):
437
    """Class to retrieve Xavier Gorce comics."""
438
    name = 'gorce'
439
    long_name = "Xavier Gorce"
440
    url = "http://xaviergorce.blog.lemonde.fr"
441
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
442
443
444
class CartooningForPeace(GenericLeMondeBlog):
445
    """Class to retrieve Cartooning For Peace comics."""
446
    name = 'forpeace'
447
    long_name = "Cartooning For Peace"
448
    url = "http://cartooningforpeace.blog.lemonde.fr"
449
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
450
451
452
class Aurel(GenericLeMondeBlog):
453
    """Class to retrieve Aurel comics."""
454
    name = 'aurel'
455
    long_name = "Aurel"
456
    url = "http://aurel.blog.lemonde.fr"
457
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
458
459
460
class LesCulottees(GenericLeMondeBlog):
461
    """Class to retrieve Les Culottees comics."""
462
    name = 'culottees'
463
    long_name = 'Les Culottees'
464
    url = "http://lesculottees.blog.lemonde.fr"
465
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
466
467
468
class UneAnneeAuLycee(GenericLeMondeBlog):
469
    """Class to retrieve Une Annee Au Lycee comics."""
470
    name = 'lycee'
471
    long_name = 'Une Annee au Lycee'
472
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
473
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
474
475
476
class Rall(GenericNavigableComic):
477
    """Class to retrieve Ted Rall comics."""
478
    # Also on http://www.gocomics.com/tedrall
479
    name = 'rall'
480
    long_name = "Ted Rall"
481
    url = "http://rall.com/comic"
482
    _categories = ('RALL', )
483
    get_navi_link = get_link_rel_next
484
    get_first_comic_link = simulate_first_link
485
    # Not the first but I didn't find an efficient way to retrieve it
486
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
487
488
    @classmethod
489
    def get_comic_info(cls, soup, link):
490
        """Get information about a particular comics."""
491
        title = soup.find('meta', property='og:title')['content']
492
        author = soup.find("span", class_="author vcard").find("a").string
493
        date_str = soup.find("span", class_="entry-date").string
494
        day = string_to_date(date_str, "%B %d, %Y")
495
        desc = soup.find('meta', property='og:description')['content']
496
        imgs = soup.find('div', class_='entry-content').find_all('img')
497
        imgs = imgs[:-7]  # remove social media buttons
498
        return {
499
            'title': title,
500
            'author': author,
501
            'month': day.month,
502
            'year': day.year,
503
            'day': day.day,
504
            'description': desc,
505
            'img': [i['src'] for i in imgs],
506
        }
507
508
509
class Dilem(GenericNavigableComic):
510
    """Class to retrieve Ali Dilem comics."""
511
    name = 'dilem'
512
    long_name = 'Ali Dilem'
513
    url = 'http://information.tv5monde.com/dilem'
514
    _categories = ('FRANCAIS', )
515
    get_url_from_link = join_cls_url_to_href
516
    get_first_comic_link = simulate_first_link
517
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
518
519
    @classmethod
520
    def get_navi_link(cls, last_soup, next_):
521
        """Get link to next or previous comic."""
522
        # prev is next / next is prev
523
        li = last_soup.find('li', class_='prev' if next_ else 'next')
524
        return li.find('a') if li else None
525
526 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
527
    def get_comic_info(cls, soup, link):
528
        """Get information about a particular comics."""
529
        short_url = soup.find('link', rel='shortlink')['href']
530
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
531
        imgs = soup.find_all('meta', property='og:image')
532
        date_str = soup.find('span', property='dc:date')['content']
533
        date_str = date_str[:10]
534
        day = string_to_date(date_str, "%Y-%m-%d")
535
        return {
536
            'short_url': short_url,
537
            'title': title,
538
            'img': [i['content'] for i in imgs],
539
            'day': day.day,
540
            'month': day.month,
541
            'year': day.year,
542
        }
543
544
545
class SpaceAvalanche(GenericNavigableComic):
546
    """Class to retrieve Space Avalanche comics."""
547
    name = 'avalanche'
548
    long_name = 'Space Avalanche'
549
    url = 'http://www.spaceavalanche.com'
550
    get_navi_link = get_link_rel_next
551
552
    @classmethod
553
    def get_first_comic_link(cls):
554
        """Get link to first comics."""
555
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
556
557
    @classmethod
558
    def get_comic_info(cls, soup, link):
559
        """Get information about a particular comics."""
560
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
561
        title = link['title']
562
        url = cls.get_url_from_link(link)
563
        year, month, day = [int(s)
564
                            for s in url_date_re.match(url).groups()]
565
        imgs = soup.find("div", class_="entry").find_all("img")
566
        return {
567
            'title': title,
568
            'day': day,
569
            'month': month,
570
            'year': year,
571
            'img': [i['src'] for i in imgs],
572
        }
573
574
575
class ZenPencils(GenericNavigableComic):
576
    """Class to retrieve ZenPencils comics."""
577
    # Also on http://zenpencils.tumblr.com
578
    # Also on http://www.gocomics.com/zen-pencils
579
    name = 'zenpencils'
580
    long_name = 'Zen Pencils'
581
    url = 'http://zenpencils.com'
582
    _categories = ('ZENPENCILS', )
583
    get_navi_link = get_link_rel_next
584
    get_first_comic_link = simulate_first_link
585
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
586
587
    @classmethod
588
    def get_comic_info(cls, soup, link):
589
        """Get information about a particular comics."""
590
        imgs = soup.find('div', id='comic').find_all('img')
591
        # imgs2 = soup.find_all('meta', property='og:image')
592
        post = soup.find('div', class_='post-content')
593
        author = post.find("span", class_="post-author").find("a").string
594
        title = soup.find('h2', class_='post-title').string
595
        date_str = post.find('span', class_='post-date').string
596
        day = string_to_date(date_str, "%B %d, %Y")
597
        assert imgs
598
        assert all(i['alt'] == i['title'] for i in imgs)
599
        assert all(i['alt'] in (title, "") for i in imgs)
600
        return {
601
            'title': title,
602
            'author': author,
603
            'day': day.day,
604
            'month': day.month,
605
            'year': day.year,
606
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
607
        }
608
609
610
class ItsTheTie(GenericEmptyComic, GenericNavigableComic):
611
    """Class to retrieve It's the tie comics."""
612
    # Also on http://itsthetie.tumblr.com
613
    # Also on https://tapastic.com/series/itsthetie
614
    name = 'tie'
615
    long_name = "It's the tie"
616
    url = "http://itsthetie.com"
617
    _categories = ('TIE', )
618
    get_first_comic_link = get_div_navfirst_a
619
    get_navi_link = get_a_rel_next
620
621
    @classmethod
622
    def get_comic_info(cls, soup, link):
623
        """Get information about a particular comics."""
624
        title = soup.find('h1', class_='comic-title').find('a').string
625
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
626
        day = string_to_date(date_str, "%B %d, %Y")
627
        # Bonus images may or may not be in meta og:image.
628
        imgs = soup.find_all('meta', property='og:image')
629
        imgs_src = [i['content'] for i in imgs]
630
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
631
        bonus_src = [b['data-oversrc'] for b in bonus]
632
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
633
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
634
        tag_meta = soup.find('meta', property='article:tag')
635
        tags = tag_meta['content'] if tag_meta else ""
636
        return {
637
            'title': title,
638
            'month': day.month,
639
            'year': day.year,
640
            'day': day.day,
641
            'img': all_imgs_src,
642
            'tags': tags,
643
        }
644
645
646
class PenelopeBagieu(GenericNavigableComic):
647
    """Class to retrieve comics from Penelope Bagieu's blog."""
648 View Code Duplication
    name = 'bagieu'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
649
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
650
    url = 'http://www.penelope-jolicoeur.com'
651
    _categories = ('FRANCAIS', )
652
    get_navi_link = get_link_rel_next
653
    get_first_comic_link = simulate_first_link
654
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
655
656
    @classmethod
657
    def get_comic_info(cls, soup, link):
658
        """Get information about a particular comics."""
659
        date_str = soup.find('h2', class_='date-header').string
660
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
661
        imgs = soup.find('div', class_='entry-body').find_all('img')
662
        title = soup.find('h3', class_='entry-header').string
663
        return {
664
            'title': title,
665
            'img': [i['src'] for i in imgs],
666
            'month': day.month,
667
            'year': day.year,
668
            'day': day.day,
669
        }
670
671
672
class OneOneOneOneComic(GenericEmptyComic, GenericNavigableComic):
673
    """Class to retrieve 1111 Comics."""
674 View Code Duplication
    # Also on http://comics1111.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
675
    # Also on https://tapastic.com/series/1111-Comics
676
    name = '1111'
677
    long_name = '1111 Comics'
678
    url = 'http://www.1111comics.me'
679
    _categories = ('ONEONEONEONE', )
680
    get_first_comic_link = get_div_navfirst_a
681
    get_navi_link = get_link_rel_next
682
683
    @classmethod
684
    def get_comic_info(cls, soup, link):
685
        """Get information about a particular comics."""
686
        title = soup.find('h1', class_='comic-title').find('a').string
687
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
688
        day = string_to_date(date_str, "%B %d, %Y")
689
        imgs = soup.find_all('meta', property='og:image')
690
        return {
691
            'title': title,
692
            'month': day.month,
693
            'year': day.year,
694
            'day': day.day,
695
            'img': [i['content'] for i in imgs],
696
        }
697
698
699
class AngryAtNothing(GenericEmptyComic, GenericNavigableComic):
700
    """Class to retrieve Angry at Nothing comics."""
701 View Code Duplication
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
702
    # Also on http://angryatnothing.tumblr.com
703
    name = 'angry'
704
    long_name = 'Angry At Nothing'
705
    url = 'http://www.angryatnothing.net'
706
    get_first_comic_link = get_div_navfirst_a
707
    get_navi_link = get_a_rel_next
708
709
    @classmethod
710
    def get_comic_info(cls, soup, link):
711
        """Get information about a particular comics."""
712
        title = soup.find('h1', class_='comic-title').find('a').string
713
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
714
        day = string_to_date(date_str, "%B %d, %Y")
715
        imgs = soup.find_all('meta', property='og:image')
716
        return {
717
            'title': title,
718
            'month': day.month,
719
            'year': day.year,
720
            'day': day.day,
721
            'img': [i['content'] for i in imgs],
722
        }
723
724
725
class NeDroid(GenericNavigableComic):
726
    """Class to retrieve NeDroid comics."""
727
    name = 'nedroid'
728
    long_name = 'NeDroid'
729
    url = 'http://nedroid.com'
730 View Code Duplication
    get_first_comic_link = get_div_navfirst_a
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
731
    get_navi_link = get_link_rel_next
732
    get_url_from_link = join_cls_url_to_href
733
734
    @classmethod
735
    def get_comic_info(cls, soup, link):
736
        """Get information about a particular comics."""
737
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
738
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
739
        num = int(short_url_re.match(short_url).groups()[0])
740
        imgs = soup.find('div', id='comic').find_all('img')
741
        assert len(imgs) == 1
742
        title = imgs[0]['alt']
743
        title2 = imgs[0]['title']
744
        return {
745
            'short_url': short_url,
746
            'title': title,
747
            'title2': title2,
748
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
749
            'num': num,
750
        }
751
752
753
class Garfield(GenericNavigableComic):
754
    """Class to retrieve Garfield comics."""
755
    # Also on http://www.gocomics.com/garfield
756
    name = 'garfield'
757
    long_name = 'Garfield'
758
    url = 'https://garfield.com'
759
    _categories = ('GARFIELD', )
760
    get_first_comic_link = simulate_first_link
761
    first_url = 'https://garfield.com/comic/1978/06/19'
762
763
    @classmethod
764
    def get_navi_link(cls, last_soup, next_):
765
        """Get link to next or previous comic."""
766
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
767
768
    @classmethod
769
    def get_comic_info(cls, soup, link):
770
        """Get information about a particular comics."""
771
        url = cls.get_url_from_link(link)
772
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
773
        year, month, day = [int(s) for s in date_re.match(url).groups()]
774
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
775
        return {
776
            'month': month,
777
            'year': year,
778
            'day': day,
779
            'img': [i['src'] for i in imgs],
780
        }
781
782 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
783
class Dilbert(GenericNavigableComic):
784
    """Class to retrieve Dilbert comics."""
785
    # Also on http://www.gocomics.com/dilbert-classics
786
    name = 'dilbert'
787
    long_name = 'Dilbert'
788
    url = 'http://dilbert.com'
789
    get_url_from_link = join_cls_url_to_href
790
    get_first_comic_link = simulate_first_link
791
    first_url = 'http://dilbert.com/strip/1989-04-16'
792
793
    @classmethod
794
    def get_navi_link(cls, last_soup, next_):
795
        """Get link to next or previous comic."""
796
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
797
        return link.find('a') if link else None
798
799
    @classmethod
800
    def get_comic_info(cls, soup, link):
801
        """Get information about a particular comics."""
802
        title = soup.find('meta', property='og:title')['content']
803
        imgs = soup.find_all('meta', property='og:image')
804
        desc = soup.find('meta', property='og:description')['content']
805
        date_str = soup.find('meta', property='article:publish_date')['content']
806
        day = string_to_date(date_str, "%B %d, %Y")
807
        author = soup.find('meta', property='article:author')['content']
808
        tags = soup.find('meta', property='article:tag')['content']
809
        return {
810
            'title': title,
811
            'description': desc,
812
            'img': [i['content'] for i in imgs],
813
            'author': author,
814
            'tags': tags,
815
            'day': day.day,
816
            'month': day.month,
817
            'year': day.year
818
        }
819
820
821
class VictimsOfCircumsolar(GenericEmptyComic, GenericNavigableComic):
822
    """Class to retrieve VictimsOfCircumsolar comics."""
823
    # Also on https://victimsofcomics.tumblr.com
824
    name = 'circumsolar'
825
    long_name = 'Victims Of Circumsolar'
826
    url = 'http://www.victimsofcircumsolar.com'
827
    get_navi_link = get_a_navi_comicnavnext_navinext
828
    get_first_comic_link = simulate_first_link
829
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
830
831
    @classmethod
832
    def get_comic_info(cls, soup, link):
833
        """Get information about a particular comics."""
834
        # Date is on the archive page
835
        title = soup.find_all('meta', property='og:title')[-1]['content']
836
        desc = soup.find_all('meta', property='og:description')[-1]['content']
837
        imgs = soup.find('div', id='comic').find_all('img')
838
        assert all(i['title'] == i['alt'] == title for i in imgs)
839
        return {
840
            'title': title,
841
            'description': desc,
842
            'img': [i['src'] for i in imgs],
843
        }
844
845
846
class ThreeWordPhrase(GenericNavigableComic):
847
    """Class to retrieve Three Word Phrase comics."""
848
    # Also on http://www.threewordphrase.tumblr.com
849
    name = 'threeword'
850
    long_name = 'Three Word Phrase'
851
    url = 'http://threewordphrase.com'
852
    get_url_from_link = join_cls_url_to_href
853
854
    @classmethod
855
    def get_first_comic_link(cls):
856
        """Get link to first comics."""
857
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
858
859
    @classmethod
860
    def get_navi_link(cls, last_soup, next_):
861
        """Get link to next or previous comic."""
862
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
863
        return None if link.get('href') is None else link
864
865
    @classmethod
866
    def get_comic_info(cls, soup, link):
867
        """Get information about a particular comics."""
868
        title = soup.find('title')
869
        imgs = [img for img in soup.find_all('img')
870
                if not img['src'].endswith(
871
                    ('link.gif', '32.png', 'twpbookad.jpg',
872
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
873
        return {
874
            'title': title.string if title else None,
875
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
876
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
877
        }
878
879
880
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
881
    """Class to retrieve Deadly Panel comics."""
882
    # Also on https://tapastic.com/series/deadlypanel
883
    # Also on https://deadlypanel.tumblr.com
884
    name = 'deadly'
885
    long_name = 'Deadly Panel'
886
    url = 'http://www.deadlypanel.com'
887
    get_first_comic_link = get_a_navi_navifirst
888
    get_navi_link = get_a_navi_comicnavnext_navinext
889
890
    @classmethod
891
    def get_comic_info(cls, soup, link):
892
        """Get information about a particular comics."""
893
        imgs = soup.find('div', id='comic').find_all('img')
894
        assert all(i['alt'] == i['title'] for i in imgs)
895
        return {
896
            'img': [i['src'] for i in imgs],
897
        }
898
899
900
class TheGentlemanArmchair(GenericNavigableComic):
901
    """Class to retrieve The Gentleman Armchair comics."""
902 View Code Duplication
    name = 'gentlemanarmchair'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
903
    long_name = 'The Gentleman Armchair'
904
    url = 'http://thegentlemansarmchair.com'
905
    get_first_comic_link = get_a_navi_navifirst
906
    get_navi_link = get_link_rel_next
907
908
    @classmethod
909
    def get_comic_info(cls, soup, link):
910
        """Get information about a particular comics."""
911
        title = soup.find('h2', class_='post-title').string
912
        author = soup.find("span", class_="post-author").find("a").string
913
        date_str = soup.find('span', class_='post-date').string
914
        day = string_to_date(date_str, "%B %d, %Y")
915
        imgs = soup.find('div', id='comic').find_all('img')
916
        return {
917
            'img': [i['src'] for i in imgs],
918
            'title': title,
919
            'author': author,
920
            'month': day.month,
921
            'year': day.year,
922
            'day': day.day,
923
        }
924
925
926
class ImogenQuest(GenericNavigableComic):
927
    """Class to retrieve Imogen Quest comics."""
928 View Code Duplication
    # Also on http://imoquest.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
929
    name = 'imogen'
930
    long_name = 'Imogen Quest'
931
    url = 'http://imogenquest.net'
932
    get_first_comic_link = get_div_navfirst_a
933
    get_navi_link = get_a_rel_next
934
935
    @classmethod
936
    def get_comic_info(cls, soup, link):
937
        """Get information about a particular comics."""
938
        title = soup.find('h2', class_='post-title').string
939
        author = soup.find("span", class_="post-author").find("a").string
940
        date_str = soup.find('span', class_='post-date').string
941
        day = string_to_date(date_str, '%B %d, %Y')
942
        imgs = soup.find('div', class_='comicpane').find_all('img')
943
        assert all(i['alt'] == i['title'] for i in imgs)
944
        title2 = imgs[0]['title']
945
        return {
946
            'day': day.day,
947
            'month': day.month,
948
            'year': day.year,
949
            'img': [i['src'] for i in imgs],
950
            'title': title,
951
            'title2': title2,
952
            'author': author,
953
        }
954
955
956
class MyExtraLife(GenericNavigableComic):
957
    """Class to retrieve My Extra Life comics."""
958 View Code Duplication
    name = 'extralife'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
959
    long_name = 'My Extra Life'
960
    url = 'http://www.myextralife.com'
961
    get_navi_link = get_link_rel_next
962
963
    @classmethod
964
    def get_first_comic_link(cls):
965
        """Get link to first comics."""
966
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
967
968
    @classmethod
969
    def get_comic_info(cls, soup, link):
970
        """Get information about a particular comics."""
971
        title = soup.find("h1", class_="comic_title").string
972
        date_str = soup.find("span", class_="comic_date").string
973
        day = string_to_date(date_str, "%B %d, %Y")
974
        imgs = soup.find_all("img", class_="comic")
975
        assert all(i['alt'] == i['title'] == title for i in imgs)
976
        return {
977
            'title': title,
978
            'img': [i['src'] for i in imgs if i["src"]],
979
            'day': day.day,
980
            'month': day.month,
981
            'year': day.year
982
        }
983
984
985
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
986
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
987
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
988
    # Also on http://smbc-comics.tumblr.com
989
    name = 'smbc'
990
    long_name = 'Saturday Morning Breakfast Cereal'
991
    url = 'http://www.smbc-comics.com'
992
    _categories = ('SMBC', )
993
    get_navi_link = get_a_rel_next
994
995
    @classmethod
996
    def get_first_comic_link(cls):
997
        """Get link to first comics."""
998
        return get_soup_at_url(cls.url).find('a', rel='start')
999
1000
    @classmethod
1001
    def get_comic_info(cls, soup, link):
1002
        """Get information about a particular comics."""
1003
        image1 = soup.find('img', id='cc-comic')
1004
        image_url1 = image1['src']
1005
        aftercomic = soup.find('div', id='aftercomic')
1006
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1007
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1008
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1009
        day = string_to_date(date_str, "%B %d, %Y")
1010
        return {
1011
            'title': image1['title'],
1012
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
1013
            'day': day.day,
1014
            'month': day.month,
1015
            'year': day.year
1016
        }
1017
1018
1019
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1020
    """Class to retrieve Perry Bible Fellowship comics."""
1021
    name = 'pbf'
1022
    long_name = 'Perry Bible Fellowship'
1023
    url = 'http://pbfcomics.com'
1024
    get_url_from_archive_element = join_cls_url_to_href
1025
1026
    @classmethod
1027
    def get_archive_elements(cls):
1028
        soup = get_soup_at_url(cls.url)
1029
        thumbnails = soup.find('div', id='all_thumbnails')
1030
        return reversed(thumbnails.find_all('a'))
1031
1032
    @classmethod
1033
    def get_comic_info(cls, soup, link):
1034
        """Get information about a particular comics."""
1035
        name = soup.find('meta', property='og:title')['content']
1036
        imgs = soup.find_all('meta', property='og:image')
1037
        assert len(imgs) == 1
1038
        return {
1039
            'name': name,
1040
            'img': [i['content'] for i in imgs],
1041
        }
1042
1043
1044
class Mercworks(GenericNavigableComic):
1045
    """Class to retrieve Mercworks comics."""
1046 View Code Duplication
    # Also on http://mercworks.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1047
    name = 'mercworks'
1048
    long_name = 'Mercworks'
1049
    url = 'http://mercworks.net'
1050
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1051
    get_navi_link = get_link_rel_next
1052
1053
    @classmethod
1054
    def get_comic_info(cls, soup, link):
1055
        """Get information about a particular comics."""
1056
        title = soup.find('meta', property='og:title')['content']
1057
        metadesc = soup.find('meta', property='og:description')
1058
        desc = metadesc['content'] if metadesc else ""
1059
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1060
        day = string_to_date(date_str, "%Y-%m-%d")
1061
        imgs = soup.find_all('meta', property='og:image')
1062
        return {
1063
            'img': [i['content'] for i in imgs],
1064
            'title': title,
1065
            'desc': desc,
1066
            'day': day.day,
1067
            'month': day.month,
1068
            'year': day.year
1069
        }
1070
1071
1072
class BerkeleyMews(GenericListableComic):
1073
    """Class to retrieve Berkeley Mews comics."""
1074
    # Also on http://mews.tumblr.com
1075
    # Also on http://www.gocomics.com/berkeley-mews
1076
    name = 'berkeley'
1077
    long_name = 'Berkeley Mews'
1078
    url = 'http://www.berkeleymews.com'
1079
    _categories = ('BERKELEY', )
1080
    get_url_from_archive_element = get_href
1081
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1082
1083
    @classmethod
1084
    def get_archive_elements(cls):
1085
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1086
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1087
1088
    @classmethod
1089
    def get_comic_info(cls, soup, link):
1090
        """Get information about a particular comics."""
1091
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1092
        url = cls.get_url_from_archive_element(link)
1093
        num = int(cls.comic_num_re.match(url).groups()[0])
1094
        img = soup.find('div', id='comic').find('img')
1095
        assert all(i['alt'] == i['title'] for i in [img])
1096
        title2 = img['title']
1097
        img_url = img['src']
1098
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1099
        return {
1100
            'num': num,
1101
            'title': link.string,
1102
            'title2': title2,
1103
            'img': [img_url],
1104
            'year': year,
1105
            'month': month,
1106
            'day': day,
1107
        }
1108
1109
1110
class GenericBouletCorp(GenericNavigableComic):
1111
    """Generic class to retrieve BouletCorp comics in different languages."""
1112
    # Also on https://bouletcorp.tumblr.com
1113
    _categories = ('BOULET', )
1114
    get_navi_link = get_link_rel_next
1115
1116
    @classmethod
1117
    def get_first_comic_link(cls):
1118
        """Get link to first comics."""
1119
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1120
1121
    @classmethod
1122
    def get_comic_info(cls, soup, link):
1123
        """Get information about a particular comics."""
1124
        url = cls.get_url_from_link(link)
1125
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1126
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1127
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1128
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1129
        title = soup.find('title').string
1130
        return {
1131
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1132
            'title': title,
1133
            'texts': texts,
1134
            'year': year,
1135
            'month': month,
1136
            'day': day,
1137
        }
1138
1139
1140
class BouletCorp(GenericBouletCorp):
1141
    """Class to retrieve BouletCorp comics."""
1142
    name = 'boulet'
1143
    long_name = 'Boulet Corp'
1144
    url = 'http://www.bouletcorp.com'
1145
    _categories = ('FRANCAIS', )
1146
1147
1148
class BouletCorpEn(GenericBouletCorp):
1149
    """Class to retrieve EnglishBouletCorp comics."""
1150
    name = 'boulet_en'
1151
    long_name = 'Boulet Corp English'
1152
    url = 'http://english.bouletcorp.com'
1153
1154
1155
class AmazingSuperPowers(GenericNavigableComic):
1156
    """Class to retrieve Amazing Super Powers comics."""
1157 View Code Duplication
    name = 'asp'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1158
    long_name = 'Amazing Super Powers'
1159
    url = 'http://www.amazingsuperpowers.com'
1160
    get_first_comic_link = get_a_navi_navifirst
1161
    get_navi_link = get_a_navi_navinext
1162
1163
    @classmethod
1164
    def get_comic_info(cls, soup, link):
1165
        """Get information about a particular comics."""
1166
        author = soup.find("span", class_="post-author").find("a").string
1167
        date_str = soup.find('span', class_='post-date').string
1168
        day = string_to_date(date_str, "%B %d, %Y")
1169
        imgs = soup.find('div', id='comic').find_all('img')
1170
        title = ' '.join(i['title'] for i in imgs)
1171
        assert all(i['alt'] == i['title'] for i in imgs)
1172
        return {
1173
            'title': title,
1174
            'author': author,
1175
            'img': [img['src'] for img in imgs],
1176
            'day': day.day,
1177
            'month': day.month,
1178
            'year': day.year
1179
        }
1180
1181
1182
class ToonHole(GenericNavigableComic):
1183
    """Class to retrieve Toon Holes comics."""
1184
    # Also on http://tapastic.com/series/TOONHOLE
1185
    name = 'toonhole'
1186
    long_name = 'Toon Hole'
1187
    url = 'http://www.toonhole.com'
1188
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1189
    get_navi_link = get_link_rel_next
1190
1191 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1192
    def get_comic_info(cls, soup, link):
1193
        """Get information about a particular comics."""
1194
        short_url = soup.find('link', rel='shortlink')['href']
1195
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1196
        day = string_to_date(date_str, "%B %d, %Y")
1197
        imgs = soup.find('div', id='comic').find_all('img')
1198
        if imgs:
1199
            img = imgs[0]
1200
            title = img['alt']
1201
            assert img['title'] == title
1202
        else:
1203
            title = ""
1204
        return {
1205
            'short_url': short_url,
1206
            'title': title,
1207
            'month': day.month,
1208
            'year': day.year,
1209
            'day': day.day,
1210
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1211
        }
1212
1213
1214
class Channelate(GenericNavigableComic):
1215
    """Class to retrieve Channelate comics."""
1216
    name = 'channelate'
1217
    long_name = 'Channelate'
1218
    url = 'http://www.channelate.com'
1219
    get_first_comic_link = get_div_navfirst_a
1220
    get_navi_link = get_link_rel_next
1221
    get_url_from_link = join_cls_url_to_href
1222
1223
    @classmethod
1224
    def get_comic_info(cls, soup, link):
1225
        """Get information about a particular comics."""
1226
        author = soup.find("span", class_="post-author").find("a").string
1227
        date_str = soup.find('span', class_='post-date').string
1228
        day = string_to_date(date_str, '%Y/%m/%d')
1229
        title = soup.find('meta', property='og:title')['content']
1230
        post = soup.find('div', id='comic')
1231
        imgs = post.find_all('img') if post else []
1232
        extra_url = None
1233
        extra_div = soup.find('div', id='extrapanelbutton')
1234
        if extra_div:
1235
            extra_url = extra_div.find('a')['href']
1236
            extra_soup = get_soup_at_url(extra_url)
1237
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1238
            imgs.extend(extra_imgs)
1239
        return {
1240
            'url_extra': extra_url,
1241
            'title': title,
1242
            'author': author,
1243
            'month': day.month,
1244
            'year': day.year,
1245
            'day': day.day,
1246
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1247
        }
1248
1249
1250
class CyanideAndHappiness(GenericNavigableComic):
1251
    """Class to retrieve Cyanide And Happiness comics."""
1252
    name = 'cyanide'
1253
    long_name = 'Cyanide and Happiness'
1254
    url = 'http://explosm.net'
1255
    _categories = ('NSFW', )
1256
    get_url_from_link = join_cls_url_to_href
1257
1258
    @classmethod
1259
    def get_first_comic_link(cls):
1260
        """Get link to first comics."""
1261
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1262
1263
    @classmethod
1264
    def get_navi_link(cls, last_soup, next_):
1265
        """Get link to next or previous comic."""
1266
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1267
        return None if link.get('href') is None else link
1268
1269
    @classmethod
1270
    def get_comic_info(cls, soup, link):
1271
        """Get information about a particular comics."""
1272
        url2 = soup.find('meta', property='og:url')['content']
1273
        num = int(url2.split('/')[-2])
1274
        date_str = soup.find('h3').find('a').string
1275
        day = string_to_date(date_str, '%Y.%m.%d')
1276
        author = soup.find('small', class_="author-credit-name").string
1277
        assert author.startswith('by ')
1278
        author = author[3:]
1279
        imgs = soup.find_all('img', id='main-comic')
1280
        return {
1281
            'num': num,
1282
            'author': author,
1283
            'month': day.month,
1284
            'year': day.year,
1285
            'day': day.day,
1286
            'prefix': '%d-' % num,
1287
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1288
        }
1289
1290
1291
class MrLovenstein(GenericComic):
1292
    """Class to retrieve Mr Lovenstein comics."""
1293
    # Also on https://tapastic.com/series/MrLovenstein
1294
    name = 'mrlovenstein'
1295
    long_name = 'Mr. Lovenstein'
1296
    url = 'http://www.mrlovenstein.com'
1297
1298
    @classmethod
1299
    def get_next_comic(cls, last_comic):
1300
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1301
        # TODO: more info from http://www.mrlovenstein.com/archive
1302
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1303
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1304
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1305
        first, last = min(nums), max(nums)
1306
        if last_comic:
1307
            first = last_comic['num'] + 1
1308
        for num in range(first, last + 1):
1309
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1310
            soup = get_soup_at_url(url)
1311
            imgs = list(
1312
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1313
            description = soup.find('meta', attrs={'name': 'description'})['content']
1314
            yield {
1315
                'url': url,
1316
                'num': num,
1317
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1318
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1319
                'description': description,
1320
            }
1321
1322
1323
class DinosaurComics(GenericListableComic):
1324
    """Class to retrieve Dinosaur Comics comics."""
1325
    name = 'dinosaur'
1326
    long_name = 'Dinosaur Comics'
1327
    url = 'http://www.qwantz.com'
1328
    get_url_from_archive_element = get_href
1329
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1330
1331
    @classmethod
1332
    def get_archive_elements(cls):
1333
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1334
        # first link is random -> skip it
1335
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1336
1337
    @classmethod
1338
    def get_comic_info(cls, soup, link):
1339
        """Get information about a particular comics."""
1340
        url = cls.get_url_from_archive_element(link)
1341
        num = int(cls.comic_link_re.match(url).groups()[0])
1342
        date_str = link.string
1343
        text = link.next_sibling.string
1344
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1345
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1346
        img = soup.find('img', src=comic_img_re)
1347
        return {
1348
            'month': day.month,
1349
            'year': day.year,
1350
            'day': day.day,
1351
            'img': [img.get('src')],
1352
            'title': img.get('title'),
1353
            'text': text,
1354
            'num': num,
1355
        }
1356
1357
1358
class ButterSafe(GenericListableComic):
1359
    """Class to retrieve Butter Safe comics."""
1360
    name = 'butter'
1361 View Code Duplication
    long_name = 'ButterSafe'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1362
    url = 'http://buttersafe.com'
1363
    get_url_from_archive_element = get_href
1364
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1365
1366
    @classmethod
1367
    def get_archive_elements(cls):
1368
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1369
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1370
1371
    @classmethod
1372
    def get_comic_info(cls, soup, link):
1373
        """Get information about a particular comics."""
1374
        url = cls.get_url_from_archive_element(link)
1375
        title = link.string
1376
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1377
        img = soup.find('div', id='comic').find('img')
1378
        assert img['alt'] == title
1379
        return {
1380
            'title': title,
1381
            'day': day,
1382
            'month': month,
1383
            'year': year,
1384
            'img': [img['src']],
1385
        }
1386
1387
1388
class CalvinAndHobbes(GenericComic):
1389
    """Class to retrieve Calvin and Hobbes comics."""
1390
    # Also on http://www.gocomics.com/calvinandhobbes/
1391
    name = 'calvin'
1392
    long_name = 'Calvin and Hobbes'
1393
    # This is not through any official webpage but eh...
1394
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1395
1396
    @classmethod
1397
    def get_next_comic(cls, last_comic):
1398
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1399
        last_date = get_date_for_comic(
1400
            last_comic) if last_comic else date(1985, 11, 1)
1401
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1402
        img_re = re.compile('')
1403
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1404
            url = link['href']
1405
            year, month = link_re.match(url).groups()
1406
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1407
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1408
                month_url = urljoin_wrapper(cls.url, url)
1409
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1410
                    img_src = img['src']
1411
                    day = int(img_re.match(img_src).groups()[0])
1412
                    comic_date = date(int(year), int(month), day)
1413
                    if comic_date > last_date:
1414
                        yield {
1415
                            'url': month_url,
1416
                            'year': int(year),
1417
                            'month': int(month),
1418
                            'day': int(day),
1419
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1420
                        }
1421
                        last_date = comic_date
1422
1423
1424
class AbstruseGoose(GenericListableComic):
1425
    """Class to retrieve AbstruseGoose Comics."""
1426
    name = 'abstruse'
1427 View Code Duplication
    long_name = 'Abstruse Goose'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1428
    url = 'http://abstrusegoose.com'
1429
    get_url_from_archive_element = get_href
1430
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1431
    comic_img_re = re.compile('^%s/strips/.*' % url)
1432
1433
    @classmethod
1434
    def get_archive_elements(cls):
1435
        archive_url = urljoin_wrapper(cls.url, 'archive')
1436
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1437
1438
    @classmethod
1439
    def get_comic_info(cls, soup, archive_elt):
1440
        comic_url = cls.get_url_from_archive_element(archive_elt)
1441
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1442
        return {
1443
            'num': num,
1444
            'title': archive_elt.string,
1445
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1446
        }
1447
1448
1449
class PhDComics(GenericNavigableComic):
1450
    """Class to retrieve PHD Comics."""
1451
    name = 'phd'
1452
    long_name = 'PhD Comics'
1453
    url = 'http://phdcomics.com/comics/archive.php'
1454
1455
    @classmethod
1456
    def get_first_comic_link(cls):
1457
        """Get link to first comics."""
1458
        soup = get_soup_at_url(cls.url)
1459
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1460
        return None if img is None else img.parent
1461
1462
    @classmethod
1463
    def get_navi_link(cls, last_soup, next_):
1464
        """Get link to next or previous comic."""
1465
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1466
        img = last_soup.find('img', src=url)
1467
        return None if img is None else img.parent
1468
1469
    @classmethod
1470
    def get_comic_info(cls, soup, link):
1471
        """Get information about a particular comics."""
1472
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1473
        imgs = soup.find_all('meta', property='og:image')
1474
        return {
1475
            'img': [i['content'] for i in imgs],
1476
            'title': title,
1477
        }
1478
1479
1480
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1481
    """Class to retrieve Octopuns comics."""
1482 View Code Duplication
    # Also on http://octopuns.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1483
    name = 'octopuns'
1484
    long_name = 'Octopuns'
1485
    url = 'http://www.octopuns.net'
1486
1487
    @classmethod
1488
    def get_first_comic_link(cls):
1489
        """Get link to first comics."""
1490
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1491
1492
    @classmethod
1493
    def get_navi_link(cls, last_soup, next_):
1494
        """Get link to next or previous comic."""
1495
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1496
        return None if link.get('href') is None else link
1497
1498
    @classmethod
1499
    def get_comic_info(cls, soup, link):
1500
        """Get information about a particular comics."""
1501
        title = soup.find('h3', class_='post-title entry-title').string
1502
        date_str = soup.find('h2', class_='date-header').string
1503
        day = string_to_date(date_str, "%A, %B %d, %Y")
1504
        imgs = soup.find_all('link', rel='image_src')
1505
        return {
1506
            'img': [i['href'] for i in imgs],
1507
            'title': title,
1508
            'day': day.day,
1509
            'month': day.month,
1510
            'year': day.year,
1511
        }
1512
1513
1514
class Quarktees(GenericNavigableComic):
1515
    """Class to retrieve the Quarktees comics."""
1516
    name = 'quarktees'
1517
    long_name = 'Quarktees'
1518
    url = 'http://www.quarktees.com/blogs/news'
1519
    get_url_from_link = join_cls_url_to_href
1520
    get_first_comic_link = simulate_first_link
1521
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1522
1523
    @classmethod
1524
    def get_navi_link(cls, last_soup, next_):
1525
        """Get link to next or previous comic."""
1526
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1527
1528
    @classmethod
1529
    def get_comic_info(cls, soup, link):
1530
        """Get information about a particular comics."""
1531
        title = soup.find('meta', property='og:title')['content']
1532
        article = soup.find('div', class_='single-article')
1533
        imgs = article.find_all('img')
1534
        return {
1535
            'title': title,
1536
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1537
        }
1538
1539
1540
class OverCompensating(GenericNavigableComic):
1541
    """Class to retrieve the Over Compensating comics."""
1542
    name = 'compensating'
1543
    long_name = 'Over Compensating'
1544
    url = 'http://www.overcompensating.com'
1545
    get_url_from_link = join_cls_url_to_href
1546
1547
    @classmethod
1548
    def get_first_comic_link(cls):
1549
        """Get link to first comics."""
1550
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1551
1552
    @classmethod
1553
    def get_navi_link(cls, last_soup, next_):
1554
        """Get link to next or previous comic."""
1555
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1556
1557
    @classmethod
1558
    def get_comic_info(cls, soup, link):
1559
        """Get information about a particular comics."""
1560
        img_src_re = re.compile('^/oc/comics/.*')
1561
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1562
        comic_url = cls.get_url_from_link(link)
1563
        num = int(comic_num_re.match(comic_url).groups()[0])
1564
        img = soup.find('img', src=img_src_re)
1565
        return {
1566
            'num': num,
1567
            'img': [urljoin_wrapper(comic_url, img['src'])],
1568
            'title': img.get('title')
1569
        }
1570
1571
1572
class Oglaf(GenericNavigableComic):
1573
    """Class to retrieve Oglaf comics."""
1574
    name = 'oglaf'
1575
    long_name = 'Oglaf [NSFW]'
1576
    url = 'http://oglaf.com'
1577
    _categories = ('NSFW', )
1578
    get_url_from_link = join_cls_url_to_href
1579
1580
    @classmethod
1581
    def get_first_comic_link(cls):
1582
        """Get link to first comics."""
1583
        return get_soup_at_url(cls.url).find("div", id="st").parent
1584
1585
    @classmethod
1586
    def get_navi_link(cls, last_soup, next_):
1587
        """Get link to next or previous comic."""
1588
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1589
        return div.parent if div else None
1590
1591
    @classmethod
1592
    def get_comic_info(cls, soup, link):
1593
        """Get information about a particular comics."""
1594
        title = soup.find('title').string
1595
        title_imgs = soup.find('div', id='tt').find_all('img')
1596
        assert len(title_imgs) == 1
1597
        strip_imgs = soup.find_all('img', id='strip')
1598
        assert len(strip_imgs) == 1
1599
        imgs = title_imgs + strip_imgs
1600
        desc = ' '.join(i['title'] for i in imgs)
1601
        return {
1602
            'title': title,
1603
            'img': [i['src'] for i in imgs],
1604
            'description': desc,
1605
        }
1606
1607
1608
class ScandinaviaAndTheWorld(GenericNavigableComic):
1609
    """Class to retrieve Scandinavia And The World comics."""
1610
    name = 'satw'
1611
    long_name = 'Scandinavia And The World'
1612
    url = 'http://satwcomic.com'
1613
    get_first_comic_link = simulate_first_link
1614
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1615
1616
    @classmethod
1617
    def get_navi_link(cls, last_soup, next_):
1618
        """Get link to next or previous comic."""
1619
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1620
1621
    @classmethod
1622
    def get_comic_info(cls, soup, link):
1623
        """Get information about a particular comics."""
1624
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1625
        desc = soup.find('meta', property='og:description')['content']
1626
        imgs = soup.find_all('img', itemprop="image")
1627
        return {
1628
            'title': title,
1629
            'description': desc,
1630
            'img': [i['src'] for i in imgs],
1631
        }
1632
1633
1634
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1635
    """Class to retrieve the Something Of That Ilk comics."""
1636
    name = 'somethingofthatilk'
1637
    long_name = 'Something Of That Ilk'
1638
    url = 'http://www.somethingofthatilk.com'
1639
1640
1641
class InfiniteMonkeyBusiness(GenericNavigableComic):
1642
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1643
    name = 'monkey'
1644
    long_name = 'Infinite Monkey Business'
1645
    url = 'http://infinitemonkeybusiness.net'
1646
    get_navi_link = get_a_navi_comicnavnext_navinext
1647
    get_first_comic_link = simulate_first_link
1648
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1649
1650
    @classmethod
1651
    def get_comic_info(cls, soup, link):
1652
        """Get information about a particular comics."""
1653
        title = soup.find('meta', property='og:title')['content']
1654
        imgs = soup.find('div', id='comic').find_all('img')
1655
        return {
1656
            'title': title,
1657
            'img': [i['src'] for i in imgs],
1658
        }
1659
1660
1661
class Wondermark(GenericListableComic):
1662
    """Class to retrieve the Wondermark comics."""
1663
    name = 'wondermark'
1664
    long_name = 'Wondermark'
1665
    url = 'http://wondermark.com'
1666
    get_url_from_archive_element = get_href
1667
1668
    @classmethod
1669
    def get_archive_elements(cls):
1670
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1671
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1672
1673
    @classmethod
1674
    def get_comic_info(cls, soup, link):
1675
        """Get information about a particular comics."""
1676
        date_str = soup.find('div', class_='postdate').find('em').string
1677
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1678
        div = soup.find('div', id='comic')
1679
        if div:
1680
            img = div.find('img')
1681
            img_src = [img['src']]
1682
            alt = img['alt']
1683
            assert alt == img['title']
1684
            title = soup.find('meta', property='og:title')['content']
1685
        else:
1686
            img_src = []
1687
            alt = ''
1688
            title = ''
1689
        return {
1690
            'month': day.month,
1691
            'year': day.year,
1692
            'day': day.day,
1693
            'img': img_src,
1694
            'title': title,
1695
            'alt': alt,
1696
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1697
        }
1698
1699
1700
class WarehouseComic(GenericNavigableComic):
1701
    """Class to retrieve Warehouse Comic comics."""
1702 View Code Duplication
    name = 'warehouse'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1703
    long_name = 'Warehouse Comic'
1704
    url = 'http://warehousecomic.com'
1705
    get_first_comic_link = get_a_navi_navifirst
1706
    get_navi_link = get_link_rel_next
1707
1708
    @classmethod
1709
    def get_comic_info(cls, soup, link):
1710
        """Get information about a particular comics."""
1711
        title = soup.find('h2', class_='post-title').string
1712
        date_str = soup.find('span', class_='post-date').string
1713
        day = string_to_date(date_str, "%B %d, %Y")
1714
        imgs = soup.find('div', id='comic').find_all('img')
1715
        return {
1716
            'img': [i['src'] for i in imgs],
1717
            'title': title,
1718
            'day': day.day,
1719
            'month': day.month,
1720
            'year': day.year,
1721
        }
1722
1723
1724
class JustSayEh(GenericNavigableComic):
1725
    """Class to retrieve Just Say Eh comics."""
1726
    # Also on http//tapastic.com/series/Just-Say-Eh
1727
    name = 'justsayeh'
1728
    long_name = 'Just Say Eh'
1729
    url = 'http://www.justsayeh.com'
1730
    get_first_comic_link = get_a_navi_navifirst
1731
    get_navi_link = get_a_navi_comicnavnext_navinext
1732
1733
    @classmethod
1734
    def get_comic_info(cls, soup, link):
1735
        """Get information about a particular comics."""
1736
        title = soup.find('h2', class_='post-title').string
1737
        imgs = soup.find("div", id="comic").find_all("img")
1738
        assert all(i['alt'] == i['title'] for i in imgs)
1739
        alt = imgs[0]['alt']
1740
        return {
1741
            'img': [i['src'] for i in imgs],
1742
            'title': title,
1743
            'alt': alt,
1744
        }
1745
1746
1747
class MouseBearComedy(GenericNavigableComic):
1748
    """Class to retrieve Mouse Bear Comedy comics."""
1749 View Code Duplication
    # Also on http://mousebearcomedy.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1750
    name = 'mousebear'
1751
    long_name = 'Mouse Bear Comedy'
1752
    url = 'http://www.mousebearcomedy.com'
1753
    get_first_comic_link = get_a_navi_navifirst
1754
    get_navi_link = get_a_navi_comicnavnext_navinext
1755
1756
    @classmethod
1757
    def get_comic_info(cls, soup, link):
1758
        """Get information about a particular comics."""
1759
        title = soup.find('h2', class_='post-title').string
1760
        author = soup.find("span", class_="post-author").find("a").string
1761
        date_str = soup.find("span", class_="post-date").string
1762
        day = string_to_date(date_str, '%B %d, %Y')
1763
        imgs = soup.find("div", id="comic").find_all("img")
1764
        assert all(i['alt'] == i['title'] == title for i in imgs)
1765
        return {
1766
            'day': day.day,
1767
            'month': day.month,
1768
            'year': day.year,
1769
            'img': [i['src'] for i in imgs],
1770
            'title': title,
1771
            'author': author,
1772
        }
1773
1774
1775
class BigFootJustice(GenericNavigableComic):
1776
    """Class to retrieve Big Foot Justice comics."""
1777 View Code Duplication
    # Also on http://tapastic.com/series/bigfoot-justice
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1778
    name = 'bigfoot'
1779
    long_name = 'Big Foot Justice'
1780
    url = 'http://bigfootjustice.com'
1781
    get_first_comic_link = get_a_navi_navifirst
1782
    get_navi_link = get_a_navi_comicnavnext_navinext
1783
1784
    @classmethod
1785
    def get_comic_info(cls, soup, link):
1786
        """Get information about a particular comics."""
1787
        imgs = soup.find('div', id='comic').find_all('img')
1788
        assert all(i['title'] == i['alt'] for i in imgs)
1789
        title = ' '.join(i['title'] for i in imgs)
1790
        return {
1791
            'img': [i['src'] for i in imgs],
1792
            'title': title,
1793
        }
1794
1795
1796
class RespawnComic(GenericNavigableComic):
1797
    """Class to retrieve Respawn Comic."""
1798
    # Also on https://respawncomic.tumblr.com
1799
    name = 'respawn'
1800
    long_name = 'Respawn Comic'
1801
    url = 'http://respawncomic.com '
1802
    _categories = ('RESPAWN', )
1803
    get_navi_link = get_a_rel_next
1804
    get_first_comic_link = simulate_first_link
1805
    first_url = 'http://respawncomic.com/comic/c0001/'
1806
1807 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1808
    def get_comic_info(cls, soup, link):
1809
        """Get information about a particular comics."""
1810
        title = soup.find('meta', property='og:title')['content']
1811
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1812
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1813
        date_str = date_str[:10]
1814
        day = string_to_date(date_str, "%Y-%m-%d")
1815
        imgs = soup.find_all('meta', property='og:image')
1816
        skip_imgs = {
1817
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1818
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1819
        }
1820
        return {
1821
            'title': title,
1822
            'author': author,
1823
            'day': day.day,
1824
            'month': day.month,
1825
            'year': day.year,
1826
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1827
        }
1828
1829
1830
class SafelyEndangered(GenericNavigableComic):
1831
    """Class to retrieve Safely Endangered comics."""
1832 View Code Duplication
    # Also on http://tumblr.safelyendangered.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1833
    name = 'endangered'
1834
    long_name = 'Safely Endangered'
1835
    url = 'http://www.safelyendangered.com'
1836
    get_navi_link = get_link_rel_next
1837
    get_first_comic_link = simulate_first_link
1838
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1839
1840
    @classmethod
1841
    def get_comic_info(cls, soup, link):
1842
        """Get information about a particular comics."""
1843
        title = soup.find('h2', class_='post-title').string
1844
        date_str = soup.find('span', class_='post-date').string
1845
        day = string_to_date(date_str, '%B %d, %Y')
1846
        imgs = soup.find('div', id='comic').find_all('img')
1847
        alt = imgs[0]['alt']
1848
        assert all(i['alt'] == i['title'] for i in imgs)
1849
        return {
1850
            'day': day.day,
1851
            'month': day.month,
1852
            'year': day.year,
1853
            'img': [i['src'] for i in imgs],
1854
            'title': title,
1855
            'alt': alt,
1856
        }
1857
1858
1859
class PicturesInBoxes(GenericNavigableComic):
1860
    """Class to retrieve Pictures In Boxes comics."""
1861 View Code Duplication
    # Also on https://picturesinboxescomic.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1862
    name = 'picturesinboxes'
1863
    long_name = 'Pictures in Boxes'
1864
    url = 'http://www.picturesinboxes.com'
1865
    get_navi_link = get_a_navi_navinext
1866
    get_first_comic_link = simulate_first_link
1867
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1868
1869
    @classmethod
1870
    def get_comic_info(cls, soup, link):
1871
        """Get information about a particular comics."""
1872
        title = soup.find('h2', class_='post-title').string
1873
        author = soup.find("span", class_="post-author").find("a").string
1874
        date_str = soup.find('span', class_='post-date').string
1875
        day = string_to_date(date_str, '%B %d, %Y')
1876
        imgs = soup.find('div', class_='comicpane').find_all('img')
1877
        assert imgs
1878
        assert all(i['title'] == i['alt'] == title for i in imgs)
1879
        return {
1880
            'day': day.day,
1881
            'month': day.month,
1882
            'year': day.year,
1883
            'img': [i['src'] for i in imgs],
1884
            'title': title,
1885
            'author': author,
1886
        }
1887
1888
1889
class Penmen(GenericNavigableComic):
1890
    """Class to retrieve Penmen comics."""
1891 View Code Duplication
    name = 'penmen'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1892
    long_name = 'Penmen'
1893
    url = 'http://penmen.com'
1894
    get_navi_link = get_link_rel_next
1895
    get_first_comic_link = simulate_first_link
1896
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1897
1898
    @classmethod
1899
    def get_comic_info(cls, soup, link):
1900
        """Get information about a particular comics."""
1901
        title = soup.find('title').string
1902
        imgs = soup.find('div', class_='entry-content').find_all('img')
1903
        short_url = soup.find('link', rel='shortlink')['href']
1904
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1905
        date_str = soup.find('time')['datetime'][:10]
1906
        day = string_to_date(date_str, "%Y-%m-%d")
1907
        return {
1908
            'title': title,
1909
            'short_url': short_url,
1910
            'img': [i['src'] for i in imgs],
1911
            'tags': tags,
1912
            'month': day.month,
1913
            'year': day.year,
1914
            'day': day.day,
1915
        }
1916
1917
1918
class TheDoghouseDiaries(GenericNavigableComic):
1919
    """Class to retrieve The Dog House Diaries comics."""
1920
    name = 'doghouse'
1921
    long_name = 'The Dog House Diaries'
1922
    url = 'http://thedoghousediaries.com'
1923
1924
    @classmethod
1925
    def get_first_comic_link(cls):
1926
        """Get link to first comics."""
1927
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1928
1929
    @classmethod
1930
    def get_navi_link(cls, last_soup, next_):
1931
        """Get link to next or previous comic."""
1932
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1933
1934
    @classmethod
1935
    def get_comic_info(cls, soup, link):
1936
        """Get information about a particular comics."""
1937
        comic_img_re = re.compile('^dhdcomics/.*')
1938
        img = soup.find('img', src=comic_img_re)
1939
        comic_url = cls.get_url_from_link(link)
1940
        return {
1941
            'title': soup.find('h2', id='titleheader').string,
1942
            'title2': soup.find('div', id='subtext').string,
1943
            'alt': img.get('title'),
1944
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1945
            'num': int(comic_url.split('/')[-1]),
1946
        }
1947
1948
1949
class InvisibleBread(GenericListableComic):
1950
    """Class to retrieve Invisible Bread comics."""
1951
    # Also on http://www.gocomics.com/invisible-bread
1952
    name = 'invisiblebread'
1953
    long_name = 'Invisible Bread'
1954
    url = 'http://invisiblebread.com'
1955
1956
    @classmethod
1957
    def get_archive_elements(cls):
1958
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1959
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1960
1961
    @classmethod
1962
    def get_url_from_archive_element(cls, td):
1963
        return td.find('a')['href']
1964
1965
    @classmethod
1966
    def get_comic_info(cls, soup, td):
1967
        """Get information about a particular comics."""
1968
        url = cls.get_url_from_archive_element(td)
1969
        title = td.find('a').string
1970
        month_and_day = td.previous_sibling.string
1971
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1972
        year = link_re.match(url).groups()[0]
1973
        date_str = month_and_day + ' ' + year
1974
        day = string_to_date(date_str, '%b %d %Y')
1975
        imgs = [soup.find('div', id='comic').find('img')]
1976
        assert len(imgs) == 1
1977
        assert all(i['title'] == i['alt'] == title for i in imgs)
1978
        return {
1979
            'month': day.month,
1980
            'year': day.year,
1981
            'day': day.day,
1982
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1983
            'title': title,
1984
        }
1985
1986
1987
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1988
    """Class to retrieve Disco Bleach Comics."""
1989
    name = 'discobleach'
1990
    long_name = 'Disco Bleach'
1991
    url = 'http://discobleach.com'
1992
1993
1994
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1995
    """Class to retrieve TubeyToons comics."""
1996
    # Also on http://tapastic.com/series/Tubey-Toons
1997
    # Also on https://tubeytoons.tumblr.com
1998
    name = 'tubeytoons'
1999
    long_name = 'Tubey Toons'
2000
    url = 'http://tubeytoons.com'
2001
    _categories = ('TUNEYTOONS', )
2002
2003
2004
class CompletelySeriousComics(GenericNavigableComic):
2005
    """Class to retrieve Completely Serious comics."""
2006 View Code Duplication
    name = 'completelyserious'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2007
    long_name = 'Completely Serious Comics'
2008
    url = 'http://completelyseriouscomics.com'
2009
    get_first_comic_link = get_a_navi_navifirst
2010
    get_navi_link = get_a_navi_navinext
2011
2012
    @classmethod
2013
    def get_comic_info(cls, soup, link):
2014
        """Get information about a particular comics."""
2015
        title = soup.find('h2', class_='post-title').string
2016
        author = soup.find('span', class_='post-author').contents[1].string
2017
        date_str = soup.find('span', class_='post-date').string
2018
        day = string_to_date(date_str, '%B %d, %Y')
2019
        imgs = soup.find('div', class_='comicpane').find_all('img')
2020
        assert imgs
2021
        alt = imgs[0]['title']
2022
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2023
        return {
2024
            'month': day.month,
2025
            'year': day.year,
2026
            'day': day.day,
2027
            'img': [i['src'] for i in imgs],
2028
            'title': title,
2029
            'alt': alt,
2030
            'author': author,
2031
        }
2032
2033
2034
class PoorlyDrawnLines(GenericListableComic):
2035
    """Class to retrieve Poorly Drawn Lines comics."""
2036
    # Also on http://pdlcomics.tumblr.com
2037 View Code Duplication
    name = 'poorlydrawn'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2038
    long_name = 'Poorly Drawn Lines'
2039
    url = 'https://www.poorlydrawnlines.com'
2040
    _categories = ('POORLYDRAWN', )
2041
    get_url_from_archive_element = get_href
2042
2043
    @classmethod
2044
    def get_comic_info(cls, soup, link):
2045
        """Get information about a particular comics."""
2046
        imgs = soup.find('div', class_='post').find_all('img')
2047
        assert len(imgs) <= 1
2048
        return {
2049
            'img': [i['src'] for i in imgs],
2050
            'title': imgs[0].get('title', "") if imgs else "",
2051
        }
2052
2053
    @classmethod
2054
    def get_archive_elements(cls):
2055
        archive_url = urljoin_wrapper(cls.url, 'archive')
2056
        url_re = re.compile('^%s/comic/.' % cls.url)
2057
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2058
2059
2060
class LoadingComics(GenericNavigableComic):
2061
    """Class to retrieve Loading Artist comics."""
2062 View Code Duplication
    name = 'loadingartist'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2063
    long_name = 'Loading Artist'
2064
    url = 'http://www.loadingartist.com/latest'
2065
2066
    @classmethod
2067
    def get_first_comic_link(cls):
2068
        """Get link to first comics."""
2069
        return get_soup_at_url(cls.url).find('a', title="First")
2070
2071
    @classmethod
2072
    def get_navi_link(cls, last_soup, next_):
2073
        """Get link to next or previous comic."""
2074
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2075
2076
    @classmethod
2077
    def get_comic_info(cls, soup, link):
2078
        """Get information about a particular comics."""
2079
        title = soup.find('h1').string
2080
        date_str = soup.find('span', class_='date').string.strip()
2081
        day = string_to_date(date_str, "%B %d, %Y")
2082
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2083
        return {
2084
            'title': title,
2085
            'img': [i['src'] for i in imgs],
2086
            'month': day.month,
2087
            'year': day.year,
2088
            'day': day.day,
2089
        }
2090
2091
2092
class ChuckleADuck(GenericNavigableComic):
2093
    """Class to retrieve Chuckle-A-Duck comics."""
2094 View Code Duplication
    name = 'chuckleaduck'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2095
    long_name = 'Chuckle-A-duck'
2096
    url = 'http://chuckleaduck.com'
2097
    get_first_comic_link = get_div_navfirst_a
2098
    get_navi_link = get_link_rel_next
2099
2100
    @classmethod
2101
    def get_comic_info(cls, soup, link):
2102
        """Get information about a particular comics."""
2103
        date_str = soup.find('span', class_='post-date').string
2104
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2105
        author = soup.find('span', class_='post-author').string
2106
        div = soup.find('div', id='comic')
2107
        imgs = div.find_all('img') if div else []
2108
        title = imgs[0]['title'] if imgs else ""
2109
        assert all(i['title'] == i['alt'] == title for i in imgs)
2110
        return {
2111
            'month': day.month,
2112
            'year': day.year,
2113
            'day': day.day,
2114
            'img': [i['src'] for i in imgs],
2115
            'title': title,
2116
            'author': author,
2117
        }
2118
2119
2120
class DepressedAlien(GenericNavigableComic):
2121
    """Class to retrieve Depressed Alien Comics."""
2122
    name = 'depressedalien'
2123
    long_name = 'Depressed Alien'
2124
    url = 'http://depressedalien.com'
2125
    get_url_from_link = join_cls_url_to_href
2126
2127
    @classmethod
2128
    def get_first_comic_link(cls):
2129
        """Get link to first comics."""
2130
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2131
2132
    @classmethod
2133
    def get_navi_link(cls, last_soup, next_):
2134
        """Get link to next or previous comic."""
2135
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2136
2137
    @classmethod
2138
    def get_comic_info(cls, soup, link):
2139
        """Get information about a particular comics."""
2140
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2141
        imgs = soup.find_all('meta', property='og:image')
2142
        return {
2143
            'title': title,
2144
            'img': [i['content'] for i in imgs],
2145
        }
2146
2147
2148 View Code Duplication
class TurnOffUs(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2149
    """Class to retrieve TurnOffUs comics."""
2150
    name = 'turnoffus'
2151
    long_name = 'Turn Off Us'
2152
    url = 'http://turnoff.us'
2153
    get_url_from_archive_element = join_cls_url_to_href
2154
2155
    @classmethod
2156
    def get_archive_elements(cls):
2157
        archive_url = urljoin_wrapper(cls.url, 'all')
2158
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2159
        return reversed(post_list.find_all('a', class_='post-link'))
2160
2161
    @classmethod
2162
    def get_comic_info(cls, soup, archive_elt):
2163
        """Get information about a particular comics."""
2164
        title = soup.find('meta', property='og:title')['content']
2165
        imgs = soup.find_all('meta', property='og:image')
2166
        return {
2167
            'title': title,
2168
            'img': [i['content'] for i in imgs],
2169
        }
2170
2171
2172
class ThingsInSquares(GenericListableComic):
2173
    """Class to retrieve Things In Squares comics."""
2174
    # This can be retrieved in other languages
2175
    # Also on https://tapastic.com/series/Things-in-Squares
2176
    name = 'squares'
2177
    long_name = 'Things in squares'
2178
    url = 'http://www.thingsinsquares.com'
2179
2180
    @classmethod
2181
    def get_comic_info(cls, soup, tr):
2182
        """Get information about a particular comics."""
2183
        _, td2, td3 = tr.find_all('td')
2184
        a = td2.find('a')
2185
        date_str = td3.string
2186
        day = string_to_date(date_str, "%m.%d.%y")
2187
        title = a.string
2188
        title2 = soup.find('meta', property='og:title')['content']
2189
        desc = soup.find('meta', property='og:description')
2190
        description = desc['content'] if desc else ''
2191
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2192
        imgs = soup.find('div', class_='entry-content').find_all('img')
2193
        return {
2194
            'day': day.day,
2195
            'month': day.month,
2196
            'year': day.year,
2197
            'title': title,
2198
            'title2': title2,
2199
            'description': description,
2200
            'tags': tags,
2201
            'img': [i['src'] for i in imgs],
2202
            'alt': ' '.join(i['alt'] for i in imgs),
2203
        }
2204
2205
    @classmethod
2206
    def get_url_from_archive_element(cls, tr):
2207
        _, td2, __ = tr.find_all('td')
2208
        return td2.find('a')['href']
2209
2210
    @classmethod
2211
    def get_archive_elements(cls):
2212
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2213
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2214
2215
2216
class HappleTea(GenericNavigableComic):
2217
    """Class to retrieve Happle Tea Comics."""
2218 View Code Duplication
    name = 'happletea'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2219
    long_name = 'Happle Tea'
2220
    url = 'http://www.happletea.com'
2221
    get_first_comic_link = get_a_navi_navifirst
2222
    get_navi_link = get_link_rel_next
2223
2224
    @classmethod
2225
    def get_comic_info(cls, soup, link):
2226
        """Get information about a particular comics."""
2227
        imgs = soup.find('div', id='comic').find_all('img')
2228
        post = soup.find('div', class_='post-content')
2229
        title = post.find('h2', class_='post-title').string
2230
        author = post.find('a', rel='author').string
2231
        date_str = post.find('span', class_='post-date').string
2232
        day = string_to_date(date_str, "%B %d, %Y")
2233
        assert all(i['alt'] == i['title'] for i in imgs)
2234
        return {
2235
            'title': title,
2236
            'img': [i['src'] for i in imgs],
2237
            'alt': ''.join(i['alt'] for i in imgs),
2238
            'month': day.month,
2239
            'year': day.year,
2240
            'day': day.day,
2241
            'author': author,
2242
        }
2243
2244
2245
class RockPaperScissors(GenericNavigableComic):
2246
    """Class to retrieve Rock Paper Scissors comics."""
2247
    name = 'rps'
2248
    long_name = 'Rock Paper Scissors'
2249
    url = 'http://rps-comics.com'
2250
    get_first_comic_link = get_a_navi_navifirst
2251
    get_navi_link = get_link_rel_next
2252
2253
    @classmethod
2254
    def get_comic_info(cls, soup, link):
2255
        """Get information about a particular comics."""
2256
        title = soup.find('title').string
2257
        imgs = soup.find_all('meta', property='og:image')
2258
        short_url = soup.find('link', rel='shortlink')['href']
2259
        transcript = soup.find('div', id='transcript-content').string
2260
        return {
2261
            'title': title,
2262
            'transcript': transcript,
2263
            'short_url': short_url,
2264
            'img': [i['content'] for i in imgs],
2265
        }
2266
2267
2268
class FatAwesomeComics(GenericNavigableComic):
2269
    """Class to retrieve Fat Awesome Comics."""
2270
    # Also on http://fatawesomecomedy.tumblr.com
2271
    name = 'fatawesome'
2272
    long_name = 'Fat Awesome'
2273
    url = 'http://fatawesome.com/comics'
2274
    get_navi_link = get_a_rel_next
2275
    get_first_comic_link = simulate_first_link
2276
    first_url = 'http://fatawesome.com/shortbus/'
2277
2278
    @classmethod
2279
    def get_comic_info(cls, soup, link):
2280
        """Get information about a particular comics."""
2281
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2282
        description = soup.find('meta', attrs={'name': 'description'})['content']
2283
        tags_prop = soup.find('meta', property='article:tag')
2284
        tags = tags_prop['content'] if tags_prop else ""
2285
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2286
        day = string_to_date(date_str, "%Y-%m-%d")
2287
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2288
        assert len(imgs) == 1
2289
        return {
2290
            'title': title,
2291
            'description': description,
2292
            'tags': tags,
2293
            'alt': "".join(i['alt'] for i in imgs),
2294
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2295
            'month': day.month,
2296
            'year': day.year,
2297
            'day': day.day,
2298
        }
2299
2300
2301 View Code Duplication
class JuliasDrawings(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2302
    """Class to retrieve Julia's Drawings."""
2303
    name = 'julia'
2304
    long_name = "Julia's Drawings"
2305
    url = 'https://drawings.jvns.ca'
2306
    get_url_from_archive_element = get_href
2307
2308
    @classmethod
2309
    def get_archive_elements(cls):
2310
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2311
        return [art.find('a') for art in reversed(articles)]
2312
2313
    @classmethod
2314
    def get_comic_info(cls, soup, archive_elt):
2315
        """Get information about a particular comics."""
2316
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2317
        day = string_to_date(date_str, "%Y-%m-%d")
2318
        title = soup.find('h3', class_='p-post-title').string
2319
        imgs = soup.find('section', class_='post-content').find_all('img')
2320
        return {
2321
            'title': title,
2322
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2323
            'month': day.month,
2324
            'year': day.year,
2325
            'day': day.day,
2326
        }
2327
2328
2329
class AnythingComic(GenericListableComic):
2330
    """Class to retrieve Anything Comics."""
2331
    # Also on http://tapastic.com/series/anything
2332
    name = 'anythingcomic'
2333
    long_name = 'Anything Comic'
2334
    url = 'http://www.anythingcomic.com'
2335
2336
    @classmethod
2337
    def get_archive_elements(cls):
2338
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2339
        # The first 2 <tr>'s do not correspond to comics
2340
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2341
2342
    @classmethod
2343
    def get_url_from_archive_element(cls, tr):
2344
        """Get url corresponding to an archive element."""
2345
        _, td_comic, td_date, _ = tr.find_all('td')
2346 View Code Duplication
        link = td_comic.find('a')
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2347
        return urljoin_wrapper(cls.url, link['href'])
2348
2349
    @classmethod
2350
    def get_comic_info(cls, soup, tr):
2351
        """Get information about a particular comics."""
2352
        td_num, td_comic, td_date, _ = tr.find_all('td')
2353
        num = int(td_num.string)
2354
        link = td_comic.find('a')
2355
        title = link.string
2356
        imgs = soup.find_all('img', id='comic_image')
2357
        date_str = td_date.string
2358
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2359
        assert len(imgs) == 1
2360
        assert all(i.get('alt') == i.get('title') for i in imgs)
2361
        return {
2362
            'num': num,
2363
            'title': title,
2364
            'alt': imgs[0].get('alt', ''),
2365
            'img': [i['src'] for i in imgs],
2366
            'month': day.month,
2367
            'year': day.year,
2368
            'day': day.day,
2369
        }
2370
2371
2372
class LonnieMillsap(GenericNavigableComic):
2373
    """Class to retrieve Lonnie Millsap's comics."""
2374 View Code Duplication
    name = 'millsap'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2375
    long_name = 'Lonnie Millsap'
2376
    url = 'http://www.lonniemillsap.com'
2377
    get_navi_link = get_link_rel_next
2378
    get_first_comic_link = simulate_first_link
2379
    first_url = 'http://www.lonniemillsap.com/?p=42'
2380
2381
    @classmethod
2382
    def get_comic_info(cls, soup, link):
2383
        """Get information about a particular comics."""
2384
        title = soup.find('h2', class_='post-title').string
2385
        post = soup.find('div', class_='post-content')
2386
        author = post.find("span", class_="post-author").find("a").string
2387
        date_str = post.find("span", class_="post-date").string
2388
        day = string_to_date(date_str, "%B %d, %Y")
2389
        imgs = post.find("div", class_="entry").find_all("img")
2390
        return {
2391
            'title': title,
2392
            'author': author,
2393
            'img': [i['src'] for i in imgs],
2394
            'month': day.month,
2395
            'year': day.year,
2396
            'day': day.day,
2397
        }
2398
2399
2400
class LinsEditions(GenericNavigableComic):
2401
    """Class to retrieve L.I.N.S. Editions comics."""
2402
    # Also on https://linscomics.tumblr.com
2403
    # Now on https://warandpeas.com
2404
    name = 'lins'
2405
    long_name = 'L.I.N.S. Editions'
2406
    url = 'https://linsedition.com'
2407
    _categories = ('LINS', )
2408
    get_navi_link = get_link_rel_next
2409
    get_first_comic_link = simulate_first_link
2410
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2411
2412
    @classmethod
2413
    def get_comic_info(cls, soup, link):
2414
        """Get information about a particular comics."""
2415
        title = soup.find('meta', property='og:title')['content']
2416
        imgs = soup.find_all('meta', property='og:image')
2417
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2418
        day = string_to_date(date_str, "%Y-%m-%d")
2419
        return {
2420
            'title': title,
2421
            'img': [i['content'] for i in imgs],
2422
            'month': day.month,
2423
            'year': day.year,
2424
            'day': day.day,
2425
        }
2426
2427
2428
class ThorsThundershack(GenericNavigableComic):
2429
    """Class to retrieve Thor's Thundershack comics."""
2430
    # Also on http://tapastic.com/series/Thors-Thundershac
2431
    name = 'thor'
2432
    long_name = 'Thor\'s Thundershack'
2433
    url = 'http://www.thorsthundershack.com'
2434
    _categories = ('THOR', )
2435
    get_url_from_link = join_cls_url_to_href
2436
2437
    @classmethod
2438
    def get_first_comic_link(cls):
2439
        """Get link to first comics."""
2440
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2441
2442
    @classmethod
2443
    def get_navi_link(cls, last_soup, next_):
2444
        """Get link to next or previous comic."""
2445
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2446
            if link['href'] != '/comic':
2447
                return link
2448
        return None
2449
2450 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2451
    def get_comic_info(cls, soup, link):
2452
        """Get information about a particular comics."""
2453
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2454
        description = soup.find('div', itemprop='articleBody').text
2455
        author = soup.find('span', itemprop='author copyrightHolder').string
2456
        imgs = soup.find_all('img', itemprop='image')
2457
        assert all(i['title'] == i['alt'] for i in imgs)
2458
        alt = imgs[0]['alt'] if imgs else ""
2459
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2460
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2461
        return {
2462
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2463
            'month': day.month,
2464
            'year': day.year,
2465
            'day': day.day,
2466
            'author': author,
2467
            'title': title,
2468
            'alt': alt,
2469
            'description': description,
2470
        }
2471
2472
2473
class GerbilWithAJetpack(GenericNavigableComic):
2474
    """Class to retrieve GerbilWithAJetpack comics."""
2475
    name = 'gerbil'
2476 View Code Duplication
    long_name = 'Gerbil With A Jetpack'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2477
    url = 'http://gerbilwithajetpack.com'
2478
    get_first_comic_link = get_a_navi_navifirst
2479
    get_navi_link = get_a_rel_next
2480
2481
    @classmethod
2482
    def get_comic_info(cls, soup, link):
2483
        """Get information about a particular comics."""
2484
        title = soup.find('h2', class_='post-title').string
2485
        author = soup.find("span", class_="post-author").find("a").string
2486
        date_str = soup.find("span", class_="post-date").string
2487
        day = string_to_date(date_str, "%B %d, %Y")
2488
        imgs = soup.find("div", id="comic").find_all("img")
2489
        alt = imgs[0]['alt']
2490
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2491
        return {
2492
            'img': [i['src'] for i in imgs],
2493
            'title': title,
2494
            'alt': alt,
2495
            'author': author,
2496
            'day': day.day,
2497
            'month': day.month,
2498
            'year': day.year
2499
        }
2500
2501
2502
class EveryDayBlues(GenericEmptyComic, GenericNavigableComic):
2503
    """Class to retrieve EveryDayBlues Comics."""
2504 View Code Duplication
    name = "blues"
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2505
    long_name = "Every Day Blues"
2506
    url = "http://everydayblues.net"
2507
    get_first_comic_link = get_a_navi_navifirst
2508
    get_navi_link = get_link_rel_next
2509
2510
    @classmethod
2511
    def get_comic_info(cls, soup, link):
2512
        """Get information about a particular comics."""
2513
        title = soup.find("h2", class_="post-title").string
2514
        author = soup.find("span", class_="post-author").find("a").string
2515
        date_str = soup.find("span", class_="post-date").string
2516
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2517
        imgs = soup.find("div", id="comic").find_all("img")
2518
        assert all(i['alt'] == i['title'] == title for i in imgs)
2519
        assert len(imgs) <= 1
2520
        return {
2521
            'img': [i['src'] for i in imgs],
2522
            'title': title,
2523
            'author': author,
2524
            'day': day.day,
2525
            'month': day.month,
2526
            'year': day.year
2527
        }
2528
2529
2530
class BiterComics(GenericNavigableComic):
2531
    """Class to retrieve Biter Comics."""
2532
    name = "biter"
2533
    long_name = "Biter Comics"
2534 View Code Duplication
    url = "http://www.bitercomics.com"
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2535
    get_first_comic_link = get_a_navi_navifirst
2536
    get_navi_link = get_link_rel_next
2537
2538
    @classmethod
2539
    def get_comic_info(cls, soup, link):
2540
        """Get information about a particular comics."""
2541
        title = soup.find("h1", class_="entry-title").string
2542
        author = soup.find("span", class_="author vcard").find("a").string
2543
        date_str = soup.find("span", class_="entry-date").string
2544
        day = string_to_date(date_str, "%B %d, %Y")
2545
        imgs = soup.find("div", id="comic").find_all("img")
2546
        assert all(i['alt'] == i['title'] for i in imgs)
2547
        assert len(imgs) == 1
2548
        alt = imgs[0]['alt']
2549
        return {
2550
            'img': [i['src'] for i in imgs],
2551
            'title': title,
2552
            'alt': alt,
2553
            'author': author,
2554
            'day': day.day,
2555
            'month': day.month,
2556
            'year': day.year
2557
        }
2558
2559
2560
class TheAwkwardYeti(GenericNavigableComic):
2561
    """Class to retrieve The Awkward Yeti comics."""
2562
    # Also on http://www.gocomics.com/the-awkward-yeti
2563
    # Also on http://larstheyeti.tumblr.com
2564
    # Also on https://tapastic.com/series/TheAwkwardYeti
2565
    name = 'yeti'
2566
    long_name = 'The Awkward Yeti'
2567
    url = 'http://theawkwardyeti.com'
2568
    _categories = ('YETI', )
2569
    get_first_comic_link = get_a_navi_navifirst
2570
    get_navi_link = get_link_rel_next
2571
2572
    @classmethod
2573
    def get_comic_info(cls, soup, link):
2574
        """Get information about a particular comics."""
2575
        title = soup.find('h2', class_='post-title').string
2576
        date_str = soup.find("span", class_="post-date").string
2577
        day = string_to_date(date_str, "%B %d, %Y")
2578
        imgs = soup.find("div", id="comic").find_all("img")
2579
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2580
        return {
2581
            'img': [i['src'] for i in imgs],
2582
            'title': title,
2583 View Code Duplication
            'day': day.day,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2584
            'month': day.month,
2585
            'year': day.year
2586
        }
2587
2588
2589
class PleasantThoughts(GenericNavigableComic):
2590
    """Class to retrieve Pleasant Thoughts comics."""
2591
    name = 'pleasant'
2592
    long_name = 'Pleasant Thoughts'
2593
    url = 'http://pleasant-thoughts.com'
2594
    get_first_comic_link = get_a_navi_navifirst
2595
    get_navi_link = get_link_rel_next
2596
2597
    @classmethod
2598
    def get_comic_info(cls, soup, link):
2599
        """Get information about a particular comics."""
2600
        post = soup.find('div', class_='post-content')
2601
        title = post.find('h2', class_='post-title').string
2602
        imgs = post.find("div", class_="entry").find_all("img")
2603
        return {
2604
            'title': title,
2605
            'img': [i['src'] for i in imgs],
2606
        }
2607
2608
2609
class MisterAndMe(GenericNavigableComic):
2610
    """Class to retrieve Mister & Me Comics."""
2611
    # Also on http://www.gocomics.com/mister-and-me
2612
    # Also on https://tapastic.com/series/Mister-and-Me
2613
    name = 'mister'
2614
    long_name = 'Mister & Me'
2615
    url = 'http://www.mister-and-me.com'
2616
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2617
    get_navi_link = get_link_rel_next
2618
2619
    @classmethod
2620
    def get_comic_info(cls, soup, link):
2621
        """Get information about a particular comics."""
2622
        title = soup.find('h2', class_='post-title').string
2623
        author = soup.find("span", class_="post-author").find("a").string
2624
        date_str = soup.find("span", class_="post-date").string
2625
        day = string_to_date(date_str, "%B %d, %Y")
2626
        imgs = soup.find("div", id="comic").find_all("img")
2627
        assert all(i['alt'] == i['title'] for i in imgs)
2628
        assert len(imgs) <= 1
2629
        alt = imgs[0]['alt'] if imgs else ""
2630
        return {
2631
            'img': [i['src'] for i in imgs],
2632
            'title': title,
2633
            'alt': alt,
2634
            'author': author,
2635
            'day': day.day,
2636
            'month': day.month,
2637
            'year': day.year
2638
        }
2639
2640
2641
class LastPlaceComics(GenericNavigableComic):
2642
    """Class to retrieve Last Place Comics."""
2643
    name = 'lastplace'
2644
    long_name = 'Last Place Comics'
2645 View Code Duplication
    url = "http://lastplacecomics.com"
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2646
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2647
    get_navi_link = get_link_rel_next
2648
2649
    @classmethod
2650
    def get_comic_info(cls, soup, link):
2651
        """Get information about a particular comics."""
2652
        title = soup.find('h2', class_='post-title').string
2653
        author = soup.find("span", class_="post-author").find("a").string
2654
        date_str = soup.find("span", class_="post-date").string
2655
        day = string_to_date(date_str, "%B %d, %Y")
2656
        imgs = soup.find("div", id="comic").find_all("img")
2657
        assert all(i['alt'] == i['title'] for i in imgs)
2658
        assert len(imgs) <= 1
2659
        alt = imgs[0]['alt'] if imgs else ""
2660
        return {
2661
            'img': [i['src'] for i in imgs],
2662
            'title': title,
2663
            'alt': alt,
2664
            'author': author,
2665
            'day': day.day,
2666
            'month': day.month,
2667
            'year': day.year
2668
        }
2669
2670
2671
class TalesOfAbsurdity(GenericNavigableComic):
2672
    """Class to retrieve Tales Of Absurdity comics."""
2673
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2674
    # Also on http://talesofabsurdity.tumblr.com
2675
    name = 'absurdity'
2676
    long_name = 'Tales of Absurdity'
2677 View Code Duplication
    url = 'http://talesofabsurdity.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2678
    _categories = ('ABSURDITY', )
2679
    get_first_comic_link = get_a_navi_navifirst
2680
    get_navi_link = get_a_navi_comicnavnext_navinext
2681
2682
    @classmethod
2683
    def get_comic_info(cls, soup, link):
2684
        """Get information about a particular comics."""
2685
        title = soup.find('h2', class_='post-title').string
2686
        author = soup.find("span", class_="post-author").find("a").string
2687
        date_str = soup.find("span", class_="post-date").string
2688
        day = string_to_date(date_str, "%B %d, %Y")
2689
        imgs = soup.find("div", id="comic").find_all("img")
2690
        assert all(i['alt'] == i['title'] for i in imgs)
2691
        alt = imgs[0]['alt'] if imgs else ""
2692
        return {
2693
            'img': [i['src'] for i in imgs],
2694
            'title': title,
2695
            'alt': alt,
2696
            'author': author,
2697
            'day': day.day,
2698
            'month': day.month,
2699
            'year': day.year
2700
        }
2701
2702
2703
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2704
    """Class to retrieve Endless Origami Comics."""
2705
    name = "origami"
2706
    long_name = "Endless Origami"
2707
    url = "http://endlessorigami.com"
2708
    get_first_comic_link = get_a_navi_navifirst
2709
    get_navi_link = get_link_rel_next
2710
2711
    @classmethod
2712
    def get_comic_info(cls, soup, link):
2713
        """Get information about a particular comics."""
2714
        title = soup.find('h2', class_='post-title').string
2715
        author = soup.find("span", class_="post-author").find("a").string
2716
        date_str = soup.find("span", class_="post-date").string
2717
        day = string_to_date(date_str, "%B %d, %Y")
2718
        imgs = soup.find("div", id="comic").find_all("img")
2719
        assert all(i['alt'] == i['title'] for i in imgs)
2720
        alt = imgs[0]['alt'] if imgs else ""
2721
        return {
2722
            'img': [i['src'] for i in imgs],
2723
            'title': title,
2724
            'alt': alt,
2725
            'author': author,
2726
            'day': day.day,
2727
            'month': day.month,
2728
            'year': day.year
2729
        }
2730 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2731
2732
class PlanC(GenericNavigableComic):
2733
    """Class to retrieve Plan C comics."""
2734
    name = 'planc'
2735
    long_name = 'Plan C'
2736
    url = 'http://www.plancomic.com'
2737
    get_first_comic_link = get_a_navi_navifirst
2738
    get_navi_link = get_a_navi_comicnavnext_navinext
2739
2740
    @classmethod
2741
    def get_comic_info(cls, soup, link):
2742
        """Get information about a particular comics."""
2743
        title = soup.find('h2', class_='post-title').string
2744
        date_str = soup.find("span", class_="post-date").string
2745
        day = string_to_date(date_str, "%B %d, %Y")
2746
        imgs = soup.find('div', id='comic').find_all('img')
2747
        return {
2748 View Code Duplication
            'title': title,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2749
            'img': [i['src'] for i in imgs],
2750
            'month': day.month,
2751
            'year': day.year,
2752
            'day': day.day,
2753
        }
2754
2755
2756
class BuniComic(GenericNavigableComic):
2757
    """Class to retrieve Buni Comics."""
2758
    name = 'buni'
2759
    long_name = 'BuniComics'
2760
    url = 'http://www.bunicomic.com'
2761
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2762
    get_navi_link = get_link_rel_next
2763
2764
    @classmethod
2765
    def get_comic_info(cls, soup, link):
2766
        """Get information about a particular comics."""
2767
        imgs = soup.find('div', id='comic').find_all('img')
2768
        assert all(i['alt'] == i['title'] for i in imgs)
2769
        assert len(imgs) == 1
2770
        return {
2771
            'img': [i['src'] for i in imgs],
2772
            'title': imgs[0]['title'],
2773
        }
2774
2775
2776
class GenericCommitStrip(GenericNavigableComic):
2777
    """Generic class to retrieve Commit Strips in different languages."""
2778
    get_navi_link = get_a_rel_next
2779
    get_first_comic_link = simulate_first_link
2780
    first_url = NotImplemented
2781
2782
    @classmethod
2783
    def get_comic_info(cls, soup, link):
2784
        """Get information about a particular comics."""
2785
        desc = soup.find('meta', property='og:description')['content']
2786
        title = soup.find('meta', property='og:title')['content']
2787
        imgs = soup.find('div', class_='entry-content').find_all('img')
2788 View Code Duplication
        title2 = ' '.join(i.get('title', '') for i in imgs)
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2789
        return {
2790
            'title': title,
2791
            'title2': title2,
2792
            'description': desc,
2793
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2794
        }
2795
2796
2797
class CommitStripFr(GenericCommitStrip):
2798
    """Class to retrieve Commit Strips in French."""
2799
    name = 'commit_fr'
2800
    long_name = 'Commit Strip (Fr)'
2801
    url = 'http://www.commitstrip.com/fr'
2802
    _categories = ('FRANCAIS', )
2803
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2804
2805
2806
class CommitStripEn(GenericCommitStrip):
2807
    """Class to retrieve Commit Strips in English."""
2808
    name = 'commit_en'
2809
    long_name = 'Commit Strip (En)'
2810
    url = 'http://www.commitstrip.com/en'
2811
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2812
2813
2814
class GenericBoumerie(GenericNavigableComic):
2815
    """Generic class to retrieve Boumeries comics in different languages."""
2816
    get_first_comic_link = get_a_navi_navifirst
2817
    get_navi_link = get_link_rel_next
2818
    date_format = NotImplemented
2819
    lang = NotImplemented
2820
2821
    @classmethod
2822
    def get_comic_info(cls, soup, link):
2823
        """Get information about a particular comics."""
2824
        title = soup.find('h2', class_='post-title').string
2825
        short_url = soup.find('link', rel='shortlink')['href']
2826
        author = soup.find("span", class_="post-author").find("a").string
2827
        date_str = soup.find('span', class_='post-date').string
2828
        day = string_to_date(date_str, cls.date_format, cls.lang)
2829
        imgs = soup.find('div', id='comic').find_all('img')
2830
        assert all(i['alt'] == i['title'] for i in imgs)
2831
        return {
2832
            'short_url': short_url,
2833
            'img': [i['src'] for i in imgs],
2834
            'title': title,
2835
            'author': author,
2836
            'month': day.month,
2837
            'year': day.year,
2838
            'day': day.day,
2839
        }
2840
2841
2842
class BoumerieEn(GenericBoumerie):
2843
    """Class to retrieve Boumeries comics in English."""
2844
    name = 'boumeries_en'
2845
    long_name = 'Boumeries (En)'
2846
    url = 'http://comics.boumerie.com'
2847
    date_format = "%B %d, %Y"
2848
    lang = 'en_GB.UTF-8'
2849
2850
2851
class BoumerieFr(GenericBoumerie):
2852
    """Class to retrieve Boumeries comics in French."""
2853
    name = 'boumeries_fr'
2854
    long_name = 'Boumeries (Fr)'
2855
    url = 'http://bd.boumerie.com'
2856
    _categories = ('FRANCAIS', )
2857
    date_format = "%A, %d %B %Y"
2858
    lang = "fr_FR.utf8"
2859
2860
2861
class UnearthedComics(GenericNavigableComic):
2862
    """Class to retrieve Unearthed comics."""
2863
    # Also on http://tapastic.com/series/UnearthedComics
2864
    # Also on https://unearthedcomics.tumblr.com
2865
    name = 'unearthed'
2866
    long_name = 'Unearthed Comics'
2867
    url = 'http://unearthedcomics.com'
2868
    _categories = ('UNEARTHED', )
2869 View Code Duplication
    get_navi_link = get_link_rel_next
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2870
    get_first_comic_link = simulate_first_link
2871
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2872
2873
    @classmethod
2874
    def get_comic_info(cls, soup, link):
2875
        """Get information about a particular comics."""
2876
        short_url = soup.find('link', rel='shortlink')['href']
2877
        title_elt = soup.find('h1') or soup.find('h2')
2878
        title = title_elt.string if title_elt else ""
2879
        desc = soup.find('meta', property='og:description')
2880
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2881
        day = string_to_date(date_str, "%Y-%m-%d")
2882
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2883
        imgs = post.find_all('img')
2884
        return {
2885
            'title': title,
2886
            'description': desc,
2887
            'url2': short_url,
2888
            'img': [i['src'] for i in imgs],
2889
            'month': day.month,
2890
            'year': day.year,
2891
            'day': day.day,
2892
        }
2893
2894
2895
class Optipess(GenericNavigableComic):
2896
    """Class to retrieve Optipess comics."""
2897
    name = 'optipess'
2898
    long_name = 'Optipess'
2899
    url = 'http://www.optipess.com'
2900
    get_first_comic_link = get_a_navi_navifirst
2901
    get_navi_link = get_link_rel_next
2902
2903
    @classmethod
2904
    def get_comic_info(cls, soup, link):
2905
        """Get information about a particular comics."""
2906
        title = soup.find('h2', class_='post-title').string
2907
        author = soup.find("span", class_="post-author").find("a").string
2908
        comic = soup.find('div', id='comic')
2909
        imgs = comic.find_all('img') if comic else []
2910
        alt = imgs[0]['title'] if imgs else ""
2911
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2912
        date_str = soup.find('span', class_='post-date').string
2913
        day = string_to_date(date_str, "%B %d, %Y")
2914
        return {
2915
            'title': title,
2916
            'alt': alt,
2917
            'author': author,
2918
            'img': [i['src'] for i in imgs],
2919
            'month': day.month,
2920
            'year': day.year,
2921
            'day': day.day,
2922
        }
2923
2924
2925
class PainTrainComic(GenericNavigableComic):
2926
    """Class to retrieve Pain Train Comics."""
2927
    name = 'paintrain'
2928
    long_name = 'Pain Train Comics'
2929
    url = 'http://paintraincomic.com'
2930
    get_first_comic_link = get_a_navi_navifirst
2931
    get_navi_link = get_link_rel_next
2932
2933
    @classmethod
2934
    def get_comic_info(cls, soup, link):
2935
        """Get information about a particular comics."""
2936
        title = soup.find('h2', class_='post-title').string
2937
        short_url = soup.find('link', rel='shortlink')['href']
2938
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2939
        num = int(short_url_re.match(short_url).groups()[0])
2940
        imgs = soup.find('div', id='comic').find_all('img')
2941
        alt = imgs[0]['title']
2942
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2943
        date_str = soup.find('span', class_='post-date').string
2944
        day = string_to_date(date_str, "%d/%m/%Y")
2945
        return {
2946
            'short_url': short_url,
2947
            'num': num,
2948
            'img': [i['src'] for i in imgs],
2949
            'month': day.month,
2950
            'year': day.year,
2951
            'day': day.day,
2952
            'alt': alt,
2953
            'title': title,
2954
        }
2955
2956
2957
class MoonBeard(GenericNavigableComic):
2958
    """Class to retrieve MoonBeard comics."""
2959
    # Also on http://squireseses.tumblr.com
2960
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2961
    name = 'moonbeard'
2962
    long_name = 'Moon Beard'
2963
    url = 'http://moonbeard.com'
2964
    _categories = ('MOONBEARD', )
2965
    get_first_comic_link = get_a_navi_navifirst
2966
    get_navi_link = get_a_navi_navinext
2967
2968
    @classmethod
2969
    def get_comic_info(cls, soup, link):
2970
        """Get information about a particular comics."""
2971
        title = soup.find('h2', class_='post-title').string
2972
        short_url = soup.find('link', rel='shortlink')['href']
2973
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2974
        num = int(short_url_re.match(short_url).groups()[0])
2975
        imgs = soup.find('div', id='comic').find_all('img')
2976
        alt = imgs[0]['title']
2977
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2978
        date_str = soup.find('span', class_='post-date').string
2979
        day = string_to_date(date_str, "%B %d, %Y")
2980
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2981
        author = soup.find('span', class_='post-author').string
2982
        return {
2983
            'short_url': short_url,
2984
            'num': num,
2985
            'img': [i['src'] for i in imgs],
2986
            'month': day.month,
2987
            'year': day.year,
2988
            'day': day.day,
2989
            'title': title,
2990
            'tags': tags,
2991
            'alt': alt,
2992
            'author': author,
2993
        }
2994
2995
2996
class AHammADay(GenericEmptyComic, GenericNavigableComic):
2997
    """Class to retrieve class A Hamm A Day comics."""
2998
    name = 'hamm'
2999 View Code Duplication
    long_name = 'A Hamm A Day'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3000
    url = 'http://www.ahammaday.com'
3001
    get_url_from_link = join_cls_url_to_href
3002
    get_first_comic_link = simulate_first_link
3003
    first_url = 'http://www.ahammaday.com/today/3/6/french'
3004
3005
    @classmethod
3006
    def get_navi_link(cls, last_soup, next_):
3007
        """Get link to next or previous comic."""
3008
        # prev is next / next is prev
3009
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
3010
3011
    @classmethod
3012
    def get_comic_info(cls, soup, link):
3013
        """Get information about a particular comics."""
3014
        date_str = soup.find('time', class_='published')['datetime']
3015
        day = string_to_date(date_str, "%Y-%m-%d")
3016
        author = soup.find('span', class_='blog-author').find('a').string
3017
        title = soup.find('meta', property='og:title')['content']
3018
        imgs = soup.find_all('meta', itemprop='image')
3019
        return {
3020
            'img': [i['content'] for i in imgs],
3021
            'title': title,
3022
            'author': author,
3023
            'day': day.day,
3024
            'month': day.month,
3025
            'year': day.year,
3026
        }
3027
3028
3029
class LittleLifeLines(GenericNavigableComic):
3030
    """Class to retrieve Little Life Lines comics."""
3031
    # Also on https://little-life-lines.tumblr.com
3032
    name = 'life'
3033
    long_name = 'Little Life Lines'
3034
    url = 'http://www.littlelifelines.com'
3035
    get_url_from_link = join_cls_url_to_href
3036
    get_first_comic_link = simulate_first_link
3037
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3038
3039
    @classmethod
3040
    def get_navi_link(cls, last_soup, next_):
3041
        """Get link to next or previous comic."""
3042
        # prev is next / next is prev
3043
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3044
        return li.find('a') if li else None
3045
3046
    @classmethod
3047
    def get_comic_info(cls, soup, link):
3048
        """Get information about a particular comics."""
3049
        title = soup.find('meta', property='og:title')['content']
3050
        desc = soup.find('meta', property='og:description')['content']
3051
        date_str = soup.find('time', class_='published')['datetime']
3052
        day = string_to_date(date_str, "%Y-%m-%d")
3053
        author = soup.find('a', rel='author').string
3054
        div_content = soup.find('div', class_="body entry-content")
3055
        imgs = div_content.find_all('img')
3056
        imgs = [i for i in imgs if i.get('src') is not None]
3057
        alt = imgs[0]['alt']
3058
        return {
3059
            'title': title,
3060
            'alt': alt,
3061
            'description': desc,
3062
            'author': author,
3063
            'day': day.day,
3064
            'month': day.month,
3065
            'year': day.year,
3066
            'img': [i['src'] for i in imgs],
3067
        }
3068
3069
3070
class GenericWordPressInkblot(GenericNavigableComic):
3071
    """Generic class to retrieve comics using WordPress with Inkblot."""
3072
    get_navi_link = get_link_rel_next
3073
3074
    @classmethod
3075
    def get_first_comic_link(cls):
3076
        """Get link to first comics."""
3077
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3078
3079 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3080
    def get_comic_info(cls, soup, link):
3081
        """Get information about a particular comics."""
3082
        title = soup.find('meta', property='og:title')['content']
3083
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3084
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3085
        day = string_to_date(date_str, "%Y-%m-%d")
3086
        return {
3087
            'title': title,
3088
            'day': day.day,
3089
            'month': day.month,
3090
            'year': day.year,
3091
            'img': [i['src'] for i in imgs],
3092
        }
3093
3094
3095
class EverythingsStupid(GenericWordPressInkblot):
3096
    """Class to retrieve Everything's stupid Comics."""
3097
    # Also on http://tapastic.com/series/EverythingsStupid
3098
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3099
    # Also on http://everythingsstupidcomics.tumblr.com
3100
    name = 'stupid'
3101
    long_name = "Everything's Stupid"
3102
    url = 'http://everythingsstupid.net'
3103
3104
3105
class TheIsmComics(GenericWordPressInkblot):
3106
    """Class to retrieve The Ism Comics."""
3107
    # Also on https://tapastic.com/series/TheIsm (?)
3108
    name = 'theism'
3109
    long_name = "The Ism"
3110
    url = 'http://www.theism-comics.com'
3111
3112
3113
class WoodenPlankStudios(GenericWordPressInkblot):
3114
    """Class to retrieve Wooden Plank Studios comics."""
3115
    name = 'woodenplank'
3116
    long_name = 'Wooden Plank Studios'
3117
    url = 'http://woodenplankstudios.com'
3118
3119
3120
class ElectricBunnyComic(GenericNavigableComic):
3121
    """Class to retrieve Electric Bunny Comics."""
3122
    # Also on http://electricbunnycomics.tumblr.com
3123
    name = 'bunny'
3124
    long_name = 'Electric Bunny Comic'
3125
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3126
    get_url_from_link = join_cls_url_to_href
3127
3128
    @classmethod
3129
    def get_first_comic_link(cls):
3130
        """Get link to first comics."""
3131
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3132
3133
    @classmethod
3134
    def get_navi_link(cls, last_soup, next_):
3135
        """Get link to next or previous comic."""
3136
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3137
        return img.parent if img else None
3138
3139
    @classmethod
3140
    def get_comic_info(cls, soup, link):
3141
        """Get information about a particular comics."""
3142
        title = soup.find('meta', property='og:title')['content']
3143
        imgs = soup.find_all('meta', property='og:image')
3144
        return {
3145
            'title': title,
3146
            'img': [i['content'] for i in imgs],
3147
        }
3148
3149
3150
class SheldonComics(GenericNavigableComic):
3151
    """Class to retrieve Sheldon comics."""
3152
    # Also on http://www.gocomics.com/sheldon
3153
    name = 'sheldon'
3154
    long_name = 'Sheldon Comics'
3155
    url = 'http://www.sheldoncomics.com'
3156 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3157
    @classmethod
3158
    def get_first_comic_link(cls):
3159
        """Get link to first comics."""
3160
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3161
3162
    @classmethod
3163
    def get_navi_link(cls, last_soup, next_):
3164
        """Get link to next or previous comic."""
3165
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3166
            if link['href'] != 'http://www.sheldoncomics.com':
3167
                return link
3168
        return None
3169
3170
    @classmethod
3171
    def get_comic_info(cls, soup, link):
3172
        """Get information about a particular comics."""
3173
        imgs = soup.find("div", id="comic-foot").find_all("img")
3174
        assert all(i['alt'] == i['title'] for i in imgs)
3175
        assert len(imgs) == 1
3176
        title = imgs[0]['title']
3177
        return {
3178
            'title': title,
3179
            'img': [i['src'] for i in imgs],
3180
        }
3181
3182
3183 View Code Duplication
class Ubertool(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3184
    """Class to retrieve Ubertool comics."""
3185
    # Also on https://ubertool.tumblr.com
3186
    # Also on https://tapastic.com/series/ubertool
3187
    name = 'ubertool'
3188
    long_name = 'Ubertool'
3189
    url = 'http://ubertoolcomic.com'
3190
    _categories = ('UBERTOOL', )
3191
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3192
    get_navi_link = get_a_comicnavbase_comicnavnext
3193
3194
    @classmethod
3195
    def get_comic_info(cls, soup, link):
3196
        """Get information about a particular comics."""
3197
        title = soup.find('h2', class_='post-title').string
3198
        date_str = soup.find('span', class_='post-date').string
3199
        day = string_to_date(date_str, "%B %d, %Y")
3200
        imgs = soup.find('div', id='comic').find_all('img')
3201
        return {
3202
            'img': [i['src'] for i in imgs],
3203
            'title': title,
3204
            'month': day.month,
3205
            'year': day.year,
3206
            'day': day.day,
3207
        }
3208
3209
3210
class EarthExplodes(GenericNavigableComic):
3211
    """Class to retrieve The Earth Explodes comics."""
3212
    name = 'earthexplodes'
3213
    long_name = 'The Earth Explodes'
3214
    url = 'http://www.earthexplodes.com'
3215
    get_url_from_link = join_cls_url_to_href
3216
    get_first_comic_link = simulate_first_link
3217
    first_url = 'http://www.earthexplodes.com/comics/000/'
3218
3219
    @classmethod
3220
    def get_navi_link(cls, last_soup, next_):
3221
        """Get link to next or previous comic."""
3222
        return last_soup.find('a', id='next' if next_ else 'prev')
3223
3224
    @classmethod
3225
    def get_comic_info(cls, soup, link):
3226
        """Get information about a particular comics."""
3227
        title = soup.find('title').string
3228
        imgs = soup.find('div', id='image').find_all('img')
3229
        alt = imgs[0].get('title', '')
3230
        return {
3231
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3232
            'title': title,
3233
            'alt': alt,
3234
        }
3235
3236
3237
class PomComics(GenericNavigableComic):
3238
    """Class to retrieve PomComics."""
3239
    name = 'pom'
3240
    long_name = 'Pom Comics / Piece of Me'
3241
    url = 'http://www.pomcomic.com'
3242
    get_url_from_link = join_cls_url_to_href
3243
3244
    @classmethod
3245
    def get_first_comic_link(cls):
3246
        """Get link to first comics."""
3247
        return get_soup_at_url(cls.url).find('a', class_='btn-first')
3248
3249
    @classmethod
3250
    def get_navi_link(cls, last_soup, next_):
3251
        """Get link to next or previous comic."""
3252
        return last_soup.find('a', class_='btn-next' if next_ else 'btn-prev')
3253
3254
    @classmethod
3255
    def get_comic_info(cls, soup, link):
3256
        """Get information about a particular comics."""
3257
        title = soup.find('h1').string
3258
        desc = soup.find('meta', property='og:description')['content']
3259
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3260
        imgs = soup.find('div', class_='comic').find_all('img')
3261
        return {
3262
            'title': title,
3263
            'desc': desc,
3264
            'tags': tags,
3265
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3266
        }
3267
3268
3269
class CubeDrone(GenericEmptyComic, GenericNavigableComic):
3270
    """Class to retrieve Cube Drone comics."""
3271
    name = 'cubedrone'
3272
    long_name = 'Cube Drone'
3273
    url = 'http://cube-drone.com/comics'
3274
    get_url_from_link = join_cls_url_to_href
3275
3276
    @classmethod
3277
    def get_first_comic_link(cls):
3278
        """Get link to first comics."""
3279
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3280
3281
    @classmethod
3282
    def get_navi_link(cls, last_soup, next_):
3283
        """Get link to next or previous comic."""
3284
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3285
        return last_soup.find('span', class_=class_).parent
3286
3287
    @classmethod
3288
    def get_comic_info(cls, soup, link):
3289
        """Get information about a particular comics."""
3290
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3291
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3292
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3293
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3294
        imgs = soup.find_all('img', class_='comic img-responsive')
3295
        title2 = imgs[0]['title']
3296
        alt = imgs[0]['alt']
3297
        return {
3298
            'url2': url2,
3299
            'title': title,
3300
            'title2': title2,
3301
            'alt': alt,
3302
            'img': [i['src'] for i in imgs],
3303
        }
3304
3305
3306
class MakeItStoopid(GenericNavigableComic):
3307
    """Class to retrieve Make It Stoopid Comics."""
3308
    name = 'stoopid'
3309
    long_name = 'Make it stoopid'
3310
    url = 'http://makeitstoopid.com/comic.php'
3311
3312
    @classmethod
3313
    def get_nav(cls, soup):
3314
        """Get the navigation elements from soup object."""
3315 View Code Duplication
        cnav = soup.find_all(class_='cnav')
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3316
        nav1, nav2 = cnav[:5], cnav[5:]
3317
        assert nav1 == nav2
3318
        # begin, prev, archive, next_, end = nav1
3319
        return [None if i.get('href') is None else i for i in nav1]
3320
3321
    @classmethod
3322
    def get_first_comic_link(cls):
3323
        """Get link to first comics."""
3324
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3325
3326
    @classmethod
3327
    def get_navi_link(cls, last_soup, next_):
3328
        """Get link to next or previous comic."""
3329
        return cls.get_nav(last_soup)[3 if next_ else 1]
3330
3331
    @classmethod
3332
    def get_comic_info(cls, soup, link):
3333
        """Get information about a particular comics."""
3334
        title = link['title']
3335
        imgs = soup.find_all('img', id='comicimg')
3336
        return {
3337
            'title': title,
3338
            'img': [i['src'] for i in imgs],
3339
        }
3340
3341
3342
class OffTheLeashDog(GenericNavigableComic):
3343
    """Class to retrieve Off The Leash Dog comics."""
3344
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3345
    # Also on http://www.rupertfawcettcartoons.com
3346
    name = 'offtheleash'
3347
    long_name = 'Off The Leash Dog'
3348
    url = 'http://offtheleashdogcartoons.com'
3349
    _categories = ('FAWCETT', )
3350
    get_navi_link = get_a_rel_next
3351
    get_first_comic_link = simulate_first_link
3352
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3353
3354
    @classmethod
3355
    def get_comic_info(cls, soup, link):
3356
        """Get information about a particular comics."""
3357
        print(link)
3358
        title = soup.find("h1", class_="entry-title").string
3359
        imgs = soup.find('div', class_='entry-content').find_all('img')
3360
        return {
3361
            'title': title,
3362
            'img': [i['src'] for i in imgs],
3363
        }
3364
3365
3366
class MarketoonistComics(GenericNavigableComic):
3367
    """Class to retrieve Marketoonist Comics."""
3368
    name = 'marketoonist'
3369
    long_name = 'Marketoonist'
3370
    url = 'https://marketoonist.com/cartoons'
3371
    get_first_comic_link = simulate_first_link
3372
    get_navi_link = get_link_rel_next
3373 View Code Duplication
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3374
3375
    @classmethod
3376
    def get_comic_info(cls, soup, link):
3377
        """Get information about a particular comics."""
3378
        imgs = soup.find_all('meta', property='og:image')
3379
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3380
        day = string_to_date(date_str, "%Y-%m-%d")
3381
        title = soup.find('meta', property='og:title')['content']
3382
        return {
3383
            'img': [i['content'] for i in imgs],
3384
            'day': day.day,
3385
            'month': day.month,
3386
            'year': day.year,
3387
            'title': title,
3388
        }
3389
3390
3391
class ConsoliaComics(GenericNavigableComic):
3392
    """Class to retrieve Consolia comics."""
3393
    name = 'consolia'
3394
    long_name = 'consolia'
3395
    url = 'https://consolia-comic.com'
3396
    get_url_from_link = join_cls_url_to_href
3397
3398
    @classmethod
3399
    def get_first_comic_link(cls):
3400
        """Get link to first comics."""
3401
        return get_soup_at_url(cls.url).find('a', class_='first')
3402
3403
    @classmethod
3404
    def get_navi_link(cls, last_soup, next_):
3405
        """Get link to next or previous comic."""
3406
        return last_soup.find('a', class_='next' if next_ else 'prev')
3407
3408
    @classmethod
3409
    def get_comic_info(cls, soup, link):
3410
        """Get information about a particular comics."""
3411
        title = soup.find('meta', property='og:title')['content']
3412
        date_str = soup.find('time')["datetime"]
3413
        day = string_to_date(date_str, "%Y-%m-%d")
3414
        imgs = soup.find_all('meta', property='og:image')
3415
        return {
3416
            'title': title,
3417
            'img': [i['content'] for i in imgs],
3418
            'day': day.day,
3419
            'month': day.month,
3420
            'year': day.year,
3421
        }
3422
3423
3424
class TuMourrasMoinsBete(GenericNavigableComic):
3425
    """Class to retrieve Tu Mourras Moins Bete comics."""
3426
    name = 'mourrasmoinsbete'
3427
    long_name = 'Tu Mourras Moins Bete'
3428
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3429
    _categories = ('FRANCAIS', )
3430
    get_first_comic_link = simulate_first_link
3431
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3432
3433
    @classmethod
3434
    def get_navi_link(cls, last_soup, next_):
3435
        """Get link to next or previous comic."""
3436
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3437
3438
    @classmethod
3439
    def get_comic_info(cls, soup, link):
3440 View Code Duplication
        """Get information about a particular comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3441
        title = soup.find('title').string
3442
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3443
        author = soup.find('span', itemprop='author').string
3444
        return {
3445
            'img': [i['src'] for i in imgs],
3446
            'author': author,
3447
            'title': title,
3448
        }
3449
3450
3451
class GeekAndPoke(GenericNavigableComic):
3452
    """Class to retrieve Geek And Poke comics."""
3453
    name = 'geek'
3454
    long_name = 'Geek And Poke'
3455
    url = 'http://geek-and-poke.com'
3456
    get_url_from_link = join_cls_url_to_href
3457
    get_first_comic_link = simulate_first_link
3458
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3459
3460
    @classmethod
3461
    def get_navi_link(cls, last_soup, next_):
3462
        """Get link to next or previous comic."""
3463
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3464
3465
    @classmethod
3466
    def get_comic_info(cls, soup, link):
3467
        """Get information about a particular comics."""
3468
        title = soup.find('meta', property='og:title')['content']
3469
        desc = soup.find('meta', property='og:description')['content']
3470
        date_str = soup.find('time', class_='published')['datetime']
3471
        day = string_to_date(date_str, "%Y-%m-%d")
3472
        author = soup.find('a', rel='author').string
3473
        div_content = (soup.find('div', class_="body entry-content") or
3474
                       soup.find('div', class_="special-content"))
3475
        imgs = div_content.find_all('img')
3476
        imgs = [i for i in imgs if i.get('src') is not None]
3477
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3478
        alt = imgs[0].get('alt', "") if imgs else []
3479
        return {
3480
            'title': title,
3481
            'alt': alt,
3482
            'description': desc,
3483
            'author': author,
3484
            'day': day.day,
3485
            'month': day.month,
3486
            'year': day.year,
3487
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3488
        }
3489
3490
3491
class GloryOwlComix(GenericNavigableComic):
3492
    """Class to retrieve Glory Owl comics."""
3493
    name = 'gloryowl'
3494
    long_name = 'Glory Owl'
3495
    url = 'http://gloryowlcomix.blogspot.fr'
3496
    _categories = ('NSFW', 'FRANCAIS')
3497
    get_first_comic_link = simulate_first_link
3498
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3499
3500
    @classmethod
3501
    def get_navi_link(cls, last_soup, next_):
3502
        """Get link to next or previous comic."""
3503
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3504
3505
    @classmethod
3506
    def get_comic_info(cls, soup, link):
3507
        """Get information about a particular comics."""
3508
        title = soup.find('title').string
3509
        imgs = soup.find_all('link', rel='image_src')
3510
        author = soup.find('a', rel='author').string
3511
        return {
3512
            'img': [i['href'] for i in imgs],
3513
            'author': author,
3514
            'title': title,
3515
        }
3516
3517
3518
class GenericTumblrV1(GenericComic):
3519
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3520
    _categories = ('TUMBLR', )
3521
3522
    @classmethod
3523
    def get_next_comic(cls, last_comic):
3524
        """Generic implementation of get_next_comic for Tumblr comics."""
3525
        for p in cls.get_posts(last_comic):
3526
            comic = cls.get_comic_info(p)
3527
            if comic is not None:
3528
                yield comic
3529
3530
    @classmethod
3531
    def get_url_from_post(cls, post):
3532
        url = post['url']
3533
        if not url.startswith(cls.url):
3534
            print("url '%s' does not start with '%s'" % (url, cls.url))
3535
        return url
3536
3537
    @classmethod
3538
    def get_api_url(cls):
3539
        return urljoin_wrapper(cls.url, '/api/read/')
3540
3541
    @classmethod
3542
    def get_api_url_for_id(cls, tumblr_id):
3543
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3544
3545
    @classmethod
3546
    def get_comic_info(cls, post):
3547
        """Get information about a particular comics."""
3548
        type_ = post['type']
3549
        if type_ != 'photo':
3550
            return None
3551
        tumblr_id = int(post['id'])
3552
        api_url = cls.get_api_url_for_id(tumblr_id)
3553
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3554
        caption = post.find('photo-caption')
3555
        title = caption.string if caption else ""
3556
        tags = ' '.join(t.string for t in post.find_all('tag'))
3557
        # Photos may appear in 'photo' tags and/or straight in the post
3558
        photo_tags = post.find_all('photo')
3559
        if not photo_tags:
3560
            photo_tags = [post]
3561
        # Images are in multiple resolutions - taking the first one
3562
        imgs = [photo.find('photo-url') for photo in photo_tags]
3563
        return {
3564
            'url': cls.get_url_from_post(post),
3565
            'url2': post['url-with-slug'],
3566
            'day': day.day,
3567
            'month': day.month,
3568
            'year': day.year,
3569
            'title': title,
3570
            'tags': tags,
3571
            'img': [i.string for i in imgs],
3572
            'tumblr-id': tumblr_id,
3573
            'api_url': api_url,
3574
        }
3575
3576
    @classmethod
3577
    def get_posts(cls, last_comic, nb_post_per_call=10):
3578
        """Get posts using API. nb_post_per_call is max 50.
3579
3580
        Posts are retrieved from newer to older as per the tumblr v1 api
3581
        but are returned in chronological order."""
3582
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3583
        posts_acc = []
3584
        if last_comic is not None:
3585
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3586
            # might end up spending a lot of time looking for something that
3587
            # doesn't exist. Failing early and clearly might be a better option.
3588
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3589
            try:
3590
                get_soup_at_url(last_api_url)
3591
            except urllib.error.HTTPError:
3592
                try:
3593
                    get_soup_at_url(cls.url)
3594
                except urllib.error.HTTPError:
3595
                    print("Did not find previous post nor main url %s" % cls.url)
3596
                else:
3597
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3598
                return reversed(posts_acc)
3599
        api_url = cls.get_api_url()
3600
        posts = get_soup_at_url(api_url).find('posts')
3601
        start, total = int(posts['start']), int(posts['total'])
3602
        assert start == 0
3603
        for starting_num in range(0, total, nb_post_per_call):
3604
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3605
            posts2 = get_soup_at_url(api_url2).find('posts')
3606
            start2, total2 = int(posts2['start']), int(posts2['total'])
3607
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3608
            # This may happen and should be handled in the future
3609
            assert total == total2, "%d != %d" % (total, total2)
3610
            for p in posts2.find_all('post'):
3611
                tumblr_id = int(p['id'])
3612
                if waiting_for_id and waiting_for_id == tumblr_id:
3613
                    return reversed(posts_acc)
3614
                posts_acc.append(p)
3615
        if waiting_for_id is None:
3616
            return reversed(posts_acc)
3617
        print("Did not find %s : there might be a problem" % waiting_for_id)
3618
        return []
3619
3620
3621
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3622
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3623
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3624
    # Also on http://www.smbc-comics.com
3625
    name = 'smbc-tumblr'
3626
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3627
    url = 'http://smbc-comics.tumblr.com'
3628
    _categories = ('SMBC', )
3629
3630
3631
class IrwinCardozo(GenericTumblrV1):
3632
    """Class to retrieve Irwin Cardozo Comics."""
3633
    name = 'irwinc'
3634
    long_name = 'Irwin Cardozo'
3635
    url = 'http://irwincardozocomics.tumblr.com'
3636
3637
3638
class AccordingToDevin(GenericTumblrV1):
3639
    """Class to retrieve According To Devin comics."""
3640
    name = 'devin'
3641
    long_name = 'According To Devin'
3642
    url = 'http://accordingtodevin.tumblr.com'
3643
3644
3645
class ItsTheTieTumblr(GenericTumblrV1):
3646
    """Class to retrieve It's the tie comics."""
3647
    # Also on http://itsthetie.com
3648
    # Also on https://tapastic.com/series/itsthetie
3649
    name = 'tie-tumblr'
3650
    long_name = "It's the tie (from Tumblr)"
3651
    url = "http://itsthetie.tumblr.com"
3652
    _categories = ('TIE', )
3653
3654
3655
class OctopunsTumblr(GenericTumblrV1):
3656
    """Class to retrieve Octopuns comics."""
3657
    # Also on http://www.octopuns.net
3658
    name = 'octopuns-tumblr'
3659
    long_name = 'Octopuns (from Tumblr)'
3660
    url = 'http://octopuns.tumblr.com'
3661
3662
3663
class PicturesInBoxesTumblr(GenericTumblrV1):
3664
    """Class to retrieve Pictures In Boxes comics."""
3665
    # Also on http://www.picturesinboxes.com
3666
    name = 'picturesinboxes-tumblr'
3667
    long_name = 'Pictures in Boxes (from Tumblr)'
3668
    url = 'https://picturesinboxescomic.tumblr.com'
3669
3670
3671
class TubeyToonsTumblr(GenericTumblrV1):
3672
    """Class to retrieve TubeyToons comics."""
3673
    # Also on http://tapastic.com/series/Tubey-Toons
3674
    # Also on http://tubeytoons.com
3675
    name = 'tubeytoons-tumblr'
3676
    long_name = 'Tubey Toons (from Tumblr)'
3677
    url = 'https://tubeytoons.tumblr.com'
3678
    _categories = ('TUNEYTOONS', )
3679
3680
3681
class UnearthedComicsTumblr(GenericTumblrV1):
3682
    """Class to retrieve Unearthed comics."""
3683
    # Also on http://tapastic.com/series/UnearthedComics
3684
    # Also on http://unearthedcomics.com
3685
    name = 'unearthed-tumblr'
3686
    long_name = 'Unearthed Comics (from Tumblr)'
3687
    url = 'https://unearthedcomics.tumblr.com'
3688
    _categories = ('UNEARTHED', )
3689
3690
3691
class PieComic(GenericTumblrV1):
3692
    """Class to retrieve Pie Comic comics."""
3693
    name = 'pie'
3694
    long_name = 'Pie Comic'
3695
    url = "http://piecomic.tumblr.com"
3696
3697
3698
class MrEthanDiamond(GenericTumblrV1):
3699
    """Class to retrieve Mr Ethan Diamond comics."""
3700
    name = 'diamond'
3701
    long_name = 'Mr Ethan Diamond'
3702
    url = 'http://mrethandiamond.tumblr.com'
3703
3704
3705
class Flocci(GenericTumblrV1):
3706
    """Class to retrieve floccinaucinihilipilification comics."""
3707
    name = 'flocci'
3708
    long_name = 'floccinaucinihilipilification'
3709
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3710
3711
3712
class UpAndOut(GenericTumblrV1):
3713
    """Class to retrieve Up & Out comics."""
3714
    # Also on http://tapastic.com/series/UP-and-OUT
3715
    name = 'upandout'
3716
    long_name = 'Up And Out (from Tumblr)'
3717
    url = 'http://upandoutcomic.tumblr.com'
3718
3719
3720
class Pundemonium(GenericTumblrV1):
3721
    """Class to retrieve Pundemonium comics."""
3722
    name = 'pundemonium'
3723
    long_name = 'Pundemonium'
3724
    url = 'http://monstika.tumblr.com'
3725
3726
3727
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3728
    """Class to retrieve Poorly Drawn Lines comics."""
3729
    # Also on http://poorlydrawnlines.com
3730
    name = 'poorlydrawn-tumblr'
3731
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3732
    url = 'http://pdlcomics.tumblr.com'
3733
    _categories = ('POORLYDRAWN', )
3734
3735
3736
class PearShapedComics(GenericTumblrV1):
3737
    """Class to retrieve Pear Shaped Comics."""
3738
    name = 'pearshaped'
3739
    long_name = 'Pear-Shaped Comics'
3740
    url = 'http://pearshapedcomics.com'
3741
3742
3743
class PondScumComics(GenericTumblrV1):
3744
    """Class to retrieve Pond Scum Comics."""
3745
    name = 'pond'
3746
    long_name = 'Pond Scum'
3747
    url = 'http://pondscumcomic.tumblr.com'
3748
3749
3750
class MercworksTumblr(GenericTumblrV1):
3751
    """Class to retrieve Mercworks comics."""
3752
    # Also on http://mercworks.net
3753
    name = 'mercworks-tumblr'
3754
    long_name = 'Mercworks (from Tumblr)'
3755
    url = 'http://mercworks.tumblr.com'
3756
3757
3758
class OwlTurdTumblr(GenericTumblrV1):
3759
    """Class to retrieve Owl Turd comics."""
3760
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3761
    name = 'owlturd-tumblr'
3762
    long_name = 'Owl Turd (from Tumblr)'
3763
    url = 'http://owlturd.com'
3764
    _categories = ('OWLTURD', )
3765
3766
3767
class VectorBelly(GenericTumblrV1):
3768
    """Class to retrieve Vector Belly comics."""
3769
    # Also on http://vectorbelly.com
3770
    name = 'vector'
3771
    long_name = 'Vector Belly'
3772
    url = 'http://vectorbelly.tumblr.com'
3773
3774
3775
class GoneIntoRapture(GenericTumblrV1):
3776
    """Class to retrieve Gone Into Rapture comics."""
3777
    # Also on http://goneintorapture.tumblr.com
3778
    # Also on http://tapastic.com/series/Goneintorapture
3779
    name = 'rapture'
3780
    long_name = 'Gone Into Rapture'
3781
    url = 'http://goneintorapture.com'
3782
3783
3784
class TheOatmealTumblr(GenericTumblrV1):
3785
    """Class to retrieve The Oatmeal comics."""
3786
    # Also on http://theoatmeal.com
3787
    name = 'oatmeal-tumblr'
3788
    long_name = 'The Oatmeal (from Tumblr)'
3789
    url = 'http://oatmeal.tumblr.com'
3790
3791
3792
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3793
    """Class to retrieve Heck If I Know Comics."""
3794
    # Also on http://tapastic.com/series/Regular
3795
    name = 'heck-tumblr'
3796
    long_name = 'Heck if I Know comics (from Tumblr)'
3797
    url = 'http://heckifiknowcomics.com'
3798
3799
3800
class MyJetPack(GenericTumblrV1):
3801
    """Class to retrieve My Jet Pack comics."""
3802
    name = 'jetpack'
3803
    long_name = 'My Jet Pack'
3804
    url = 'http://myjetpack.tumblr.com'
3805
3806
3807
class CheerUpEmoKidTumblr(GenericTumblrV1):
3808
    """Class to retrieve CheerUpEmoKid comics."""
3809
    # Also on http://www.cheerupemokid.com
3810
    # Also on http://tapastic.com/series/CUEK
3811
    name = 'cuek-tumblr'
3812
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3813
    url = 'https://enzocomics.tumblr.com'
3814
3815
3816
class ForLackOfABetterComic(GenericTumblrV1):
3817
    """Class to retrieve For Lack Of A Better Comics."""
3818
    # Also on http://forlackofabettercomic.com
3819
    name = 'lack'
3820
    long_name = 'For Lack Of A Better Comic'
3821
    url = 'http://forlackofabettercomic.tumblr.com'
3822
3823
3824
class ZenPencilsTumblr(GenericTumblrV1):
3825
    """Class to retrieve ZenPencils comics."""
3826
    # Also on http://zenpencils.com
3827
    # Also on http://www.gocomics.com/zen-pencils
3828
    name = 'zenpencils-tumblr'
3829
    long_name = 'Zen Pencils (from Tumblr)'
3830
    url = 'http://zenpencils.tumblr.com'
3831
    _categories = ('ZENPENCILS', )
3832
3833
3834
class ThreeWordPhraseTumblr(GenericTumblrV1):
3835
    """Class to retrieve Three Word Phrase comics."""
3836
    # Also on http://threewordphrase.com
3837
    name = 'threeword-tumblr'
3838
    long_name = 'Three Word Phrase (from Tumblr)'
3839
    url = 'http://threewordphrase.tumblr.com'
3840
3841
3842
class TimeTrabbleTumblr(GenericTumblrV1):
3843
    """Class to retrieve Time Trabble comics."""
3844
    # Also on http://timetrabble.com
3845
    name = 'timetrabble-tumblr'
3846
    long_name = 'Time Trabble (from Tumblr)'
3847
    url = 'http://timetrabble.tumblr.com'
3848
3849
3850
class SafelyEndangeredTumblr(GenericTumblrV1):
3851
    """Class to retrieve Safely Endangered comics."""
3852
    # Also on http://www.safelyendangered.com
3853
    name = 'endangered-tumblr'
3854
    long_name = 'Safely Endangered (from Tumblr)'
3855
    url = 'http://tumblr.safelyendangered.com'
3856
3857
3858
class MouseBearComedyTumblr(GenericTumblrV1):
3859
    """Class to retrieve Mouse Bear Comedy comics."""
3860
    # Also on http://www.mousebearcomedy.com
3861
    name = 'mousebear-tumblr'
3862
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3863
    url = 'http://mousebearcomedy.tumblr.com'
3864
3865
3866
class BouletCorpTumblr(GenericTumblrV1):
3867
    """Class to retrieve BouletCorp comics."""
3868
    # Also on http://www.bouletcorp.com
3869
    name = 'boulet-tumblr'
3870
    long_name = 'Boulet Corp (from Tumblr)'
3871
    url = 'https://bouletcorp.tumblr.com'
3872
    _categories = ('BOULET', )
3873
3874
3875
class TheAwkwardYetiTumblr(GenericTumblrV1):
3876
    """Class to retrieve The Awkward Yeti comics."""
3877
    # Also on http://www.gocomics.com/the-awkward-yeti
3878
    # Also on http://theawkwardyeti.com
3879
    # Also on https://tapastic.com/series/TheAwkwardYeti
3880
    name = 'yeti-tumblr'
3881
    long_name = 'The Awkward Yeti (from Tumblr)'
3882
    url = 'http://larstheyeti.tumblr.com'
3883
    _categories = ('YETI', )
3884
3885
3886
class NellucNhoj(GenericTumblrV1):
3887
    """Class to retrieve NellucNhoj comics."""
3888
    name = 'nhoj'
3889
    long_name = 'Nelluc Nhoj'
3890
    url = 'http://nellucnhoj.com'
3891
3892
3893
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3894
    """Class to retrieve Down The Upward Spiral comics."""
3895
    # Also on http://www.downtheupwardspiral.com
3896
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
3897
    name = 'spiral-tumblr'
3898
    long_name = 'Down the Upward Spiral (from Tumblr)'
3899
    url = 'http://downtheupwardspiral.tumblr.com'
3900
3901
3902
class AsPerUsualTumblr(GenericTumblrV1):
3903
    """Class to retrieve As Per Usual comics."""
3904
    # Also on https://tapastic.com/series/AsPerUsual
3905
    name = 'usual-tumblr'
3906
    long_name = 'As Per Usual (from Tumblr)'
3907
    url = 'http://as-per-usual.tumblr.com'
3908
    categories = ('DAMILEE', )
3909
3910
3911
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3912
    """Class to retrieve Hot Comics For Cool People."""
3913
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3914
    # Also on http://hotcomics.biz (links to tumblr)
3915
    # Also on http://hcfcp.com (links to tumblr)
3916
    name = 'hotcomics-tumblr'
3917
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3918
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3919
    categories = ('DAMILEE', )
3920
3921
3922
class OneOneOneOneComicTumblr(GenericTumblrV1):
3923
    """Class to retrieve 1111 Comics."""
3924
    # Also on http://www.1111comics.me
3925
    # Also on https://tapastic.com/series/1111-Comics
3926
    name = '1111-tumblr'
3927
    long_name = '1111 Comics (from Tumblr)'
3928
    url = 'http://comics1111.tumblr.com'
3929
    _categories = ('ONEONEONEONE', )
3930
3931
3932
class JhallComicsTumblr(GenericTumblrV1):
3933
    """Class to retrieve Jhall Comics."""
3934
    # Also on http://jhallcomics.com
3935
    name = 'jhall-tumblr'
3936
    long_name = 'Jhall Comics (from Tumblr)'
3937
    url = 'http://jhallcomics.tumblr.com'
3938
3939
3940
class BerkeleyMewsTumblr(GenericTumblrV1):
3941
    """Class to retrieve Berkeley Mews comics."""
3942
    # Also on http://www.gocomics.com/berkeley-mews
3943
    # Also on http://www.berkeleymews.com
3944
    name = 'berkeley-tumblr'
3945
    long_name = 'Berkeley Mews (from Tumblr)'
3946
    url = 'http://mews.tumblr.com'
3947
    _categories = ('BERKELEY', )
3948
3949
3950
class JoanCornellaTumblr(GenericTumblrV1):
3951
    """Class to retrieve Joan Cornella comics."""
3952
    # Also on http://joancornella.net
3953
    name = 'cornella-tumblr'
3954
    long_name = 'Joan Cornella (from Tumblr)'
3955
    url = 'http://cornellajoan.tumblr.com'
3956
3957
3958
class RespawnComicTumblr(GenericTumblrV1):
3959
    """Class to retrieve Respawn Comic."""
3960
    # Also on http://respawncomic.com
3961
    name = 'respawn-tumblr'
3962
    long_name = 'Respawn Comic (from Tumblr)'
3963
    url = 'https://respawncomic.tumblr.com'
3964
3965
3966
class ChrisHallbeckTumblr(GenericTumblrV1):
3967
    """Class to retrieve Chris Hallbeck comics."""
3968
    # Also on https://tapastic.com/ChrisHallbeck
3969
    # Also on http://maximumble.com
3970
    # Also on http://minimumble.com
3971
    # Also on http://thebookofbiff.com
3972
    name = 'hallbeck-tumblr'
3973
    long_name = 'Chris Hallback (from Tumblr)'
3974
    url = 'https://chrishallbeck.tumblr.com'
3975
    _categories = ('HALLBACK', )
3976
3977
3978
class ComicNuggets(GenericTumblrV1):
3979
    """Class to retrieve Comic Nuggets."""
3980
    name = 'nuggets'
3981
    long_name = 'Comic Nuggets'
3982
    url = 'http://comicnuggets.com'
3983
3984
3985
class PigeonGazetteTumblr(GenericTumblrV1):
3986
    """Class to retrieve The Pigeon Gazette comics."""
3987
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3988
    name = 'pigeon-tumblr'
3989
    long_name = 'The Pigeon Gazette (from Tumblr)'
3990
    url = 'http://thepigeongazette.tumblr.com'
3991
3992
3993
class CancerOwl(GenericTumblrV1):
3994
    """Class to retrieve Cancer Owl comics."""
3995
    # Also on http://cancerowl.com
3996
    name = 'cancerowl-tumblr'
3997
    long_name = 'Cancer Owl (from Tumblr)'
3998
    url = 'http://cancerowl.tumblr.com'
3999
4000
4001
class FowlLanguageTumblr(GenericTumblrV1):
4002
    """Class to retrieve Fowl Language comics."""
4003
    # Also on http://www.fowllanguagecomics.com
4004
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4005
    # Also on http://www.gocomics.com/fowl-language
4006
    name = 'fowllanguage-tumblr'
4007
    long_name = 'Fowl Language Comics (from Tumblr)'
4008
    url = 'http://fowllanguagecomics.tumblr.com'
4009
    _categories = ('FOWLLANGUAGE', )
4010
4011
4012
class TheOdd1sOutTumblr(GenericTumblrV1):
4013
    """Class to retrieve The Odd 1s Out comics."""
4014
    # Also on http://theodd1sout.com
4015
    # Also on https://tapastic.com/series/Theodd1sout
4016
    name = 'theodd-tumblr'
4017
    long_name = 'The Odd 1s Out (from Tumblr)'
4018
    url = 'http://theodd1sout.tumblr.com'
4019
4020
4021
class TheUnderfoldTumblr(GenericTumblrV1):
4022
    """Class to retrieve The Underfold comics."""
4023
    # Also on http://theunderfold.com
4024
    name = 'underfold-tumblr'
4025
    long_name = 'The Underfold (from Tumblr)'
4026
    url = 'http://theunderfold.tumblr.com'
4027
4028
4029
class LolNeinTumblr(GenericTumblrV1):
4030
    """Class to retrieve Lol Nein comics."""
4031
    # Also on http://lolnein.com
4032
    name = 'lolnein-tumblr'
4033
    long_name = 'Lol Nein (from Tumblr)'
4034
    url = 'http://lolneincom.tumblr.com'
4035
4036
4037
class FatAwesomeComicsTumblr(GenericTumblrV1):
4038
    """Class to retrieve Fat Awesome Comics."""
4039
    # Also on http://fatawesome.com/comics
4040
    name = 'fatawesome-tumblr'
4041
    long_name = 'Fat Awesome (from Tumblr)'
4042
    url = 'http://fatawesomecomedy.tumblr.com'
4043
4044
4045
class TheWorldIsFlatTumblr(GenericTumblrV1):
4046
    """Class to retrieve The World Is Flat Comics."""
4047
    # Also on https://tapastic.com/series/The-World-is-Flat
4048
    name = 'flatworld-tumblr'
4049
    long_name = 'The World Is Flat (from Tumblr)'
4050
    url = 'http://theworldisflatcomics.com'
4051
4052
4053
class DorrisMc(GenericTumblrV1):
4054
    """Class to retrieve Dorris Mc Comics"""
4055
    # Also on http://www.gocomics.com/dorris-mccomics
4056
    name = 'dorrismc'
4057
    long_name = 'Dorris Mc'
4058
    url = 'http://dorrismccomics.com'
4059
4060
4061
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
4062
    """Class to retrieve Leleoz comics."""
4063
    # Also on https://tapastic.com/series/Leleoz
4064
    name = 'leleoz-tumblr'
4065
    long_name = 'Leleoz (from Tumblr)'
4066
    url = 'http://leleozcomics.tumblr.com'
4067
4068
4069
class MoonBeardTumblr(GenericTumblrV1):
4070
    """Class to retrieve MoonBeard comics."""
4071
    # Also on http://moonbeard.com
4072
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4073
    name = 'moonbeard-tumblr'
4074
    long_name = 'Moon Beard (from Tumblr)'
4075
    url = 'http://squireseses.tumblr.com'
4076
    _categories = ('MOONBEARD', )
4077
4078
4079
class AComik(GenericTumblrV1):
4080
    """Class to retrieve A Comik"""
4081
    name = 'comik'
4082
    long_name = 'A Comik'
4083
    url = 'http://acomik.com'
4084
4085
4086
class ClassicRandy(GenericTumblrV1):
4087
    """Class to retrieve Classic Randy comics."""
4088
    name = 'randy'
4089
    long_name = 'Classic Randy'
4090
    url = 'http://classicrandy.tumblr.com'
4091
4092
4093
class DagssonTumblr(GenericTumblrV1):
4094
    """Class to retrieve Dagsson comics."""
4095
    # Also on http://www.dagsson.com
4096
    name = 'dagsson-tumblr'
4097
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4098
    url = 'https://hugleikurdagsson.tumblr.com'
4099
4100
4101
class LinsEditionsTumblr(GenericTumblrV1):
4102
    """Class to retrieve L.I.N.S. Editions comics."""
4103
    # Also on https://linsedition.com
4104
    # Now on http://warandpeas.tumblr.com
4105
    name = 'lins-tumblr'
4106
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4107
    url = 'https://linscomics.tumblr.com'
4108
    _categories = ('LINS', )
4109
4110
4111
class WarAndPeasTumblr(GenericTumblrV1):
4112
    """Class to retrieve War And Peas comics."""
4113
    # Was on https://linscomics.tumblr.com
4114
    name = 'warandpeas-tumblr'
4115
    long_name = 'War And Peas (from Tumblr)'
4116
    url = 'http://warandpeas.tumblr.com'
4117
    _categories = ('WARANDPEAS', )
4118
4119
4120
class OrigamiHotDish(GenericTumblrV1):
4121
    """Class to retrieve Origami Hot Dish comics."""
4122
    name = 'origamihotdish'
4123
    long_name = 'Origami Hot Dish'
4124
    url = 'http://origamihotdish.com'
4125
4126
4127
class HitAndMissComicsTumblr(GenericTumblrV1):
4128
    """Class to retrieve Hit and Miss Comics."""
4129
    name = 'hitandmiss'
4130
    long_name = 'Hit and Miss Comics'
4131
    url = 'https://hitandmisscomics.tumblr.com'
4132
4133
4134
class HMBlanc(GenericTumblrV1):
4135
    """Class to retrieve HM Blanc comics."""
4136
    name = 'hmblanc'
4137
    long_name = 'HM Blanc'
4138
    url = 'http://hmblanc.tumblr.com'
4139
4140
4141
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4142
    """Class to retrieve Tales Of Absurdity comics."""
4143
    # Also on http://talesofabsurdity.com
4144
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4145
    name = 'absurdity-tumblr'
4146
    long_name = 'Tales of Absurdity (from Tumblr)'
4147
    url = 'http://talesofabsurdity.tumblr.com'
4148
    _categories = ('ABSURDITY', )
4149
4150
4151
class RobbieAndBobby(GenericTumblrV1):
4152
    """Class to retrieve Robbie And Bobby comics."""
4153
    # Also on http://robbieandbobby.com
4154
    name = 'robbie-tumblr'
4155
    long_name = 'Robbie And Bobby (from Tumblr)'
4156
    url = 'http://robbieandbobby.tumblr.com'
4157
4158
4159
class ElectricBunnyComicTumblr(GenericTumblrV1):
4160
    """Class to retrieve Electric Bunny Comics."""
4161
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4162
    name = 'bunny-tumblr'
4163
    long_name = 'Electric Bunny Comic (from Tumblr)'
4164
    url = 'http://electricbunnycomics.tumblr.com'
4165
4166
4167
class Hoomph(GenericTumblrV1):
4168
    """Class to retrieve Hoomph comics."""
4169
    name = 'hoomph'
4170
    long_name = 'Hoomph'
4171
    url = 'http://hoom.ph'
4172
4173
4174
class BFGFSTumblr(GenericTumblrV1):
4175
    """Class to retrieve BFGFS comics."""
4176
    # Also on https://tapastic.com/series/BFGFS
4177
    # Also on http://bfgfs.com
4178
    name = 'bfgfs-tumblr'
4179
    long_name = 'BFGFS (from Tumblr)'
4180
    url = 'https://bfgfs.tumblr.com'
4181
4182
4183
class DoodleForFood(GenericTumblrV1):
4184
    """Class to retrieve Doodle For Food comics."""
4185
    # Also on https://tapastic.com/series/Doodle-for-Food
4186
    name = 'doodle'
4187
    long_name = 'Doodle For Food'
4188
    url = 'http://www.doodleforfood.com'
4189
4190
4191
class CassandraCalinTumblr(GenericTumblrV1):
4192
    """Class to retrieve C. Cassandra comics."""
4193
    # Also on http://cassandracalin.com
4194
    # Also on https://tapastic.com/series/C-Cassandra-comics
4195
    name = 'cassandra-tumblr'
4196
    long_name = 'Cassandra Calin (from Tumblr)'
4197
    url = 'http://c-cassandra.tumblr.com'
4198
4199
4200
class DougWasTaken(GenericTumblrV1):
4201
    """Class to retrieve Doug Was Taken comics."""
4202
    name = 'doug'
4203
    long_name = 'Doug Was Taken'
4204
    url = 'https://dougwastaken.tumblr.com'
4205
4206
4207
class MandatoryRollerCoaster(GenericTumblrV1):
4208
    """Class to retrieve Mandatory Roller Coaster comics."""
4209
    name = 'rollercoaster'
4210
    long_name = 'Mandatory Roller Coaster'
4211
    url = 'http://mandatoryrollercoaster.com'
4212
4213
4214
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4215
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4216
    name = 'cperspqccltt'
4217
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4218
    url = 'http://marcoandco.tumblr.com'
4219
4220
4221
class TheGrohlTroll(GenericTumblrV1):
4222
    """Class to retrieve The Grohl Troll comics."""
4223
    name = 'grohltroll'
4224
    long_name = 'The Grohl Troll'
4225
    url = 'http://thegrohltroll.com'
4226
4227
4228
class WebcomicName(GenericTumblrV1):
4229
    """Class to retrieve Webcomic Name comics."""
4230
    name = 'webcomicname'
4231
    long_name = 'Webcomic Name'
4232
    url = 'http://webcomicname.com'
4233
4234
4235
class BooksOfAdam(GenericTumblrV1):
4236
    """Class to retrieve Books of Adam comics."""
4237
    # Also on http://www.booksofadam.com
4238
    name = 'booksofadam'
4239
    long_name = 'Books of Adam'
4240
    url = 'http://booksofadam.tumblr.com'
4241
4242
4243
class HarkAVagrant(GenericTumblrV1):
4244
    """Class to retrieve Hark A Vagrant comics."""
4245
    # Also on http://www.harkavagrant.com
4246
    name = 'hark-tumblr'
4247
    long_name = 'Hark A Vagrant (from Tumblr)'
4248
    url = 'http://beatonna.tumblr.com'
4249
4250
4251
class OurSuperAdventureTumblr(GenericTumblrV1):
4252
    """Class to retrieve Our Super Adventure comics."""
4253
    # Also on https://tapastic.com/series/Our-Super-Adventure
4254
    # Also on http://www.oursuperadventure.com
4255
    # http://sarahgraley.com
4256
    name = 'superadventure-tumblr'
4257
    long_name = 'Our Super Adventure (from Tumblr)'
4258
    url = 'http://sarahssketchbook.tumblr.com'
4259
4260
4261
class JakeLikesOnions(GenericTumblrV1):
4262
    """Class to retrieve Jake Likes Onions comics."""
4263
    name = 'jake'
4264
    long_name = 'Jake Likes Onions'
4265
    url = 'http://jakelikesonions.com'
4266
4267
4268
class InYourFaceCake(GenericTumblrV1):
4269
    """Class to retrieve In Your Face Cake comics."""
4270
    name = 'inyourfacecake-tumblr'
4271
    long_name = 'In Your Face Cake (from Tumblr)'
4272
    url = 'https://in-your-face-cake.tumblr.com'
4273
4274
4275
class Robospunk(GenericTumblrV1):
4276
    """Class to retrieve Robospunk comics."""
4277
    name = 'robospunk'
4278
    long_name = 'Robospunk'
4279
    url = 'http://robospunk.com'
4280
4281
4282
class BananaTwinky(GenericTumblrV1):
4283
    """Class to retrieve Banana Twinky comics."""
4284
    name = 'banana'
4285
    long_name = 'Banana Twinky'
4286
    url = 'https://bananatwinky.tumblr.com'
4287
4288
4289
class YesterdaysPopcornTumblr(GenericTumblrV1):
4290
    """Class to retrieve Yesterday's Popcorn comics."""
4291
    # Also on http://www.yesterdayspopcorn.com
4292
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4293
    name = 'popcorn-tumblr'
4294
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4295
    url = 'http://yesterdayspopcorn.tumblr.com'
4296
4297
4298
class TwistedDoodles(GenericTumblrV1):
4299
    """Class to retrieve Twisted Doodles comics."""
4300
    name = 'twisted'
4301
    long_name = 'Twisted Doodles'
4302
    url = 'http://www.twisteddoodles.com'
4303
4304
4305
class UbertoolTumblr(GenericTumblrV1):
4306
    """Class to retrieve Ubertool comics."""
4307
    # Also on http://ubertoolcomic.com
4308
    # Also on https://tapastic.com/series/ubertool
4309
    name = 'ubertool-tumblr'
4310
    long_name = 'Ubertool (from Tumblr)'
4311
    url = 'https://ubertool.tumblr.com'
4312 View Code Duplication
    _categories = ('UBERTOOL', )
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4313
4314
4315
class LittleLifeLinesTumblr(GenericTumblrV1):
4316
    """Class to retrieve Little Life Lines comics."""
4317
    # Also on http://www.littlelifelines.com
4318
    name = 'life-tumblr'
4319
    long_name = 'Little Life Lines (from Tumblr)'
4320
    url = 'https://little-life-lines.tumblr.com'
4321
4322
4323
class TheyCanTalk(GenericTumblrV1):
4324
    """Class to retrieve They Can Talk comics."""
4325
    name = 'theycantalk'
4326
    long_name = 'They Can Talk'
4327
    url = 'http://theycantalk.com'
4328
4329
4330
class Will5NeverCome(GenericTumblrV1):
4331
    """Class to retrieve Will 5:00 Never Come comics."""
4332
    name = 'will5'
4333
    long_name = 'Will 5:00 Never Come ?'
4334
    url = 'http://will5nevercome.com'
4335
4336
4337
class Sephko(GenericTumblrV1):
4338
    """Class to retrieve Sephko Comics."""
4339
    # Also on http://www.sephko.com
4340
    name = 'sephko'
4341
    long_name = 'Sephko'
4342
    url = 'https://sephko.tumblr.com'
4343
4344
4345
class BlazersAtDawn(GenericTumblrV1):
4346
    """Class to retrieve Blazers At Dawn Comics."""
4347
    name = 'blazers'
4348
    long_name = 'Blazers At Dawn'
4349
    url = 'http://blazersatdawn.tumblr.com'
4350
4351
4352
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4353
    """Class to retrieve Art By Moga Comics."""
4354
    name = 'moga'
4355
    long_name = 'Art By Moga'
4356
    url = 'http://artbymoga.tumblr.com'
4357
4358
4359
class VerbalVomitTumblr(GenericTumblrV1):
4360
    """Class to retrieve Verbal Vomit comics."""
4361
    # Also on http://www.verbal-vomit.com
4362
    name = 'vomit-tumblr'
4363
    long_name = 'Verbal Vomit (from Tumblr)'
4364
    url = 'http://verbalvomits.tumblr.com'
4365
4366
4367
class LibraryComic(GenericTumblrV1):
4368
    """Class to retrieve LibraryComic."""
4369
    # Also on http://librarycomic.com
4370
    name = 'library-tumblr'
4371
    long_name = 'LibraryComic (from Tumblr)'
4372
    url = 'https://librarycomic.tumblr.com'
4373
4374
4375
class TizzyStitchBirdTumblr(GenericTumblrV1):
4376
    """Class to retrieve Tizzy Stitch Bird comics."""
4377
    # Also on http://tizzystitchbird.com
4378
    # Also on https://tapastic.com/series/TizzyStitchbird
4379
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4380
    name = 'tizzy-tumblr'
4381
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4382
    url = 'http://tizzystitchbird.tumblr.com'
4383
4384
4385
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4386
    """Class to retrieve VictimsOfCircumsolar comics."""
4387
    # Also on http://www.victimsofcircumsolar.com
4388
    name = 'circumsolar-tumblr'
4389
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4390
    url = 'https://victimsofcomics.tumblr.com'
4391
4392
4393
class RockPaperCynicTumblr(GenericTumblrV1):
4394
    """Class to retrieve RockPaperCynic comics."""
4395
    # Also on http://www.rockpapercynic.com
4396
    # Also on https://tapastic.com/series/rockpapercynic
4397
    name = 'rpc-tumblr'
4398
    long_name = 'Rock Paper Cynic (from Tumblr)'
4399
    url = 'http://rockpapercynic.tumblr.com'
4400
4401
4402
class DeadlyPanelTumblr(GenericTumblrV1):
4403
    """Class to retrieve Deadly Panel comics."""
4404
    # Also on http://www.deadlypanel.com
4405
    # Also on https://tapastic.com/series/deadlypanel
4406
    name = 'deadly-tumblr'
4407
    long_name = 'Deadly Panel (from Tumblr)'
4408
    url = 'https://deadlypanel.tumblr.com'
4409
4410
4411
class CatanaComics(GenericTumblrV1):
4412
    """Class to retrieve Catana comics."""
4413
    name = 'catana'
4414
    long_name = 'Catana'
4415
    url = 'http://www.catanacomics.com'
4416
4417
4418
class AngryAtNothingTumblr(GenericTumblrV1):
4419
    """Class to retrieve Angry at Nothing comics."""
4420
    # Also on http://www.angryatnothing.net
4421
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4422
    name = 'angry-tumblr'
4423
    long_name = 'Angry At Nothing (from Tumblr)'
4424
    url = 'http://angryatnothing.tumblr.com'
4425
4426
4427
class ShanghaiTango(GenericTumblrV1):
4428
    """Class to retrieve Shanghai Tango comic."""
4429
    name = 'tango'
4430
    long_name = 'Shanghai Tango'
4431
    url = 'http://tango2010weibo.tumblr.com'
4432
4433
4434
class OffTheLeashDogTumblr(GenericTumblrV1):
4435
    """Class to retrieve Off The Leash Dog comics."""
4436
    # Also on http://offtheleashdogcartoons.com
4437
    # Also on http://www.rupertfawcettcartoons.com
4438
    name = 'offtheleash-tumblr'
4439
    long_name = 'Off The Leash Dog (from Tumblr)'
4440
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4441
    _categories = ('FAWCETT', )
4442
4443
4444
class ImogenQuestTumblr(GenericTumblrV1):
4445
    """Class to retrieve Imogen Quest comics."""
4446
    # Also on http://imogenquest.net
4447
    name = 'imogen-tumblr'
4448
    long_name = 'Imogen Quest (from Tumblr)'
4449
    url = 'http://imoquest.tumblr.com'
4450
4451
4452
class Shitfest(GenericTumblrV1):
4453
    """Class to retrieve Shitfest comics."""
4454
    name = 'shitfest'
4455
    long_name = 'Shitfest'
4456
    url = 'http://shitfestcomic.com'
4457
4458
4459
class IceCreamSandwichComics(GenericTumblrV1):
4460
    """Class to retrieve Ice Cream Sandwich Comics."""
4461
    name = 'icecream'
4462
    long_name = 'Ice Cream Sandwich Comics'
4463
    url = 'http://icecreamsandwichcomics.com'
4464
4465
4466
class Dustinteractive(GenericTumblrV1):
4467
    """Class to retrieve Dustinteractive comics."""
4468
    name = 'dustinteractive'
4469
    long_name = 'Dustinteractive'
4470
    url = 'http://dustinteractive.com'
4471
4472
4473
class StickyCinemaFloor(GenericTumblrV1):
4474
    """Class to retrieve Sticky Cinema Floor comics."""
4475
    name = 'stickycinema'
4476
    long_name = 'Sticky Cinema Floor'
4477
    url = 'https://stickycinemafloor.tumblr.com'
4478
4479
4480
class IncidentalComicsTumblr(GenericTumblrV1):
4481
    """Class to retrieve Incidental Comics."""
4482
    # Also on http://www.incidentalcomics.com
4483
    name = 'incidental-tumblr'
4484
    long_name = 'Incidental Comics (from Tumblr)'
4485
    url = 'http://incidentalcomics.tumblr.com'
4486
4487
4488
class APleasantWasteOfTimeTumblr(GenericTumblrV1):
4489
    """Class to retrieve A Pleasant Waste Of Time comics."""
4490
    # Also on https://tapas.io/series/A-Pleasant-
4491
    name = 'pleasant-waste-tumblr'
4492
    long_name = 'A Pleasant Waste Of Time (from Tumblr)'
4493
    url = 'https://artjcf.tumblr.com'
4494
    _categories = ('WASTE', )
4495
4496
4497
class HorovitzComicsTumblr(GenericTumblrV1):
4498
    """Class to retrieve Horovitz new comics."""
4499
    # Also on http://www.horovitzcomics.com
4500
    name = 'horovitz-tumblr'
4501
    long_name = 'Horovitz (from Tumblr)'
4502
    url = 'https://horovitzcomics.tumblr.com'
4503
    _categories = ('HOROVITZ', )
4504
4505
4506
class DeepDarkFearsTumblr(GenericTumblrV1):
4507
    """Class to retrieve DeepvDarkvFears comics."""
4508
    name = 'deep-dark-fears-tumblr'
4509
    long_name = 'Deep Dark Fears (from Tumblr)'
4510
    url = 'http://deep-dark-fears.tumblr.com'
4511
4512
4513
class JamesOfNoTradesTumblr(GenericTumblrV1):
4514
    """Class to retrieve JamesOfNoTrades comics."""
4515
    # Also on http://jamesofnotrades.com
4516
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
4517
    # Also on https://tapas.io/series/James-of-No-Trades
4518
    name = 'jamesofnotrades-tumblr'
4519
    long_name = 'James Of No Trades (from Tumblr)'
4520
    url = 'http://jamesfregan.tumblr.com'
4521
    _categories = ('JAMESOFNOTRADES', )
4522
4523
4524
class HorovitzComics(GenericEmptyComic, GenericListableComic):
4525
    """Generic class to handle the logic common to the different comics from Horovitz."""
4526
    # Also on https://horovitzcomics.tumblr.com
4527
    url = 'http://www.horovitzcomics.com'
4528
    _categories = ('HOROVITZ', )
4529
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4530
    link_re = NotImplemented
4531
    get_url_from_archive_element = join_cls_url_to_href
4532
4533
    @classmethod
4534
    def get_comic_info(cls, soup, link):
4535
        """Get information about a particular comics."""
4536
        href = link['href']
4537
        num = int(cls.link_re.match(href).groups()[0])
4538
        title = link.string
4539
        imgs = soup.find_all('img', id='comic')
4540
        assert len(imgs) == 1
4541
        year, month, day = [int(s)
4542
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4543
        return {
4544
            'title': title,
4545
            'day': day,
4546
            'month': month,
4547
            'year': year,
4548
            'img': [i['src'] for i in imgs],
4549
            'num': num,
4550
        }
4551
4552
    @classmethod
4553
    def get_archive_elements(cls):
4554
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4555
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4556
4557
4558
class HorovitzNew(HorovitzComics):
4559
    """Class to retrieve Horovitz new comics."""
4560
    name = 'horovitznew'
4561
    long_name = 'Horovitz New'
4562
    link_re = re.compile('^/comics/new/([0-9]+)$')
4563
4564
4565
class HorovitzClassic(HorovitzComics):
4566
    """Class to retrieve Horovitz classic comics."""
4567
    name = 'horovitzclassic'
4568
    long_name = 'Horovitz Classic'
4569
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4570
4571
4572
class GenericGoComic(GenericNavigableComic):
4573
    """Generic class to handle the logic common to comics from gocomics.com."""
4574
    _categories = ('GOCOMIC', )
4575
4576
    @classmethod
4577
    def get_first_comic_link(cls):
4578
        """Get link to first comics."""
4579
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4580
4581
    @classmethod
4582
    def get_navi_link(cls, last_soup, next_):
4583
        """Get link to next or previous comic."""
4584
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4585
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right hidden-sm-up sm '
4586
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4587
4588
    @classmethod
4589
    def get_url_from_link(cls, link):
4590
        gocomics = 'http://www.gocomics.com'
4591
        return urljoin_wrapper(gocomics, link['href'])
4592
4593
    @classmethod
4594
    def get_comic_info(cls, soup, link):
4595
        """Get information about a particular comics."""
4596
        date_str = soup.find('meta', property='article:published_time')['content']
4597
        day = string_to_date(date_str, "%Y-%m-%d")
4598
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4599
        author = soup.find('meta', property='article:author')['content']
4600
        tags = soup.find('meta', property='article:tag')['content']
4601
        return {
4602
            'day': day.day,
4603
            'month': day.month,
4604
            'year': day.year,
4605
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4606
            'author': author,
4607
            'tags': tags,
4608
        }
4609
4610
4611
class PearlsBeforeSwine(GenericGoComic):
4612
    """Class to retrieve Pearls Before Swine comics."""
4613
    name = 'pearls'
4614
    long_name = 'Pearls Before Swine'
4615
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4616
4617
4618
class Peanuts(GenericGoComic):
4619
    """Class to retrieve Peanuts comics."""
4620
    name = 'peanuts'
4621
    long_name = 'Peanuts'
4622
    url = 'http://www.gocomics.com/peanuts'
4623
4624
4625
class MattWuerker(GenericGoComic):
4626
    """Class to retrieve Matt Wuerker comics."""
4627
    name = 'wuerker'
4628
    long_name = 'Matt Wuerker'
4629
    url = 'http://www.gocomics.com/mattwuerker'
4630
4631
4632
class TomToles(GenericGoComic):
4633
    """Class to retrieve Tom Toles comics."""
4634
    name = 'toles'
4635
    long_name = 'Tom Toles'
4636
    url = 'http://www.gocomics.com/tomtoles'
4637
4638
4639
class BreakOfDay(GenericGoComic):
4640
    """Class to retrieve Break Of Day comics."""
4641
    name = 'breakofday'
4642
    long_name = 'Break Of Day'
4643
    url = 'http://www.gocomics.com/break-of-day'
4644
4645
4646
class Brevity(GenericGoComic):
4647
    """Class to retrieve Brevity comics."""
4648
    name = 'brevity'
4649
    long_name = 'Brevity'
4650
    url = 'http://www.gocomics.com/brevitypanel'
4651
4652
4653
class MichaelRamirez(GenericGoComic):
4654
    """Class to retrieve Michael Ramirez comics."""
4655
    name = 'ramirez'
4656
    long_name = 'Michael Ramirez'
4657
    url = 'http://www.gocomics.com/michaelramirez'
4658
4659
4660
class MikeLuckovich(GenericGoComic):
4661
    """Class to retrieve Mike Luckovich comics."""
4662
    name = 'luckovich'
4663
    long_name = 'Mike Luckovich'
4664
    url = 'http://www.gocomics.com/mikeluckovich'
4665
4666
4667
class JimBenton(GenericGoComic):
4668
    """Class to retrieve Jim Benton comics."""
4669
    # Also on http://jimbenton.tumblr.com
4670
    name = 'benton'
4671
    long_name = 'Jim Benton'
4672
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4673
4674
4675
class TheArgyleSweater(GenericGoComic):
4676
    """Class to retrieve the Argyle Sweater comics."""
4677
    name = 'argyle'
4678
    long_name = 'Argyle Sweater'
4679
    url = 'http://www.gocomics.com/theargylesweater'
4680
4681
4682
class SunnyStreet(GenericGoComic):
4683
    """Class to retrieve Sunny Street comics."""
4684
    # Also on http://www.sunnystreetcomics.com
4685
    name = 'sunny'
4686
    long_name = 'Sunny Street'
4687
    url = 'http://www.gocomics.com/sunny-street'
4688
4689
4690
class OffTheMark(GenericGoComic):
4691
    """Class to retrieve Off The Mark comics."""
4692
    # Also on https://www.offthemark.com
4693
    name = 'offthemark'
4694
    long_name = 'Off The Mark'
4695
    url = 'http://www.gocomics.com/offthemark'
4696
4697
4698
class WuMo(GenericGoComic):
4699
    """Class to retrieve WuMo comics."""
4700
    # Also on http://wumo.com
4701
    name = 'wumo'
4702
    long_name = 'WuMo'
4703
    url = 'http://www.gocomics.com/wumo'
4704
4705
4706
class LunarBaboon(GenericGoComic):
4707
    """Class to retrieve Lunar Baboon comics."""
4708
    # Also on http://www.lunarbaboon.com
4709
    # Also on https://tapastic.com/series/Lunarbaboon
4710
    name = 'lunarbaboon'
4711
    long_name = 'Lunar Baboon'
4712
    url = 'http://www.gocomics.com/lunarbaboon'
4713
4714
4715
class SandersenGocomic(GenericGoComic):
4716
    """Class to retrieve Sarah Andersen comics."""
4717
    # Also on http://sarahcandersen.com
4718
    # Also on http://tapastic.com/series/Doodle-Time
4719
    name = 'sandersen-goc'
4720
    long_name = 'Sarah Andersen (from GoComics)'
4721
    url = 'http://www.gocomics.com/sarahs-scribbles'
4722
4723
4724
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4725
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4726
    # Also on http://smbc-comics.tumblr.com
4727
    # Also on http://www.smbc-comics.com
4728
    name = 'smbc-goc'
4729
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4730
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4731
    _categories = ('SMBC', )
4732
4733
4734
class CalvinAndHobbesGoComic(GenericGoComic):
4735
    """Class to retrieve Calvin and Hobbes comics."""
4736
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4737
    name = 'calvin-goc'
4738
    long_name = 'Calvin and Hobbes (from GoComics)'
4739
    url = 'http://www.gocomics.com/calvinandhobbes'
4740
4741
4742
class RallGoComic(GenericGoComic):
4743
    """Class to retrieve Ted Rall comics."""
4744
    # Also on http://rall.com/comic
4745
    name = 'rall-goc'
4746
    long_name = "Ted Rall (from GoComics)"
4747
    url = "http://www.gocomics.com/ted-rall"
4748
    _categories = ('RALL', )
4749
4750
4751
class TheAwkwardYetiGoComic(GenericGoComic):
4752
    """Class to retrieve The Awkward Yeti comics."""
4753
    # Also on http://larstheyeti.tumblr.com
4754
    # Also on http://theawkwardyeti.com
4755
    # Also on https://tapastic.com/series/TheAwkwardYeti
4756
    name = 'yeti-goc'
4757
    long_name = 'The Awkward Yeti (from GoComics)'
4758
    url = 'http://www.gocomics.com/the-awkward-yeti'
4759
    _categories = ('YETI', )
4760
4761
4762
class BerkeleyMewsGoComics(GenericGoComic):
4763
    """Class to retrieve Berkeley Mews comics."""
4764
    # Also on http://mews.tumblr.com
4765
    # Also on http://www.berkeleymews.com
4766
    name = 'berkeley-goc'
4767
    long_name = 'Berkeley Mews (from GoComics)'
4768
    url = 'http://www.gocomics.com/berkeley-mews'
4769
    _categories = ('BERKELEY', )
4770
4771
4772
class SheldonGoComics(GenericGoComic):
4773
    """Class to retrieve Sheldon comics."""
4774
    # Also on http://www.sheldoncomics.com
4775
    name = 'sheldon-goc'
4776
    long_name = 'Sheldon Comics (from GoComics)'
4777
    url = 'http://www.gocomics.com/sheldon'
4778
4779
4780
class FowlLanguageGoComics(GenericGoComic):
4781
    """Class to retrieve Fowl Language comics."""
4782
    # Also on http://www.fowllanguagecomics.com
4783
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4784
    # Also on http://fowllanguagecomics.tumblr.com
4785
    name = 'fowllanguage-goc'
4786
    long_name = 'Fowl Language Comics (from GoComics)'
4787
    url = 'http://www.gocomics.com/fowl-language'
4788
    _categories = ('FOWLLANGUAGE', )
4789
4790
4791
class NickAnderson(GenericGoComic):
4792
    """Class to retrieve Nick Anderson comics."""
4793
    name = 'nickanderson'
4794
    long_name = 'Nick Anderson'
4795
    url = 'http://www.gocomics.com/nickanderson'
4796
4797
4798
class GarfieldGoComics(GenericGoComic):
4799
    """Class to retrieve Garfield comics."""
4800
    # Also on http://garfield.com
4801
    name = 'garfield-goc'
4802
    long_name = 'Garfield (from GoComics)'
4803
    url = 'http://www.gocomics.com/garfield'
4804
    _categories = ('GARFIELD', )
4805
4806
4807
class DorrisMcGoComics(GenericGoComic):
4808
    """Class to retrieve Dorris Mc Comics"""
4809
    # Also on http://dorrismccomics.com
4810
    name = 'dorrismc-goc'
4811
    long_name = 'Dorris Mc (from GoComics)'
4812
    url = 'http://www.gocomics.com/dorris-mccomics'
4813
4814
4815
class FoxTrot(GenericGoComic):
4816
    """Class to retrieve FoxTrot comics."""
4817
    name = 'foxtrot'
4818
    long_name = 'FoxTrot'
4819
    url = 'http://www.gocomics.com/foxtrot'
4820
4821
4822
class FoxTrotClassics(GenericGoComic):
4823
    """Class to retrieve FoxTrot Classics comics."""
4824
    name = 'foxtrot-classics'
4825
    long_name = 'FoxTrot Classics'
4826
    url = 'http://www.gocomics.com/foxtrotclassics'
4827
4828
4829
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4830
    """Class to retrieve Mister & Me Comics."""
4831
    # Also on http://www.mister-and-me.com
4832
    # Also on https://tapastic.com/series/Mister-and-Me
4833
    name = 'mister-goc'
4834
    long_name = 'Mister & Me (from GoComics)'
4835
    url = 'http://www.gocomics.com/mister-and-me'
4836
4837
4838
class NonSequitur(GenericGoComic):
4839
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4840
    name = 'nonsequitur'
4841
    long_name = 'Non Sequitur'
4842
    url = 'http://www.gocomics.com/nonsequitur'
4843
4844
4845
class GenericTapasticComic(GenericListableComic):
4846
    """Generic class to handle the logic common to comics from tapastic.com."""
4847
    _categories = ('TAPASTIC', )
4848
4849
    @classmethod
4850
    def get_comic_info(cls, soup, archive_elt):
4851
        """Get information about a particular comics."""
4852
        timestamp = int(archive_elt['publishDate']) / 1000.0
4853
        day = datetime.datetime.fromtimestamp(timestamp).date()
4854
        imgs = soup.find_all('img', class_='art-image')
4855
        if not imgs:
4856
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4857
            return None
4858
        assert len(imgs) > 0
4859
        return {
4860
            'day': day.day,
4861
            'year': day.year,
4862
            'month': day.month,
4863
            'img': [i['src'] for i in imgs],
4864
            'title': archive_elt['title'],
4865
        }
4866
4867
    @classmethod
4868
    def get_url_from_archive_element(cls, archive_elt):
4869
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4870
4871
    @classmethod
4872
    def get_archive_elements(cls):
4873
        pref, suff = 'episodeList : ', ','
4874
        # Information is stored in the javascript part
4875
        # I don't know the clean way to get it so this is the ugly way.
4876
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4877
        return json.loads(string)
4878
4879
4880
class VegetablesForDessert(GenericTapasticComic):
4881
    """Class to retrieve Vegetables For Dessert comics."""
4882
    # Also on http://vegetablesfordessert.tumblr.com
4883
    name = 'vegetables'
4884
    long_name = 'Vegetables For Dessert'
4885
    url = 'http://tapastic.com/series/vegetablesfordessert'
4886
4887
4888
class FowlLanguageTapa(GenericTapasticComic):
4889
    """Class to retrieve Fowl Language comics."""
4890
    # Also on http://www.fowllanguagecomics.com
4891
    # Also on http://fowllanguagecomics.tumblr.com
4892
    # Also on http://www.gocomics.com/fowl-language
4893
    name = 'fowllanguage-tapa'
4894
    long_name = 'Fowl Language Comics (from Tapastic)'
4895
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4896
    _categories = ('FOWLLANGUAGE', )
4897
4898
4899
class OscillatingProfundities(GenericTapasticComic):
4900
    """Class to retrieve Oscillating Profundities comics."""
4901
    name = 'oscillating'
4902
    long_name = 'Oscillating Profundities'
4903
    url = 'http://tapastic.com/series/oscillatingprofundities'
4904
4905
4906
class ZnoflatsComics(GenericTapasticComic):
4907
    """Class to retrieve Znoflats comics."""
4908
    name = 'znoflats'
4909
    long_name = 'Znoflats Comics'
4910
    url = 'http://tapastic.com/series/Znoflats-Comics'
4911
4912
4913
class SandersenTapastic(GenericTapasticComic):
4914
    """Class to retrieve Sarah Andersen comics."""
4915
    # Also on http://sarahcandersen.com
4916
    # Also on http://www.gocomics.com/sarahs-scribbles
4917
    name = 'sandersen-tapa'
4918
    long_name = 'Sarah Andersen (from Tapastic)'
4919
    url = 'http://tapastic.com/series/Doodle-Time'
4920
4921
4922
class TubeyToonsTapastic(GenericTapasticComic):
4923
    """Class to retrieve TubeyToons comics."""
4924
    # Also on http://tubeytoons.com
4925
    # Also on https://tubeytoons.tumblr.com
4926
    name = 'tubeytoons-tapa'
4927
    long_name = 'Tubey Toons (from Tapastic)'
4928
    url = 'http://tapastic.com/series/Tubey-Toons'
4929
    _categories = ('TUNEYTOONS', )
4930
4931
4932
class AnythingComicTapastic(GenericTapasticComic):
4933
    """Class to retrieve Anything Comics."""
4934
    # Also on http://www.anythingcomic.com
4935
    name = 'anythingcomic-tapa'
4936
    long_name = 'Anything Comic (from Tapastic)'
4937
    url = 'http://tapastic.com/series/anything'
4938
4939
4940
class UnearthedComicsTapastic(GenericTapasticComic):
4941
    """Class to retrieve Unearthed comics."""
4942
    # Also on http://unearthedcomics.com
4943
    # Also on https://unearthedcomics.tumblr.com
4944
    name = 'unearthed-tapa'
4945
    long_name = 'Unearthed Comics (from Tapastic)'
4946
    url = 'http://tapastic.com/series/UnearthedComics'
4947
    _categories = ('UNEARTHED', )
4948
4949
4950
class EverythingsStupidTapastic(GenericTapasticComic):
4951
    """Class to retrieve Everything's stupid Comics."""
4952
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4953
    # Also on http://everythingsstupid.net
4954
    name = 'stupid-tapa'
4955
    long_name = "Everything's Stupid (from Tapastic)"
4956
    url = 'http://tapastic.com/series/EverythingsStupid'
4957
4958
4959
class JustSayEhTapastic(GenericTapasticComic):
4960
    """Class to retrieve Just Say Eh comics."""
4961
    # Also on http://www.justsayeh.com
4962
    name = 'justsayeh-tapa'
4963
    long_name = 'Just Say Eh (from Tapastic)'
4964
    url = 'http://tapastic.com/series/Just-Say-Eh'
4965
4966
4967
class ThorsThundershackTapastic(GenericTapasticComic):
4968
    """Class to retrieve Thor's Thundershack comics."""
4969
    # Also on http://www.thorsthundershack.com
4970
    name = 'thor-tapa'
4971
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4972
    url = 'http://tapastic.com/series/Thors-Thundershac'
4973
    _categories = ('THOR', )
4974
4975
4976
class OwlTurdTapastic(GenericTapasticComic):
4977
    """Class to retrieve Owl Turd comics."""
4978
    # Also on http://owlturd.com
4979
    name = 'owlturd-tapa'
4980
    long_name = 'Owl Turd (from Tapastic)'
4981
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4982
    _categories = ('OWLTURD', )
4983
4984
4985
class GoneIntoRaptureTapastic(GenericTapasticComic):
4986
    """Class to retrieve Gone Into Rapture comics."""
4987
    # Also on http://goneintorapture.tumblr.com
4988
    # Also on http://goneintorapture.com
4989
    name = 'rapture-tapa'
4990
    long_name = 'Gone Into Rapture (from Tapastic)'
4991
    url = 'http://tapastic.com/series/Goneintorapture'
4992
4993
4994
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4995
    """Class to retrieve Heck If I Know Comics."""
4996
    # Also on http://heckifiknowcomics.com
4997
    name = 'heck-tapa'
4998
    long_name = 'Heck if I Know comics (from Tapastic)'
4999
    url = 'http://tapastic.com/series/Regular'
5000
5001
5002
class CheerUpEmoKidTapa(GenericTapasticComic):
5003
    """Class to retrieve CheerUpEmoKid comics."""
5004
    # Also on http://www.cheerupemokid.com
5005
    # Also on https://enzocomics.tumblr.com
5006
    name = 'cuek-tapa'
5007
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
5008
    url = 'http://tapastic.com/series/CUEK'
5009
5010
5011
class BigFootJusticeTapa(GenericTapasticComic):
5012
    """Class to retrieve Big Foot Justice comics."""
5013
    # Also on http://bigfootjustice.com
5014
    name = 'bigfoot-tapa'
5015
    long_name = 'Big Foot Justice (from Tapastic)'
5016
    url = 'http://tapastic.com/series/bigfoot-justice'
5017
5018
5019
class UpAndOutTapa(GenericTapasticComic):
5020
    """Class to retrieve Up & Out comics."""
5021
    # Also on http://upandoutcomic.tumblr.com
5022
    name = 'upandout-tapa'
5023
    long_name = 'Up And Out (from Tapastic)'
5024
    url = 'http://tapastic.com/series/UP-and-OUT'
5025
5026
5027
class ToonHoleTapa(GenericTapasticComic):
5028
    """Class to retrieve Toon Holes comics."""
5029
    # Also on http://www.toonhole.com
5030
    name = 'toonhole-tapa'
5031
    long_name = 'Toon Hole (from Tapastic)'
5032
    url = 'http://tapastic.com/series/TOONHOLE'
5033
5034
5035
class AngryAtNothingTapa(GenericTapasticComic):
5036
    """Class to retrieve Angry at Nothing comics."""
5037
    # Also on http://www.angryatnothing.net
5038
    # Also on http://angryatnothing.tumblr.com
5039
    name = 'angry-tapa'
5040
    long_name = 'Angry At Nothing (from Tapastic)'
5041
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
5042
5043
5044
class LeleozTapa(GenericTapasticComic):
5045
    """Class to retrieve Leleoz comics."""
5046
    # Also on http://leleozcomics.tumblr.com
5047
    name = 'leleoz-tapa'
5048
    long_name = 'Leleoz (from Tapastic)'
5049
    url = 'https://tapastic.com/series/Leleoz'
5050
5051
5052
class TheAwkwardYetiTapa(GenericTapasticComic):
5053
    """Class to retrieve The Awkward Yeti comics."""
5054
    # Also on http://www.gocomics.com/the-awkward-yeti
5055
    # Also on http://theawkwardyeti.com
5056
    # Also on http://larstheyeti.tumblr.com
5057
    name = 'yeti-tapa'
5058
    long_name = 'The Awkward Yeti (from Tapastic)'
5059
    url = 'https://tapastic.com/series/TheAwkwardYeti'
5060
    _categories = ('YETI', )
5061
5062
5063
class AsPerUsualTapa(GenericTapasticComic):
5064
    """Class to retrieve As Per Usual comics."""
5065
    # Also on http://as-per-usual.tumblr.com
5066
    name = 'usual-tapa'
5067
    long_name = 'As Per Usual (from Tapastic)'
5068
    url = 'https://tapastic.com/series/AsPerUsual'
5069
    categories = ('DAMILEE', )
5070
5071
5072
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
5073
    """Class to retrieve Hot Comics For Cool People."""
5074
    # Also on http://hotcomicsforcoolpeople.tumblr.com
5075
    # Also on http://hotcomics.biz (links to tumblr)
5076
    # Also on http://hcfcp.com (links to tumblr)
5077
    name = 'hotcomics-tapa'
5078
    long_name = 'Hot Comics For Cool People (from Tapastic)'
5079
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
5080
    categories = ('DAMILEE', )
5081
5082
5083
class OneOneOneOneComicTapa(GenericTapasticComic):
5084
    """Class to retrieve 1111 Comics."""
5085
    # Also on http://www.1111comics.me
5086
    # Also on http://comics1111.tumblr.com
5087
    name = '1111-tapa'
5088
    long_name = '1111 Comics (from Tapastic)'
5089
    url = 'https://tapastic.com/series/1111-Comics'
5090
    _categories = ('ONEONEONEONE', )
5091
5092
5093
class TumbleDryTapa(GenericTapasticComic):
5094
    """Class to retrieve Tumble Dry comics."""
5095
    # Also on http://tumbledrycomics.com
5096
    name = 'tumbledry-tapa'
5097
    long_name = 'Tumblr Dry (from Tapastic)'
5098
    url = 'https://tapastic.com/series/TumbleDryComics'
5099
5100
5101
class DeadlyPanelTapa(GenericTapasticComic):
5102
    """Class to retrieve Deadly Panel comics."""
5103
    # Also on http://www.deadlypanel.com
5104
    # Also on https://deadlypanel.tumblr.com
5105
    name = 'deadly-tapa'
5106
    long_name = 'Deadly Panel (from Tapastic)'
5107
    url = 'https://tapastic.com/series/deadlypanel'
5108
5109
5110
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5111
    """Class to retrieve Chris Hallbeck comics."""
5112
    # Also on https://chrishallbeck.tumblr.com
5113
    # Also on http://maximumble.com
5114
    name = 'hallbeckmaxi-tapa'
5115
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5116
    url = 'https://tapastic.com/series/Maximumble'
5117
    _categories = ('HALLBACK', )
5118
5119
5120
class ChrisHallbeckMiniTapa(GenericEmptyComic, GenericTapasticComic):
5121
    """Class to retrieve Chris Hallbeck comics."""
5122
    # Also on https://chrishallbeck.tumblr.com
5123
    # Also on http://minimumble.com
5124
    name = 'hallbeckmini-tapa'
5125
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5126
    url = 'https://tapastic.com/series/Minimumble'
5127
    _categories = ('HALLBACK', )
5128
5129
5130
class ChrisHallbeckBiffTapa(GenericEmptyComic, GenericTapasticComic):
5131
    """Class to retrieve Chris Hallbeck comics."""
5132
    # Also on https://chrishallbeck.tumblr.com
5133
    # Also on http://thebookofbiff.com
5134
    name = 'hallbeckbiff-tapa'
5135
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5136
    url = 'https://tapastic.com/series/Biff'
5137
    _categories = ('HALLBACK', )
5138
5139
5140
class RandoWisTapa(GenericTapasticComic):
5141
    """Class to retrieve RandoWis comics."""
5142
    # Also on https://randowis.com
5143
    name = 'randowis-tapa'
5144
    long_name = 'RandoWis (from Tapastic)'
5145
    url = 'https://tapastic.com/series/RandoWis'
5146
5147
5148
class PigeonGazetteTapa(GenericTapasticComic):
5149
    """Class to retrieve The Pigeon Gazette comics."""
5150
    # Also on http://thepigeongazette.tumblr.com
5151
    name = 'pigeon-tapa'
5152
    long_name = 'The Pigeon Gazette (from Tapastic)'
5153
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5154
5155
5156
class TheOdd1sOutTapa(GenericTapasticComic):
5157
    """Class to retrieve The Odd 1s Out comics."""
5158
    # Also on http://theodd1sout.com
5159
    # Also on http://theodd1sout.tumblr.com
5160
    name = 'theodd-tapa'
5161
    long_name = 'The Odd 1s Out (from Tapastic)'
5162
    url = 'https://tapastic.com/series/Theodd1sout'
5163
5164
5165
class TheWorldIsFlatTapa(GenericTapasticComic):
5166
    """Class to retrieve The World Is Flat Comics."""
5167
    # Also on http://theworldisflatcomics.tumblr.com
5168
    name = 'flatworld-tapa'
5169
    long_name = 'The World Is Flat (from Tapastic)'
5170
    url = 'https://tapastic.com/series/The-World-is-Flat'
5171
5172
5173
class MisterAndMeTapa(GenericTapasticComic):
5174
    """Class to retrieve Mister & Me Comics."""
5175
    # Also on http://www.mister-and-me.com
5176
    # Also on http://www.gocomics.com/mister-and-me
5177
    name = 'mister-tapa'
5178
    long_name = 'Mister & Me (from Tapastic)'
5179
    url = 'https://tapastic.com/series/Mister-and-Me'
5180
5181
5182
class TalesOfAbsurdityTapa(GenericEmptyComic, GenericTapasticComic):
5183
    """Class to retrieve Tales Of Absurdity comics."""
5184
    # Also on http://talesofabsurdity.com
5185
    # Also on http://talesofabsurdity.tumblr.com
5186
    name = 'absurdity-tapa'
5187
    long_name = 'Tales of Absurdity (from Tapastic)'
5188
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5189
    _categories = ('ABSURDITY', )
5190
5191
5192
class BFGFSTapa(GenericTapasticComic):
5193
    """Class to retrieve BFGFS comics."""
5194
    # Also on http://bfgfs.com
5195
    # Also on https://bfgfs.tumblr.com
5196
    name = 'bfgfs-tapa'
5197
    long_name = 'BFGFS (from Tapastic)'
5198
    url = 'https://tapastic.com/series/BFGFS'
5199
5200
5201
class DoodleForFoodTapa(GenericTapasticComic):
5202
    """Class to retrieve Doodle For Food comics."""
5203
    # Also on http://www.doodleforfood.com
5204
    name = 'doodle-tapa'
5205
    long_name = 'Doodle For Food (from Tapastic)'
5206
    url = 'https://tapastic.com/series/Doodle-for-Food'
5207
5208
5209
class MrLovensteinTapa(GenericTapasticComic):
5210
    """Class to retrieve Mr Lovenstein comics."""
5211
    # Also on  https://tapastic.com/series/MrLovenstein
5212
    name = 'mrlovenstein-tapa'
5213
    long_name = 'Mr. Lovenstein (from Tapastic)'
5214
    url = 'https://tapastic.com/series/MrLovenstein'
5215
5216
5217
class CassandraCalinTapa(GenericTapasticComic):
5218
    """Class to retrieve C. Cassandra comics."""
5219
    # Also on http://cassandracalin.com
5220
    # Also on http://c-cassandra.tumblr.com
5221
    name = 'cassandra-tapa'
5222
    long_name = 'Cassandra Calin (from Tapastic)'
5223
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5224
5225
5226
class WafflesAndPancakes(GenericTapasticComic):
5227
    """Class to retrieve Waffles And Pancakes comics."""
5228
    # Also on http://wandpcomic.com
5229
    name = 'waffles'
5230
    long_name = 'Waffles And Pancakes'
5231
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5232
5233
5234
class YesterdaysPopcornTapastic(GenericTapasticComic):
5235
    """Class to retrieve Yesterday's Popcorn comics."""
5236
    # Also on http://www.yesterdayspopcorn.com
5237
    # Also on http://yesterdayspopcorn.tumblr.com
5238
    name = 'popcorn-tapa'
5239
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5240
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5241
5242
5243
class OurSuperAdventureTapastic(GenericEmptyComic, GenericTapasticComic):
5244
    """Class to retrieve Our Super Adventure comics."""
5245
    # Also on http://www.oursuperadventure.com
5246
    # http://sarahssketchbook.tumblr.com
5247
    # http://sarahgraley.com
5248
    name = 'superadventure-tapastic'
5249
    long_name = 'Our Super Adventure (from Tapastic)'
5250
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5251
5252
5253
class NamelessPCs(GenericTapasticComic):
5254
    """Class to retrieve Nameless PCs comics."""
5255
    # Also on http://namelesspcs.com
5256
    name = 'namelesspcs-tapa'
5257
    long_name = 'NamelessPCs (from Tapastic)'
5258
    url = 'https://tapastic.com/series/NamelessPC'
5259
5260
5261
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5262
    """Class to retrieve Down The Upward Spiral comics."""
5263
    # Also on http://www.downtheupwardspiral.com
5264
    # Also on http://downtheupwardspiral.tumblr.com
5265
    name = 'spiral-tapa'
5266
    long_name = 'Down the Upward Spiral (from Tapastic)'
5267
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5268
5269
5270
class UbertoolTapa(GenericTapasticComic):
5271
    """Class to retrieve Ubertool comics."""
5272
    # Also on http://ubertoolcomic.com
5273
    # Also on https://ubertool.tumblr.com
5274
    name = 'ubertool-tapa'
5275
    long_name = 'Ubertool (from Tapastic)'
5276
    url = 'https://tapastic.com/series/ubertool'
5277
    _categories = ('UBERTOOL', )
5278
5279
5280
class BarteNerdsTapa(GenericEmptyComic, GenericTapasticComic):
5281
    """Class to retrieve BarteNerds comics."""
5282
    # Also on http://www.bartenerds.com
5283
    name = 'bartenerds-tapa'
5284
    long_name = 'BarteNerds (from Tapastic)'
5285
    url = 'https://tapastic.com/series/BarteNERDS'
5286
5287
5288
class SmallBlueYonderTapa(GenericTapasticComic):
5289
    """Class to retrieve Small Blue Yonder comics."""
5290
    # Also on http://www.smallblueyonder.com
5291
    name = 'smallblue-tapa'
5292
    long_name = 'Small Blue Yonder (from Tapastic)'
5293
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5294
5295
5296
class TizzyStitchBirdTapa(GenericTapasticComic):
5297
    """Class to retrieve Tizzy Stitch Bird comics."""
5298
    # Also on http://tizzystitchbird.com
5299
    # Also on http://tizzystitchbird.tumblr.com
5300
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5301
    name = 'tizzy-tapa'
5302
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5303
    url = 'https://tapastic.com/series/TizzyStitchbird'
5304
5305
5306
class RockPaperCynicTapa(GenericTapasticComic):
5307
    """Class to retrieve RockPaperCynic comics."""
5308
    # Also on http://www.rockpapercynic.com
5309
    # Also on http://rockpapercynic.tumblr.com
5310
    name = 'rpc-tapa'
5311
    long_name = 'Rock Paper Cynic (from Tapastic)'
5312
    url = 'https://tapastic.com/series/rockpapercynic'
5313
5314
5315
class IsItCanonTapa(GenericTapasticComic):
5316
    """Class to retrieve Is It Canon comics."""
5317
    # Also on http://www.isitcanon.com
5318
    name = 'canon-tapa'
5319
    long_name = 'Is It Canon (from Tapastic)'
5320
    url = 'http://tapastic.com/series/isitcanon'
5321
5322
5323
class ItsTheTieTapa(GenericTapasticComic):
5324
    """Class to retrieve It's the tie comics."""
5325
    # Also on http://itsthetie.com
5326
    # Also on http://itsthetie.tumblr.com
5327
    name = 'tie-tapa'
5328
    long_name = "It's the tie (from Tapastic)"
5329
    url = "https://tapastic.com/series/itsthetie"
5330
    _categories = ('TIE', )
5331
5332
5333
class JamesOfNoTradesTapa(GenericTapasticComic):
5334
    """Class to retrieve JamesOfNoTrades comics."""
5335
    # Also on http://jamesofnotrades.com
5336
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
5337
    # Also on http://jamesfregan.tumblr.com
5338
    name = 'jamesofnotrades-tapa'
5339
    long_name = 'James Of No Trades (from Tapastic)'
5340
    url = 'https://tapas.io/series/James-of-No-Trades'
5341
    _categories = ('JAMESOFNOTRADES', )
5342
5343
5344
class MomentumTapa(GenericTapasticComic):
5345
    """Class to retrieve Momentum comics."""
5346
    # Also on http://www.momentumcomic.com
5347
    name = 'momentum-tapa'
5348
    long_name = 'Momentum (from Tapastic)'
5349
    url = 'https://tapastic.com/series/momentum'
5350
5351
5352
class APleasantWasteOfTimeTapa(GenericTapasticComic):
5353
    """Class to retrieve A Pleasant Waste Of Time comics."""
5354
    # Also on https://artjcf.tumblr.com
5355
    name = 'pleasant-waste-tapa'
5356
    long_name = 'A Pleasant Waste Of Time (from Tapastic)'
5357
    url = 'https://tapas.io/series/A-Pleasant-'
5358
    _categories = ('WASTE', )
5359
5360
5361
def get_subclasses(klass):
5362
    """Gets the list of direct/indirect subclasses of a class"""
5363
    subclasses = klass.__subclasses__()
5364
    for derived in list(subclasses):
5365
        subclasses.extend(get_subclasses(derived))
5366
    return subclasses
5367
5368
5369
def remove_st_nd_rd_th_from_date(string):
5370
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5371
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5372
    return (string.replace('st', '')
5373
            .replace('nd', '')
5374
            .replace('rd', '')
5375
            .replace('th', '')
5376
            .replace('Augu', 'August'))
5377
5378
5379
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5380
    """Function to convert string to date object.
5381
    Wrapper around datetime.datetime.strptime."""
5382
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5383
    prev_locale = locale.setlocale(locale.LC_ALL)
5384
    if local != prev_locale:
5385
        locale.setlocale(locale.LC_ALL, local)
5386
    ret = datetime.datetime.strptime(string, date_format).date()
5387
    if local != prev_locale:
5388
        locale.setlocale(locale.LC_ALL, prev_locale)
5389
    return ret
5390
5391
5392
COMICS = set(get_subclasses(GenericComic))
5393
VALID_COMICS = [c for c in COMICS if c.name is not None]
5394
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5395
assert len(VALID_COMICS) == len(COMIC_NAMES)
5396
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5397
assert len(VALID_COMICS) == len(CLASS_NAMES)
5398