Completed
Push — master ( ce2402...48aeb4 )
by De
01:29
created

comics.py (53 issues)

Code
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    try:
333
        url = cls.first_url
334
    except AttributeError:
335
        url = input("Get starting URL: ")
336
    print(url)
337
    comic = cls.get_prev_link(get_soup_at_url(url))
338
    while comic:
339
        url = cls.get_url_from_link(comic)
340
        print(url)
341
        comic = cls.get_prev_link(get_soup_at_url(url))
342
    return {'href': url}
343
344
345
class GenericEmptyComic(GenericComic):
346
    """Generic class for comics where nothing is to be done.
347
348
    It can be useful to deactivate temporarily comics that do not work
349
    properly by replacing `def MyComic(GenericWhateverComic)` with
350
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
351
    _categories = ('EMPTY', )
352
353
    @classmethod
354
    def get_next_comic(cls, last_comic):
355
        """Implementation of get_next_comic returning no comics."""
356
        cls.log("comic is considered as empty - returning no comic")
357
        return []
358
359
360 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
361
    """Class to retrieve Extra Fabulous Comics."""
362
    name = 'efc'
363
    long_name = 'Extra Fabulous Comics'
364
    url = 'http://extrafabulouscomics.com'
365
    get_first_comic_link = get_a_navi_navifirst
366
    get_navi_link = get_link_rel_next
367
368
    @classmethod
369
    def get_comic_info(cls, soup, link):
370
        """Get information about a particular comics."""
371
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
372
        imgs = soup.find_all('img', src=img_src_re)
373
        title = soup.find('meta', property='og:title')['content']
374
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
375
        day = string_to_date(date_str, "%Y-%m-%d")
376
        return {
377
            'title': title,
378
            'img': [i['src'] for i in imgs],
379
            'month': day.month,
380
            'year': day.year,
381
            'day': day.day,
382
            'prefix': title + '-'
383
        }
384
385
386 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
387
    """Generic class to retrieve comics from Le Monde blogs."""
388
    _categories = ('LEMONDE', 'FRANCAIS')
389
    get_navi_link = get_link_rel_next
390
    get_first_comic_link = simulate_first_link
391
    first_url = NotImplemented
392
393
    @classmethod
394
    def get_comic_info(cls, soup, link):
395
        """Get information about a particular comics."""
396
        url2 = soup.find('link', rel='shortlink')['href']
397
        title = soup.find('meta', property='og:title')['content']
398
        date_str = soup.find("span", class_="entry-date").string
399
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
400
        imgs = soup.find_all('meta', property='og:image')
401
        return {
402
            'title': title,
403
            'url2': url2,
404
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
405
            'month': day.month,
406
            'year': day.year,
407
            'day': day.day,
408
        }
409
410
411
class ZepWorld(GenericLeMondeBlog):
412
    """Class to retrieve Zep World comics."""
413
    name = "zep"
414
    long_name = "Zep World"
415
    url = "http://zepworld.blog.lemonde.fr"
416
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
417
418
419
class Vidberg(GenericLeMondeBlog):
420
    """Class to retrieve Vidberg comics."""
421
    name = 'vidberg'
422
    long_name = "Vidberg - l'actu en patates"
423
    url = "http://vidberg.blog.lemonde.fr"
424
    # Not the first but I didn't find an efficient way to retrieve it
425
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
426
427
428
class Plantu(GenericLeMondeBlog):
429
    """Class to retrieve Plantu comics."""
430
    name = 'plantu'
431
    long_name = "Plantu"
432
    url = "http://plantu.blog.lemonde.fr"
433
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
434
435
436
class XavierGorce(GenericLeMondeBlog):
437
    """Class to retrieve Xavier Gorce comics."""
438
    name = 'gorce'
439
    long_name = "Xavier Gorce"
440
    url = "http://xaviergorce.blog.lemonde.fr"
441
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
442
443
444
class CartooningForPeace(GenericLeMondeBlog):
445
    """Class to retrieve Cartooning For Peace comics."""
446
    name = 'forpeace'
447
    long_name = "Cartooning For Peace"
448
    url = "http://cartooningforpeace.blog.lemonde.fr"
449
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
450
451
452
class Aurel(GenericLeMondeBlog):
453
    """Class to retrieve Aurel comics."""
454
    name = 'aurel'
455
    long_name = "Aurel"
456
    url = "http://aurel.blog.lemonde.fr"
457
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
458
459
460
class LesCulottees(GenericLeMondeBlog):
461
    """Class to retrieve Les Culottees comics."""
462
    name = 'culottees'
463
    long_name = 'Les Culottees'
464
    url = "http://lesculottees.blog.lemonde.fr"
465
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
466
467
468
class UneAnneeAuLycee(GenericLeMondeBlog):
469
    """Class to retrieve Une Annee Au Lycee comics."""
470
    name = 'lycee'
471
    long_name = 'Une Annee au Lycee'
472
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
473
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
474
475
476 View Code Duplication
class Rall(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
477
    """Class to retrieve Ted Rall comics."""
478
    # Also on http://www.gocomics.com/tedrall
479
    name = 'rall'
480
    long_name = "Ted Rall"
481
    url = "http://rall.com/comic"
482
    _categories = ('RALL', )
483
    get_navi_link = get_link_rel_next
484
    get_first_comic_link = simulate_first_link
485
    # Not the first but I didn't find an efficient way to retrieve it
486
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
487
488
    @classmethod
489
    def get_comic_info(cls, soup, link):
490
        """Get information about a particular comics."""
491
        title = soup.find('meta', property='og:title')['content']
492
        author = soup.find("span", class_="author vcard").find("a").string
493
        date_str = soup.find("span", class_="entry-date").string
494
        day = string_to_date(date_str, "%B %d, %Y")
495
        desc = soup.find('meta', property='og:description')['content']
496
        imgs = soup.find('div', class_='entry-content').find_all('img')
497
        imgs = imgs[:-7]  # remove social media buttons
498
        return {
499
            'title': title,
500
            'author': author,
501
            'month': day.month,
502
            'year': day.year,
503
            'day': day.day,
504
            'description': desc,
505
            'img': [i['src'] for i in imgs],
506
        }
507
508
509
class Dilem(GenericNavigableComic):
510
    """Class to retrieve Ali Dilem comics."""
511
    name = 'dilem'
512
    long_name = 'Ali Dilem'
513
    url = 'http://information.tv5monde.com/dilem'
514
    _categories = ('FRANCAIS', )
515
    get_url_from_link = join_cls_url_to_href
516
    get_first_comic_link = simulate_first_link
517
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
518
519
    @classmethod
520
    def get_navi_link(cls, last_soup, next_):
521
        """Get link to next or previous comic."""
522
        # prev is next / next is prev
523
        li = last_soup.find('li', class_='prev' if next_ else 'next')
524
        return li.find('a') if li else None
525
526 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
527
    def get_comic_info(cls, soup, link):
528
        """Get information about a particular comics."""
529
        short_url = soup.find('link', rel='shortlink')['href']
530
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
531
        imgs = soup.find_all('meta', property='og:image')
532
        date_str = soup.find('span', property='dc:date')['content']
533
        date_str = date_str[:10]
534
        day = string_to_date(date_str, "%Y-%m-%d")
535
        return {
536
            'short_url': short_url,
537
            'title': title,
538
            'img': [i['content'] for i in imgs],
539
            'day': day.day,
540
            'month': day.month,
541
            'year': day.year,
542
        }
543
544
545
class SpaceAvalanche(GenericNavigableComic):
546
    """Class to retrieve Space Avalanche comics."""
547
    name = 'avalanche'
548
    long_name = 'Space Avalanche'
549
    url = 'http://www.spaceavalanche.com'
550
    get_navi_link = get_link_rel_next
551
552
    @classmethod
553
    def get_first_comic_link(cls):
554
        """Get link to first comics."""
555
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
556
557
    @classmethod
558
    def get_comic_info(cls, soup, link):
559
        """Get information about a particular comics."""
560
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
561
        title = link['title']
562
        url = cls.get_url_from_link(link)
563
        year, month, day = [int(s)
564
                            for s in url_date_re.match(url).groups()]
565
        imgs = soup.find("div", class_="entry").find_all("img")
566
        return {
567
            'title': title,
568
            'day': day,
569
            'month': month,
570
            'year': year,
571
            'img': [i['src'] for i in imgs],
572
        }
573
574
575
class ZenPencils(GenericNavigableComic):
576
    """Class to retrieve ZenPencils comics."""
577
    # Also on http://zenpencils.tumblr.com
578
    # Also on http://www.gocomics.com/zen-pencils
579
    name = 'zenpencils'
580
    long_name = 'Zen Pencils'
581
    url = 'http://zenpencils.com'
582
    _categories = ('ZENPENCILS', )
583
    get_navi_link = get_link_rel_next
584
    get_first_comic_link = simulate_first_link
585
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
586
587
    @classmethod
588
    def get_comic_info(cls, soup, link):
589
        """Get information about a particular comics."""
590
        imgs = soup.find('div', id='comic').find_all('img')
591
        # imgs2 = soup.find_all('meta', property='og:image')
592
        post = soup.find('div', class_='post-content')
593
        author = post.find("span", class_="post-author").find("a").string
594
        title = soup.find('h2', class_='post-title').string
595
        date_str = post.find('span', class_='post-date').string
596
        day = string_to_date(date_str, "%B %d, %Y")
597
        assert imgs
598
        assert all(i['alt'] == i['title'] for i in imgs)
599
        assert all(i['alt'] in (title, "") for i in imgs)
600
        return {
601
            'title': title,
602
            'author': author,
603
            'day': day.day,
604
            'month': day.month,
605
            'year': day.year,
606
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
607
        }
608
609
610
class ItsTheTie(GenericEmptyComic, GenericNavigableComic):
611
    """Class to retrieve It's the tie comics."""
612
    # Also on http://itsthetie.tumblr.com
613
    # Also on https://tapastic.com/series/itsthetie
614
    name = 'tie'
615
    long_name = "It's the tie"
616
    url = "http://itsthetie.com"
617
    _categories = ('TIE', )
618
    get_first_comic_link = get_div_navfirst_a
619
    get_navi_link = get_a_rel_next
620
621
    @classmethod
622
    def get_comic_info(cls, soup, link):
623
        """Get information about a particular comics."""
624
        title = soup.find('h1', class_='comic-title').find('a').string
625
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
626
        day = string_to_date(date_str, "%B %d, %Y")
627
        # Bonus images may or may not be in meta og:image.
628
        imgs = soup.find_all('meta', property='og:image')
629
        imgs_src = [i['content'] for i in imgs]
630
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
631
        bonus_src = [b['data-oversrc'] for b in bonus]
632
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
633
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
634
        tag_meta = soup.find('meta', property='article:tag')
635
        tags = tag_meta['content'] if tag_meta else ""
636
        return {
637
            'title': title,
638
            'month': day.month,
639
            'year': day.year,
640
            'day': day.day,
641
            'img': all_imgs_src,
642
            'tags': tags,
643
        }
644
645
646
class PenelopeBagieu(GenericNavigableComic):
647
    """Class to retrieve comics from Penelope Bagieu's blog."""
648 View Code Duplication
    name = 'bagieu'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
649
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
650
    url = 'http://www.penelope-jolicoeur.com'
651
    _categories = ('FRANCAIS', )
652
    get_navi_link = get_link_rel_next
653
    get_first_comic_link = simulate_first_link
654
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
655
656
    @classmethod
657
    def get_comic_info(cls, soup, link):
658
        """Get information about a particular comics."""
659
        date_str = soup.find('h2', class_='date-header').string
660
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
661
        imgs = soup.find('div', class_='entry-body').find_all('img')
662
        title = soup.find('h3', class_='entry-header').string
663
        return {
664
            'title': title,
665
            'img': [i['src'] for i in imgs],
666
            'month': day.month,
667
            'year': day.year,
668
            'day': day.day,
669
        }
670
671
672
class OneOneOneOneComic(GenericEmptyComic, GenericNavigableComic):
673
    """Class to retrieve 1111 Comics."""
674 View Code Duplication
    # Also on http://comics1111.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
675
    # Also on https://tapastic.com/series/1111-Comics
676
    name = '1111'
677
    long_name = '1111 Comics'
678
    url = 'http://www.1111comics.me'
679
    _categories = ('ONEONEONEONE', )
680
    get_first_comic_link = get_div_navfirst_a
681
    get_navi_link = get_link_rel_next
682
683
    @classmethod
684
    def get_comic_info(cls, soup, link):
685
        """Get information about a particular comics."""
686
        title = soup.find('h1', class_='comic-title').find('a').string
687
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
688
        day = string_to_date(date_str, "%B %d, %Y")
689
        imgs = soup.find_all('meta', property='og:image')
690
        return {
691
            'title': title,
692
            'month': day.month,
693
            'year': day.year,
694
            'day': day.day,
695
            'img': [i['content'] for i in imgs],
696
        }
697
698
699
class AngryAtNothing(GenericEmptyComic, GenericNavigableComic):
700
    """Class to retrieve Angry at Nothing comics."""
701 View Code Duplication
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
702
    # Also on http://angryatnothing.tumblr.com
703
    name = 'angry'
704
    long_name = 'Angry At Nothing'
705
    url = 'http://www.angryatnothing.net'
706
    get_first_comic_link = get_div_navfirst_a
707
    get_navi_link = get_a_rel_next
708
709
    @classmethod
710
    def get_comic_info(cls, soup, link):
711
        """Get information about a particular comics."""
712
        title = soup.find('h1', class_='comic-title').find('a').string
713
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
714
        day = string_to_date(date_str, "%B %d, %Y")
715
        imgs = soup.find_all('meta', property='og:image')
716
        return {
717
            'title': title,
718
            'month': day.month,
719
            'year': day.year,
720
            'day': day.day,
721
            'img': [i['content'] for i in imgs],
722
        }
723
724
725
class NeDroid(GenericNavigableComic):
726
    """Class to retrieve NeDroid comics."""
727
    name = 'nedroid'
728
    long_name = 'NeDroid'
729
    url = 'http://nedroid.com'
730 View Code Duplication
    get_first_comic_link = get_div_navfirst_a
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
731
    get_navi_link = get_link_rel_next
732
    get_url_from_link = join_cls_url_to_href
733
734
    @classmethod
735
    def get_comic_info(cls, soup, link):
736
        """Get information about a particular comics."""
737
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
738
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
739
        num = int(short_url_re.match(short_url).groups()[0])
740
        imgs = soup.find('div', id='comic').find_all('img')
741
        assert len(imgs) == 1
742
        title = imgs[0]['alt']
743
        title2 = imgs[0]['title']
744
        return {
745
            'short_url': short_url,
746
            'title': title,
747
            'title2': title2,
748
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
749
            'num': num,
750
        }
751
752
753
class Garfield(GenericNavigableComic):
754
    """Class to retrieve Garfield comics."""
755
    # Also on http://www.gocomics.com/garfield
756
    name = 'garfield'
757
    long_name = 'Garfield'
758
    url = 'https://garfield.com'
759
    _categories = ('GARFIELD', )
760
    get_first_comic_link = simulate_first_link
761
    first_url = 'https://garfield.com/comic/1978/06/19'
762
763
    @classmethod
764
    def get_navi_link(cls, last_soup, next_):
765
        """Get link to next or previous comic."""
766
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
767
768
    @classmethod
769
    def get_comic_info(cls, soup, link):
770
        """Get information about a particular comics."""
771
        url = cls.get_url_from_link(link)
772
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
773
        year, month, day = [int(s) for s in date_re.match(url).groups()]
774
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
775
        return {
776
            'month': month,
777
            'year': year,
778
            'day': day,
779
            'img': [i['src'] for i in imgs],
780
        }
781
782 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
783
class Dilbert(GenericNavigableComic):
784
    """Class to retrieve Dilbert comics."""
785
    # Also on http://www.gocomics.com/dilbert-classics
786
    name = 'dilbert'
787
    long_name = 'Dilbert'
788
    url = 'http://dilbert.com'
789
    get_url_from_link = join_cls_url_to_href
790
    get_first_comic_link = simulate_first_link
791
    first_url = 'http://dilbert.com/strip/1989-04-16'
792
793
    @classmethod
794
    def get_navi_link(cls, last_soup, next_):
795
        """Get link to next or previous comic."""
796
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
797
        return link.find('a') if link else None
798
799
    @classmethod
800
    def get_comic_info(cls, soup, link):
801
        """Get information about a particular comics."""
802
        title = soup.find('meta', property='og:title')['content']
803
        imgs = soup.find_all('meta', property='og:image')
804
        desc = soup.find('meta', property='og:description')['content']
805
        date_str = soup.find('meta', property='article:publish_date')['content']
806
        day = string_to_date(date_str, "%B %d, %Y")
807
        author = soup.find('meta', property='article:author')['content']
808
        tags = soup.find('meta', property='article:tag')['content']
809
        return {
810
            'title': title,
811
            'description': desc,
812
            'img': [i['content'] for i in imgs],
813
            'author': author,
814
            'tags': tags,
815
            'day': day.day,
816
            'month': day.month,
817
            'year': day.year
818
        }
819
820
821
class VictimsOfCircumsolar(GenericEmptyComic, GenericNavigableComic):
822
    """Class to retrieve VictimsOfCircumsolar comics."""
823
    # Also on https://victimsofcomics.tumblr.com
824
    name = 'circumsolar'
825
    long_name = 'Victims Of Circumsolar'
826
    url = 'http://www.victimsofcircumsolar.com'
827
    get_navi_link = get_a_navi_comicnavnext_navinext
828
    get_first_comic_link = simulate_first_link
829
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
830
831
    @classmethod
832
    def get_comic_info(cls, soup, link):
833
        """Get information about a particular comics."""
834
        # Date is on the archive page
835
        title = soup.find_all('meta', property='og:title')[-1]['content']
836
        desc = soup.find_all('meta', property='og:description')[-1]['content']
837
        imgs = soup.find('div', id='comic').find_all('img')
838
        assert all(i['title'] == i['alt'] == title for i in imgs)
839
        return {
840
            'title': title,
841
            'description': desc,
842
            'img': [i['src'] for i in imgs],
843
        }
844
845
846
class ThreeWordPhrase(GenericNavigableComic):
847
    """Class to retrieve Three Word Phrase comics."""
848
    # Also on http://www.threewordphrase.tumblr.com
849
    name = 'threeword'
850
    long_name = 'Three Word Phrase'
851
    url = 'http://threewordphrase.com'
852
    get_url_from_link = join_cls_url_to_href
853
854
    @classmethod
855
    def get_first_comic_link(cls):
856
        """Get link to first comics."""
857
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
858
859
    @classmethod
860
    def get_navi_link(cls, last_soup, next_):
861
        """Get link to next or previous comic."""
862
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
863
        return None if link.get('href') is None else link
864
865
    @classmethod
866
    def get_comic_info(cls, soup, link):
867
        """Get information about a particular comics."""
868
        title = soup.find('title')
869
        imgs = [img for img in soup.find_all('img')
870
                if not img['src'].endswith(
871
                    ('link.gif', '32.png', 'twpbookad.jpg',
872
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
873
        return {
874
            'title': title.string if title else None,
875
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
876
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
877
        }
878
879
880
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
881
    """Class to retrieve Deadly Panel comics."""
882
    # Also on https://tapastic.com/series/deadlypanel
883
    # Also on https://deadlypanel.tumblr.com
884
    name = 'deadly'
885
    long_name = 'Deadly Panel'
886
    url = 'http://www.deadlypanel.com'
887
    get_first_comic_link = get_a_navi_navifirst
888
    get_navi_link = get_a_navi_comicnavnext_navinext
889
890
    @classmethod
891
    def get_comic_info(cls, soup, link):
892
        """Get information about a particular comics."""
893
        imgs = soup.find('div', id='comic').find_all('img')
894
        assert all(i['alt'] == i['title'] for i in imgs)
895
        return {
896
            'img': [i['src'] for i in imgs],
897
        }
898
899
900
class TheGentlemanArmchair(GenericNavigableComic):
901
    """Class to retrieve The Gentleman Armchair comics."""
902 View Code Duplication
    name = 'gentlemanarmchair'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
903
    long_name = 'The Gentleman Armchair'
904
    url = 'http://thegentlemansarmchair.com'
905
    get_first_comic_link = get_a_navi_navifirst
906
    get_navi_link = get_link_rel_next
907
908
    @classmethod
909
    def get_comic_info(cls, soup, link):
910
        """Get information about a particular comics."""
911
        title = soup.find('h2', class_='post-title').string
912
        author = soup.find("span", class_="post-author").find("a").string
913
        date_str = soup.find('span', class_='post-date').string
914
        day = string_to_date(date_str, "%B %d, %Y")
915
        imgs = soup.find('div', id='comic').find_all('img')
916
        return {
917
            'img': [i['src'] for i in imgs],
918
            'title': title,
919
            'author': author,
920
            'month': day.month,
921
            'year': day.year,
922
            'day': day.day,
923
        }
924
925
926
class ImogenQuest(GenericNavigableComic):
927
    """Class to retrieve Imogen Quest comics."""
928 View Code Duplication
    # Also on http://imoquest.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
929
    name = 'imogen'
930
    long_name = 'Imogen Quest'
931
    url = 'http://imogenquest.net'
932
    get_first_comic_link = get_div_navfirst_a
933
    get_navi_link = get_a_rel_next
934
935
    @classmethod
936
    def get_comic_info(cls, soup, link):
937
        """Get information about a particular comics."""
938
        title = soup.find('h2', class_='post-title').string
939
        author = soup.find("span", class_="post-author").find("a").string
940
        date_str = soup.find('span', class_='post-date').string
941
        day = string_to_date(date_str, '%B %d, %Y')
942
        imgs = soup.find('div', class_='comicpane').find_all('img')
943
        assert all(i['alt'] == i['title'] for i in imgs)
944
        title2 = imgs[0]['title']
945
        return {
946
            'day': day.day,
947
            'month': day.month,
948
            'year': day.year,
949
            'img': [i['src'] for i in imgs],
950
            'title': title,
951
            'title2': title2,
952
            'author': author,
953
        }
954
955
956
class MyExtraLife(GenericNavigableComic):
957
    """Class to retrieve My Extra Life comics."""
958 View Code Duplication
    name = 'extralife'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
959
    long_name = 'My Extra Life'
960
    url = 'http://www.myextralife.com'
961
    get_navi_link = get_link_rel_next
962
963
    @classmethod
964
    def get_first_comic_link(cls):
965
        """Get link to first comics."""
966
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
967
968
    @classmethod
969
    def get_comic_info(cls, soup, link):
970
        """Get information about a particular comics."""
971
        title = soup.find("h1", class_="comic_title").string
972
        date_str = soup.find("span", class_="comic_date").string
973
        day = string_to_date(date_str, "%B %d, %Y")
974
        imgs = soup.find_all("img", class_="comic")
975
        assert all(i['alt'] == i['title'] == title for i in imgs)
976
        return {
977
            'title': title,
978
            'img': [i['src'] for i in imgs if i["src"]],
979
            'day': day.day,
980
            'month': day.month,
981
            'year': day.year
982
        }
983
984
985
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
986
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
987
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
988
    # Also on http://smbc-comics.tumblr.com
989
    name = 'smbc'
990
    long_name = 'Saturday Morning Breakfast Cereal'
991
    url = 'http://www.smbc-comics.com'
992
    _categories = ('SMBC', )
993
    get_navi_link = get_a_rel_next
994
995
    @classmethod
996
    def get_first_comic_link(cls):
997
        """Get link to first comics."""
998
        return get_soup_at_url(cls.url).find('a', rel='start')
999
1000
    @classmethod
1001
    def get_comic_info(cls, soup, link):
1002
        """Get information about a particular comics."""
1003
        image1 = soup.find('img', id='cc-comic')
1004
        image_url1 = image1['src']
1005
        aftercomic = soup.find('div', id='aftercomic')
1006
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1007
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1008
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1009
        day = string_to_date(date_str, "%B %d, %Y")
1010
        return {
1011
            'title': image1['title'],
1012
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
1013
            'day': day.day,
1014
            'month': day.month,
1015
            'year': day.year
1016
        }
1017
1018
1019
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1020
    """Class to retrieve Perry Bible Fellowship comics."""
1021
    name = 'pbf'
1022
    long_name = 'Perry Bible Fellowship'
1023
    url = 'http://pbfcomics.com'
1024
    get_url_from_archive_element = join_cls_url_to_href
1025
1026
    @classmethod
1027
    def get_archive_elements(cls):
1028
        soup = get_soup_at_url(cls.url)
1029
        thumbnails = soup.find('div', id='all_thumbnails')
1030
        return reversed(thumbnails.find_all('a'))
1031
1032
    @classmethod
1033
    def get_comic_info(cls, soup, link):
1034
        """Get information about a particular comics."""
1035
        name = soup.find('meta', property='og:title')['content']
1036
        imgs = soup.find_all('meta', property='og:image')
1037
        assert len(imgs) == 1
1038
        return {
1039
            'name': name,
1040
            'img': [i['content'] for i in imgs],
1041
        }
1042
1043
1044
class Mercworks(GenericNavigableComic):
1045
    """Class to retrieve Mercworks comics."""
1046 View Code Duplication
    # Also on http://mercworks.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1047
    name = 'mercworks'
1048
    long_name = 'Mercworks'
1049
    url = 'http://mercworks.net'
1050
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1051
    get_navi_link = get_link_rel_next
1052
1053
    @classmethod
1054
    def get_comic_info(cls, soup, link):
1055
        """Get information about a particular comics."""
1056
        title = soup.find('meta', property='og:title')['content']
1057
        metadesc = soup.find('meta', property='og:description')
1058
        desc = metadesc['content'] if metadesc else ""
1059
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1060
        day = string_to_date(date_str, "%Y-%m-%d")
1061
        imgs = soup.find_all('meta', property='og:image')
1062
        return {
1063
            'img': [i['content'] for i in imgs],
1064
            'title': title,
1065
            'desc': desc,
1066
            'day': day.day,
1067
            'month': day.month,
1068
            'year': day.year
1069
        }
1070
1071
1072
class BerkeleyMews(GenericListableComic):
1073
    """Class to retrieve Berkeley Mews comics."""
1074
    # Also on http://mews.tumblr.com
1075
    # Also on http://www.gocomics.com/berkeley-mews
1076
    name = 'berkeley'
1077
    long_name = 'Berkeley Mews'
1078
    url = 'http://www.berkeleymews.com'
1079
    _categories = ('BERKELEY', )
1080
    get_url_from_archive_element = get_href
1081
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1082
1083
    @classmethod
1084
    def get_archive_elements(cls):
1085
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1086
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1087
1088
    @classmethod
1089
    def get_comic_info(cls, soup, link):
1090
        """Get information about a particular comics."""
1091
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1092
        url = cls.get_url_from_archive_element(link)
1093
        num = int(cls.comic_num_re.match(url).groups()[0])
1094
        img = soup.find('div', id='comic').find('img')
1095
        assert all(i['alt'] == i['title'] for i in [img])
1096
        title2 = img['title']
1097
        img_url = img['src']
1098
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1099
        return {
1100
            'num': num,
1101
            'title': link.string,
1102
            'title2': title2,
1103
            'img': [img_url],
1104
            'year': year,
1105
            'month': month,
1106
            'day': day,
1107
        }
1108
1109
1110
class GenericBouletCorp(GenericNavigableComic):
1111
    """Generic class to retrieve BouletCorp comics in different languages."""
1112
    # Also on https://bouletcorp.tumblr.com
1113
    _categories = ('BOULET', )
1114
    get_navi_link = get_link_rel_next
1115
1116
    @classmethod
1117
    def get_first_comic_link(cls):
1118
        """Get link to first comics."""
1119
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1120
1121
    @classmethod
1122
    def get_comic_info(cls, soup, link):
1123
        """Get information about a particular comics."""
1124
        url = cls.get_url_from_link(link)
1125
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1126
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1127
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1128
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1129
        title = soup.find('title').string
1130
        return {
1131
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1132
            'title': title,
1133
            'texts': texts,
1134
            'year': year,
1135
            'month': month,
1136
            'day': day,
1137
        }
1138
1139
1140
class BouletCorp(GenericBouletCorp):
1141
    """Class to retrieve BouletCorp comics."""
1142
    name = 'boulet'
1143
    long_name = 'Boulet Corp'
1144
    url = 'http://www.bouletcorp.com'
1145
    _categories = ('FRANCAIS', )
1146
1147
1148
class BouletCorpEn(GenericBouletCorp):
1149
    """Class to retrieve EnglishBouletCorp comics."""
1150
    name = 'boulet_en'
1151
    long_name = 'Boulet Corp English'
1152
    url = 'http://english.bouletcorp.com'
1153
1154
1155
class AmazingSuperPowers(GenericNavigableComic):
1156
    """Class to retrieve Amazing Super Powers comics."""
1157 View Code Duplication
    name = 'asp'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1158
    long_name = 'Amazing Super Powers'
1159
    url = 'http://www.amazingsuperpowers.com'
1160
    get_first_comic_link = get_a_navi_navifirst
1161
    get_navi_link = get_a_navi_navinext
1162
1163
    @classmethod
1164
    def get_comic_info(cls, soup, link):
1165
        """Get information about a particular comics."""
1166
        author = soup.find("span", class_="post-author").find("a").string
1167
        date_str = soup.find('span', class_='post-date').string
1168
        day = string_to_date(date_str, "%B %d, %Y")
1169
        imgs = soup.find('div', id='comic').find_all('img')
1170
        title = ' '.join(i['title'] for i in imgs)
1171
        assert all(i['alt'] == i['title'] for i in imgs)
1172
        return {
1173
            'title': title,
1174
            'author': author,
1175
            'img': [img['src'] for img in imgs],
1176
            'day': day.day,
1177
            'month': day.month,
1178
            'year': day.year
1179
        }
1180
1181
1182
class ToonHole(GenericNavigableComic):
1183
    """Class to retrieve Toon Holes comics."""
1184
    # Also on http://tapastic.com/series/TOONHOLE
1185
    name = 'toonhole'
1186
    long_name = 'Toon Hole'
1187
    url = 'http://www.toonhole.com'
1188
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1189
    get_navi_link = get_link_rel_next
1190
1191
    @classmethod
1192
    def get_comic_info(cls, soup, link):
1193
        """Get information about a particular comics."""
1194
        short_url = soup.find('link', rel='shortlink')['href']
1195
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1196
        day = string_to_date(date_str, "%B %d, %Y")
1197
        imgs = soup.find('div', id='comic').find_all('img')
1198
        if imgs:
1199
            img = imgs[0]
1200
            title = img['alt']
1201
            assert img['title'] == title
1202
        else:
1203
            title = ""
1204
        return {
1205
            'short_url': short_url,
1206
            'title': title,
1207
            'month': day.month,
1208
            'year': day.year,
1209
            'day': day.day,
1210
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1211
        }
1212
1213
1214
class Channelate(GenericNavigableComic):
1215
    """Class to retrieve Channelate comics."""
1216
    name = 'channelate'
1217
    long_name = 'Channelate'
1218
    url = 'http://www.channelate.com'
1219
    get_first_comic_link = get_div_navfirst_a
1220
    get_navi_link = get_link_rel_next
1221
    get_url_from_link = join_cls_url_to_href
1222
1223
    @classmethod
1224
    def get_comic_info(cls, soup, link):
1225
        """Get information about a particular comics."""
1226
        author = soup.find("span", class_="post-author").find("a").string
1227
        date_str = soup.find('span', class_='post-date').string
1228
        day = string_to_date(date_str, '%Y/%m/%d')
1229
        title = soup.find('meta', property='og:title')['content']
1230
        post = soup.find('div', id='comic')
1231
        imgs = post.find_all('img') if post else []
1232
        extra_url = None
1233
        extra_div = soup.find('div', id='extrapanelbutton')
1234
        if extra_div:
1235
            extra_url = extra_div.find('a')['href']
1236
            extra_soup = get_soup_at_url(extra_url)
1237
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1238
            imgs.extend(extra_imgs)
1239
        return {
1240
            'url_extra': extra_url,
1241
            'title': title,
1242
            'author': author,
1243
            'month': day.month,
1244
            'year': day.year,
1245
            'day': day.day,
1246
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1247
        }
1248
1249
1250
class CyanideAndHappiness(GenericNavigableComic):
1251
    """Class to retrieve Cyanide And Happiness comics."""
1252
    name = 'cyanide'
1253
    long_name = 'Cyanide and Happiness'
1254
    url = 'http://explosm.net'
1255
    _categories = ('NSFW', )
1256
    get_url_from_link = join_cls_url_to_href
1257
1258
    @classmethod
1259
    def get_first_comic_link(cls):
1260
        """Get link to first comics."""
1261
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1262
1263
    @classmethod
1264
    def get_navi_link(cls, last_soup, next_):
1265
        """Get link to next or previous comic."""
1266
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1267
        return None if link.get('href') is None else link
1268
1269
    @classmethod
1270
    def get_comic_info(cls, soup, link):
1271
        """Get information about a particular comics."""
1272
        url2 = soup.find('meta', property='og:url')['content']
1273
        num = int(url2.split('/')[-2])
1274
        date_str = soup.find('h3').find('a').string
1275
        day = string_to_date(date_str, '%Y.%m.%d')
1276
        author = soup.find('small', class_="author-credit-name").string
1277
        assert author.startswith('by ')
1278
        author = author[3:]
1279
        imgs = soup.find_all('img', id='main-comic')
1280
        return {
1281
            'num': num,
1282
            'author': author,
1283
            'month': day.month,
1284
            'year': day.year,
1285
            'day': day.day,
1286
            'prefix': '%d-' % num,
1287
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1288
        }
1289
1290
1291
class MrLovenstein(GenericComic):
1292
    """Class to retrieve Mr Lovenstein comics."""
1293
    # Also on https://tapastic.com/series/MrLovenstein
1294
    name = 'mrlovenstein'
1295
    long_name = 'Mr. Lovenstein'
1296
    url = 'http://www.mrlovenstein.com'
1297
1298
    @classmethod
1299
    def get_next_comic(cls, last_comic):
1300
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1301
        # TODO: more info from http://www.mrlovenstein.com/archive
1302
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1303
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1304
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1305
        first, last = min(nums), max(nums)
1306
        if last_comic:
1307
            first = last_comic['num'] + 1
1308
        for num in range(first, last + 1):
1309
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1310
            soup = get_soup_at_url(url)
1311
            imgs = list(
1312
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1313
            description = soup.find('meta', attrs={'name': 'description'})['content']
1314
            yield {
1315
                'url': url,
1316
                'num': num,
1317
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1318
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1319
                'description': description,
1320
            }
1321
1322
1323
class DinosaurComics(GenericListableComic):
1324
    """Class to retrieve Dinosaur Comics comics."""
1325
    name = 'dinosaur'
1326
    long_name = 'Dinosaur Comics'
1327
    url = 'http://www.qwantz.com'
1328
    get_url_from_archive_element = get_href
1329
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1330
1331
    @classmethod
1332
    def get_archive_elements(cls):
1333
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1334
        # first link is random -> skip it
1335
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1336
1337
    @classmethod
1338
    def get_comic_info(cls, soup, link):
1339
        """Get information about a particular comics."""
1340
        url = cls.get_url_from_archive_element(link)
1341
        num = int(cls.comic_link_re.match(url).groups()[0])
1342
        date_str = link.string
1343
        text = link.next_sibling.string
1344
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1345
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1346
        img = soup.find('img', src=comic_img_re)
1347
        return {
1348
            'month': day.month,
1349
            'year': day.year,
1350
            'day': day.day,
1351
            'img': [img.get('src')],
1352
            'title': img.get('title'),
1353
            'text': text,
1354
            'num': num,
1355
        }
1356
1357
1358
class ButterSafe(GenericListableComic):
1359
    """Class to retrieve Butter Safe comics."""
1360
    name = 'butter'
1361 View Code Duplication
    long_name = 'ButterSafe'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1362
    url = 'http://buttersafe.com'
1363
    get_url_from_archive_element = get_href
1364
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1365
1366
    @classmethod
1367
    def get_archive_elements(cls):
1368
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1369
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1370
1371
    @classmethod
1372
    def get_comic_info(cls, soup, link):
1373
        """Get information about a particular comics."""
1374
        url = cls.get_url_from_archive_element(link)
1375
        title = link.string
1376
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1377
        img = soup.find('div', id='comic').find('img')
1378
        assert img['alt'] == title
1379
        return {
1380
            'title': title,
1381
            'day': day,
1382
            'month': month,
1383
            'year': year,
1384
            'img': [img['src']],
1385
        }
1386
1387
1388
class CalvinAndHobbes(GenericComic):
1389
    """Class to retrieve Calvin and Hobbes comics."""
1390
    # Also on http://www.gocomics.com/calvinandhobbes/
1391
    name = 'calvin'
1392
    long_name = 'Calvin and Hobbes'
1393
    # This is not through any official webpage but eh...
1394
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1395
1396
    @classmethod
1397
    def get_next_comic(cls, last_comic):
1398
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1399
        last_date = get_date_for_comic(
1400
            last_comic) if last_comic else date(1985, 11, 1)
1401
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1402
        img_re = re.compile('')
1403
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1404
            url = link['href']
1405
            year, month = link_re.match(url).groups()
1406
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1407
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1408
                month_url = urljoin_wrapper(cls.url, url)
1409
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1410
                    img_src = img['src']
1411
                    day = int(img_re.match(img_src).groups()[0])
1412
                    comic_date = date(int(year), int(month), day)
1413
                    if comic_date > last_date:
1414
                        yield {
1415
                            'url': month_url,
1416
                            'year': int(year),
1417
                            'month': int(month),
1418
                            'day': int(day),
1419
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1420
                        }
1421
                        last_date = comic_date
1422
1423
1424
class AbstruseGoose(GenericListableComic):
1425
    """Class to retrieve AbstruseGoose Comics."""
1426
    name = 'abstruse'
1427 View Code Duplication
    long_name = 'Abstruse Goose'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1428
    url = 'http://abstrusegoose.com'
1429
    get_url_from_archive_element = get_href
1430
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1431
    comic_img_re = re.compile('^%s/strips/.*' % url)
1432
1433
    @classmethod
1434
    def get_archive_elements(cls):
1435
        archive_url = urljoin_wrapper(cls.url, 'archive')
1436
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1437
1438
    @classmethod
1439
    def get_comic_info(cls, soup, archive_elt):
1440
        comic_url = cls.get_url_from_archive_element(archive_elt)
1441
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1442
        return {
1443
            'num': num,
1444
            'title': archive_elt.string,
1445
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1446
        }
1447
1448
1449
class PhDComics(GenericNavigableComic):
1450
    """Class to retrieve PHD Comics."""
1451
    name = 'phd'
1452
    long_name = 'PhD Comics'
1453
    url = 'http://phdcomics.com/comics/archive.php'
1454
1455
    @classmethod
1456
    def get_first_comic_link(cls):
1457
        """Get link to first comics."""
1458
        soup = get_soup_at_url(cls.url)
1459
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1460
        return None if img is None else img.parent
1461
1462
    @classmethod
1463
    def get_navi_link(cls, last_soup, next_):
1464
        """Get link to next or previous comic."""
1465
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1466
        img = last_soup.find('img', src=url)
1467
        return None if img is None else img.parent
1468
1469
    @classmethod
1470
    def get_comic_info(cls, soup, link):
1471
        """Get information about a particular comics."""
1472
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1473
        imgs = soup.find_all('meta', property='og:image')
1474
        return {
1475
            'img': [i['content'] for i in imgs],
1476
            'title': title,
1477
        }
1478
1479
1480
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1481
    """Class to retrieve Octopuns comics."""
1482 View Code Duplication
    # Also on http://octopuns.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1483
    name = 'octopuns'
1484
    long_name = 'Octopuns'
1485
    url = 'http://www.octopuns.net'
1486
1487
    @classmethod
1488
    def get_first_comic_link(cls):
1489
        """Get link to first comics."""
1490
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1491
1492
    @classmethod
1493
    def get_navi_link(cls, last_soup, next_):
1494
        """Get link to next or previous comic."""
1495
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1496
        return None if link.get('href') is None else link
1497
1498
    @classmethod
1499
    def get_comic_info(cls, soup, link):
1500
        """Get information about a particular comics."""
1501
        title = soup.find('h3', class_='post-title entry-title').string
1502
        date_str = soup.find('h2', class_='date-header').string
1503
        day = string_to_date(date_str, "%A, %B %d, %Y")
1504
        imgs = soup.find_all('link', rel='image_src')
1505
        return {
1506
            'img': [i['href'] for i in imgs],
1507
            'title': title,
1508
            'day': day.day,
1509
            'month': day.month,
1510
            'year': day.year,
1511
        }
1512
1513
1514
class Quarktees(GenericNavigableComic):
1515
    """Class to retrieve the Quarktees comics."""
1516
    name = 'quarktees'
1517
    long_name = 'Quarktees'
1518
    url = 'http://www.quarktees.com/blogs/news'
1519
    get_url_from_link = join_cls_url_to_href
1520
    get_first_comic_link = simulate_first_link
1521
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1522
1523
    @classmethod
1524
    def get_navi_link(cls, last_soup, next_):
1525
        """Get link to next or previous comic."""
1526
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1527
1528
    @classmethod
1529
    def get_comic_info(cls, soup, link):
1530
        """Get information about a particular comics."""
1531
        title = soup.find('meta', property='og:title')['content']
1532
        article = soup.find('div', class_='single-article')
1533
        imgs = article.find_all('img')
1534
        return {
1535
            'title': title,
1536
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1537
        }
1538
1539
1540
class OverCompensating(GenericNavigableComic):
1541
    """Class to retrieve the Over Compensating comics."""
1542
    name = 'compensating'
1543
    long_name = 'Over Compensating'
1544
    url = 'http://www.overcompensating.com'
1545
    get_url_from_link = join_cls_url_to_href
1546
1547
    @classmethod
1548
    def get_first_comic_link(cls):
1549
        """Get link to first comics."""
1550
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1551
1552
    @classmethod
1553
    def get_navi_link(cls, last_soup, next_):
1554
        """Get link to next or previous comic."""
1555
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1556
1557
    @classmethod
1558
    def get_comic_info(cls, soup, link):
1559
        """Get information about a particular comics."""
1560
        img_src_re = re.compile('^/oc/comics/.*')
1561
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1562
        comic_url = cls.get_url_from_link(link)
1563
        num = int(comic_num_re.match(comic_url).groups()[0])
1564
        img = soup.find('img', src=img_src_re)
1565
        return {
1566
            'num': num,
1567
            'img': [urljoin_wrapper(comic_url, img['src'])],
1568
            'title': img.get('title')
1569
        }
1570
1571
1572
class Oglaf(GenericNavigableComic):
1573
    """Class to retrieve Oglaf comics."""
1574
    name = 'oglaf'
1575
    long_name = 'Oglaf [NSFW]'
1576
    url = 'http://oglaf.com'
1577
    _categories = ('NSFW', )
1578
    get_url_from_link = join_cls_url_to_href
1579
1580
    @classmethod
1581
    def get_first_comic_link(cls):
1582
        """Get link to first comics."""
1583
        return get_soup_at_url(cls.url).find("div", id="st").parent
1584
1585
    @classmethod
1586
    def get_navi_link(cls, last_soup, next_):
1587
        """Get link to next or previous comic."""
1588
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1589
        return div.parent if div else None
1590
1591
    @classmethod
1592
    def get_comic_info(cls, soup, link):
1593
        """Get information about a particular comics."""
1594
        title = soup.find('title').string
1595
        title_imgs = soup.find('div', id='tt').find_all('img')
1596
        assert len(title_imgs) == 1
1597
        strip_imgs = soup.find_all('img', id='strip')
1598
        assert len(strip_imgs) == 1
1599
        imgs = title_imgs + strip_imgs
1600
        desc = ' '.join(i['title'] for i in imgs)
1601
        return {
1602
            'title': title,
1603
            'img': [i['src'] for i in imgs],
1604
            'description': desc,
1605
        }
1606
1607
1608
class ScandinaviaAndTheWorld(GenericNavigableComic):
1609
    """Class to retrieve Scandinavia And The World comics."""
1610
    name = 'satw'
1611
    long_name = 'Scandinavia And The World'
1612
    url = 'http://satwcomic.com'
1613
    get_first_comic_link = simulate_first_link
1614
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1615
1616
    @classmethod
1617
    def get_navi_link(cls, last_soup, next_):
1618
        """Get link to next or previous comic."""
1619
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1620
1621
    @classmethod
1622
    def get_comic_info(cls, soup, link):
1623
        """Get information about a particular comics."""
1624
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1625
        desc = soup.find('meta', property='og:description')['content']
1626
        imgs = soup.find_all('img', itemprop="image")
1627
        return {
1628
            'title': title,
1629
            'description': desc,
1630
            'img': [i['src'] for i in imgs],
1631
        }
1632
1633
1634
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1635
    """Class to retrieve the Something Of That Ilk comics."""
1636
    name = 'somethingofthatilk'
1637
    long_name = 'Something Of That Ilk'
1638
    url = 'http://www.somethingofthatilk.com'
1639
1640
1641
class InfiniteMonkeyBusiness(GenericNavigableComic):
1642
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1643
    name = 'monkey'
1644
    long_name = 'Infinite Monkey Business'
1645
    url = 'http://infinitemonkeybusiness.net'
1646
    get_navi_link = get_a_navi_comicnavnext_navinext
1647
    get_first_comic_link = simulate_first_link
1648
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1649
1650
    @classmethod
1651
    def get_comic_info(cls, soup, link):
1652
        """Get information about a particular comics."""
1653
        title = soup.find('meta', property='og:title')['content']
1654
        imgs = soup.find('div', id='comic').find_all('img')
1655
        return {
1656
            'title': title,
1657
            'img': [i['src'] for i in imgs],
1658
        }
1659
1660
1661
class Wondermark(GenericListableComic):
1662
    """Class to retrieve the Wondermark comics."""
1663
    name = 'wondermark'
1664
    long_name = 'Wondermark'
1665
    url = 'http://wondermark.com'
1666
    get_url_from_archive_element = get_href
1667
1668
    @classmethod
1669
    def get_archive_elements(cls):
1670
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1671
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1672
1673
    @classmethod
1674
    def get_comic_info(cls, soup, link):
1675
        """Get information about a particular comics."""
1676
        date_str = soup.find('div', class_='postdate').find('em').string
1677
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1678
        div = soup.find('div', id='comic')
1679
        if div:
1680
            img = div.find('img')
1681
            img_src = [img['src']]
1682
            alt = img['alt']
1683
            assert alt == img['title']
1684
            title = soup.find('meta', property='og:title')['content']
1685
        else:
1686
            img_src = []
1687
            alt = ''
1688
            title = ''
1689
        return {
1690
            'month': day.month,
1691
            'year': day.year,
1692
            'day': day.day,
1693
            'img': img_src,
1694
            'title': title,
1695
            'alt': alt,
1696
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1697
        }
1698
1699
1700
class WarehouseComic(GenericNavigableComic):
1701
    """Class to retrieve Warehouse Comic comics."""
1702 View Code Duplication
    name = 'warehouse'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1703
    long_name = 'Warehouse Comic'
1704
    url = 'http://warehousecomic.com'
1705
    get_first_comic_link = get_a_navi_navifirst
1706
    get_navi_link = get_link_rel_next
1707
1708
    @classmethod
1709
    def get_comic_info(cls, soup, link):
1710
        """Get information about a particular comics."""
1711
        title = soup.find('h2', class_='post-title').string
1712
        date_str = soup.find('span', class_='post-date').string
1713
        day = string_to_date(date_str, "%B %d, %Y")
1714
        imgs = soup.find('div', id='comic').find_all('img')
1715
        return {
1716
            'img': [i['src'] for i in imgs],
1717
            'title': title,
1718
            'day': day.day,
1719
            'month': day.month,
1720
            'year': day.year,
1721
        }
1722
1723
1724
class JustSayEh(GenericNavigableComic):
1725
    """Class to retrieve Just Say Eh comics."""
1726
    # Also on http//tapastic.com/series/Just-Say-Eh
1727
    name = 'justsayeh'
1728
    long_name = 'Just Say Eh'
1729
    url = 'http://www.justsayeh.com'
1730
    get_first_comic_link = get_a_navi_navifirst
1731
    get_navi_link = get_a_navi_comicnavnext_navinext
1732
1733
    @classmethod
1734
    def get_comic_info(cls, soup, link):
1735
        """Get information about a particular comics."""
1736
        title = soup.find('h2', class_='post-title').string
1737
        imgs = soup.find("div", id="comic").find_all("img")
1738
        assert all(i['alt'] == i['title'] for i in imgs)
1739
        alt = imgs[0]['alt']
1740
        return {
1741
            'img': [i['src'] for i in imgs],
1742
            'title': title,
1743
            'alt': alt,
1744
        }
1745
1746
1747
class MouseBearComedy(GenericNavigableComic):
1748
    """Class to retrieve Mouse Bear Comedy comics."""
1749 View Code Duplication
    # Also on http://mousebearcomedy.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1750
    name = 'mousebear'
1751
    long_name = 'Mouse Bear Comedy'
1752
    url = 'http://www.mousebearcomedy.com'
1753
    get_first_comic_link = get_a_navi_navifirst
1754
    get_navi_link = get_a_navi_comicnavnext_navinext
1755
1756
    @classmethod
1757
    def get_comic_info(cls, soup, link):
1758
        """Get information about a particular comics."""
1759
        title = soup.find('h2', class_='post-title').string
1760
        author = soup.find("span", class_="post-author").find("a").string
1761
        date_str = soup.find("span", class_="post-date").string
1762
        day = string_to_date(date_str, '%B %d, %Y')
1763
        imgs = soup.find("div", id="comic").find_all("img")
1764
        assert all(i['alt'] == i['title'] == title for i in imgs)
1765
        return {
1766
            'day': day.day,
1767
            'month': day.month,
1768
            'year': day.year,
1769
            'img': [i['src'] for i in imgs],
1770
            'title': title,
1771
            'author': author,
1772
        }
1773
1774
1775
class BigFootJustice(GenericNavigableComic):
1776
    """Class to retrieve Big Foot Justice comics."""
1777 View Code Duplication
    # Also on http://tapastic.com/series/bigfoot-justice
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1778
    name = 'bigfoot'
1779
    long_name = 'Big Foot Justice'
1780
    url = 'http://bigfootjustice.com'
1781
    get_first_comic_link = get_a_navi_navifirst
1782
    get_navi_link = get_a_navi_comicnavnext_navinext
1783
1784
    @classmethod
1785
    def get_comic_info(cls, soup, link):
1786
        """Get information about a particular comics."""
1787
        imgs = soup.find('div', id='comic').find_all('img')
1788
        assert all(i['title'] == i['alt'] for i in imgs)
1789
        title = ' '.join(i['title'] for i in imgs)
1790
        return {
1791
            'img': [i['src'] for i in imgs],
1792
            'title': title,
1793
        }
1794
1795
1796
class RespawnComic(GenericNavigableComic):
1797
    """Class to retrieve Respawn Comic."""
1798
    # Also on https://respawncomic.tumblr.com
1799
    name = 'respawn'
1800
    long_name = 'Respawn Comic'
1801
    url = 'http://respawncomic.com '
1802
    _categories = ('RESPAWN', )
1803
    get_navi_link = get_a_rel_next
1804
    get_first_comic_link = simulate_first_link
1805
    first_url = 'http://respawncomic.com/comic/c0001/'
1806
1807
    @classmethod
1808
    def get_comic_info(cls, soup, link):
1809 View Code Duplication
        """Get information about a particular comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1810
        title = soup.find('meta', property='og:title')['content']
1811
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1812
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1813
        date_str = date_str[:10]
1814
        day = string_to_date(date_str, "%Y-%m-%d")
1815
        imgs = soup.find_all('meta', property='og:image')
1816
        skip_imgs = {
1817
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1818
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1819
        }
1820
        return {
1821
            'title': title,
1822
            'author': author,
1823
            'day': day.day,
1824
            'month': day.month,
1825
            'year': day.year,
1826
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1827
        }
1828
1829
1830
class SafelyEndangered(GenericNavigableComic):
1831
    """Class to retrieve Safely Endangered comics."""
1832 View Code Duplication
    # Also on http://tumblr.safelyendangered.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1833
    name = 'endangered'
1834
    long_name = 'Safely Endangered'
1835
    url = 'http://www.safelyendangered.com'
1836
    get_navi_link = get_link_rel_next
1837
    get_first_comic_link = simulate_first_link
1838
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1839
1840
    @classmethod
1841
    def get_comic_info(cls, soup, link):
1842
        """Get information about a particular comics."""
1843
        title = soup.find('h2', class_='post-title').string
1844
        date_str = soup.find('span', class_='post-date').string
1845
        day = string_to_date(date_str, '%B %d, %Y')
1846
        imgs = soup.find('div', id='comic').find_all('img')
1847
        alt = imgs[0]['alt']
1848
        assert all(i['alt'] == i['title'] for i in imgs)
1849
        return {
1850
            'day': day.day,
1851
            'month': day.month,
1852
            'year': day.year,
1853
            'img': [i['src'] for i in imgs],
1854
            'title': title,
1855
            'alt': alt,
1856
        }
1857
1858
1859
class PicturesInBoxes(GenericNavigableComic):
1860
    """Class to retrieve Pictures In Boxes comics."""
1861 View Code Duplication
    # Also on https://picturesinboxescomic.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1862
    name = 'picturesinboxes'
1863
    long_name = 'Pictures in Boxes'
1864
    url = 'http://www.picturesinboxes.com'
1865
    get_navi_link = get_a_navi_navinext
1866
    get_first_comic_link = simulate_first_link
1867
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1868
1869
    @classmethod
1870
    def get_comic_info(cls, soup, link):
1871
        """Get information about a particular comics."""
1872
        title = soup.find('h2', class_='post-title').string
1873
        author = soup.find("span", class_="post-author").find("a").string
1874
        date_str = soup.find('span', class_='post-date').string
1875
        day = string_to_date(date_str, '%B %d, %Y')
1876
        imgs = soup.find('div', class_='comicpane').find_all('img')
1877
        assert imgs
1878
        assert all(i['title'] == i['alt'] == title for i in imgs)
1879
        return {
1880
            'day': day.day,
1881
            'month': day.month,
1882
            'year': day.year,
1883
            'img': [i['src'] for i in imgs],
1884
            'title': title,
1885
            'author': author,
1886
        }
1887
1888
1889
class Penmen(GenericNavigableComic):
1890
    """Class to retrieve Penmen comics."""
1891 View Code Duplication
    name = 'penmen'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1892
    long_name = 'Penmen'
1893
    url = 'http://penmen.com'
1894
    get_navi_link = get_link_rel_next
1895
    get_first_comic_link = simulate_first_link
1896
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1897
1898
    @classmethod
1899
    def get_comic_info(cls, soup, link):
1900
        """Get information about a particular comics."""
1901
        title = soup.find('title').string
1902
        imgs = soup.find('div', class_='entry-content').find_all('img')
1903
        short_url = soup.find('link', rel='shortlink')['href']
1904
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1905
        date_str = soup.find('time')['datetime'][:10]
1906
        day = string_to_date(date_str, "%Y-%m-%d")
1907
        return {
1908
            'title': title,
1909
            'short_url': short_url,
1910
            'img': [i['src'] for i in imgs],
1911
            'tags': tags,
1912
            'month': day.month,
1913
            'year': day.year,
1914
            'day': day.day,
1915
        }
1916
1917
1918
class TheDoghouseDiaries(GenericNavigableComic):
1919
    """Class to retrieve The Dog House Diaries comics."""
1920
    name = 'doghouse'
1921
    long_name = 'The Dog House Diaries'
1922
    url = 'http://thedoghousediaries.com'
1923
1924
    @classmethod
1925
    def get_first_comic_link(cls):
1926
        """Get link to first comics."""
1927
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1928
1929
    @classmethod
1930
    def get_navi_link(cls, last_soup, next_):
1931
        """Get link to next or previous comic."""
1932
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1933
1934
    @classmethod
1935
    def get_comic_info(cls, soup, link):
1936
        """Get information about a particular comics."""
1937
        comic_img_re = re.compile('^dhdcomics/.*')
1938
        img = soup.find('img', src=comic_img_re)
1939
        comic_url = cls.get_url_from_link(link)
1940
        return {
1941
            'title': soup.find('h2', id='titleheader').string,
1942
            'title2': soup.find('div', id='subtext').string,
1943
            'alt': img.get('title'),
1944
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1945
            'num': int(comic_url.split('/')[-1]),
1946
        }
1947
1948
1949
class InvisibleBread(GenericListableComic):
1950
    """Class to retrieve Invisible Bread comics."""
1951
    # Also on http://www.gocomics.com/invisible-bread
1952
    name = 'invisiblebread'
1953
    long_name = 'Invisible Bread'
1954
    url = 'http://invisiblebread.com'
1955
1956
    @classmethod
1957
    def get_archive_elements(cls):
1958
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1959
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1960
1961
    @classmethod
1962
    def get_url_from_archive_element(cls, td):
1963
        return td.find('a')['href']
1964
1965
    @classmethod
1966
    def get_comic_info(cls, soup, td):
1967
        """Get information about a particular comics."""
1968 View Code Duplication
        url = cls.get_url_from_archive_element(td)
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1969
        title = td.find('a').string
1970
        month_and_day = td.previous_sibling.string
1971
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1972
        year = link_re.match(url).groups()[0]
1973
        date_str = month_and_day + ' ' + year
1974
        day = string_to_date(date_str, '%b %d %Y')
1975
        imgs = [soup.find('div', id='comic').find('img')]
1976
        assert len(imgs) == 1
1977
        assert all(i['title'] == i['alt'] == title for i in imgs)
1978
        return {
1979
            'month': day.month,
1980
            'year': day.year,
1981
            'day': day.day,
1982
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1983
            'title': title,
1984
        }
1985
1986
1987
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1988
    """Class to retrieve Disco Bleach Comics."""
1989
    name = 'discobleach'
1990
    long_name = 'Disco Bleach'
1991
    url = 'http://discobleach.com'
1992
1993
1994
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1995
    """Class to retrieve TubeyToons comics."""
1996
    # Also on http://tapastic.com/series/Tubey-Toons
1997
    # Also on https://tubeytoons.tumblr.com
1998
    name = 'tubeytoons'
1999
    long_name = 'Tubey Toons'
2000
    url = 'http://tubeytoons.com'
2001
    _categories = ('TUNEYTOONS', )
2002
2003
2004
class CompletelySeriousComics(GenericNavigableComic):
2005
    """Class to retrieve Completely Serious comics."""
2006 View Code Duplication
    name = 'completelyserious'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2007
    long_name = 'Completely Serious Comics'
2008
    url = 'http://completelyseriouscomics.com'
2009
    get_first_comic_link = get_a_navi_navifirst
2010
    get_navi_link = get_a_navi_navinext
2011
2012
    @classmethod
2013
    def get_comic_info(cls, soup, link):
2014
        """Get information about a particular comics."""
2015
        title = soup.find('h2', class_='post-title').string
2016
        author = soup.find('span', class_='post-author').contents[1].string
2017
        date_str = soup.find('span', class_='post-date').string
2018
        day = string_to_date(date_str, '%B %d, %Y')
2019
        imgs = soup.find('div', class_='comicpane').find_all('img')
2020
        assert imgs
2021
        alt = imgs[0]['title']
2022
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2023
        return {
2024
            'month': day.month,
2025
            'year': day.year,
2026
            'day': day.day,
2027
            'img': [i['src'] for i in imgs],
2028
            'title': title,
2029
            'alt': alt,
2030
            'author': author,
2031
        }
2032
2033
2034
class PoorlyDrawnLines(GenericListableComic):
2035
    """Class to retrieve Poorly Drawn Lines comics."""
2036
    # Also on http://pdlcomics.tumblr.com
2037 View Code Duplication
    name = 'poorlydrawn'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2038
    long_name = 'Poorly Drawn Lines'
2039
    url = 'https://www.poorlydrawnlines.com'
2040
    _categories = ('POORLYDRAWN', )
2041
    get_url_from_archive_element = get_href
2042
2043
    @classmethod
2044
    def get_comic_info(cls, soup, link):
2045
        """Get information about a particular comics."""
2046
        imgs = soup.find('div', class_='post').find_all('img')
2047
        assert len(imgs) <= 1
2048
        return {
2049
            'img': [i['src'] for i in imgs],
2050
            'title': imgs[0].get('title', "") if imgs else "",
2051
        }
2052
2053
    @classmethod
2054
    def get_archive_elements(cls):
2055
        archive_url = urljoin_wrapper(cls.url, 'archive')
2056
        url_re = re.compile('^%s/comic/.' % cls.url)
2057
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2058
2059
2060
class LoadingComics(GenericNavigableComic):
2061
    """Class to retrieve Loading Artist comics."""
2062 View Code Duplication
    name = 'loadingartist'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2063
    long_name = 'Loading Artist'
2064
    url = 'http://www.loadingartist.com/latest'
2065
2066
    @classmethod
2067
    def get_first_comic_link(cls):
2068
        """Get link to first comics."""
2069
        return get_soup_at_url(cls.url).find('a', title="First")
2070
2071
    @classmethod
2072
    def get_navi_link(cls, last_soup, next_):
2073
        """Get link to next or previous comic."""
2074
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2075
2076
    @classmethod
2077
    def get_comic_info(cls, soup, link):
2078
        """Get information about a particular comics."""
2079
        title = soup.find('h1').string
2080
        date_str = soup.find('span', class_='date').string.strip()
2081
        day = string_to_date(date_str, "%B %d, %Y")
2082
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2083
        return {
2084
            'title': title,
2085
            'img': [i['src'] for i in imgs],
2086
            'month': day.month,
2087
            'year': day.year,
2088
            'day': day.day,
2089
        }
2090
2091
2092
class ChuckleADuck(GenericNavigableComic):
2093
    """Class to retrieve Chuckle-A-Duck comics."""
2094 View Code Duplication
    name = 'chuckleaduck'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2095
    long_name = 'Chuckle-A-duck'
2096
    url = 'http://chuckleaduck.com'
2097
    get_first_comic_link = get_div_navfirst_a
2098
    get_navi_link = get_link_rel_next
2099
2100
    @classmethod
2101
    def get_comic_info(cls, soup, link):
2102
        """Get information about a particular comics."""
2103
        date_str = soup.find('span', class_='post-date').string
2104
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2105
        author = soup.find('span', class_='post-author').string
2106
        div = soup.find('div', id='comic')
2107
        imgs = div.find_all('img') if div else []
2108
        title = imgs[0]['title'] if imgs else ""
2109
        assert all(i['title'] == i['alt'] == title for i in imgs)
2110
        return {
2111
            'month': day.month,
2112
            'year': day.year,
2113
            'day': day.day,
2114
            'img': [i['src'] for i in imgs],
2115
            'title': title,
2116
            'author': author,
2117
        }
2118
2119
2120
class DepressedAlien(GenericNavigableComic):
2121
    """Class to retrieve Depressed Alien Comics."""
2122
    name = 'depressedalien'
2123
    long_name = 'Depressed Alien'
2124
    url = 'http://depressedalien.com'
2125
    get_url_from_link = join_cls_url_to_href
2126
2127
    @classmethod
2128
    def get_first_comic_link(cls):
2129
        """Get link to first comics."""
2130
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2131
2132
    @classmethod
2133
    def get_navi_link(cls, last_soup, next_):
2134
        """Get link to next or previous comic."""
2135
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2136
2137
    @classmethod
2138
    def get_comic_info(cls, soup, link):
2139
        """Get information about a particular comics."""
2140
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2141
        imgs = soup.find_all('meta', property='og:image')
2142
        return {
2143
            'title': title,
2144
            'img': [i['content'] for i in imgs],
2145
        }
2146
2147
2148
class TurnOffUs(GenericListableComic):
2149
    """Class to retrieve TurnOffUs comics."""
2150
    name = 'turnoffus'
2151
    long_name = 'Turn Off Us'
2152
    url = 'http://turnoff.us'
2153
    get_url_from_archive_element = join_cls_url_to_href
2154
2155
    @classmethod
2156
    def get_archive_elements(cls):
2157
        archive_url = urljoin_wrapper(cls.url, 'all')
2158
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2159
        return reversed(post_list.find_all('a', class_='post-link'))
2160
2161
    @classmethod
2162
    def get_comic_info(cls, soup, archive_elt):
2163
        """Get information about a particular comics."""
2164
        title = soup.find('meta', property='og:title')['content']
2165
        imgs = soup.find_all('meta', property='og:image')
2166
        return {
2167
            'title': title,
2168
            'img': [i['content'] for i in imgs],
2169
        }
2170
2171
2172
class ThingsInSquares(GenericListableComic):
2173
    """Class to retrieve Things In Squares comics."""
2174
    # This can be retrieved in other languages
2175
    # Also on https://tapastic.com/series/Things-in-Squares
2176
    name = 'squares'
2177
    long_name = 'Things in squares'
2178
    url = 'http://www.thingsinsquares.com'
2179
2180
    @classmethod
2181
    def get_comic_info(cls, soup, tr):
2182
        """Get information about a particular comics."""
2183
        _, td2, td3 = tr.find_all('td')
2184
        a = td2.find('a')
2185
        date_str = td3.string
2186
        day = string_to_date(date_str, "%m.%d.%y")
2187
        title = a.string
2188
        title2 = soup.find('meta', property='og:title')['content']
2189
        desc = soup.find('meta', property='og:description')
2190
        description = desc['content'] if desc else ''
2191
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2192
        imgs = soup.find('div', class_='entry-content').find_all('img')
2193
        return {
2194
            'day': day.day,
2195
            'month': day.month,
2196
            'year': day.year,
2197
            'title': title,
2198
            'title2': title2,
2199
            'description': description,
2200
            'tags': tags,
2201
            'img': [i['src'] for i in imgs],
2202
            'alt': ' '.join(i['alt'] for i in imgs),
2203
        }
2204
2205
    @classmethod
2206
    def get_url_from_archive_element(cls, tr):
2207
        _, td2, __ = tr.find_all('td')
2208
        return td2.find('a')['href']
2209
2210
    @classmethod
2211
    def get_archive_elements(cls):
2212
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2213
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2214
2215
2216
class HappleTea(GenericNavigableComic):
2217
    """Class to retrieve Happle Tea Comics."""
2218 View Code Duplication
    name = 'happletea'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2219
    long_name = 'Happle Tea'
2220
    url = 'http://www.happletea.com'
2221
    get_first_comic_link = get_a_navi_navifirst
2222
    get_navi_link = get_link_rel_next
2223
2224
    @classmethod
2225
    def get_comic_info(cls, soup, link):
2226
        """Get information about a particular comics."""
2227
        imgs = soup.find('div', id='comic').find_all('img')
2228
        post = soup.find('div', class_='post-content')
2229
        title = post.find('h2', class_='post-title').string
2230
        author = post.find('a', rel='author').string
2231
        date_str = post.find('span', class_='post-date').string
2232
        day = string_to_date(date_str, "%B %d, %Y")
2233
        assert all(i['alt'] == i['title'] for i in imgs)
2234
        return {
2235
            'title': title,
2236
            'img': [i['src'] for i in imgs],
2237
            'alt': ''.join(i['alt'] for i in imgs),
2238
            'month': day.month,
2239
            'year': day.year,
2240
            'day': day.day,
2241
            'author': author,
2242
        }
2243
2244
2245
class RockPaperScissors(GenericNavigableComic):
2246
    """Class to retrieve Rock Paper Scissors comics."""
2247
    name = 'rps'
2248
    long_name = 'Rock Paper Scissors'
2249
    url = 'http://rps-comics.com'
2250
    get_first_comic_link = get_a_navi_navifirst
2251
    get_navi_link = get_link_rel_next
2252
2253
    @classmethod
2254
    def get_comic_info(cls, soup, link):
2255
        """Get information about a particular comics."""
2256
        title = soup.find('title').string
2257
        imgs = soup.find_all('meta', property='og:image')
2258
        short_url = soup.find('link', rel='shortlink')['href']
2259
        transcript = soup.find('div', id='transcript-content').string
2260
        return {
2261
            'title': title,
2262
            'transcript': transcript,
2263
            'short_url': short_url,
2264
            'img': [i['content'] for i in imgs],
2265
        }
2266
2267
2268
class FatAwesomeComics(GenericNavigableComic):
2269
    """Class to retrieve Fat Awesome Comics."""
2270
    # Also on http://fatawesomecomedy.tumblr.com
2271
    name = 'fatawesome'
2272
    long_name = 'Fat Awesome'
2273
    url = 'http://fatawesome.com/comics'
2274
    get_navi_link = get_a_rel_next
2275
    get_first_comic_link = simulate_first_link
2276
    first_url = 'http://fatawesome.com/shortbus/'
2277
2278
    @classmethod
2279
    def get_comic_info(cls, soup, link):
2280
        """Get information about a particular comics."""
2281
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2282
        description = soup.find('meta', attrs={'name': 'description'})['content']
2283
        tags_prop = soup.find('meta', property='article:tag')
2284
        tags = tags_prop['content'] if tags_prop else ""
2285
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2286
        day = string_to_date(date_str, "%Y-%m-%d")
2287
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2288
        assert len(imgs) == 1
2289
        return {
2290
            'title': title,
2291
            'description': description,
2292
            'tags': tags,
2293
            'alt': "".join(i['alt'] for i in imgs),
2294
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2295
            'month': day.month,
2296
            'year': day.year,
2297
            'day': day.day,
2298
        }
2299
2300 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2301
class AnythingComic(GenericListableComic):
2302
    """Class to retrieve Anything Comics."""
2303
    # Also on http://tapastic.com/series/anything
2304
    name = 'anythingcomic'
2305
    long_name = 'Anything Comic'
2306
    url = 'http://www.anythingcomic.com'
2307
2308
    @classmethod
2309
    def get_archive_elements(cls):
2310
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2311
        # The first 2 <tr>'s do not correspond to comics
2312
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2313
2314
    @classmethod
2315
    def get_url_from_archive_element(cls, tr):
2316
        """Get url corresponding to an archive element."""
2317
        _, td_comic, td_date, _ = tr.find_all('td')
2318
        link = td_comic.find('a')
2319
        return urljoin_wrapper(cls.url, link['href'])
2320
2321
    @classmethod
2322
    def get_comic_info(cls, soup, tr):
2323
        """Get information about a particular comics."""
2324
        td_num, td_comic, td_date, _ = tr.find_all('td')
2325
        num = int(td_num.string)
2326
        link = td_comic.find('a')
2327
        title = link.string
2328
        imgs = soup.find_all('img', id='comic_image')
2329
        date_str = td_date.string
2330
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2331
        assert len(imgs) == 1
2332
        assert all(i.get('alt') == i.get('title') for i in imgs)
2333
        return {
2334
            'num': num,
2335
            'title': title,
2336
            'alt': imgs[0].get('alt', ''),
2337
            'img': [i['src'] for i in imgs],
2338
            'month': day.month,
2339
            'year': day.year,
2340
            'day': day.day,
2341
        }
2342
2343
2344
class LonnieMillsap(GenericNavigableComic):
2345
    """Class to retrieve Lonnie Millsap's comics."""
2346 View Code Duplication
    name = 'millsap'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2347
    long_name = 'Lonnie Millsap'
2348
    url = 'http://www.lonniemillsap.com'
2349
    get_navi_link = get_link_rel_next
2350
    get_first_comic_link = simulate_first_link
2351
    first_url = 'http://www.lonniemillsap.com/?p=42'
2352
2353
    @classmethod
2354
    def get_comic_info(cls, soup, link):
2355
        """Get information about a particular comics."""
2356
        title = soup.find('h2', class_='post-title').string
2357
        post = soup.find('div', class_='post-content')
2358
        author = post.find("span", class_="post-author").find("a").string
2359
        date_str = post.find("span", class_="post-date").string
2360
        day = string_to_date(date_str, "%B %d, %Y")
2361
        imgs = post.find("div", class_="entry").find_all("img")
2362
        return {
2363
            'title': title,
2364
            'author': author,
2365
            'img': [i['src'] for i in imgs],
2366
            'month': day.month,
2367
            'year': day.year,
2368
            'day': day.day,
2369
        }
2370
2371
2372
class LinsEditions(GenericNavigableComic):
2373
    """Class to retrieve L.I.N.S. Editions comics."""
2374 View Code Duplication
    # Also on https://linscomics.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2375
    # Now on https://warandpeas.com
2376
    name = 'lins'
2377
    long_name = 'L.I.N.S. Editions'
2378
    url = 'https://linsedition.com'
2379
    _categories = ('LINS', )
2380
    get_navi_link = get_link_rel_next
2381
    get_first_comic_link = simulate_first_link
2382
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2383
2384
    @classmethod
2385
    def get_comic_info(cls, soup, link):
2386
        """Get information about a particular comics."""
2387
        title = soup.find('meta', property='og:title')['content']
2388
        imgs = soup.find_all('meta', property='og:image')
2389
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2390
        day = string_to_date(date_str, "%Y-%m-%d")
2391
        return {
2392
            'title': title,
2393
            'img': [i['content'] for i in imgs],
2394
            'month': day.month,
2395
            'year': day.year,
2396
            'day': day.day,
2397
        }
2398
2399
2400
class ThorsThundershack(GenericNavigableComic):
2401
    """Class to retrieve Thor's Thundershack comics."""
2402
    # Also on http://tapastic.com/series/Thors-Thundershac
2403
    name = 'thor'
2404
    long_name = 'Thor\'s Thundershack'
2405
    url = 'http://www.thorsthundershack.com'
2406
    _categories = ('THOR', )
2407
    get_url_from_link = join_cls_url_to_href
2408
2409
    @classmethod
2410
    def get_first_comic_link(cls):
2411
        """Get link to first comics."""
2412
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2413
2414
    @classmethod
2415
    def get_navi_link(cls, last_soup, next_):
2416
        """Get link to next or previous comic."""
2417
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2418
            if link['href'] != '/comic':
2419
                return link
2420
        return None
2421
2422
    @classmethod
2423
    def get_comic_info(cls, soup, link):
2424
        """Get information about a particular comics."""
2425
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2426
        description = soup.find('div', itemprop='articleBody').text
2427
        author = soup.find('span', itemprop='author copyrightHolder').string
2428
        imgs = soup.find_all('img', itemprop='image')
2429
        assert all(i['title'] == i['alt'] for i in imgs)
2430
        alt = imgs[0]['alt'] if imgs else ""
2431
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2432
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2433
        return {
2434
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2435
            'month': day.month,
2436
            'year': day.year,
2437
            'day': day.day,
2438
            'author': author,
2439
            'title': title,
2440
            'alt': alt,
2441
            'description': description,
2442
        }
2443
2444
2445
class GerbilWithAJetpack(GenericNavigableComic):
2446
    """Class to retrieve GerbilWithAJetpack comics."""
2447
    name = 'gerbil'
2448
    long_name = 'Gerbil With A Jetpack'
2449
    url = 'http://gerbilwithajetpack.com'
2450
    get_first_comic_link = get_a_navi_navifirst
2451
    get_navi_link = get_a_rel_next
2452
2453
    @classmethod
2454
    def get_comic_info(cls, soup, link):
2455
        """Get information about a particular comics."""
2456
        title = soup.find('h2', class_='post-title').string
2457
        author = soup.find("span", class_="post-author").find("a").string
2458
        date_str = soup.find("span", class_="post-date").string
2459
        day = string_to_date(date_str, "%B %d, %Y")
2460
        imgs = soup.find("div", id="comic").find_all("img")
2461
        alt = imgs[0]['alt']
2462
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2463
        return {
2464
            'img': [i['src'] for i in imgs],
2465
            'title': title,
2466
            'alt': alt,
2467
            'author': author,
2468
            'day': day.day,
2469
            'month': day.month,
2470
            'year': day.year
2471
        }
2472
2473
2474
class EveryDayBlues(GenericEmptyComic, GenericNavigableComic):
2475
    """Class to retrieve EveryDayBlues Comics."""
2476 View Code Duplication
    name = "blues"
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2477
    long_name = "Every Day Blues"
2478
    url = "http://everydayblues.net"
2479
    get_first_comic_link = get_a_navi_navifirst
2480
    get_navi_link = get_link_rel_next
2481
2482
    @classmethod
2483
    def get_comic_info(cls, soup, link):
2484
        """Get information about a particular comics."""
2485
        title = soup.find("h2", class_="post-title").string
2486
        author = soup.find("span", class_="post-author").find("a").string
2487
        date_str = soup.find("span", class_="post-date").string
2488
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2489
        imgs = soup.find("div", id="comic").find_all("img")
2490
        assert all(i['alt'] == i['title'] == title for i in imgs)
2491
        assert len(imgs) <= 1
2492
        return {
2493
            'img': [i['src'] for i in imgs],
2494
            'title': title,
2495
            'author': author,
2496
            'day': day.day,
2497
            'month': day.month,
2498
            'year': day.year
2499
        }
2500
2501
2502
class BiterComics(GenericNavigableComic):
2503
    """Class to retrieve Biter Comics."""
2504 View Code Duplication
    name = "biter"
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2505
    long_name = "Biter Comics"
2506
    url = "http://www.bitercomics.com"
2507
    get_first_comic_link = get_a_navi_navifirst
2508
    get_navi_link = get_link_rel_next
2509
2510
    @classmethod
2511
    def get_comic_info(cls, soup, link):
2512
        """Get information about a particular comics."""
2513
        title = soup.find("h1", class_="entry-title").string
2514
        author = soup.find("span", class_="author vcard").find("a").string
2515
        date_str = soup.find("span", class_="entry-date").string
2516
        day = string_to_date(date_str, "%B %d, %Y")
2517
        imgs = soup.find("div", id="comic").find_all("img")
2518
        assert all(i['alt'] == i['title'] for i in imgs)
2519
        assert len(imgs) == 1
2520
        alt = imgs[0]['alt']
2521
        return {
2522
            'img': [i['src'] for i in imgs],
2523
            'title': title,
2524
            'alt': alt,
2525
            'author': author,
2526
            'day': day.day,
2527
            'month': day.month,
2528
            'year': day.year
2529
        }
2530
2531
2532
class TheAwkwardYeti(GenericNavigableComic):
2533
    """Class to retrieve The Awkward Yeti comics."""
2534 View Code Duplication
    # Also on http://www.gocomics.com/the-awkward-yeti
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2535
    # Also on http://larstheyeti.tumblr.com
2536
    # Also on https://tapastic.com/series/TheAwkwardYeti
2537
    name = 'yeti'
2538
    long_name = 'The Awkward Yeti'
2539
    url = 'http://theawkwardyeti.com'
2540
    _categories = ('YETI', )
2541
    get_first_comic_link = get_a_navi_navifirst
2542
    get_navi_link = get_link_rel_next
2543
2544
    @classmethod
2545
    def get_comic_info(cls, soup, link):
2546
        """Get information about a particular comics."""
2547
        title = soup.find('h2', class_='post-title').string
2548
        date_str = soup.find("span", class_="post-date").string
2549
        day = string_to_date(date_str, "%B %d, %Y")
2550
        imgs = soup.find("div", id="comic").find_all("img")
2551
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2552
        return {
2553
            'img': [i['src'] for i in imgs],
2554
            'title': title,
2555
            'day': day.day,
2556
            'month': day.month,
2557
            'year': day.year
2558
        }
2559
2560
2561
class PleasantThoughts(GenericNavigableComic):
2562
    """Class to retrieve Pleasant Thoughts comics."""
2563
    name = 'pleasant'
2564
    long_name = 'Pleasant Thoughts'
2565
    url = 'http://pleasant-thoughts.com'
2566
    get_first_comic_link = get_a_navi_navifirst
2567
    get_navi_link = get_link_rel_next
2568
2569
    @classmethod
2570
    def get_comic_info(cls, soup, link):
2571
        """Get information about a particular comics."""
2572
        post = soup.find('div', class_='post-content')
2573
        title = post.find('h2', class_='post-title').string
2574
        imgs = post.find("div", class_="entry").find_all("img")
2575
        return {
2576
            'title': title,
2577
            'img': [i['src'] for i in imgs],
2578
        }
2579
2580
2581
class MisterAndMe(GenericNavigableComic):
2582
    """Class to retrieve Mister & Me Comics."""
2583 View Code Duplication
    # Also on http://www.gocomics.com/mister-and-me
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2584
    # Also on https://tapastic.com/series/Mister-and-Me
2585
    name = 'mister'
2586
    long_name = 'Mister & Me'
2587
    url = 'http://www.mister-and-me.com'
2588
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2589
    get_navi_link = get_link_rel_next
2590
2591
    @classmethod
2592
    def get_comic_info(cls, soup, link):
2593
        """Get information about a particular comics."""
2594
        title = soup.find('h2', class_='post-title').string
2595
        author = soup.find("span", class_="post-author").find("a").string
2596
        date_str = soup.find("span", class_="post-date").string
2597
        day = string_to_date(date_str, "%B %d, %Y")
2598
        imgs = soup.find("div", id="comic").find_all("img")
2599
        assert all(i['alt'] == i['title'] for i in imgs)
2600
        assert len(imgs) <= 1
2601
        alt = imgs[0]['alt'] if imgs else ""
2602
        return {
2603
            'img': [i['src'] for i in imgs],
2604
            'title': title,
2605
            'alt': alt,
2606
            'author': author,
2607
            'day': day.day,
2608
            'month': day.month,
2609
            'year': day.year
2610
        }
2611
2612
2613
class LastPlaceComics(GenericNavigableComic):
2614
    """Class to retrieve Last Place Comics."""
2615
    name = 'lastplace'
2616
    long_name = 'Last Place Comics'
2617
    url = "http://lastplacecomics.com"
2618
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2619
    get_navi_link = get_link_rel_next
2620
2621
    @classmethod
2622
    def get_comic_info(cls, soup, link):
2623
        """Get information about a particular comics."""
2624
        title = soup.find('h2', class_='post-title').string
2625
        author = soup.find("span", class_="post-author").find("a").string
2626
        date_str = soup.find("span", class_="post-date").string
2627
        day = string_to_date(date_str, "%B %d, %Y")
2628
        imgs = soup.find("div", id="comic").find_all("img")
2629
        assert all(i['alt'] == i['title'] for i in imgs)
2630
        assert len(imgs) <= 1
2631
        alt = imgs[0]['alt'] if imgs else ""
2632
        return {
2633
            'img': [i['src'] for i in imgs],
2634
            'title': title,
2635
            'alt': alt,
2636
            'author': author,
2637
            'day': day.day,
2638
            'month': day.month,
2639
            'year': day.year
2640
        }
2641
2642
2643
class TalesOfAbsurdity(GenericNavigableComic):
2644
    """Class to retrieve Tales Of Absurdity comics."""
2645 View Code Duplication
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2646
    # Also on http://talesofabsurdity.tumblr.com
2647
    name = 'absurdity'
2648
    long_name = 'Tales of Absurdity'
2649
    url = 'http://talesofabsurdity.com'
2650
    _categories = ('ABSURDITY', )
2651
    get_first_comic_link = get_a_navi_navifirst
2652
    get_navi_link = get_a_navi_comicnavnext_navinext
2653
2654
    @classmethod
2655
    def get_comic_info(cls, soup, link):
2656
        """Get information about a particular comics."""
2657
        title = soup.find('h2', class_='post-title').string
2658
        author = soup.find("span", class_="post-author").find("a").string
2659
        date_str = soup.find("span", class_="post-date").string
2660
        day = string_to_date(date_str, "%B %d, %Y")
2661
        imgs = soup.find("div", id="comic").find_all("img")
2662
        assert all(i['alt'] == i['title'] for i in imgs)
2663
        alt = imgs[0]['alt'] if imgs else ""
2664
        return {
2665
            'img': [i['src'] for i in imgs],
2666
            'title': title,
2667
            'alt': alt,
2668
            'author': author,
2669
            'day': day.day,
2670
            'month': day.month,
2671
            'year': day.year
2672
        }
2673
2674
2675
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2676
    """Class to retrieve Endless Origami Comics."""
2677 View Code Duplication
    name = "origami"
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2678
    long_name = "Endless Origami"
2679
    url = "http://endlessorigami.com"
2680
    get_first_comic_link = get_a_navi_navifirst
2681
    get_navi_link = get_link_rel_next
2682
2683
    @classmethod
2684
    def get_comic_info(cls, soup, link):
2685
        """Get information about a particular comics."""
2686
        title = soup.find('h2', class_='post-title').string
2687
        author = soup.find("span", class_="post-author").find("a").string
2688
        date_str = soup.find("span", class_="post-date").string
2689
        day = string_to_date(date_str, "%B %d, %Y")
2690
        imgs = soup.find("div", id="comic").find_all("img")
2691
        assert all(i['alt'] == i['title'] for i in imgs)
2692
        alt = imgs[0]['alt'] if imgs else ""
2693
        return {
2694
            'img': [i['src'] for i in imgs],
2695
            'title': title,
2696
            'alt': alt,
2697
            'author': author,
2698
            'day': day.day,
2699
            'month': day.month,
2700
            'year': day.year
2701
        }
2702
2703
2704
class PlanC(GenericNavigableComic):
2705
    """Class to retrieve Plan C comics."""
2706
    name = 'planc'
2707
    long_name = 'Plan C'
2708
    url = 'http://www.plancomic.com'
2709
    get_first_comic_link = get_a_navi_navifirst
2710
    get_navi_link = get_a_navi_comicnavnext_navinext
2711
2712
    @classmethod
2713
    def get_comic_info(cls, soup, link):
2714
        """Get information about a particular comics."""
2715
        title = soup.find('h2', class_='post-title').string
2716
        date_str = soup.find("span", class_="post-date").string
2717
        day = string_to_date(date_str, "%B %d, %Y")
2718
        imgs = soup.find('div', id='comic').find_all('img')
2719
        return {
2720
            'title': title,
2721
            'img': [i['src'] for i in imgs],
2722
            'month': day.month,
2723
            'year': day.year,
2724
            'day': day.day,
2725
        }
2726
2727
2728
class BuniComic(GenericNavigableComic):
2729
    """Class to retrieve Buni Comics."""
2730 View Code Duplication
    name = 'buni'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2731
    long_name = 'BuniComics'
2732
    url = 'http://www.bunicomic.com'
2733
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2734
    get_navi_link = get_link_rel_next
2735
2736
    @classmethod
2737
    def get_comic_info(cls, soup, link):
2738
        """Get information about a particular comics."""
2739
        imgs = soup.find('div', id='comic').find_all('img')
2740
        assert all(i['alt'] == i['title'] for i in imgs)
2741
        assert len(imgs) == 1
2742
        return {
2743
            'img': [i['src'] for i in imgs],
2744
            'title': imgs[0]['title'],
2745
        }
2746
2747
2748
class GenericCommitStrip(GenericNavigableComic):
2749
    """Generic class to retrieve Commit Strips in different languages."""
2750
    get_navi_link = get_a_rel_next
2751
    get_first_comic_link = simulate_first_link
2752
    first_url = NotImplemented
2753
2754
    @classmethod
2755
    def get_comic_info(cls, soup, link):
2756
        """Get information about a particular comics."""
2757
        desc = soup.find('meta', property='og:description')['content']
2758
        title = soup.find('meta', property='og:title')['content']
2759
        imgs = soup.find('div', class_='entry-content').find_all('img')
2760
        title2 = ' '.join(i.get('title', '') for i in imgs)
2761
        return {
2762
            'title': title,
2763
            'title2': title2,
2764
            'description': desc,
2765
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2766
        }
2767
2768
2769
class CommitStripFr(GenericCommitStrip):
2770
    """Class to retrieve Commit Strips in French."""
2771
    name = 'commit_fr'
2772
    long_name = 'Commit Strip (Fr)'
2773
    url = 'http://www.commitstrip.com/fr'
2774
    _categories = ('FRANCAIS', )
2775
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2776
2777
2778
class CommitStripEn(GenericCommitStrip):
2779
    """Class to retrieve Commit Strips in English."""
2780
    name = 'commit_en'
2781
    long_name = 'Commit Strip (En)'
2782
    url = 'http://www.commitstrip.com/en'
2783
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2784
2785
2786
class GenericBoumerie(GenericNavigableComic):
2787
    """Generic class to retrieve Boumeries comics in different languages."""
2788 View Code Duplication
    get_first_comic_link = get_a_navi_navifirst
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2789
    get_navi_link = get_link_rel_next
2790
    date_format = NotImplemented
2791
    lang = NotImplemented
2792
2793
    @classmethod
2794
    def get_comic_info(cls, soup, link):
2795
        """Get information about a particular comics."""
2796
        title = soup.find('h2', class_='post-title').string
2797
        short_url = soup.find('link', rel='shortlink')['href']
2798
        author = soup.find("span", class_="post-author").find("a").string
2799
        date_str = soup.find('span', class_='post-date').string
2800
        day = string_to_date(date_str, cls.date_format, cls.lang)
2801
        imgs = soup.find('div', id='comic').find_all('img')
2802
        assert all(i['alt'] == i['title'] for i in imgs)
2803
        return {
2804
            'short_url': short_url,
2805
            'img': [i['src'] for i in imgs],
2806
            'title': title,
2807
            'author': author,
2808
            'month': day.month,
2809
            'year': day.year,
2810
            'day': day.day,
2811
        }
2812
2813
2814
class BoumerieEn(GenericBoumerie):
2815
    """Class to retrieve Boumeries comics in English."""
2816
    name = 'boumeries_en'
2817
    long_name = 'Boumeries (En)'
2818
    url = 'http://comics.boumerie.com'
2819
    date_format = "%B %d, %Y"
2820
    lang = 'en_GB.UTF-8'
2821
2822
2823
class BoumerieFr(GenericBoumerie):
2824
    """Class to retrieve Boumeries comics in French."""
2825
    name = 'boumeries_fr'
2826
    long_name = 'Boumeries (Fr)'
2827
    url = 'http://bd.boumerie.com'
2828
    _categories = ('FRANCAIS', )
2829
    date_format = "%A, %d %B %Y"
2830
    lang = "fr_FR.utf8"
2831
2832
2833
class UnearthedComics(GenericNavigableComic):
2834
    """Class to retrieve Unearthed comics."""
2835 View Code Duplication
    # Also on http://tapastic.com/series/UnearthedComics
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2836
    # Also on https://unearthedcomics.tumblr.com
2837
    name = 'unearthed'
2838
    long_name = 'Unearthed Comics'
2839
    url = 'http://unearthedcomics.com'
2840
    _categories = ('UNEARTHED', )
2841
    get_navi_link = get_link_rel_next
2842
    get_first_comic_link = simulate_first_link
2843
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2844
2845
    @classmethod
2846
    def get_comic_info(cls, soup, link):
2847
        """Get information about a particular comics."""
2848
        short_url = soup.find('link', rel='shortlink')['href']
2849
        title_elt = soup.find('h1') or soup.find('h2')
2850
        title = title_elt.string if title_elt else ""
2851
        desc = soup.find('meta', property='og:description')
2852
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2853
        day = string_to_date(date_str, "%Y-%m-%d")
2854
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2855
        imgs = post.find_all('img')
2856
        return {
2857
            'title': title,
2858
            'description': desc,
2859
            'url2': short_url,
2860
            'img': [i['src'] for i in imgs],
2861
            'month': day.month,
2862
            'year': day.year,
2863
            'day': day.day,
2864
        }
2865
2866
2867
class Optipess(GenericNavigableComic):
2868
    """Class to retrieve Optipess comics."""
2869 View Code Duplication
    name = 'optipess'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2870
    long_name = 'Optipess'
2871
    url = 'http://www.optipess.com'
2872
    get_first_comic_link = get_a_navi_navifirst
2873
    get_navi_link = get_link_rel_next
2874
2875
    @classmethod
2876
    def get_comic_info(cls, soup, link):
2877
        """Get information about a particular comics."""
2878
        title = soup.find('h2', class_='post-title').string
2879
        author = soup.find("span", class_="post-author").find("a").string
2880
        comic = soup.find('div', id='comic')
2881
        imgs = comic.find_all('img') if comic else []
2882
        alt = imgs[0]['title'] if imgs else ""
2883
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2884
        date_str = soup.find('span', class_='post-date').string
2885
        day = string_to_date(date_str, "%B %d, %Y")
2886
        return {
2887
            'title': title,
2888
            'alt': alt,
2889
            'author': author,
2890
            'img': [i['src'] for i in imgs],
2891
            'month': day.month,
2892
            'year': day.year,
2893
            'day': day.day,
2894
        }
2895
2896
2897
class PainTrainComic(GenericNavigableComic):
2898
    """Class to retrieve Pain Train Comics."""
2899
    name = 'paintrain'
2900
    long_name = 'Pain Train Comics'
2901
    url = 'http://paintraincomic.com'
2902
    get_first_comic_link = get_a_navi_navifirst
2903
    get_navi_link = get_link_rel_next
2904
2905
    @classmethod
2906
    def get_comic_info(cls, soup, link):
2907
        """Get information about a particular comics."""
2908
        title = soup.find('h2', class_='post-title').string
2909
        short_url = soup.find('link', rel='shortlink')['href']
2910
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2911
        num = int(short_url_re.match(short_url).groups()[0])
2912
        imgs = soup.find('div', id='comic').find_all('img')
2913
        alt = imgs[0]['title']
2914
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2915
        date_str = soup.find('span', class_='post-date').string
2916
        day = string_to_date(date_str, "%d/%m/%Y")
2917
        return {
2918
            'short_url': short_url,
2919
            'num': num,
2920
            'img': [i['src'] for i in imgs],
2921
            'month': day.month,
2922
            'year': day.year,
2923
            'day': day.day,
2924
            'alt': alt,
2925
            'title': title,
2926
        }
2927
2928
2929
class MoonBeard(GenericNavigableComic):
2930
    """Class to retrieve MoonBeard comics."""
2931
    # Also on http://blog.squiresjam.es
2932
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2933
    name = 'moonbeard'
2934
    long_name = 'Moon Beard'
2935
    url = 'http://moonbeard.com'
2936
    get_first_comic_link = get_a_navi_navifirst
2937
    get_navi_link = get_a_navi_navinext
2938
2939
    @classmethod
2940
    def get_comic_info(cls, soup, link):
2941
        """Get information about a particular comics."""
2942
        title = soup.find('h2', class_='post-title').string
2943
        short_url = soup.find('link', rel='shortlink')['href']
2944
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2945
        num = int(short_url_re.match(short_url).groups()[0])
2946
        imgs = soup.find('div', id='comic').find_all('img')
2947
        alt = imgs[0]['title']
2948
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2949
        date_str = soup.find('span', class_='post-date').string
2950
        day = string_to_date(date_str, "%B %d, %Y")
2951
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2952
        author = soup.find('span', class_='post-author').string
2953
        return {
2954
            'short_url': short_url,
2955
            'num': num,
2956
            'img': [i['src'] for i in imgs],
2957
            'month': day.month,
2958
            'year': day.year,
2959
            'day': day.day,
2960
            'title': title,
2961
            'tags': tags,
2962
            'alt': alt,
2963
            'author': author,
2964
        }
2965
2966
2967
class AHammADay(GenericEmptyComic, GenericNavigableComic):
2968
    """Class to retrieve class A Hamm A Day comics."""
2969
    name = 'hamm'
2970
    long_name = 'A Hamm A Day'
2971
    url = 'http://www.ahammaday.com'
2972
    get_url_from_link = join_cls_url_to_href
2973
    get_first_comic_link = simulate_first_link
2974
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2975
2976
    @classmethod
2977
    def get_navi_link(cls, last_soup, next_):
2978
        """Get link to next or previous comic."""
2979
        # prev is next / next is prev
2980
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2981
2982
    @classmethod
2983
    def get_comic_info(cls, soup, link):
2984 View Code Duplication
        """Get information about a particular comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2985
        date_str = soup.find('time', class_='published')['datetime']
2986
        day = string_to_date(date_str, "%Y-%m-%d")
2987
        author = soup.find('span', class_='blog-author').find('a').string
2988
        title = soup.find('meta', property='og:title')['content']
2989
        imgs = soup.find_all('meta', itemprop='image')
2990
        return {
2991
            'img': [i['content'] for i in imgs],
2992
            'title': title,
2993
            'author': author,
2994
            'day': day.day,
2995
            'month': day.month,
2996
            'year': day.year,
2997
        }
2998
2999 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3000
class LittleLifeLines(GenericNavigableComic):
3001
    """Class to retrieve Little Life Lines comics."""
3002
    # Also on https://little-life-lines.tumblr.com
3003
    name = 'life'
3004
    long_name = 'Little Life Lines'
3005
    url = 'http://www.littlelifelines.com'
3006
    get_url_from_link = join_cls_url_to_href
3007
    get_first_comic_link = simulate_first_link
3008
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3009
3010
    @classmethod
3011
    def get_navi_link(cls, last_soup, next_):
3012
        """Get link to next or previous comic."""
3013
        # prev is next / next is prev
3014
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3015
        return li.find('a') if li else None
3016
3017
    @classmethod
3018
    def get_comic_info(cls, soup, link):
3019
        """Get information about a particular comics."""
3020
        title = soup.find('meta', property='og:title')['content']
3021
        desc = soup.find('meta', property='og:description')['content']
3022
        date_str = soup.find('time', class_='published')['datetime']
3023
        day = string_to_date(date_str, "%Y-%m-%d")
3024
        author = soup.find('a', rel='author').string
3025
        div_content = soup.find('div', class_="body entry-content")
3026
        imgs = div_content.find_all('img')
3027
        imgs = [i for i in imgs if i.get('src') is not None]
3028
        alt = imgs[0]['alt']
3029
        return {
3030
            'title': title,
3031
            'alt': alt,
3032
            'description': desc,
3033
            'author': author,
3034
            'day': day.day,
3035
            'month': day.month,
3036
            'year': day.year,
3037
            'img': [i['src'] for i in imgs],
3038
        }
3039
3040
3041
class GenericWordPressInkblot(GenericNavigableComic):
3042
    """Generic class to retrieve comics using WordPress with Inkblot."""
3043
    get_navi_link = get_link_rel_next
3044
3045
    @classmethod
3046
    def get_first_comic_link(cls):
3047
        """Get link to first comics."""
3048
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3049
3050
    @classmethod
3051
    def get_comic_info(cls, soup, link):
3052 View Code Duplication
        """Get information about a particular comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3053
        title = soup.find('meta', property='og:title')['content']
3054
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3055
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3056
        day = string_to_date(date_str, "%Y-%m-%d")
3057
        return {
3058
            'title': title,
3059
            'day': day.day,
3060
            'month': day.month,
3061
            'year': day.year,
3062
            'img': [i['src'] for i in imgs],
3063
        }
3064
3065
3066
class EverythingsStupid(GenericWordPressInkblot):
3067
    """Class to retrieve Everything's stupid Comics."""
3068
    # Also on http://tapastic.com/series/EverythingsStupid
3069
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3070
    # Also on http://everythingsstupidcomics.tumblr.com
3071
    name = 'stupid'
3072
    long_name = "Everything's Stupid"
3073
    url = 'http://everythingsstupid.net'
3074
3075
3076
class TheIsmComics(GenericWordPressInkblot):
3077
    """Class to retrieve The Ism Comics."""
3078
    # Also on https://tapastic.com/series/TheIsm (?)
3079
    name = 'theism'
3080
    long_name = "The Ism"
3081
    url = 'http://www.theism-comics.com'
3082
3083
3084
class WoodenPlankStudios(GenericEmptyComic, GenericWordPressInkblot):
3085
    """Class to retrieve Wooden Plank Studios comics."""
3086
    name = 'woodenplank'
3087
    long_name = 'Wooden Plank Studios'
3088
    url = 'http://woodenplankstudios.com'
3089
3090
3091
class ElectricBunnyComic(GenericNavigableComic):
3092
    """Class to retrieve Electric Bunny Comics."""
3093
    # Also on http://electricbunnycomics.tumblr.com
3094
    name = 'bunny'
3095
    long_name = 'Electric Bunny Comic'
3096
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3097
    get_url_from_link = join_cls_url_to_href
3098
3099
    @classmethod
3100
    def get_first_comic_link(cls):
3101
        """Get link to first comics."""
3102
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3103
3104
    @classmethod
3105
    def get_navi_link(cls, last_soup, next_):
3106
        """Get link to next or previous comic."""
3107
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3108
        return img.parent if img else None
3109
3110
    @classmethod
3111
    def get_comic_info(cls, soup, link):
3112
        """Get information about a particular comics."""
3113
        title = soup.find('meta', property='og:title')['content']
3114
        imgs = soup.find_all('meta', property='og:image')
3115
        return {
3116
            'title': title,
3117
            'img': [i['content'] for i in imgs],
3118
        }
3119
3120
3121
class SheldonComics(GenericNavigableComic):
3122
    """Class to retrieve Sheldon comics."""
3123
    # Also on http://www.gocomics.com/sheldon
3124
    name = 'sheldon'
3125
    long_name = 'Sheldon Comics'
3126
    url = 'http://www.sheldoncomics.com'
3127
3128
    @classmethod
3129
    def get_first_comic_link(cls):
3130
        """Get link to first comics."""
3131
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3132
3133
    @classmethod
3134
    def get_navi_link(cls, last_soup, next_):
3135
        """Get link to next or previous comic."""
3136
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3137
            if link['href'] != 'http://www.sheldoncomics.com':
3138
                return link
3139
        return None
3140
3141
    @classmethod
3142
    def get_comic_info(cls, soup, link):
3143
        """Get information about a particular comics."""
3144
        imgs = soup.find("div", id="comic-foot").find_all("img")
3145
        assert all(i['alt'] == i['title'] for i in imgs)
3146
        assert len(imgs) == 1
3147
        title = imgs[0]['title']
3148
        return {
3149
            'title': title,
3150
            'img': [i['src'] for i in imgs],
3151
        }
3152
3153
3154
class Ubertool(GenericNavigableComic):
3155
    """Class to retrieve Ubertool comics."""
3156 View Code Duplication
    # Also on https://ubertool.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3157
    # Also on https://tapastic.com/series/ubertool
3158
    name = 'ubertool'
3159
    long_name = 'Ubertool'
3160
    url = 'http://ubertoolcomic.com'
3161
    _categories = ('UBERTOOL', )
3162
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3163
    get_navi_link = get_a_comicnavbase_comicnavnext
3164
3165
    @classmethod
3166
    def get_comic_info(cls, soup, link):
3167
        """Get information about a particular comics."""
3168
        title = soup.find('h2', class_='post-title').string
3169
        date_str = soup.find('span', class_='post-date').string
3170
        day = string_to_date(date_str, "%B %d, %Y")
3171
        imgs = soup.find('div', id='comic').find_all('img')
3172
        return {
3173
            'img': [i['src'] for i in imgs],
3174
            'title': title,
3175
            'month': day.month,
3176
            'year': day.year,
3177
            'day': day.day,
3178
        }
3179
3180
3181
class EarthExplodes(GenericNavigableComic):
3182
    """Class to retrieve The Earth Explodes comics."""
3183 View Code Duplication
    name = 'earthexplodes'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3184
    long_name = 'The Earth Explodes'
3185
    url = 'http://www.earthexplodes.com'
3186
    get_url_from_link = join_cls_url_to_href
3187
    get_first_comic_link = simulate_first_link
3188
    first_url = 'http://www.earthexplodes.com/comics/000/'
3189
3190
    @classmethod
3191
    def get_navi_link(cls, last_soup, next_):
3192
        """Get link to next or previous comic."""
3193
        return last_soup.find('a', id='next' if next_ else 'prev')
3194
3195
    @classmethod
3196
    def get_comic_info(cls, soup, link):
3197
        """Get information about a particular comics."""
3198
        title = soup.find('title').string
3199
        imgs = soup.find('div', id='image').find_all('img')
3200
        alt = imgs[0].get('title', '')
3201
        return {
3202
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3203
            'title': title,
3204
            'alt': alt,
3205
        }
3206
3207
3208
class PomComics(GenericNavigableComic):
3209
    """Class to retrieve PomComics."""
3210
    name = 'pom'
3211
    long_name = 'Pom Comics / Piece of Me'
3212
    url = 'http://www.pomcomic.com'
3213
    get_url_from_link = join_cls_url_to_href
3214
3215
    @classmethod
3216
    def get_first_comic_link(cls):
3217
        """Get link to first comics."""
3218
        return get_soup_at_url(cls.url).find('a', class_='btn_first')
3219
3220
    @classmethod
3221
    def get_navi_link(cls, last_soup, next_):
3222
        """Get link to next or previous comic."""
3223
        return last_soup.find('a', class_='btn_next' if next_ else 'btn_prev')
3224
3225
    @classmethod
3226
    def get_comic_info(cls, soup, link):
3227
        """Get information about a particular comics."""
3228
        title = soup.find('h1', id="comic-name").string
3229
        desc = soup.find('meta', property='og:description')['content']
3230
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3231
        imgs = soup.find('div', class_='comic').find_all('img')
3232
        return {
3233
            'title': title,
3234
            'desc': desc,
3235
            'tags': tags,
3236
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3237
        }
3238
3239
3240
class CubeDrone(GenericEmptyComic, GenericNavigableComic):
3241
    """Class to retrieve Cube Drone comics."""
3242
    name = 'cubedrone'
3243
    long_name = 'Cube Drone'
3244
    url = 'http://cube-drone.com/comics'
3245
    get_url_from_link = join_cls_url_to_href
3246
3247
    @classmethod
3248
    def get_first_comic_link(cls):
3249
        """Get link to first comics."""
3250
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3251
3252
    @classmethod
3253
    def get_navi_link(cls, last_soup, next_):
3254
        """Get link to next or previous comic."""
3255
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3256
        return last_soup.find('span', class_=class_).parent
3257
3258
    @classmethod
3259
    def get_comic_info(cls, soup, link):
3260
        """Get information about a particular comics."""
3261
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3262
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3263
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3264
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3265
        imgs = soup.find_all('img', class_='comic img-responsive')
3266
        title2 = imgs[0]['title']
3267
        alt = imgs[0]['alt']
3268
        return {
3269
            'url2': url2,
3270
            'title': title,
3271
            'title2': title2,
3272
            'alt': alt,
3273
            'img': [i['src'] for i in imgs],
3274
        }
3275
3276
3277
class MakeItStoopid(GenericNavigableComic):
3278
    """Class to retrieve Make It Stoopid Comics."""
3279
    name = 'stoopid'
3280
    long_name = 'Make it stoopid'
3281
    url = 'http://makeitstoopid.com/comic.php'
3282
3283
    @classmethod
3284
    def get_nav(cls, soup):
3285
        """Get the navigation elements from soup object."""
3286
        cnav = soup.find_all(class_='cnav')
3287
        nav1, nav2 = cnav[:5], cnav[5:]
3288
        assert nav1 == nav2
3289
        # begin, prev, archive, next_, end = nav1
3290
        return [None if i.get('href') is None else i for i in nav1]
3291
3292
    @classmethod
3293
    def get_first_comic_link(cls):
3294
        """Get link to first comics."""
3295
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3296
3297
    @classmethod
3298
    def get_navi_link(cls, last_soup, next_):
3299
        """Get link to next or previous comic."""
3300
        return cls.get_nav(last_soup)[3 if next_ else 1]
3301
3302
    @classmethod
3303
    def get_comic_info(cls, soup, link):
3304
        """Get information about a particular comics."""
3305
        title = link['title']
3306
        imgs = soup.find_all('img', id='comicimg')
3307
        return {
3308
            'title': title,
3309
            'img': [i['src'] for i in imgs],
3310
        }
3311
3312
3313
class OffTheLeashDog(GenericNavigableComic):
3314
    """Class to retrieve Off The Leash Dog comics."""
3315 View Code Duplication
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3316
    # Also on http://www.rupertfawcettcartoons.com
3317
    name = 'offtheleash'
3318
    long_name = 'Off The Leash Dog'
3319
    url = 'http://offtheleashdogcartoons.com'
3320
    _categories = ('FAWCETT', )
3321
    get_navi_link = get_a_rel_next
3322
    get_first_comic_link = simulate_first_link
3323
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3324
3325
    @classmethod
3326
    def get_comic_info(cls, soup, link):
3327
        """Get information about a particular comics."""
3328
        print(link)
3329
        title = soup.find("h1", class_="entry-title").string
3330
        imgs = soup.find('div', class_='entry-content').find_all('img')
3331
        return {
3332
            'title': title,
3333
            'img': [i['src'] for i in imgs],
3334
        }
3335
3336
3337
class MarketoonistComics(GenericNavigableComic):
3338
    """Class to retrieve Marketoonist Comics."""
3339
    name = 'marketoonist'
3340
    long_name = 'Marketoonist'
3341
    url = 'https://marketoonist.com/cartoons'
3342
    get_first_comic_link = simulate_first_link
3343
    get_navi_link = get_link_rel_next
3344
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3345
3346
    @classmethod
3347
    def get_comic_info(cls, soup, link):
3348
        """Get information about a particular comics."""
3349
        imgs = soup.find_all('meta', property='og:image')
3350
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3351
        day = string_to_date(date_str, "%Y-%m-%d")
3352
        title = soup.find('meta', property='og:title')['content']
3353
        return {
3354
            'img': [i['content'] for i in imgs],
3355
            'day': day.day,
3356
            'month': day.month,
3357
            'year': day.year,
3358
            'title': title,
3359
        }
3360
3361
3362
class ConsoliaComics(GenericNavigableComic):
3363
    """Class to retrieve Consolia comics."""
3364
    name = 'consolia'
3365
    long_name = 'consolia'
3366
    url = 'https://consolia-comic.com'
3367
    get_url_from_link = join_cls_url_to_href
3368
3369
    @classmethod
3370
    def get_first_comic_link(cls):
3371
        """Get link to first comics."""
3372
        return get_soup_at_url(cls.url).find('a', class_='first')
3373 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3374
    @classmethod
3375
    def get_navi_link(cls, last_soup, next_):
3376
        """Get link to next or previous comic."""
3377
        return last_soup.find('a', class_='next' if next_ else 'prev')
3378
3379
    @classmethod
3380
    def get_comic_info(cls, soup, link):
3381
        """Get information about a particular comics."""
3382
        title = soup.find('meta', property='og:title')['content']
3383
        date_str = soup.find('time')["datetime"]
3384
        day = string_to_date(date_str, "%Y-%m-%d")
3385
        imgs = soup.find_all('meta', property='og:image')
3386
        return {
3387
            'title': title,
3388
            'img': [i['content'] for i in imgs],
3389
            'day': day.day,
3390
            'month': day.month,
3391
            'year': day.year,
3392
        }
3393
3394
3395
class TuMourrasMoinsBete(GenericNavigableComic):
3396
    """Class to retrieve Tu Mourras Moins Bete comics."""
3397
    name = 'mourrasmoinsbete'
3398
    long_name = 'Tu Mourras Moins Bete'
3399
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3400
    _categories = ('FRANCAIS', )
3401
    get_first_comic_link = simulate_first_link
3402
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3403
3404
    @classmethod
3405
    def get_navi_link(cls, last_soup, next_):
3406
        """Get link to next or previous comic."""
3407
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3408
3409
    @classmethod
3410
    def get_comic_info(cls, soup, link):
3411
        """Get information about a particular comics."""
3412
        title = soup.find('title').string
3413
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3414
        author = soup.find('span', itemprop='author').string
3415
        return {
3416
            'img': [i['src'] for i in imgs],
3417
            'author': author,
3418
            'title': title,
3419
        }
3420
3421
3422
class GeekAndPoke(GenericNavigableComic):
3423
    """Class to retrieve Geek And Poke comics."""
3424
    name = 'geek'
3425
    long_name = 'Geek And Poke'
3426
    url = 'http://geek-and-poke.com'
3427
    get_url_from_link = join_cls_url_to_href
3428
    get_first_comic_link = simulate_first_link
3429
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3430
3431
    @classmethod
3432
    def get_navi_link(cls, last_soup, next_):
3433
        """Get link to next or previous comic."""
3434
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3435
3436
    @classmethod
3437
    def get_comic_info(cls, soup, link):
3438
        """Get information about a particular comics."""
3439
        title = soup.find('meta', property='og:title')['content']
3440 View Code Duplication
        desc = soup.find('meta', property='og:description')['content']
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3441
        date_str = soup.find('time', class_='published')['datetime']
3442
        day = string_to_date(date_str, "%Y-%m-%d")
3443
        author = soup.find('a', rel='author').string
3444
        div_content = (soup.find('div', class_="body entry-content") or
3445
                       soup.find('div', class_="special-content"))
3446
        imgs = div_content.find_all('img')
3447
        imgs = [i for i in imgs if i.get('src') is not None]
3448
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3449
        alt = imgs[0].get('alt', "") if imgs else []
3450
        return {
3451
            'title': title,
3452
            'alt': alt,
3453
            'description': desc,
3454
            'author': author,
3455
            'day': day.day,
3456
            'month': day.month,
3457
            'year': day.year,
3458
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3459
        }
3460
3461
3462
class GloryOwlComix(GenericNavigableComic):
3463
    """Class to retrieve Glory Owl comics."""
3464
    name = 'gloryowl'
3465
    long_name = 'Glory Owl'
3466
    url = 'http://gloryowlcomix.blogspot.fr'
3467
    _categories = ('NSFW', 'FRANCAIS')
3468
    get_first_comic_link = simulate_first_link
3469
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3470
3471
    @classmethod
3472
    def get_navi_link(cls, last_soup, next_):
3473
        """Get link to next or previous comic."""
3474
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3475
3476
    @classmethod
3477
    def get_comic_info(cls, soup, link):
3478
        """Get information about a particular comics."""
3479
        title = soup.find('title').string
3480
        imgs = soup.find_all('link', rel='image_src')
3481
        author = soup.find('a', rel='author').string
3482
        return {
3483
            'img': [i['href'] for i in imgs],
3484
            'author': author,
3485
            'title': title,
3486
        }
3487
3488
3489
class GenericTumblrV1(GenericComic):
3490
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3491
    _categories = ('TUMBLR', )
3492
3493
    @classmethod
3494
    def get_next_comic(cls, last_comic):
3495
        """Generic implementation of get_next_comic for Tumblr comics."""
3496
        for p in cls.get_posts(last_comic):
3497
            comic = cls.get_comic_info(p)
3498
            if comic is not None:
3499
                yield comic
3500
3501
    @classmethod
3502
    def get_url_from_post(cls, post):
3503
        url = post['url']
3504
        if not url.startswith(cls.url):
3505
            print("url '%s' does not start with '%s'" % (url, cls.url))
3506
        return url
3507
3508
    @classmethod
3509
    def get_api_url(cls):
3510
        return urljoin_wrapper(cls.url, '/api/read/')
3511
3512
    @classmethod
3513
    def get_api_url_for_id(cls, tumblr_id):
3514
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3515
3516
    @classmethod
3517
    def get_comic_info(cls, post):
3518
        """Get information about a particular comics."""
3519
        type_ = post['type']
3520
        if type_ != 'photo':
3521
            return None
3522
        tumblr_id = int(post['id'])
3523
        api_url = cls.get_api_url_for_id(tumblr_id)
3524
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3525
        caption = post.find('photo-caption')
3526
        title = caption.string if caption else ""
3527
        tags = ' '.join(t.string for t in post.find_all('tag'))
3528
        # Photos may appear in 'photo' tags and/or straight in the post
3529
        photo_tags = post.find_all('photo')
3530
        if not photo_tags:
3531
            photo_tags = [post]
3532
        # Images are in multiple resolutions - taking the first one
3533
        imgs = [photo.find('photo-url') for photo in photo_tags]
3534
        return {
3535
            'url': cls.get_url_from_post(post),
3536
            'url2': post['url-with-slug'],
3537
            'day': day.day,
3538
            'month': day.month,
3539
            'year': day.year,
3540
            'title': title,
3541
            'tags': tags,
3542
            'img': [i.string for i in imgs],
3543
            'tumblr-id': tumblr_id,
3544
            'api_url': api_url,
3545
        }
3546
3547
    @classmethod
3548
    def get_posts(cls, last_comic, nb_post_per_call=10):
3549
        """Get posts using API. nb_post_per_call is max 50.
3550
3551
        Posts are retrieved from newer to older as per the tumblr v1 api
3552
        but are returned in chronological order."""
3553
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3554
        posts_acc = []
3555
        if last_comic is not None:
3556
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3557
            # might end up spending a lot of time looking for something that
3558
            # doesn't exist. Failing early and clearly might be a better option.
3559
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3560
            try:
3561
                get_soup_at_url(last_api_url)
3562
            except urllib.error.HTTPError:
3563
                try:
3564
                    get_soup_at_url(cls.url)
3565
                except urllib.error.HTTPError:
3566
                    print("Did not find previous post nor main url %s" % cls.url)
3567
                else:
3568
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3569
                return reversed(posts_acc)
3570
        api_url = cls.get_api_url()
3571
        posts = get_soup_at_url(api_url).find('posts')
3572
        start, total = int(posts['start']), int(posts['total'])
3573
        assert start == 0
3574
        for starting_num in range(0, total, nb_post_per_call):
3575
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3576
            posts2 = get_soup_at_url(api_url2).find('posts')
3577
            start2, total2 = int(posts2['start']), int(posts2['total'])
3578
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3579
            # This may happen and should be handled in the future
3580
            assert total == total2, "%d != %d" % (total, total2)
3581
            for p in posts2.find_all('post'):
3582
                tumblr_id = int(p['id'])
3583
                if waiting_for_id and waiting_for_id == tumblr_id:
3584
                    return reversed(posts_acc)
3585
                posts_acc.append(p)
3586
        if waiting_for_id is None:
3587
            return reversed(posts_acc)
3588
        print("Did not find %s : there might be a problem" % waiting_for_id)
3589
        return []
3590
3591
3592
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3593
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3594
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3595
    # Also on http://www.smbc-comics.com
3596
    name = 'smbc-tumblr'
3597
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3598
    url = 'http://smbc-comics.tumblr.com'
3599
    _categories = ('SMBC', )
3600
3601
3602
class IrwinCardozo(GenericTumblrV1):
3603
    """Class to retrieve Irwin Cardozo Comics."""
3604
    name = 'irwinc'
3605
    long_name = 'Irwin Cardozo'
3606
    url = 'http://irwincardozocomics.tumblr.com'
3607
3608
3609
class AccordingToDevin(GenericTumblrV1):
3610
    """Class to retrieve According To Devin comics."""
3611
    name = 'devin'
3612
    long_name = 'According To Devin'
3613
    url = 'http://accordingtodevin.tumblr.com'
3614
3615
3616
class ItsTheTieTumblr(GenericTumblrV1):
3617
    """Class to retrieve It's the tie comics."""
3618
    # Also on http://itsthetie.com
3619
    # Also on https://tapastic.com/series/itsthetie
3620
    name = 'tie-tumblr'
3621
    long_name = "It's the tie (from Tumblr)"
3622
    url = "http://itsthetie.tumblr.com"
3623
    _categories = ('TIE', )
3624
3625
3626
class OctopunsTumblr(GenericTumblrV1):
3627
    """Class to retrieve Octopuns comics."""
3628
    # Also on http://www.octopuns.net
3629
    name = 'octopuns-tumblr'
3630
    long_name = 'Octopuns (from Tumblr)'
3631
    url = 'http://octopuns.tumblr.com'
3632
3633
3634
class PicturesInBoxesTumblr(GenericTumblrV1):
3635
    """Class to retrieve Pictures In Boxes comics."""
3636
    # Also on http://www.picturesinboxes.com
3637
    name = 'picturesinboxes-tumblr'
3638
    long_name = 'Pictures in Boxes (from Tumblr)'
3639
    url = 'https://picturesinboxescomic.tumblr.com'
3640
3641
3642
class TubeyToonsTumblr(GenericTumblrV1):
3643
    """Class to retrieve TubeyToons comics."""
3644
    # Also on http://tapastic.com/series/Tubey-Toons
3645
    # Also on http://tubeytoons.com
3646
    name = 'tubeytoons-tumblr'
3647
    long_name = 'Tubey Toons (from Tumblr)'
3648
    url = 'https://tubeytoons.tumblr.com'
3649
    _categories = ('TUNEYTOONS', )
3650
3651
3652
class UnearthedComicsTumblr(GenericTumblrV1):
3653
    """Class to retrieve Unearthed comics."""
3654
    # Also on http://tapastic.com/series/UnearthedComics
3655
    # Also on http://unearthedcomics.com
3656
    name = 'unearthed-tumblr'
3657
    long_name = 'Unearthed Comics (from Tumblr)'
3658
    url = 'https://unearthedcomics.tumblr.com'
3659
    _categories = ('UNEARTHED', )
3660
3661
3662
class PieComic(GenericTumblrV1):
3663
    """Class to retrieve Pie Comic comics."""
3664
    name = 'pie'
3665
    long_name = 'Pie Comic'
3666
    url = "http://piecomic.tumblr.com"
3667
3668
3669
class MrEthanDiamond(GenericTumblrV1):
3670
    """Class to retrieve Mr Ethan Diamond comics."""
3671
    name = 'diamond'
3672
    long_name = 'Mr Ethan Diamond'
3673
    url = 'http://mrethandiamond.tumblr.com'
3674
3675
3676
class Flocci(GenericTumblrV1):
3677
    """Class to retrieve floccinaucinihilipilification comics."""
3678
    name = 'flocci'
3679
    long_name = 'floccinaucinihilipilification'
3680
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3681
3682
3683
class UpAndOut(GenericTumblrV1):
3684
    """Class to retrieve Up & Out comics."""
3685
    # Also on http://tapastic.com/series/UP-and-OUT
3686
    name = 'upandout'
3687
    long_name = 'Up And Out (from Tumblr)'
3688
    url = 'http://upandoutcomic.tumblr.com'
3689
3690
3691
class Pundemonium(GenericTumblrV1):
3692
    """Class to retrieve Pundemonium comics."""
3693
    name = 'pundemonium'
3694
    long_name = 'Pundemonium'
3695
    url = 'http://monstika.tumblr.com'
3696
3697
3698
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3699
    """Class to retrieve Poorly Drawn Lines comics."""
3700
    # Also on http://poorlydrawnlines.com
3701
    name = 'poorlydrawn-tumblr'
3702
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3703
    url = 'http://pdlcomics.tumblr.com'
3704
    _categories = ('POORLYDRAWN', )
3705
3706
3707
class PearShapedComics(GenericTumblrV1):
3708
    """Class to retrieve Pear Shaped Comics."""
3709
    name = 'pearshaped'
3710
    long_name = 'Pear-Shaped Comics'
3711
    url = 'http://pearshapedcomics.com'
3712
3713
3714
class PondScumComics(GenericTumblrV1):
3715
    """Class to retrieve Pond Scum Comics."""
3716
    name = 'pond'
3717
    long_name = 'Pond Scum'
3718
    url = 'http://pondscumcomic.tumblr.com'
3719
3720
3721
class MercworksTumblr(GenericTumblrV1):
3722
    """Class to retrieve Mercworks comics."""
3723
    # Also on http://mercworks.net
3724
    name = 'mercworks-tumblr'
3725
    long_name = 'Mercworks (from Tumblr)'
3726
    url = 'http://mercworks.tumblr.com'
3727
3728
3729
class OwlTurdTumblr(GenericTumblrV1):
3730
    """Class to retrieve Owl Turd comics."""
3731
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3732
    name = 'owlturd-tumblr'
3733
    long_name = 'Owl Turd (from Tumblr)'
3734
    url = 'http://owlturd.com'
3735
    _categories = ('OWLTURD', )
3736
3737
3738
class VectorBelly(GenericTumblrV1):
3739
    """Class to retrieve Vector Belly comics."""
3740
    # Also on http://vectorbelly.com
3741
    name = 'vector'
3742
    long_name = 'Vector Belly'
3743
    url = 'http://vectorbelly.tumblr.com'
3744
3745
3746
class GoneIntoRapture(GenericTumblrV1):
3747
    """Class to retrieve Gone Into Rapture comics."""
3748
    # Also on http://goneintorapture.tumblr.com
3749
    # Also on http://tapastic.com/series/Goneintorapture
3750
    name = 'rapture'
3751
    long_name = 'Gone Into Rapture'
3752
    url = 'http://goneintorapture.com'
3753
3754
3755
class TheOatmealTumblr(GenericTumblrV1):
3756
    """Class to retrieve The Oatmeal comics."""
3757
    # Also on http://theoatmeal.com
3758
    name = 'oatmeal-tumblr'
3759
    long_name = 'The Oatmeal (from Tumblr)'
3760
    url = 'http://oatmeal.tumblr.com'
3761
3762
3763
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3764
    """Class to retrieve Heck If I Know Comics."""
3765
    # Also on http://tapastic.com/series/Regular
3766
    name = 'heck-tumblr'
3767
    long_name = 'Heck if I Know comics (from Tumblr)'
3768
    url = 'http://heckifiknowcomics.com'
3769
3770
3771
class MyJetPack(GenericTumblrV1):
3772
    """Class to retrieve My Jet Pack comics."""
3773
    name = 'jetpack'
3774
    long_name = 'My Jet Pack'
3775
    url = 'http://myjetpack.tumblr.com'
3776
3777
3778
class CheerUpEmoKidTumblr(GenericTumblrV1):
3779
    """Class to retrieve CheerUpEmoKid comics."""
3780
    # Also on http://www.cheerupemokid.com
3781
    # Also on http://tapastic.com/series/CUEK
3782
    name = 'cuek-tumblr'
3783
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3784
    url = 'https://enzocomics.tumblr.com'
3785
3786
3787
class ForLackOfABetterComic(GenericTumblrV1):
3788
    """Class to retrieve For Lack Of A Better Comics."""
3789
    # Also on http://forlackofabettercomic.com
3790
    name = 'lack'
3791
    long_name = 'For Lack Of A Better Comic'
3792
    url = 'http://forlackofabettercomic.tumblr.com'
3793
3794
3795
class ZenPencilsTumblr(GenericTumblrV1):
3796
    """Class to retrieve ZenPencils comics."""
3797
    # Also on http://zenpencils.com
3798
    # Also on http://www.gocomics.com/zen-pencils
3799
    name = 'zenpencils-tumblr'
3800
    long_name = 'Zen Pencils (from Tumblr)'
3801
    url = 'http://zenpencils.tumblr.com'
3802
    _categories = ('ZENPENCILS', )
3803
3804
3805
class ThreeWordPhraseTumblr(GenericTumblrV1):
3806
    """Class to retrieve Three Word Phrase comics."""
3807
    # Also on http://threewordphrase.com
3808
    name = 'threeword-tumblr'
3809
    long_name = 'Three Word Phrase (from Tumblr)'
3810
    url = 'http://threewordphrase.tumblr.com'
3811
3812
3813
class TimeTrabbleTumblr(GenericTumblrV1):
3814
    """Class to retrieve Time Trabble comics."""
3815
    # Also on http://timetrabble.com
3816
    name = 'timetrabble-tumblr'
3817
    long_name = 'Time Trabble (from Tumblr)'
3818
    url = 'http://timetrabble.tumblr.com'
3819
3820
3821
class SafelyEndangeredTumblr(GenericTumblrV1):
3822
    """Class to retrieve Safely Endangered comics."""
3823
    # Also on http://www.safelyendangered.com
3824
    name = 'endangered-tumblr'
3825
    long_name = 'Safely Endangered (from Tumblr)'
3826
    url = 'http://tumblr.safelyendangered.com'
3827
3828
3829
class MouseBearComedyTumblr(GenericTumblrV1):
3830
    """Class to retrieve Mouse Bear Comedy comics."""
3831
    # Also on http://www.mousebearcomedy.com
3832
    name = 'mousebear-tumblr'
3833
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3834
    url = 'http://mousebearcomedy.tumblr.com'
3835
3836
3837
class BouletCorpTumblr(GenericTumblrV1):
3838
    """Class to retrieve BouletCorp comics."""
3839
    # Also on http://www.bouletcorp.com
3840
    name = 'boulet-tumblr'
3841
    long_name = 'Boulet Corp (from Tumblr)'
3842
    url = 'https://bouletcorp.tumblr.com'
3843
    _categories = ('BOULET', )
3844
3845
3846
class TheAwkwardYetiTumblr(GenericTumblrV1):
3847
    """Class to retrieve The Awkward Yeti comics."""
3848
    # Also on http://www.gocomics.com/the-awkward-yeti
3849
    # Also on http://theawkwardyeti.com
3850
    # Also on https://tapastic.com/series/TheAwkwardYeti
3851
    name = 'yeti-tumblr'
3852
    long_name = 'The Awkward Yeti (from Tumblr)'
3853
    url = 'http://larstheyeti.tumblr.com'
3854
    _categories = ('YETI', )
3855
3856
3857
class NellucNhoj(GenericTumblrV1):
3858
    """Class to retrieve NellucNhoj comics."""
3859
    name = 'nhoj'
3860
    long_name = 'Nelluc Nhoj'
3861
    url = 'http://nellucnhoj.com'
3862
3863
3864
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3865
    """Class to retrieve Down The Upward Spiral comics."""
3866
    # Also on http://www.downtheupwardspiral.com
3867
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
3868
    name = 'spiral-tumblr'
3869
    long_name = 'Down the Upward Spiral (from Tumblr)'
3870
    url = 'http://downtheupwardspiral.tumblr.com'
3871
3872
3873
class AsPerUsualTumblr(GenericTumblrV1):
3874
    """Class to retrieve As Per Usual comics."""
3875
    # Also on https://tapastic.com/series/AsPerUsual
3876
    name = 'usual-tumblr'
3877
    long_name = 'As Per Usual (from Tumblr)'
3878
    url = 'http://as-per-usual.tumblr.com'
3879
    categories = ('DAMILEE', )
3880
3881
3882
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3883
    """Class to retrieve Hot Comics For Cool People."""
3884
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3885
    # Also on http://hotcomics.biz (links to tumblr)
3886
    # Also on http://hcfcp.com (links to tumblr)
3887
    name = 'hotcomics-tumblr'
3888
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3889
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3890
    categories = ('DAMILEE', )
3891
3892
3893
class OneOneOneOneComicTumblr(GenericTumblrV1):
3894
    """Class to retrieve 1111 Comics."""
3895
    # Also on http://www.1111comics.me
3896
    # Also on https://tapastic.com/series/1111-Comics
3897
    name = '1111-tumblr'
3898
    long_name = '1111 Comics (from Tumblr)'
3899
    url = 'http://comics1111.tumblr.com'
3900
    _categories = ('ONEONEONEONE', )
3901
3902
3903
class JhallComicsTumblr(GenericTumblrV1):
3904
    """Class to retrieve Jhall Comics."""
3905
    # Also on http://jhallcomics.com
3906
    name = 'jhall-tumblr'
3907
    long_name = 'Jhall Comics (from Tumblr)'
3908
    url = 'http://jhallcomics.tumblr.com'
3909
3910
3911
class BerkeleyMewsTumblr(GenericTumblrV1):
3912
    """Class to retrieve Berkeley Mews comics."""
3913
    # Also on http://www.gocomics.com/berkeley-mews
3914
    # Also on http://www.berkeleymews.com
3915
    name = 'berkeley-tumblr'
3916
    long_name = 'Berkeley Mews (from Tumblr)'
3917
    url = 'http://mews.tumblr.com'
3918
    _categories = ('BERKELEY', )
3919
3920
3921
class JoanCornellaTumblr(GenericTumblrV1):
3922
    """Class to retrieve Joan Cornella comics."""
3923
    # Also on http://joancornella.net
3924
    name = 'cornella-tumblr'
3925
    long_name = 'Joan Cornella (from Tumblr)'
3926
    url = 'http://cornellajoan.tumblr.com'
3927
3928
3929
class RespawnComicTumblr(GenericTumblrV1):
3930
    """Class to retrieve Respawn Comic."""
3931
    # Also on http://respawncomic.com
3932
    name = 'respawn-tumblr'
3933
    long_name = 'Respawn Comic (from Tumblr)'
3934
    url = 'https://respawncomic.tumblr.com'
3935
3936
3937
class ChrisHallbeckTumblr(GenericTumblrV1):
3938
    """Class to retrieve Chris Hallbeck comics."""
3939
    # Also on https://tapastic.com/ChrisHallbeck
3940
    # Also on http://maximumble.com
3941
    # Also on http://minimumble.com
3942
    # Also on http://thebookofbiff.com
3943
    name = 'hallbeck-tumblr'
3944
    long_name = 'Chris Hallback (from Tumblr)'
3945
    url = 'https://chrishallbeck.tumblr.com'
3946
    _categories = ('HALLBACK', )
3947
3948
3949
class ComicNuggets(GenericTumblrV1):
3950
    """Class to retrieve Comic Nuggets."""
3951
    name = 'nuggets'
3952
    long_name = 'Comic Nuggets'
3953
    url = 'http://comicnuggets.com'
3954
3955
3956
class PigeonGazetteTumblr(GenericTumblrV1):
3957
    """Class to retrieve The Pigeon Gazette comics."""
3958
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3959
    name = 'pigeon-tumblr'
3960
    long_name = 'The Pigeon Gazette (from Tumblr)'
3961
    url = 'http://thepigeongazette.tumblr.com'
3962
3963
3964
class CancerOwl(GenericTumblrV1):
3965
    """Class to retrieve Cancer Owl comics."""
3966
    # Also on http://cancerowl.com
3967
    name = 'cancerowl-tumblr'
3968
    long_name = 'Cancer Owl (from Tumblr)'
3969
    url = 'http://cancerowl.tumblr.com'
3970
3971
3972
class FowlLanguageTumblr(GenericTumblrV1):
3973
    """Class to retrieve Fowl Language comics."""
3974
    # Also on http://www.fowllanguagecomics.com
3975
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3976
    # Also on http://www.gocomics.com/fowl-language
3977
    name = 'fowllanguage-tumblr'
3978
    long_name = 'Fowl Language Comics (from Tumblr)'
3979
    url = 'http://fowllanguagecomics.tumblr.com'
3980
    _categories = ('FOWLLANGUAGE', )
3981
3982
3983
class TheOdd1sOutTumblr(GenericTumblrV1):
3984
    """Class to retrieve The Odd 1s Out comics."""
3985
    # Also on http://theodd1sout.com
3986
    # Also on https://tapastic.com/series/Theodd1sout
3987
    name = 'theodd-tumblr'
3988
    long_name = 'The Odd 1s Out (from Tumblr)'
3989
    url = 'http://theodd1sout.tumblr.com'
3990
3991
3992
class TheUnderfoldTumblr(GenericTumblrV1):
3993
    """Class to retrieve The Underfold comics."""
3994
    # Also on http://theunderfold.com
3995
    name = 'underfold-tumblr'
3996
    long_name = 'The Underfold (from Tumblr)'
3997
    url = 'http://theunderfold.tumblr.com'
3998
3999
4000
class LolNeinTumblr(GenericTumblrV1):
4001
    """Class to retrieve Lol Nein comics."""
4002
    # Also on http://lolnein.com
4003
    name = 'lolnein-tumblr'
4004
    long_name = 'Lol Nein (from Tumblr)'
4005
    url = 'http://lolneincom.tumblr.com'
4006
4007
4008
class FatAwesomeComicsTumblr(GenericTumblrV1):
4009
    """Class to retrieve Fat Awesome Comics."""
4010
    # Also on http://fatawesome.com/comics
4011
    name = 'fatawesome-tumblr'
4012
    long_name = 'Fat Awesome (from Tumblr)'
4013
    url = 'http://fatawesomecomedy.tumblr.com'
4014
4015
4016
class TheWorldIsFlatTumblr(GenericTumblrV1):
4017
    """Class to retrieve The World Is Flat Comics."""
4018
    # Also on https://tapastic.com/series/The-World-is-Flat
4019
    name = 'flatworld-tumblr'
4020
    long_name = 'The World Is Flat (from Tumblr)'
4021
    url = 'http://theworldisflatcomics.com'
4022
4023
4024
class DorrisMc(GenericTumblrV1):
4025
    """Class to retrieve Dorris Mc Comics"""
4026
    # Also on http://www.gocomics.com/dorris-mccomics
4027
    name = 'dorrismc'
4028
    long_name = 'Dorris Mc'
4029
    url = 'http://dorrismccomics.com'
4030
4031
4032
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
4033
    """Class to retrieve Leleoz comics."""
4034
    # Also on https://tapastic.com/series/Leleoz
4035
    name = 'leleoz-tumblr'
4036
    long_name = 'Leleoz (from Tumblr)'
4037
    url = 'http://leleozcomics.tumblr.com'
4038
4039
4040
class MoonBeardTumblr(GenericTumblrV1):
4041
    """Class to retrieve MoonBeard comics."""
4042
    # Also on http://moonbeard.com
4043
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4044
    name = 'moonbeard-tumblr'
4045
    long_name = 'Moon Beard (from Tumblr)'
4046
    url = 'http://blog.squiresjam.es'
4047
4048
4049
class AComik(GenericTumblrV1):
4050
    """Class to retrieve A Comik"""
4051
    name = 'comik'
4052
    long_name = 'A Comik'
4053
    url = 'http://acomik.com'
4054
4055
4056
class ClassicRandy(GenericTumblrV1):
4057
    """Class to retrieve Classic Randy comics."""
4058
    name = 'randy'
4059
    long_name = 'Classic Randy'
4060
    url = 'http://classicrandy.tumblr.com'
4061
4062
4063
class DagssonTumblr(GenericTumblrV1):
4064
    """Class to retrieve Dagsson comics."""
4065
    # Also on http://www.dagsson.com
4066
    name = 'dagsson-tumblr'
4067
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4068
    url = 'https://hugleikurdagsson.tumblr.com'
4069
4070
4071
class LinsEditionsTumblr(GenericTumblrV1):
4072
    """Class to retrieve L.I.N.S. Editions comics."""
4073
    # Also on https://linsedition.com
4074
    # Now on http://warandpeas.tumblr.com
4075
    name = 'lins-tumblr'
4076
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4077
    url = 'https://linscomics.tumblr.com'
4078
    _categories = ('LINS', )
4079
4080
4081
class WarAndPeasTumblr(GenericTumblrV1):
4082
    """Class to retrieve War And Peas comics."""
4083
    # Was on https://linscomics.tumblr.com
4084
    name = 'warandpeas-tumblr'
4085
    long_name = 'War And Peas (from Tumblr)'
4086
    url = 'http://warandpeas.tumblr.com'
4087
    _categories = ('WARANDPEAS', )
4088
4089
4090
class OrigamiHotDish(GenericTumblrV1):
4091
    """Class to retrieve Origami Hot Dish comics."""
4092
    name = 'origamihotdish'
4093
    long_name = 'Origami Hot Dish'
4094
    url = 'http://origamihotdish.com'
4095
4096
4097
class HitAndMissComicsTumblr(GenericTumblrV1):
4098
    """Class to retrieve Hit and Miss Comics."""
4099
    name = 'hitandmiss'
4100
    long_name = 'Hit and Miss Comics'
4101
    url = 'https://hitandmisscomics.tumblr.com'
4102
4103
4104
class HMBlanc(GenericTumblrV1):
4105
    """Class to retrieve HM Blanc comics."""
4106
    name = 'hmblanc'
4107
    long_name = 'HM Blanc'
4108
    url = 'http://hmblanc.tumblr.com'
4109
4110
4111
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4112
    """Class to retrieve Tales Of Absurdity comics."""
4113
    # Also on http://talesofabsurdity.com
4114
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4115
    name = 'absurdity-tumblr'
4116
    long_name = 'Tales of Absurdity (from Tumblr)'
4117
    url = 'http://talesofabsurdity.tumblr.com'
4118
    _categories = ('ABSURDITY', )
4119
4120
4121
class RobbieAndBobby(GenericTumblrV1):
4122
    """Class to retrieve Robbie And Bobby comics."""
4123
    # Also on http://robbieandbobby.com
4124
    name = 'robbie-tumblr'
4125
    long_name = 'Robbie And Bobby (from Tumblr)'
4126
    url = 'http://robbieandbobby.tumblr.com'
4127
4128
4129
class ElectricBunnyComicTumblr(GenericTumblrV1):
4130
    """Class to retrieve Electric Bunny Comics."""
4131
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4132
    name = 'bunny-tumblr'
4133
    long_name = 'Electric Bunny Comic (from Tumblr)'
4134
    url = 'http://electricbunnycomics.tumblr.com'
4135
4136
4137
class Hoomph(GenericTumblrV1):
4138
    """Class to retrieve Hoomph comics."""
4139
    name = 'hoomph'
4140
    long_name = 'Hoomph'
4141
    url = 'http://hoom.ph'
4142
4143
4144
class BFGFSTumblr(GenericTumblrV1):
4145
    """Class to retrieve BFGFS comics."""
4146
    # Also on https://tapastic.com/series/BFGFS
4147
    # Also on http://bfgfs.com
4148
    name = 'bfgfs-tumblr'
4149
    long_name = 'BFGFS (from Tumblr)'
4150
    url = 'https://bfgfs.tumblr.com'
4151
4152
4153
class DoodleForFood(GenericTumblrV1):
4154
    """Class to retrieve Doodle For Food comics."""
4155
    # Also on https://tapastic.com/series/Doodle-for-Food
4156
    name = 'doodle'
4157
    long_name = 'Doodle For Food'
4158
    url = 'http://www.doodleforfood.com'
4159
4160
4161
class CassandraCalinTumblr(GenericTumblrV1):
4162
    """Class to retrieve C. Cassandra comics."""
4163
    # Also on http://cassandracalin.com
4164
    # Also on https://tapastic.com/series/C-Cassandra-comics
4165
    name = 'cassandra-tumblr'
4166
    long_name = 'Cassandra Calin (from Tumblr)'
4167
    url = 'http://c-cassandra.tumblr.com'
4168
4169
4170
class DougWasTaken(GenericTumblrV1):
4171
    """Class to retrieve Doug Was Taken comics."""
4172
    name = 'doug'
4173
    long_name = 'Doug Was Taken'
4174
    url = 'https://dougwastaken.tumblr.com'
4175
4176
4177
class MandatoryRollerCoaster(GenericTumblrV1):
4178
    """Class to retrieve Mandatory Roller Coaster comics."""
4179
    name = 'rollercoaster'
4180
    long_name = 'Mandatory Roller Coaster'
4181
    url = 'http://mandatoryrollercoaster.com'
4182
4183
4184
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4185
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4186
    name = 'cperspqccltt'
4187
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4188
    url = 'http://marcoandco.tumblr.com'
4189
4190
4191
class TheGrohlTroll(GenericTumblrV1):
4192
    """Class to retrieve The Grohl Troll comics."""
4193
    name = 'grohltroll'
4194
    long_name = 'The Grohl Troll'
4195
    url = 'http://thegrohltroll.com'
4196
4197
4198
class WebcomicName(GenericTumblrV1):
4199
    """Class to retrieve Webcomic Name comics."""
4200
    name = 'webcomicname'
4201
    long_name = 'Webcomic Name'
4202
    url = 'http://webcomicname.com'
4203
4204
4205
class BooksOfAdam(GenericTumblrV1):
4206
    """Class to retrieve Books of Adam comics."""
4207
    # Also on http://www.booksofadam.com
4208
    name = 'booksofadam'
4209
    long_name = 'Books of Adam'
4210
    url = 'http://booksofadam.tumblr.com'
4211
4212
4213
class HarkAVagrant(GenericTumblrV1):
4214
    """Class to retrieve Hark A Vagrant comics."""
4215
    # Also on http://www.harkavagrant.com
4216
    name = 'hark-tumblr'
4217
    long_name = 'Hark A Vagrant (from Tumblr)'
4218
    url = 'http://beatonna.tumblr.com'
4219
4220
4221
class OurSuperAdventureTumblr(GenericTumblrV1):
4222
    """Class to retrieve Our Super Adventure comics."""
4223
    # Also on https://tapastic.com/series/Our-Super-Adventure
4224
    # Also on http://www.oursuperadventure.com
4225
    # http://sarahgraley.com
4226
    name = 'superadventure-tumblr'
4227
    long_name = 'Our Super Adventure (from Tumblr)'
4228
    url = 'http://sarahssketchbook.tumblr.com'
4229
4230
4231
class JakeLikesOnions(GenericTumblrV1):
4232
    """Class to retrieve Jake Likes Onions comics."""
4233
    name = 'jake'
4234
    long_name = 'Jake Likes Onions'
4235
    url = 'http://jakelikesonions.com'
4236
4237
4238
class InYourFaceCake(GenericTumblrV1):
4239
    """Class to retrieve In Your Face Cake comics."""
4240
    name = 'inyourfacecake-tumblr'
4241
    long_name = 'In Your Face Cake (from Tumblr)'
4242
    url = 'https://in-your-face-cake.tumblr.com'
4243
4244
4245
class Robospunk(GenericTumblrV1):
4246
    """Class to retrieve Robospunk comics."""
4247
    name = 'robospunk'
4248
    long_name = 'Robospunk'
4249
    url = 'http://robospunk.com'
4250
4251
4252
class BananaTwinky(GenericTumblrV1):
4253
    """Class to retrieve Banana Twinky comics."""
4254
    name = 'banana'
4255
    long_name = 'Banana Twinky'
4256
    url = 'https://bananatwinky.tumblr.com'
4257
4258
4259
class YesterdaysPopcornTumblr(GenericTumblrV1):
4260
    """Class to retrieve Yesterday's Popcorn comics."""
4261
    # Also on http://www.yesterdayspopcorn.com
4262
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4263
    name = 'popcorn-tumblr'
4264
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4265
    url = 'http://yesterdayspopcorn.tumblr.com'
4266
4267
4268
class TwistedDoodles(GenericTumblrV1):
4269
    """Class to retrieve Twisted Doodles comics."""
4270
    name = 'twisted'
4271
    long_name = 'Twisted Doodles'
4272
    url = 'http://www.twisteddoodles.com'
4273
4274
4275
class UbertoolTumblr(GenericTumblrV1):
4276
    """Class to retrieve Ubertool comics."""
4277
    # Also on http://ubertoolcomic.com
4278
    # Also on https://tapastic.com/series/ubertool
4279
    name = 'ubertool-tumblr'
4280
    long_name = 'Ubertool (from Tumblr)'
4281
    url = 'https://ubertool.tumblr.com'
4282
    _categories = ('UBERTOOL', )
4283
4284
4285
class LittleLifeLinesTumblr(GenericTumblrV1):
4286
    """Class to retrieve Little Life Lines comics."""
4287
    # Also on http://www.littlelifelines.com
4288
    name = 'life-tumblr'
4289
    long_name = 'Little Life Lines (from Tumblr)'
4290
    url = 'https://little-life-lines.tumblr.com'
4291
4292
4293
class TheyCanTalk(GenericTumblrV1):
4294
    """Class to retrieve They Can Talk comics."""
4295
    name = 'theycantalk'
4296
    long_name = 'They Can Talk'
4297
    url = 'http://theycantalk.com'
4298
4299
4300
class Will5NeverCome(GenericTumblrV1):
4301
    """Class to retrieve Will 5:00 Never Come comics."""
4302
    name = 'will5'
4303
    long_name = 'Will 5:00 Never Come ?'
4304
    url = 'http://will5nevercome.com'
4305
4306
4307
class Sephko(GenericTumblrV1):
4308
    """Class to retrieve Sephko Comics."""
4309
    # Also on http://www.sephko.com
4310
    name = 'sephko'
4311
    long_name = 'Sephko'
4312 View Code Duplication
    url = 'https://sephko.tumblr.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4313
4314
4315
class BlazersAtDawn(GenericTumblrV1):
4316
    """Class to retrieve Blazers At Dawn Comics."""
4317
    name = 'blazers'
4318
    long_name = 'Blazers At Dawn'
4319
    url = 'http://blazersatdawn.tumblr.com'
4320
4321
4322
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4323
    """Class to retrieve Art By Moga Comics."""
4324
    name = 'moga'
4325
    long_name = 'Art By Moga'
4326
    url = 'http://artbymoga.tumblr.com'
4327
4328
4329
class VerbalVomitTumblr(GenericTumblrV1):
4330
    """Class to retrieve Verbal Vomit comics."""
4331
    # Also on http://www.verbal-vomit.com
4332
    name = 'vomit-tumblr'
4333
    long_name = 'Verbal Vomit (from Tumblr)'
4334
    url = 'http://verbalvomits.tumblr.com'
4335
4336
4337
class LibraryComic(GenericTumblrV1):
4338
    """Class to retrieve LibraryComic."""
4339
    # Also on http://librarycomic.com
4340
    name = 'library-tumblr'
4341
    long_name = 'LibraryComic (from Tumblr)'
4342
    url = 'https://librarycomic.tumblr.com'
4343
4344
4345
class TizzyStitchBirdTumblr(GenericTumblrV1):
4346
    """Class to retrieve Tizzy Stitch Bird comics."""
4347
    # Also on http://tizzystitchbird.com
4348
    # Also on https://tapastic.com/series/TizzyStitchbird
4349
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4350
    name = 'tizzy-tumblr'
4351
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4352
    url = 'http://tizzystitchbird.tumblr.com'
4353
4354
4355
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4356
    """Class to retrieve VictimsOfCircumsolar comics."""
4357
    # Also on http://www.victimsofcircumsolar.com
4358
    name = 'circumsolar-tumblr'
4359
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4360
    url = 'https://victimsofcomics.tumblr.com'
4361
4362
4363
class RockPaperCynicTumblr(GenericTumblrV1):
4364
    """Class to retrieve RockPaperCynic comics."""
4365
    # Also on http://www.rockpapercynic.com
4366
    # Also on https://tapastic.com/series/rockpapercynic
4367
    name = 'rpc-tumblr'
4368
    long_name = 'Rock Paper Cynic (from Tumblr)'
4369
    url = 'http://rockpapercynic.tumblr.com'
4370
4371
4372
class DeadlyPanelTumblr(GenericTumblrV1):
4373
    """Class to retrieve Deadly Panel comics."""
4374
    # Also on http://www.deadlypanel.com
4375
    # Also on https://tapastic.com/series/deadlypanel
4376
    name = 'deadly-tumblr'
4377
    long_name = 'Deadly Panel (from Tumblr)'
4378
    url = 'https://deadlypanel.tumblr.com'
4379
4380
4381
class CatanaComics(GenericTumblrV1):
4382
    """Class to retrieve Catana comics."""
4383
    name = 'catana'
4384
    long_name = 'Catana'
4385
    url = 'http://www.catanacomics.com'
4386
4387
4388
class AngryAtNothingTumblr(GenericTumblrV1):
4389
    """Class to retrieve Angry at Nothing comics."""
4390
    # Also on http://www.angryatnothing.net
4391
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4392
    name = 'angry-tumblr'
4393
    long_name = 'Angry At Nothing (from Tumblr)'
4394
    url = 'http://angryatnothing.tumblr.com'
4395
4396
4397
class ShanghaiTango(GenericTumblrV1):
4398
    """Class to retrieve Shanghai Tango comic."""
4399
    name = 'tango'
4400
    long_name = 'Shanghai Tango'
4401
    url = 'http://tango2010weibo.tumblr.com'
4402
4403
4404
class OffTheLeashDogTumblr(GenericTumblrV1):
4405
    """Class to retrieve Off The Leash Dog comics."""
4406
    # Also on http://offtheleashdogcartoons.com
4407
    # Also on http://www.rupertfawcettcartoons.com
4408
    name = 'offtheleash-tumblr'
4409
    long_name = 'Off The Leash Dog (from Tumblr)'
4410
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4411
    _categories = ('FAWCETT', )
4412
4413
4414
class ImogenQuestTumblr(GenericTumblrV1):
4415
    """Class to retrieve Imogen Quest comics."""
4416
    # Also on http://imogenquest.net
4417
    name = 'imogen-tumblr'
4418
    long_name = 'Imogen Quest (from Tumblr)'
4419
    url = 'http://imoquest.tumblr.com'
4420
4421
4422
class Shitfest(GenericTumblrV1):
4423
    """Class to retrieve Shitfest comics."""
4424
    name = 'shitfest'
4425
    long_name = 'Shitfest'
4426
    url = 'http://shitfestcomic.com'
4427
4428
4429
class IceCreamSandwichComics(GenericTumblrV1):
4430
    """Class to retrieve Ice Cream Sandwich Comics."""
4431
    name = 'icecream'
4432
    long_name = 'Ice Cream Sandwich Comics'
4433
    url = 'http://icecreamsandwichcomics.com'
4434
4435
4436
class Dustinteractive(GenericTumblrV1):
4437
    """Class to retrieve Dustinteractive comics."""
4438
    name = 'dustinteractive'
4439
    long_name = 'Dustinteractive'
4440
    url = 'http://dustinteractive.com'
4441
4442
4443
class StickyCinemaFloor(GenericTumblrV1):
4444
    """Class to retrieve Sticky Cinema Floor comics."""
4445
    name = 'stickycinema'
4446
    long_name = 'Sticky Cinema Floor'
4447
    url = 'https://stickycinemafloor.tumblr.com'
4448
4449
4450
class IncidentalComicsTumblr(GenericTumblrV1):
4451
    """Class to retrieve Incidental Comics."""
4452
    # Also on http://www.incidentalcomics.com
4453
    name = 'incidental-tumblr'
4454
    long_name = 'Incidental Comics (from Tumblr)'
4455
    url = 'http://incidentalcomics.tumblr.com'
4456
4457
4458
class HorovitzComics(GenericListableComic):
4459
    """Generic class to handle the logic common to the different comics from Horovitz."""
4460
    url = 'http://www.horovitzcomics.com'
4461
    _categories = ('HOROVITZ', )
4462
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4463
    link_re = NotImplemented
4464
    get_url_from_archive_element = join_cls_url_to_href
4465
4466
    @classmethod
4467
    def get_comic_info(cls, soup, link):
4468
        """Get information about a particular comics."""
4469
        href = link['href']
4470
        num = int(cls.link_re.match(href).groups()[0])
4471
        title = link.string
4472
        imgs = soup.find_all('img', id='comic')
4473
        assert len(imgs) == 1
4474
        year, month, day = [int(s)
4475
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4476
        return {
4477
            'title': title,
4478
            'day': day,
4479
            'month': month,
4480
            'year': year,
4481
            'img': [i['src'] for i in imgs],
4482
            'num': num,
4483
        }
4484
4485
    @classmethod
4486
    def get_archive_elements(cls):
4487
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4488
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4489
4490
4491
class HorovitzNew(HorovitzComics):
4492
    """Class to retrieve Horovitz new comics."""
4493
    name = 'horovitznew'
4494
    long_name = 'Horovitz New'
4495
    link_re = re.compile('^/comics/new/([0-9]+)$')
4496
4497
4498
class HorovitzClassic(HorovitzComics):
4499
    """Class to retrieve Horovitz classic comics."""
4500
    name = 'horovitzclassic'
4501
    long_name = 'Horovitz Classic'
4502
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4503
4504
4505
class GenericGoComic(GenericNavigableComic):
4506
    """Generic class to handle the logic common to comics from gocomics.com."""
4507
    _categories = ('GOCOMIC', )
4508
4509
    @classmethod
4510
    def get_first_comic_link(cls):
4511
        """Get link to first comics."""
4512
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4513
4514
    @classmethod
4515
    def get_navi_link(cls, last_soup, next_):
4516
        """Get link to next or previous comic."""
4517
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4518
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right hidden-sm-up sm '
4519
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4520
4521
    @classmethod
4522
    def get_url_from_link(cls, link):
4523
        gocomics = 'http://www.gocomics.com'
4524
        return urljoin_wrapper(gocomics, link['href'])
4525
4526
    @classmethod
4527
    def get_comic_info(cls, soup, link):
4528 View Code Duplication
        """Get information about a particular comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4529
        date_str = soup.find('meta', property='article:published_time')['content']
4530
        day = string_to_date(date_str, "%Y-%m-%d")
4531
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4532
        author = soup.find('meta', property='article:author')['content']
4533
        tags = soup.find('meta', property='article:tag')['content']
4534
        return {
4535
            'day': day.day,
4536
            'month': day.month,
4537
            'year': day.year,
4538
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4539
            'author': author,
4540
            'tags': tags,
4541
        }
4542
4543
4544
class PearlsBeforeSwine(GenericGoComic):
4545
    """Class to retrieve Pearls Before Swine comics."""
4546
    name = 'pearls'
4547
    long_name = 'Pearls Before Swine'
4548
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4549
4550
4551
class Peanuts(GenericGoComic):
4552
    """Class to retrieve Peanuts comics."""
4553
    name = 'peanuts'
4554
    long_name = 'Peanuts'
4555
    url = 'http://www.gocomics.com/peanuts'
4556
4557
4558
class MattWuerker(GenericGoComic):
4559
    """Class to retrieve Matt Wuerker comics."""
4560
    name = 'wuerker'
4561
    long_name = 'Matt Wuerker'
4562
    url = 'http://www.gocomics.com/mattwuerker'
4563
4564
4565
class TomToles(GenericGoComic):
4566
    """Class to retrieve Tom Toles comics."""
4567
    name = 'toles'
4568
    long_name = 'Tom Toles'
4569
    url = 'http://www.gocomics.com/tomtoles'
4570
4571
4572
class BreakOfDay(GenericGoComic):
4573
    """Class to retrieve Break Of Day comics."""
4574
    name = 'breakofday'
4575
    long_name = 'Break Of Day'
4576
    url = 'http://www.gocomics.com/break-of-day'
4577
4578
4579
class Brevity(GenericGoComic):
4580
    """Class to retrieve Brevity comics."""
4581
    name = 'brevity'
4582
    long_name = 'Brevity'
4583
    url = 'http://www.gocomics.com/brevitypanel'
4584
4585
4586
class MichaelRamirez(GenericGoComic):
4587
    """Class to retrieve Michael Ramirez comics."""
4588
    name = 'ramirez'
4589
    long_name = 'Michael Ramirez'
4590
    url = 'http://www.gocomics.com/michaelramirez'
4591
4592
4593
class MikeLuckovich(GenericGoComic):
4594
    """Class to retrieve Mike Luckovich comics."""
4595
    name = 'luckovich'
4596
    long_name = 'Mike Luckovich'
4597
    url = 'http://www.gocomics.com/mikeluckovich'
4598
4599
4600
class JimBenton(GenericGoComic):
4601
    """Class to retrieve Jim Benton comics."""
4602
    # Also on http://jimbenton.tumblr.com
4603
    name = 'benton'
4604
    long_name = 'Jim Benton'
4605
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4606
4607
4608
class TheArgyleSweater(GenericGoComic):
4609
    """Class to retrieve the Argyle Sweater comics."""
4610
    name = 'argyle'
4611
    long_name = 'Argyle Sweater'
4612
    url = 'http://www.gocomics.com/theargylesweater'
4613
4614
4615
class SunnyStreet(GenericGoComic):
4616
    """Class to retrieve Sunny Street comics."""
4617
    # Also on http://www.sunnystreetcomics.com
4618
    name = 'sunny'
4619
    long_name = 'Sunny Street'
4620
    url = 'http://www.gocomics.com/sunny-street'
4621
4622
4623
class OffTheMark(GenericGoComic):
4624
    """Class to retrieve Off The Mark comics."""
4625
    # Also on https://www.offthemark.com
4626
    name = 'offthemark'
4627
    long_name = 'Off The Mark'
4628
    url = 'http://www.gocomics.com/offthemark'
4629
4630
4631
class WuMo(GenericGoComic):
4632
    """Class to retrieve WuMo comics."""
4633
    # Also on http://wumo.com
4634
    name = 'wumo'
4635
    long_name = 'WuMo'
4636
    url = 'http://www.gocomics.com/wumo'
4637
4638
4639
class LunarBaboon(GenericGoComic):
4640
    """Class to retrieve Lunar Baboon comics."""
4641
    # Also on http://www.lunarbaboon.com
4642
    # Also on https://tapastic.com/series/Lunarbaboon
4643
    name = 'lunarbaboon'
4644
    long_name = 'Lunar Baboon'
4645
    url = 'http://www.gocomics.com/lunarbaboon'
4646
4647
4648
class SandersenGocomic(GenericGoComic):
4649
    """Class to retrieve Sarah Andersen comics."""
4650
    # Also on http://sarahcandersen.com
4651
    # Also on http://tapastic.com/series/Doodle-Time
4652
    name = 'sandersen-goc'
4653
    long_name = 'Sarah Andersen (from GoComics)'
4654
    url = 'http://www.gocomics.com/sarahs-scribbles'
4655
4656
4657
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4658
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4659
    # Also on http://smbc-comics.tumblr.com
4660
    # Also on http://www.smbc-comics.com
4661
    name = 'smbc-goc'
4662
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4663
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4664
    _categories = ('SMBC', )
4665
4666
4667
class CalvinAndHobbesGoComic(GenericGoComic):
4668
    """Class to retrieve Calvin and Hobbes comics."""
4669
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4670
    name = 'calvin-goc'
4671
    long_name = 'Calvin and Hobbes (from GoComics)'
4672
    url = 'http://www.gocomics.com/calvinandhobbes'
4673
4674
4675
class RallGoComic(GenericGoComic):
4676
    """Class to retrieve Ted Rall comics."""
4677
    # Also on http://rall.com/comic
4678
    name = 'rall-goc'
4679
    long_name = "Ted Rall (from GoComics)"
4680
    url = "http://www.gocomics.com/ted-rall"
4681
    _categories = ('RALL', )
4682
4683
4684
class TheAwkwardYetiGoComic(GenericGoComic):
4685
    """Class to retrieve The Awkward Yeti comics."""
4686
    # Also on http://larstheyeti.tumblr.com
4687
    # Also on http://theawkwardyeti.com
4688
    # Also on https://tapastic.com/series/TheAwkwardYeti
4689
    name = 'yeti-goc'
4690
    long_name = 'The Awkward Yeti (from GoComics)'
4691
    url = 'http://www.gocomics.com/the-awkward-yeti'
4692
    _categories = ('YETI', )
4693
4694
4695
class BerkeleyMewsGoComics(GenericGoComic):
4696
    """Class to retrieve Berkeley Mews comics."""
4697
    # Also on http://mews.tumblr.com
4698
    # Also on http://www.berkeleymews.com
4699
    name = 'berkeley-goc'
4700
    long_name = 'Berkeley Mews (from GoComics)'
4701
    url = 'http://www.gocomics.com/berkeley-mews'
4702
    _categories = ('BERKELEY', )
4703
4704
4705
class SheldonGoComics(GenericGoComic):
4706
    """Class to retrieve Sheldon comics."""
4707
    # Also on http://www.sheldoncomics.com
4708
    name = 'sheldon-goc'
4709
    long_name = 'Sheldon Comics (from GoComics)'
4710
    url = 'http://www.gocomics.com/sheldon'
4711
4712
4713
class FowlLanguageGoComics(GenericGoComic):
4714
    """Class to retrieve Fowl Language comics."""
4715
    # Also on http://www.fowllanguagecomics.com
4716
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4717
    # Also on http://fowllanguagecomics.tumblr.com
4718
    name = 'fowllanguage-goc'
4719
    long_name = 'Fowl Language Comics (from GoComics)'
4720
    url = 'http://www.gocomics.com/fowl-language'
4721
    _categories = ('FOWLLANGUAGE', )
4722
4723
4724
class NickAnderson(GenericGoComic):
4725
    """Class to retrieve Nick Anderson comics."""
4726
    name = 'nickanderson'
4727
    long_name = 'Nick Anderson'
4728
    url = 'http://www.gocomics.com/nickanderson'
4729
4730
4731
class GarfieldGoComics(GenericGoComic):
4732
    """Class to retrieve Garfield comics."""
4733
    # Also on http://garfield.com
4734
    name = 'garfield-goc'
4735
    long_name = 'Garfield (from GoComics)'
4736
    url = 'http://www.gocomics.com/garfield'
4737
    _categories = ('GARFIELD', )
4738
4739
4740
class DorrisMcGoComics(GenericGoComic):
4741
    """Class to retrieve Dorris Mc Comics"""
4742
    # Also on http://dorrismccomics.com
4743
    name = 'dorrismc-goc'
4744
    long_name = 'Dorris Mc (from GoComics)'
4745
    url = 'http://www.gocomics.com/dorris-mccomics'
4746
4747
4748
class FoxTrot(GenericGoComic):
4749
    """Class to retrieve FoxTrot comics."""
4750
    name = 'foxtrot'
4751
    long_name = 'FoxTrot'
4752
    url = 'http://www.gocomics.com/foxtrot'
4753
4754
4755
class FoxTrotClassics(GenericGoComic):
4756
    """Class to retrieve FoxTrot Classics comics."""
4757
    name = 'foxtrot-classics'
4758
    long_name = 'FoxTrot Classics'
4759
    url = 'http://www.gocomics.com/foxtrotclassics'
4760
4761
4762
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4763
    """Class to retrieve Mister & Me Comics."""
4764
    # Also on http://www.mister-and-me.com
4765
    # Also on https://tapastic.com/series/Mister-and-Me
4766
    name = 'mister-goc'
4767
    long_name = 'Mister & Me (from GoComics)'
4768
    url = 'http://www.gocomics.com/mister-and-me'
4769
4770
4771
class NonSequitur(GenericGoComic):
4772
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4773
    name = 'nonsequitur'
4774
    long_name = 'Non Sequitur'
4775
    url = 'http://www.gocomics.com/nonsequitur'
4776
4777
4778
class GenericTapasticComic(GenericListableComic):
4779
    """Generic class to handle the logic common to comics from tapastic.com."""
4780
    _categories = ('TAPASTIC', )
4781
4782
    @classmethod
4783
    def get_comic_info(cls, soup, archive_elt):
4784
        """Get information about a particular comics."""
4785
        timestamp = int(archive_elt['publishDate']) / 1000.0
4786
        day = datetime.datetime.fromtimestamp(timestamp).date()
4787
        imgs = soup.find_all('img', class_='art-image')
4788
        if not imgs:
4789
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4790
            return None
4791
        assert len(imgs) > 0
4792
        return {
4793
            'day': day.day,
4794
            'year': day.year,
4795
            'month': day.month,
4796
            'img': [i['src'] for i in imgs],
4797
            'title': archive_elt['title'],
4798
        }
4799
4800
    @classmethod
4801
    def get_url_from_archive_element(cls, archive_elt):
4802
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4803
4804
    @classmethod
4805
    def get_archive_elements(cls):
4806
        pref, suff = 'episodeList : ', ','
4807
        # Information is stored in the javascript part
4808
        # I don't know the clean way to get it so this is the ugly way.
4809
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4810
        return json.loads(string)
4811
4812
4813
class VegetablesForDessert(GenericTapasticComic):
4814
    """Class to retrieve Vegetables For Dessert comics."""
4815
    # Also on http://vegetablesfordessert.tumblr.com
4816
    name = 'vegetables'
4817
    long_name = 'Vegetables For Dessert'
4818
    url = 'http://tapastic.com/series/vegetablesfordessert'
4819
4820
4821
class FowlLanguageTapa(GenericTapasticComic):
4822
    """Class to retrieve Fowl Language comics."""
4823
    # Also on http://www.fowllanguagecomics.com
4824
    # Also on http://fowllanguagecomics.tumblr.com
4825
    # Also on http://www.gocomics.com/fowl-language
4826
    name = 'fowllanguage-tapa'
4827
    long_name = 'Fowl Language Comics (from Tapastic)'
4828
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4829
    _categories = ('FOWLLANGUAGE', )
4830
4831
4832
class OscillatingProfundities(GenericTapasticComic):
4833
    """Class to retrieve Oscillating Profundities comics."""
4834
    name = 'oscillating'
4835
    long_name = 'Oscillating Profundities'
4836
    url = 'http://tapastic.com/series/oscillatingprofundities'
4837
4838
4839
class ZnoflatsComics(GenericTapasticComic):
4840
    """Class to retrieve Znoflats comics."""
4841
    name = 'znoflats'
4842
    long_name = 'Znoflats Comics'
4843
    url = 'http://tapastic.com/series/Znoflats-Comics'
4844
4845
4846
class SandersenTapastic(GenericTapasticComic):
4847
    """Class to retrieve Sarah Andersen comics."""
4848
    # Also on http://sarahcandersen.com
4849
    # Also on http://www.gocomics.com/sarahs-scribbles
4850
    name = 'sandersen-tapa'
4851
    long_name = 'Sarah Andersen (from Tapastic)'
4852
    url = 'http://tapastic.com/series/Doodle-Time'
4853
4854
4855
class TubeyToonsTapastic(GenericTapasticComic):
4856
    """Class to retrieve TubeyToons comics."""
4857
    # Also on http://tubeytoons.com
4858
    # Also on https://tubeytoons.tumblr.com
4859
    name = 'tubeytoons-tapa'
4860
    long_name = 'Tubey Toons (from Tapastic)'
4861
    url = 'http://tapastic.com/series/Tubey-Toons'
4862
    _categories = ('TUNEYTOONS', )
4863
4864
4865
class AnythingComicTapastic(GenericTapasticComic):
4866
    """Class to retrieve Anything Comics."""
4867
    # Also on http://www.anythingcomic.com
4868
    name = 'anythingcomic-tapa'
4869
    long_name = 'Anything Comic (from Tapastic)'
4870
    url = 'http://tapastic.com/series/anything'
4871
4872
4873
class UnearthedComicsTapastic(GenericTapasticComic):
4874
    """Class to retrieve Unearthed comics."""
4875
    # Also on http://unearthedcomics.com
4876
    # Also on https://unearthedcomics.tumblr.com
4877
    name = 'unearthed-tapa'
4878
    long_name = 'Unearthed Comics (from Tapastic)'
4879
    url = 'http://tapastic.com/series/UnearthedComics'
4880
    _categories = ('UNEARTHED', )
4881
4882
4883
class EverythingsStupidTapastic(GenericTapasticComic):
4884
    """Class to retrieve Everything's stupid Comics."""
4885
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4886
    # Also on http://everythingsstupid.net
4887
    name = 'stupid-tapa'
4888
    long_name = "Everything's Stupid (from Tapastic)"
4889
    url = 'http://tapastic.com/series/EverythingsStupid'
4890
4891
4892
class JustSayEhTapastic(GenericTapasticComic):
4893
    """Class to retrieve Just Say Eh comics."""
4894
    # Also on http://www.justsayeh.com
4895
    name = 'justsayeh-tapa'
4896
    long_name = 'Just Say Eh (from Tapastic)'
4897
    url = 'http://tapastic.com/series/Just-Say-Eh'
4898
4899
4900
class ThorsThundershackTapastic(GenericTapasticComic):
4901
    """Class to retrieve Thor's Thundershack comics."""
4902
    # Also on http://www.thorsthundershack.com
4903
    name = 'thor-tapa'
4904
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4905
    url = 'http://tapastic.com/series/Thors-Thundershac'
4906
    _categories = ('THOR', )
4907
4908
4909
class OwlTurdTapastic(GenericTapasticComic):
4910
    """Class to retrieve Owl Turd comics."""
4911
    # Also on http://owlturd.com
4912
    name = 'owlturd-tapa'
4913
    long_name = 'Owl Turd (from Tapastic)'
4914
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4915
    _categories = ('OWLTURD', )
4916
4917
4918
class GoneIntoRaptureTapastic(GenericTapasticComic):
4919
    """Class to retrieve Gone Into Rapture comics."""
4920
    # Also on http://goneintorapture.tumblr.com
4921
    # Also on http://goneintorapture.com
4922
    name = 'rapture-tapa'
4923
    long_name = 'Gone Into Rapture (from Tapastic)'
4924
    url = 'http://tapastic.com/series/Goneintorapture'
4925
4926
4927
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4928
    """Class to retrieve Heck If I Know Comics."""
4929
    # Also on http://heckifiknowcomics.com
4930
    name = 'heck-tapa'
4931
    long_name = 'Heck if I Know comics (from Tapastic)'
4932
    url = 'http://tapastic.com/series/Regular'
4933
4934
4935
class CheerUpEmoKidTapa(GenericTapasticComic):
4936
    """Class to retrieve CheerUpEmoKid comics."""
4937
    # Also on http://www.cheerupemokid.com
4938
    # Also on https://enzocomics.tumblr.com
4939
    name = 'cuek-tapa'
4940
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4941
    url = 'http://tapastic.com/series/CUEK'
4942
4943
4944
class BigFootJusticeTapa(GenericTapasticComic):
4945
    """Class to retrieve Big Foot Justice comics."""
4946
    # Also on http://bigfootjustice.com
4947
    name = 'bigfoot-tapa'
4948
    long_name = 'Big Foot Justice (from Tapastic)'
4949
    url = 'http://tapastic.com/series/bigfoot-justice'
4950
4951
4952
class UpAndOutTapa(GenericTapasticComic):
4953
    """Class to retrieve Up & Out comics."""
4954
    # Also on http://upandoutcomic.tumblr.com
4955
    name = 'upandout-tapa'
4956
    long_name = 'Up And Out (from Tapastic)'
4957
    url = 'http://tapastic.com/series/UP-and-OUT'
4958
4959
4960
class ToonHoleTapa(GenericTapasticComic):
4961
    """Class to retrieve Toon Holes comics."""
4962
    # Also on http://www.toonhole.com
4963
    name = 'toonhole-tapa'
4964
    long_name = 'Toon Hole (from Tapastic)'
4965
    url = 'http://tapastic.com/series/TOONHOLE'
4966
4967
4968
class AngryAtNothingTapa(GenericTapasticComic):
4969
    """Class to retrieve Angry at Nothing comics."""
4970
    # Also on http://www.angryatnothing.net
4971
    # Also on http://angryatnothing.tumblr.com
4972
    name = 'angry-tapa'
4973
    long_name = 'Angry At Nothing (from Tapastic)'
4974
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4975
4976
4977
class LeleozTapa(GenericTapasticComic):
4978
    """Class to retrieve Leleoz comics."""
4979
    # Also on http://leleozcomics.tumblr.com
4980
    name = 'leleoz-tapa'
4981
    long_name = 'Leleoz (from Tapastic)'
4982
    url = 'https://tapastic.com/series/Leleoz'
4983
4984
4985
class TheAwkwardYetiTapa(GenericTapasticComic):
4986
    """Class to retrieve The Awkward Yeti comics."""
4987
    # Also on http://www.gocomics.com/the-awkward-yeti
4988
    # Also on http://theawkwardyeti.com
4989
    # Also on http://larstheyeti.tumblr.com
4990
    name = 'yeti-tapa'
4991
    long_name = 'The Awkward Yeti (from Tapastic)'
4992
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4993
    _categories = ('YETI', )
4994
4995
4996
class AsPerUsualTapa(GenericTapasticComic):
4997
    """Class to retrieve As Per Usual comics."""
4998
    # Also on http://as-per-usual.tumblr.com
4999
    name = 'usual-tapa'
5000
    long_name = 'As Per Usual (from Tapastic)'
5001
    url = 'https://tapastic.com/series/AsPerUsual'
5002
    categories = ('DAMILEE', )
5003
5004
5005
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
5006
    """Class to retrieve Hot Comics For Cool People."""
5007
    # Also on http://hotcomicsforcoolpeople.tumblr.com
5008
    # Also on http://hotcomics.biz (links to tumblr)
5009
    # Also on http://hcfcp.com (links to tumblr)
5010
    name = 'hotcomics-tapa'
5011
    long_name = 'Hot Comics For Cool People (from Tapastic)'
5012
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
5013
    categories = ('DAMILEE', )
5014
5015
5016
class OneOneOneOneComicTapa(GenericTapasticComic):
5017
    """Class to retrieve 1111 Comics."""
5018
    # Also on http://www.1111comics.me
5019
    # Also on http://comics1111.tumblr.com
5020
    name = '1111-tapa'
5021
    long_name = '1111 Comics (from Tapastic)'
5022
    url = 'https://tapastic.com/series/1111-Comics'
5023
    _categories = ('ONEONEONEONE', )
5024
5025
5026
class TumbleDryTapa(GenericTapasticComic):
5027
    """Class to retrieve Tumble Dry comics."""
5028
    # Also on http://tumbledrycomics.com
5029
    name = 'tumbledry-tapa'
5030
    long_name = 'Tumblr Dry (from Tapastic)'
5031
    url = 'https://tapastic.com/series/TumbleDryComics'
5032
5033
5034
class DeadlyPanelTapa(GenericTapasticComic):
5035
    """Class to retrieve Deadly Panel comics."""
5036
    # Also on http://www.deadlypanel.com
5037
    # Also on https://deadlypanel.tumblr.com
5038
    name = 'deadly-tapa'
5039
    long_name = 'Deadly Panel (from Tapastic)'
5040
    url = 'https://tapastic.com/series/deadlypanel'
5041
5042
5043
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5044
    """Class to retrieve Chris Hallbeck comics."""
5045
    # Also on https://chrishallbeck.tumblr.com
5046
    # Also on http://maximumble.com
5047
    name = 'hallbeckmaxi-tapa'
5048
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5049
    url = 'https://tapastic.com/series/Maximumble'
5050
    _categories = ('HALLBACK', )
5051
5052
5053
class ChrisHallbeckMiniTapa(GenericEmptyComic, GenericTapasticComic):
5054
    """Class to retrieve Chris Hallbeck comics."""
5055
    # Also on https://chrishallbeck.tumblr.com
5056
    # Also on http://minimumble.com
5057
    name = 'hallbeckmini-tapa'
5058
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5059
    url = 'https://tapastic.com/series/Minimumble'
5060
    _categories = ('HALLBACK', )
5061
5062
5063
class ChrisHallbeckBiffTapa(GenericEmptyComic, GenericTapasticComic):
5064
    """Class to retrieve Chris Hallbeck comics."""
5065
    # Also on https://chrishallbeck.tumblr.com
5066
    # Also on http://thebookofbiff.com
5067
    name = 'hallbeckbiff-tapa'
5068
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5069
    url = 'https://tapastic.com/series/Biff'
5070
    _categories = ('HALLBACK', )
5071
5072
5073
class RandoWisTapa(GenericTapasticComic):
5074
    """Class to retrieve RandoWis comics."""
5075
    # Also on https://randowis.com
5076
    name = 'randowis-tapa'
5077
    long_name = 'RandoWis (from Tapastic)'
5078
    url = 'https://tapastic.com/series/RandoWis'
5079
5080
5081
class PigeonGazetteTapa(GenericTapasticComic):
5082
    """Class to retrieve The Pigeon Gazette comics."""
5083
    # Also on http://thepigeongazette.tumblr.com
5084
    name = 'pigeon-tapa'
5085
    long_name = 'The Pigeon Gazette (from Tapastic)'
5086
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5087
5088
5089
class TheOdd1sOutTapa(GenericTapasticComic):
5090
    """Class to retrieve The Odd 1s Out comics."""
5091
    # Also on http://theodd1sout.com
5092
    # Also on http://theodd1sout.tumblr.com
5093
    name = 'theodd-tapa'
5094
    long_name = 'The Odd 1s Out (from Tapastic)'
5095
    url = 'https://tapastic.com/series/Theodd1sout'
5096
5097
5098
class TheWorldIsFlatTapa(GenericTapasticComic):
5099
    """Class to retrieve The World Is Flat Comics."""
5100
    # Also on http://theworldisflatcomics.tumblr.com
5101
    name = 'flatworld-tapa'
5102
    long_name = 'The World Is Flat (from Tapastic)'
5103
    url = 'https://tapastic.com/series/The-World-is-Flat'
5104
5105
5106
class MisterAndMeTapa(GenericTapasticComic):
5107
    """Class to retrieve Mister & Me Comics."""
5108
    # Also on http://www.mister-and-me.com
5109
    # Also on http://www.gocomics.com/mister-and-me
5110
    name = 'mister-tapa'
5111
    long_name = 'Mister & Me (from Tapastic)'
5112
    url = 'https://tapastic.com/series/Mister-and-Me'
5113
5114
5115
class TalesOfAbsurdityTapa(GenericEmptyComic, GenericTapasticComic):
5116
    """Class to retrieve Tales Of Absurdity comics."""
5117
    # Also on http://talesofabsurdity.com
5118
    # Also on http://talesofabsurdity.tumblr.com
5119
    name = 'absurdity-tapa'
5120
    long_name = 'Tales of Absurdity (from Tapastic)'
5121
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5122
    _categories = ('ABSURDITY', )
5123
5124
5125
class BFGFSTapa(GenericTapasticComic):
5126
    """Class to retrieve BFGFS comics."""
5127
    # Also on http://bfgfs.com
5128
    # Also on https://bfgfs.tumblr.com
5129
    name = 'bfgfs-tapa'
5130
    long_name = 'BFGFS (from Tapastic)'
5131
    url = 'https://tapastic.com/series/BFGFS'
5132
5133
5134
class DoodleForFoodTapa(GenericTapasticComic):
5135
    """Class to retrieve Doodle For Food comics."""
5136
    # Also on http://www.doodleforfood.com
5137
    name = 'doodle-tapa'
5138
    long_name = 'Doodle For Food (from Tapastic)'
5139
    url = 'https://tapastic.com/series/Doodle-for-Food'
5140
5141
5142
class MrLovensteinTapa(GenericTapasticComic):
5143
    """Class to retrieve Mr Lovenstein comics."""
5144
    # Also on  https://tapastic.com/series/MrLovenstein
5145
    name = 'mrlovenstein-tapa'
5146
    long_name = 'Mr. Lovenstein (from Tapastic)'
5147
    url = 'https://tapastic.com/series/MrLovenstein'
5148
5149
5150
class CassandraCalinTapa(GenericTapasticComic):
5151
    """Class to retrieve C. Cassandra comics."""
5152
    # Also on http://cassandracalin.com
5153
    # Also on http://c-cassandra.tumblr.com
5154
    name = 'cassandra-tapa'
5155
    long_name = 'Cassandra Calin (from Tapastic)'
5156
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5157
5158
5159
class WafflesAndPancakes(GenericTapasticComic):
5160
    """Class to retrieve Waffles And Pancakes comics."""
5161
    # Also on http://wandpcomic.com
5162
    name = 'waffles'
5163
    long_name = 'Waffles And Pancakes'
5164
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5165
5166
5167
class YesterdaysPopcornTapastic(GenericTapasticComic):
5168
    """Class to retrieve Yesterday's Popcorn comics."""
5169
    # Also on http://www.yesterdayspopcorn.com
5170
    # Also on http://yesterdayspopcorn.tumblr.com
5171
    name = 'popcorn-tapa'
5172
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5173
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5174
5175
5176
class OurSuperAdventureTapastic(GenericEmptyComic, GenericTapasticComic):
5177
    """Class to retrieve Our Super Adventure comics."""
5178
    # Also on http://www.oursuperadventure.com
5179
    # http://sarahssketchbook.tumblr.com
5180
    # http://sarahgraley.com
5181
    name = 'superadventure-tapastic'
5182
    long_name = 'Our Super Adventure (from Tapastic)'
5183
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5184
5185
5186
class NamelessPCs(GenericTapasticComic):
5187
    """Class to retrieve Nameless PCs comics."""
5188
    # Also on http://namelesspcs.com
5189
    name = 'namelesspcs-tapa'
5190
    long_name = 'NamelessPCs (from Tapastic)'
5191
    url = 'https://tapastic.com/series/NamelessPC'
5192
5193
5194
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5195
    """Class to retrieve Down The Upward Spiral comics."""
5196
    # Also on http://www.downtheupwardspiral.com
5197
    # Also on http://downtheupwardspiral.tumblr.com
5198
    name = 'spiral-tapa'
5199
    long_name = 'Down the Upward Spiral (from Tapastic)'
5200
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5201
5202
5203
class UbertoolTapa(GenericTapasticComic):
5204
    """Class to retrieve Ubertool comics."""
5205
    # Also on http://ubertoolcomic.com
5206
    # Also on https://ubertool.tumblr.com
5207
    name = 'ubertool-tapa'
5208
    long_name = 'Ubertool (from Tapastic)'
5209
    url = 'https://tapastic.com/series/ubertool'
5210
    _categories = ('UBERTOOL', )
5211
5212
5213
class BarteNerdsTapa(GenericTapasticComic):
5214
    """Class to retrieve BarteNerds comics."""
5215
    # Also on http://www.bartenerds.com
5216
    name = 'bartenerds-tapa'
5217
    long_name = 'BarteNerds (from Tapastic)'
5218
    url = 'https://tapastic.com/series/BarteNERDS'
5219
5220
5221
class SmallBlueYonderTapa(GenericTapasticComic):
5222
    """Class to retrieve Small Blue Yonder comics."""
5223
    # Also on http://www.smallblueyonder.com
5224
    name = 'smallblue-tapa'
5225
    long_name = 'Small Blue Yonder (from Tapastic)'
5226
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5227
5228
5229
class TizzyStitchBirdTapa(GenericTapasticComic):
5230
    """Class to retrieve Tizzy Stitch Bird comics."""
5231
    # Also on http://tizzystitchbird.com
5232
    # Also on http://tizzystitchbird.tumblr.com
5233
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5234
    name = 'tizzy-tapa'
5235
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5236
    url = 'https://tapastic.com/series/TizzyStitchbird'
5237
5238
5239
class RockPaperCynicTapa(GenericTapasticComic):
5240
    """Class to retrieve RockPaperCynic comics."""
5241
    # Also on http://www.rockpapercynic.com
5242
    # Also on http://rockpapercynic.tumblr.com
5243
    name = 'rpc-tapa'
5244
    long_name = 'Rock Paper Cynic (from Tapastic)'
5245
    url = 'https://tapastic.com/series/rockpapercynic'
5246
5247
5248
class ItsTheTieTapa(GenericTapasticComic):
5249
    """Class to retrieve It's the tie comics."""
5250
    # Also on http://itsthetie.com
5251
    # Also on http://itsthetie.tumblr.com
5252
    name = 'tie-tapa'
5253
    long_name = "It's the tie (from Tapastic)"
5254
    url = "https://tapastic.com/series/itsthetie"
5255
    _categories = ('TIE', )
5256
5257
5258
class MomentumTapa(GenericTapasticComic):
5259
    """Class to retrieve Momentum comics."""
5260
    # Also on http://www.momentumcomic.com
5261
    name = 'momentum-tapa'
5262
    long_name = 'Momentum (from Tapastic)'
5263
    url = 'https://tapastic.com/series/momentum'
5264
5265
5266
def get_subclasses(klass):
5267
    """Gets the list of direct/indirect subclasses of a class"""
5268
    subclasses = klass.__subclasses__()
5269
    for derived in list(subclasses):
5270
        subclasses.extend(get_subclasses(derived))
5271
    return subclasses
5272
5273
5274
def remove_st_nd_rd_th_from_date(string):
5275
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5276
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5277
    return (string.replace('st', '')
5278
            .replace('nd', '')
5279
            .replace('rd', '')
5280
            .replace('th', '')
5281
            .replace('Augu', 'August'))
5282
5283
5284
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5285
    """Function to convert string to date object.
5286
    Wrapper around datetime.datetime.strptime."""
5287
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5288
    prev_locale = locale.setlocale(locale.LC_ALL)
5289
    if local != prev_locale:
5290
        locale.setlocale(locale.LC_ALL, local)
5291
    ret = datetime.datetime.strptime(string, date_format).date()
5292
    if local != prev_locale:
5293
        locale.setlocale(locale.LC_ALL, prev_locale)
5294
    return ret
5295
5296
5297
COMICS = set(get_subclasses(GenericComic))
5298
VALID_COMICS = [c for c in COMICS if c.name is not None]
5299
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5300
assert len(VALID_COMICS) == len(COMIC_NAMES)
5301
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5302
assert len(VALID_COMICS) == len(CLASS_NAMES)
5303