Completed
Push — master ( 10eb24...c0fe6c )
by De
01:05
created

BerkeleyMews.get_archive_elements()   A

Complexity

Conditions 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
c 0
b 0
f 0
dl 0
loc 4
rs 10
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        for archive_elt in cls.get_archive_elements():
240
            url = cls.get_url_from_archive_element(archive_elt)
241
            cls.log("considering %s" % url)
242
            if waiting_for_url is None:
243
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
244
                soup = get_soup_at_url(url)
245
                comic = cls.get_comic_info(soup, archive_elt)
246
                if comic is not None:
247
                    assert 'url' not in comic
248
                    comic['url'] = url
249
                    yield comic
250
            elif waiting_for_url == url:
251
                waiting_for_url = None
252
        if waiting_for_url is not None:
253
            print("Did not find %s : there might be a problem" % waiting_for_url)
254
255
# Helper functions corresponding to get_first_comic_link/get_navi_link
256
257
258
@classmethod
259
def get_link_rel_next(cls, last_soup, next_):
260
    """Implementation of get_navi_link."""
261
    return last_soup.find('link', rel='next' if next_ else 'prev')
262
263
264
@classmethod
265
def get_a_rel_next(cls, last_soup, next_):
266
    """Implementation of get_navi_link."""
267
    return last_soup.find('a', rel='next' if next_ else 'prev')
268
269
270
@classmethod
271
def get_a_navi_navinext(cls, last_soup, next_):
272
    """Implementation of get_navi_link."""
273
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
274
275
276
@classmethod
277
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
280
281
282
@classmethod
283
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
284
    """Implementation of get_navi_link."""
285
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
286
287
288
@classmethod
289
def get_a_navi_navifirst(cls):
290
    """Implementation of get_first_comic_link."""
291
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
292
293
294
@classmethod
295
def get_div_navfirst_a(cls):
296
    """Implementation of get_first_comic_link."""
297
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
298
299
300
@classmethod
301
def get_a_comicnavbase_comicnavfirst(cls):
302
    """Implementation of get_first_comic_link."""
303
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
304
305
306
@classmethod
307
def simulate_first_link(cls):
308
    """Implementation of get_first_comic_link creating a link-like object from
309
    an URL provided by the class.
310
311
    Note: The first URL can easily be found using :
312
    `get_first_comic_link = navigate_to_first_comic`.
313
    """
314
    return {'href': cls.first_url}
315
316
317
@classmethod
318
def navigate_to_first_comic(cls):
319
    """Implementation of get_first_comic_link navigating from a user provided
320
    URL to the first comic.
321
322
    Sometimes, the first comic cannot be reached directly so to start
323
    from the first comic one has to go to the previous comic until
324
    there is no previous comics. Once this URL is reached, it
325
    is better to hardcode it but for development purposes, it
326
    is convenient to have an automatic way to find it.
327
328
    Then, the URL found can easily be used via `simulate_first_link`.
329
    """
330
    url = input("Get starting URL: ")
331
    print(url)
332
    comic = cls.get_prev_link(get_soup_at_url(url))
333
    while comic:
334
        url = cls.get_url_from_link(comic)
335
        print(url)
336
        comic = cls.get_prev_link(get_soup_at_url(url))
337
    return {'href': url}
338
339
340
class GenericEmptyComic(GenericComic):
341
    """Generic class for comics where nothing is to be done.
342
343
    It can be useful to deactivate temporarily comics that do not work
344
    properly by replacing `def MyComic(GenericWhateverComic)` with
345
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
346
    _categories = ('EMPTY', )
347
348
    @classmethod
349
    def get_next_comic(cls, last_comic):
350
        """Implementation of get_next_comic returning no comics."""
351
        cls.log("comic is considered as empty - returning no comic")
352
        return []
353
354
355 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
356
    """Class to retrieve Extra Fabulous Comics."""
357
    name = 'efc'
358
    long_name = 'Extra Fabulous Comics'
359
    url = 'http://extrafabulouscomics.com'
360
    get_first_comic_link = get_a_navi_navifirst
361
    get_navi_link = get_link_rel_next
362
363
    @classmethod
364
    def get_comic_info(cls, soup, link):
365
        """Get information about a particular comics."""
366
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367
        imgs = soup.find_all('img', src=img_src_re)
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
370
        day = string_to_date(date_str, "%Y-%m-%d")
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
375
            'year': day.year,
376
            'day': day.day,
377
            'prefix': title + '-'
378
        }
379
380
381
class GenericLeMondeBlog(GenericNavigableComic):
382
    """Generic class to retrieve comics from Le Monde blogs."""
383
    _categories = ('LEMONDE', 'FRANCAIS')
384
    get_navi_link = get_link_rel_next
385
    get_first_comic_link = simulate_first_link
386
    first_url = NotImplemented
387
388
    @classmethod
389
    def get_comic_info(cls, soup, link):
390
        """Get information about a particular comics."""
391
        url2 = soup.find('link', rel='shortlink')['href']
392
        title = soup.find('meta', property='og:title')['content']
393
        date_str = soup.find("span", class_="entry-date").string
394
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395
        imgs = soup.find_all('meta', property='og:image')
396
        return {
397
            'title': title,
398
            'url2': url2,
399
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400
            'month': day.month,
401
            'year': day.year,
402
            'day': day.day,
403
        }
404
405
406
class ZepWorld(GenericLeMondeBlog):
407
    """Class to retrieve Zep World comics."""
408
    name = "zep"
409
    long_name = "Zep World"
410
    url = "http://zepworld.blog.lemonde.fr"
411
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
412
413
414
class Vidberg(GenericLeMondeBlog):
415
    """Class to retrieve Vidberg comics."""
416
    name = 'vidberg'
417
    long_name = "Vidberg - l'actu en patates"
418
    url = "http://vidberg.blog.lemonde.fr"
419
    # Not the first but I didn't find an efficient way to retrieve it
420
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
421
422
423
class Plantu(GenericLeMondeBlog):
424
    """Class to retrieve Plantu comics."""
425
    name = 'plantu'
426
    long_name = "Plantu"
427
    url = "http://plantu.blog.lemonde.fr"
428
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
429
430
431
class XavierGorce(GenericLeMondeBlog):
432
    """Class to retrieve Xavier Gorce comics."""
433
    name = 'gorce'
434
    long_name = "Xavier Gorce"
435
    url = "http://xaviergorce.blog.lemonde.fr"
436
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
437
438
439
class CartooningForPeace(GenericLeMondeBlog):
440
    """Class to retrieve Cartooning For Peace comics."""
441
    name = 'forpeace'
442
    long_name = "Cartooning For Peace"
443
    url = "http://cartooningforpeace.blog.lemonde.fr"
444
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
445
446
447
class Aurel(GenericLeMondeBlog):
448
    """Class to retrieve Aurel comics."""
449
    name = 'aurel'
450
    long_name = "Aurel"
451
    url = "http://aurel.blog.lemonde.fr"
452
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
453
454
455
class LesCulottees(GenericLeMondeBlog):
456
    """Class to retrieve Les Culottees comics."""
457
    name = 'culottees'
458
    long_name = 'Les Culottees'
459
    url = "http://lesculottees.blog.lemonde.fr"
460
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
461
462
463
class UneAnneeAuLycee(GenericLeMondeBlog):
464
    """Class to retrieve Une Annee Au Lycee comics."""
465
    name = 'lycee'
466
    long_name = 'Une Annee au Lycee'
467
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
468
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
469
470
471 View Code Duplication
class Rall(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
472
    """Class to retrieve Ted Rall comics."""
473
    # Also on http://www.gocomics.com/tedrall
474
    name = 'rall'
475
    long_name = "Ted Rall"
476
    url = "http://rall.com/comic"
477
    _categories = ('RALL', )
478
    get_navi_link = get_link_rel_next
479
    get_first_comic_link = simulate_first_link
480
    # Not the first but I didn't find an efficient way to retrieve it
481
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
482
483
    @classmethod
484
    def get_comic_info(cls, soup, link):
485
        """Get information about a particular comics."""
486
        title = soup.find('meta', property='og:title')['content']
487
        author = soup.find("span", class_="author vcard").find("a").string
488
        date_str = soup.find("span", class_="entry-date").string
489
        day = string_to_date(date_str, "%B %d, %Y")
490
        desc = soup.find('meta', property='og:description')['content']
491
        imgs = soup.find('div', class_='entry-content').find_all('img')
492
        imgs = imgs[:-7]  # remove social media buttons
493
        return {
494
            'title': title,
495
            'author': author,
496
            'month': day.month,
497
            'year': day.year,
498
            'day': day.day,
499
            'description': desc,
500
            'img': [i['src'] for i in imgs],
501
        }
502
503
504
class Dilem(GenericNavigableComic):
505
    """Class to retrieve Ali Dilem comics."""
506
    name = 'dilem'
507
    long_name = 'Ali Dilem'
508
    url = 'http://information.tv5monde.com/dilem'
509
    _categories = ('FRANCAIS', )
510
    get_url_from_link = join_cls_url_to_href
511
    get_first_comic_link = simulate_first_link
512
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
513
514
    @classmethod
515
    def get_navi_link(cls, last_soup, next_):
516
        """Get link to next or previous comic."""
517
        # prev is next / next is prev
518
        li = last_soup.find('li', class_='prev' if next_ else 'next')
519
        return li.find('a') if li else None
520
521 View Code Duplication
    @classmethod
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
522
    def get_comic_info(cls, soup, link):
523
        """Get information about a particular comics."""
524
        short_url = soup.find('link', rel='shortlink')['href']
525
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
526
        imgs = soup.find_all('meta', property='og:image')
527
        date_str = soup.find('span', property='dc:date')['content']
528
        date_str = date_str[:10]
529
        day = string_to_date(date_str, "%Y-%m-%d")
530
        return {
531
            'short_url': short_url,
532
            'title': title,
533
            'img': [i['content'] for i in imgs],
534
            'day': day.day,
535
            'month': day.month,
536
            'year': day.year,
537
        }
538
539
540
class SpaceAvalanche(GenericNavigableComic):
541
    """Class to retrieve Space Avalanche comics."""
542
    name = 'avalanche'
543
    long_name = 'Space Avalanche'
544
    url = 'http://www.spaceavalanche.com'
545
    get_navi_link = get_link_rel_next
546
547
    @classmethod
548
    def get_first_comic_link(cls):
549
        """Get link to first comics."""
550
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
551
552
    @classmethod
553
    def get_comic_info(cls, soup, link):
554
        """Get information about a particular comics."""
555
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
556
        title = link['title']
557
        url = cls.get_url_from_link(link)
558
        year, month, day = [int(s)
559
                            for s in url_date_re.match(url).groups()]
560
        imgs = soup.find("div", class_="entry").find_all("img")
561
        return {
562
            'title': title,
563
            'day': day,
564
            'month': month,
565
            'year': year,
566
            'img': [i['src'] for i in imgs],
567
        }
568
569
570
class ZenPencils(GenericNavigableComic):
571
    """Class to retrieve ZenPencils comics."""
572
    # Also on http://zenpencils.tumblr.com
573
    # Also on http://www.gocomics.com/zen-pencils
574
    name = 'zenpencils'
575
    long_name = 'Zen Pencils'
576
    url = 'http://zenpencils.com'
577
    _categories = ('ZENPENCILS', )
578
    get_navi_link = get_link_rel_next
579
    get_first_comic_link = simulate_first_link
580
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
581
582
    @classmethod
583
    def get_comic_info(cls, soup, link):
584
        """Get information about a particular comics."""
585
        imgs = soup.find('div', id='comic').find_all('img')
586
        # imgs2 = soup.find_all('meta', property='og:image')
587
        post = soup.find('div', class_='post-content')
588
        author = post.find("span", class_="post-author").find("a").string
589
        title = soup.find('meta', property='og:title')['content']
590
        date_str = post.find('span', class_='post-date').string
591
        day = string_to_date(date_str, "%B %d, %Y")
592
        assert imgs
593
        assert all(i['alt'] == i['title'] for i in imgs)
594
        assert all(i['alt'] in (title, "") for i in imgs)
595
        desc = soup.find('meta', property='og:description')['content']
596
        return {
597
            'title': title,
598
            'description': desc,
599
            'author': author,
600
            'day': day.day,
601
            'month': day.month,
602
            'year': day.year,
603
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
604
        }
605
606
607
class ItsTheTie(GenericNavigableComic):
608
    """Class to retrieve It's the tie comics."""
609
    # Also on http://itsthetie.tumblr.com
610
    # Also on https://tapastic.com/series/itsthetie
611
    name = 'tie'
612
    long_name = "It's the tie"
613
    url = "http://itsthetie.com"
614
    _categories = ('TIE', )
615
    get_first_comic_link = get_div_navfirst_a
616
    get_navi_link = get_a_rel_next
617
618
    @classmethod
619
    def get_comic_info(cls, soup, link):
620
        """Get information about a particular comics."""
621
        title = soup.find('h1', class_='comic-title').find('a').string
622
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
623
        day = string_to_date(date_str, "%B %d, %Y")
624
        # Bonus images may or may not be in meta og:image.
625
        imgs = soup.find_all('meta', property='og:image')
626
        imgs_src = [i['content'] for i in imgs]
627
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
628
        bonus_src = [b['data-oversrc'] for b in bonus]
629
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
630
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
631
        tag_meta = soup.find('meta', property='article:tag')
632
        tags = tag_meta['content'] if tag_meta else ""
633
        return {
634
            'title': title,
635
            'month': day.month,
636
            'year': day.year,
637
            'day': day.day,
638
            'img': all_imgs_src,
639
            'tags': tags,
640
        }
641
642
643 View Code Duplication
class PenelopeBagieu(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
644
    """Class to retrieve comics from Penelope Bagieu's blog."""
645
    name = 'bagieu'
646
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
647
    url = 'http://www.penelope-jolicoeur.com'
648
    _categories = ('FRANCAIS', )
649
    get_navi_link = get_link_rel_next
650
    get_first_comic_link = simulate_first_link
651
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
652
653
    @classmethod
654
    def get_comic_info(cls, soup, link):
655
        """Get information about a particular comics."""
656
        date_str = soup.find('h2', class_='date-header').string
657
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
658
        imgs = soup.find('div', class_='entry-body').find_all('img')
659
        title = soup.find('h3', class_='entry-header').string
660
        return {
661
            'title': title,
662
            'img': [i['src'] for i in imgs],
663
            'month': day.month,
664
            'year': day.year,
665
            'day': day.day,
666
        }
667
668
669 View Code Duplication
class OneOneOneOneComic(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
670
    """Class to retrieve 1111 Comics."""
671
    # Also on http://comics1111.tumblr.com
672
    # Also on https://tapastic.com/series/1111-Comics
673
    name = '1111'
674
    long_name = '1111 Comics'
675
    url = 'http://www.1111comics.me'
676
    _categories = ('ONEONEONEONE', )
677
    get_first_comic_link = get_div_navfirst_a
678
    get_navi_link = get_link_rel_next
679
680
    @classmethod
681
    def get_comic_info(cls, soup, link):
682
        """Get information about a particular comics."""
683
        title = soup.find('h1', class_='comic-title').find('a').string
684
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
685
        day = string_to_date(date_str, "%B %d, %Y")
686
        imgs = soup.find_all('meta', property='og:image')
687
        return {
688
            'title': title,
689
            'month': day.month,
690
            'year': day.year,
691
            'day': day.day,
692
            'img': [i['content'] for i in imgs],
693
        }
694
695
696 View Code Duplication
class AngryAtNothing(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
697
    """Class to retrieve Angry at Nothing comics."""
698
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
699
    name = 'angry'
700
    long_name = 'Angry At Nothing'
701
    url = 'http://www.angryatnothing.net'
702
    get_first_comic_link = get_div_navfirst_a
703
    get_navi_link = get_a_rel_next
704
705
    @classmethod
706
    def get_comic_info(cls, soup, link):
707
        """Get information about a particular comics."""
708
        title = soup.find('h1', class_='comic-title').find('a').string
709
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
710
        day = string_to_date(date_str, "%B %d, %Y")
711
        imgs = soup.find_all('meta', property='og:image')
712
        return {
713
            'title': title,
714
            'month': day.month,
715
            'year': day.year,
716
            'day': day.day,
717
            'img': [i['content'] for i in imgs],
718
        }
719
720
721
class NeDroid(GenericNavigableComic):
722
    """Class to retrieve NeDroid comics."""
723
    name = 'nedroid'
724
    long_name = 'NeDroid'
725
    url = 'http://nedroid.com'
726
    get_first_comic_link = get_div_navfirst_a
727
    get_navi_link = get_link_rel_next
728
    get_url_from_link = join_cls_url_to_href
729
730 View Code Duplication
    @classmethod
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
731
    def get_comic_info(cls, soup, link):
732
        """Get information about a particular comics."""
733
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
734
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
735
        num = int(short_url_re.match(short_url).groups()[0])
736
        imgs = soup.find('div', id='comic').find_all('img')
737
        assert len(imgs) == 1
738
        title = imgs[0]['alt']
739
        title2 = imgs[0]['title']
740
        return {
741
            'short_url': short_url,
742
            'title': title,
743
            'title2': title2,
744
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
745
            'num': num,
746
        }
747
748
749
class Garfield(GenericNavigableComic):
750
    """Class to retrieve Garfield comics."""
751
    # Also on http://www.gocomics.com/garfield
752
    name = 'garfield'
753
    long_name = 'Garfield'
754
    url = 'https://garfield.com'
755
    _categories = ('GARFIELD', )
756
    get_first_comic_link = simulate_first_link
757
    first_url = 'https://garfield.com/comic/1978/06/19'
758 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
759
    @classmethod
760
    def get_navi_link(cls, last_soup, next_):
761
        """Get link to next or previous comic."""
762
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
763
764
    @classmethod
765
    def get_comic_info(cls, soup, link):
766
        """Get information about a particular comics."""
767
        url = cls.get_url_from_link(link)
768
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
769
        year, month, day = [int(s) for s in date_re.match(url).groups()]
770
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
771
        return {
772
            'month': month,
773
            'year': year,
774
            'day': day,
775
            'img': [i['src'] for i in imgs],
776
        }
777
778
779
class Dilbert(GenericNavigableComic):
780
    """Class to retrieve Dilbert comics."""
781
    # Also on http://www.gocomics.com/dilbert-classics
782
    name = 'dilbert'
783
    long_name = 'Dilbert'
784
    url = 'http://dilbert.com'
785
    get_url_from_link = join_cls_url_to_href
786
    get_first_comic_link = simulate_first_link
787
    first_url = 'http://dilbert.com/strip/1989-04-16'
788
789
    @classmethod
790
    def get_navi_link(cls, last_soup, next_):
791
        """Get link to next or previous comic."""
792
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
793
        return link.find('a') if link else None
794
795
    @classmethod
796
    def get_comic_info(cls, soup, link):
797
        """Get information about a particular comics."""
798
        title = soup.find('meta', property='og:title')['content']
799
        imgs = soup.find_all('meta', property='og:image')
800
        desc = soup.find('meta', property='og:description')['content']
801
        date_str = soup.find('meta', property='article:publish_date')['content']
802
        day = string_to_date(date_str, "%B %d, %Y")
803
        author = soup.find('meta', property='article:author')['content']
804
        tags = soup.find('meta', property='article:tag')['content']
805
        return {
806
            'title': title,
807
            'description': desc,
808
            'img': [i['content'] for i in imgs],
809
            'author': author,
810
            'tags': tags,
811
            'day': day.day,
812
            'month': day.month,
813
            'year': day.year
814
        }
815
816
817
class VictimsOfCircumsolar(GenericNavigableComic):
818
    """Class to retrieve VictimsOfCircumsolar comics."""
819
    # Also on http://victimsofcomics.tumblr.com
820
    name = 'circumsolar'
821
    long_name = 'Victims Of Circumsolar'
822
    url = 'http://www.victimsofcircumsolar.com'
823
    get_navi_link = get_a_navi_comicnavnext_navinext
824
    get_first_comic_link = simulate_first_link
825
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
826
827
    @classmethod
828
    def get_comic_info(cls, soup, link):
829
        """Get information about a particular comics."""
830
        # Date is on the archive page
831
        title = soup.find_all('meta', property='og:title')[-1]['content']
832
        desc = soup.find_all('meta', property='og:description')[-1]['content']
833
        imgs = soup.find('div', id='comic').find_all('img')
834
        assert all(i['title'] == i['alt'] == title for i in imgs)
835
        return {
836
            'title': title,
837
            'description': desc,
838
            'img': [i['src'] for i in imgs],
839
        }
840
841
842
class ThreeWordPhrase(GenericNavigableComic):
843
    """Class to retrieve Three Word Phrase comics."""
844
    # Also on http://www.threewordphrase.tumblr.com
845
    name = 'threeword'
846
    long_name = 'Three Word Phrase'
847
    url = 'http://threewordphrase.com'
848
    get_url_from_link = join_cls_url_to_href
849
850
    @classmethod
851
    def get_first_comic_link(cls):
852
        """Get link to first comics."""
853
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
854
855
    @classmethod
856
    def get_navi_link(cls, last_soup, next_):
857
        """Get link to next or previous comic."""
858
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
859
        return None if link.get('href') is None else link
860
861
    @classmethod
862
    def get_comic_info(cls, soup, link):
863
        """Get information about a particular comics."""
864
        title = soup.find('title')
865
        imgs = [img for img in soup.find_all('img')
866
                if not img['src'].endswith(
867
                    ('link.gif', '32.png', 'twpbookad.jpg',
868
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
869
        return {
870
            'title': title.string if title else None,
871
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
872
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
873
        }
874
875
876
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
877
    """Class to retrieve Deadly Panel comics."""
878
    # Also on https://tapastic.com/series/deadlypanel
879
    # Also on http://deadlypanel.tumblr.com
880
    name = 'deadly'
881
    long_name = 'Deadly Panel'
882
    url = 'http://www.deadlypanel.com'
883
    get_first_comic_link = get_a_navi_navifirst
884
    get_navi_link = get_a_navi_comicnavnext_navinext
885
886
    @classmethod
887
    def get_comic_info(cls, soup, link):
888
        """Get information about a particular comics."""
889
        imgs = soup.find('div', id='comic').find_all('img')
890
        assert all(i['alt'] == i['title'] for i in imgs)
891
        return {
892
            'img': [i['src'] for i in imgs],
893
        }
894
895
896 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
897
    """Class to retrieve The Gentleman Armchair comics."""
898
    name = 'gentlemanarmchair'
899
    long_name = 'The Gentleman Armchair'
900
    url = 'http://thegentlemansarmchair.com'
901
    get_first_comic_link = get_a_navi_navifirst
902
    get_navi_link = get_link_rel_next
903
904
    @classmethod
905
    def get_comic_info(cls, soup, link):
906
        """Get information about a particular comics."""
907
        title = soup.find('h2', class_='post-title').string
908
        author = soup.find("span", class_="post-author").find("a").string
909
        date_str = soup.find('span', class_='post-date').string
910
        day = string_to_date(date_str, "%B %d, %Y")
911
        imgs = soup.find('div', id='comic').find_all('img')
912
        return {
913
            'img': [i['src'] for i in imgs],
914
            'title': title,
915
            'author': author,
916
            'month': day.month,
917
            'year': day.year,
918
            'day': day.day,
919
        }
920
921
922 View Code Duplication
class ImogenQuest(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
923
    """Class to retrieve Imogen Quest comics."""
924
    # Also on http://imoquest.tumblr.com
925
    name = 'imogen'
926
    long_name = 'Imogen Quest'
927
    url = 'http://imogenquest.net'
928
    get_first_comic_link = get_div_navfirst_a
929
    get_navi_link = get_a_rel_next
930
931
    @classmethod
932
    def get_comic_info(cls, soup, link):
933
        """Get information about a particular comics."""
934
        title = soup.find('h2', class_='post-title').string
935
        author = soup.find("span", class_="post-author").find("a").string
936
        date_str = soup.find('span', class_='post-date').string
937
        day = string_to_date(date_str, '%B %d, %Y')
938
        imgs = soup.find('div', class_='comicpane').find_all('img')
939
        assert all(i['alt'] == i['title'] for i in imgs)
940
        title2 = imgs[0]['title']
941
        return {
942
            'day': day.day,
943
            'month': day.month,
944
            'year': day.year,
945
            'img': [i['src'] for i in imgs],
946
            'title': title,
947
            'title2': title2,
948
            'author': author,
949
        }
950
951
952
class MyExtraLife(GenericNavigableComic):
953
    """Class to retrieve My Extra Life comics."""
954
    name = 'extralife'
955
    long_name = 'My Extra Life'
956
    url = 'http://www.myextralife.com'
957
    get_navi_link = get_link_rel_next
958
959
    @classmethod
960
    def get_first_comic_link(cls):
961
        """Get link to first comics."""
962
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
963
964
    @classmethod
965
    def get_comic_info(cls, soup, link):
966
        """Get information about a particular comics."""
967
        title = soup.find("h1", class_="comic_title").string
968
        date_str = soup.find("span", class_="comic_date").string
969
        day = string_to_date(date_str, "%B %d, %Y")
970
        imgs = soup.find_all("img", class_="comic")
971
        assert all(i['alt'] == i['title'] == title for i in imgs)
972
        return {
973
            'title': title,
974
            'img': [i['src'] for i in imgs if i["src"]],
975
            'day': day.day,
976
            'month': day.month,
977
            'year': day.year
978
        }
979
980
981
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
982
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
983
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
984
    # Also on http://smbc-comics.tumblr.com
985
    name = 'smbc'
986
    long_name = 'Saturday Morning Breakfast Cereal'
987
    url = 'http://www.smbc-comics.com'
988
    _categories = ('SMBC', )
989
    get_navi_link = get_a_rel_next
990
991
    @classmethod
992
    def get_first_comic_link(cls):
993
        """Get link to first comics."""
994
        return get_soup_at_url(cls.url).find('a', rel='start')
995
996
    @classmethod
997
    def get_comic_info(cls, soup, link):
998
        """Get information about a particular comics."""
999
        image1 = soup.find('img', id='cc-comic')
1000
        image_url1 = image1['src']
1001
        aftercomic = soup.find('div', id='aftercomic')
1002
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1003
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1004
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1005
        day = string_to_date(date_str, "%B %d, %Y")
1006
        return {
1007
            'title': image1['title'],
1008
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
1009
            'day': day.day,
1010
            'month': day.month,
1011
            'year': day.year
1012
        }
1013
1014
1015
class PerryBibleFellowship(GenericListableComic):
1016
    """Class to retrieve Perry Bible Fellowship comics."""
1017
    name = 'pbf'
1018
    long_name = 'Perry Bible Fellowship'
1019
    url = 'http://pbfcomics.com'
1020
    get_url_from_archive_element = join_cls_url_to_href
1021
1022
    @classmethod
1023
    def get_archive_elements(cls):
1024
        comic_link_re = re.compile('^/[0-9]*/$')
1025
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
1026
1027
    @classmethod
1028
    def get_comic_info(cls, soup, link):
1029
        """Get information about a particular comics."""
1030
        url = cls.get_url_from_archive_element(link)
1031
        comic_img_re = re.compile('^/archive_b/PBF.*')
1032
        name = link.string
1033
        num = int(link['name'])
1034
        href = link['href']
1035
        assert href == '/%d/' % num
1036
        imgs = soup.find_all('img', src=comic_img_re)
1037
        assert len(imgs) == 1
1038
        assert imgs[0]['alt'] == name
1039
        return {
1040
            'num': num,
1041
            'name': name,
1042
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1043
            'prefix': '%d-' % num,
1044
        }
1045
1046
1047 View Code Duplication
class Mercworks(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1048
    """Class to retrieve Mercworks comics."""
1049
    # Also on http://mercworks.tumblr.com
1050
    name = 'mercworks'
1051
    long_name = 'Mercworks'
1052
    url = 'http://mercworks.net'
1053
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1054
    get_navi_link = get_link_rel_next
1055
1056
    @classmethod
1057
    def get_comic_info(cls, soup, link):
1058
        """Get information about a particular comics."""
1059
        title = soup.find('meta', property='og:title')['content']
1060
        metadesc = soup.find('meta', property='og:description')
1061
        desc = metadesc['content'] if metadesc else ""
1062
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1063
        day = string_to_date(date_str, "%Y-%m-%d")
1064
        imgs = soup.find_all('meta', property='og:image')
1065
        return {
1066
            'img': [i['content'] for i in imgs],
1067
            'title': title,
1068
            'desc': desc,
1069
            'day': day.day,
1070
            'month': day.month,
1071
            'year': day.year
1072
        }
1073
1074
1075
class BerkeleyMews(GenericListableComic):
1076
    """Class to retrieve Berkeley Mews comics."""
1077
    # Also on http://mews.tumblr.com
1078
    # Also on http://www.gocomics.com/berkeley-mews
1079
    name = 'berkeley'
1080
    long_name = 'Berkeley Mews'
1081
    url = 'http://www.berkeleymews.com'
1082
    _categories = ('BERKELEY', )
1083
    get_url_from_archive_element = get_href
1084
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1085
1086
    @classmethod
1087
    def get_archive_elements(cls):
1088
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1089
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1090
1091
    @classmethod
1092
    def get_comic_info(cls, soup, link):
1093
        """Get information about a particular comics."""
1094
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1095
        url = cls.get_url_from_archive_element(link)
1096
        num = int(cls.comic_num_re.match(url).groups()[0])
1097
        img = soup.find('div', id='comic').find('img')
1098
        assert all(i['alt'] == i['title'] for i in [img])
1099
        title2 = img['title']
1100
        img_url = img['src']
1101
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1102
        return {
1103
            'num': num,
1104
            'title': link.string,
1105
            'title2': title2,
1106
            'img': [img_url],
1107
            'year': year,
1108
            'month': month,
1109
            'day': day,
1110
        }
1111
1112
1113
class GenericBouletCorp(GenericNavigableComic):
1114
    """Generic class to retrieve BouletCorp comics in different languages."""
1115
    # Also on http://bouletcorp.tumblr.com
1116
    _categories = ('BOULET', )
1117
    get_navi_link = get_link_rel_next
1118
1119
    @classmethod
1120
    def get_first_comic_link(cls):
1121
        """Get link to first comics."""
1122
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1123
1124
    @classmethod
1125
    def get_comic_info(cls, soup, link):
1126
        """Get information about a particular comics."""
1127
        url = cls.get_url_from_link(link)
1128
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1129
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1130
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1131
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1132
        title = soup.find('title').string
1133
        return {
1134
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1135
            'title': title,
1136
            'texts': texts,
1137
            'year': year,
1138
            'month': month,
1139
            'day': day,
1140
        }
1141
1142
1143
class BouletCorp(GenericBouletCorp):
1144
    """Class to retrieve BouletCorp comics."""
1145
    name = 'boulet'
1146
    long_name = 'Boulet Corp'
1147
    url = 'http://www.bouletcorp.com'
1148
    _categories = ('FRANCAIS', )
1149
1150
1151
class BouletCorpEn(GenericBouletCorp):
1152
    """Class to retrieve EnglishBouletCorp comics."""
1153
    name = 'boulet_en'
1154
    long_name = 'Boulet Corp English'
1155
    url = 'http://english.bouletcorp.com'
1156
1157
1158 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1159
    """Class to retrieve Amazing Super Powers comics."""
1160
    name = 'asp'
1161
    long_name = 'Amazing Super Powers'
1162
    url = 'http://www.amazingsuperpowers.com'
1163
    get_first_comic_link = get_a_navi_navifirst
1164
    get_navi_link = get_a_navi_navinext
1165
1166
    @classmethod
1167
    def get_comic_info(cls, soup, link):
1168
        """Get information about a particular comics."""
1169
        author = soup.find("span", class_="post-author").find("a").string
1170
        date_str = soup.find('span', class_='post-date').string
1171
        day = string_to_date(date_str, "%B %d, %Y")
1172
        imgs = soup.find('div', id='comic').find_all('img')
1173
        title = ' '.join(i['title'] for i in imgs)
1174
        assert all(i['alt'] == i['title'] for i in imgs)
1175
        return {
1176
            'title': title,
1177
            'author': author,
1178
            'img': [img['src'] for img in imgs],
1179
            'day': day.day,
1180
            'month': day.month,
1181
            'year': day.year
1182
        }
1183
1184
1185
class ToonHole(GenericNavigableComic):
1186
    """Class to retrieve Toon Holes comics."""
1187
    # Also on http://tapastic.com/series/TOONHOLE
1188
    name = 'toonhole'
1189
    long_name = 'Toon Hole'
1190
    url = 'http://www.toonhole.com'
1191
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1192
    get_navi_link = get_link_rel_next
1193
1194
    @classmethod
1195
    def get_comic_info(cls, soup, link):
1196
        """Get information about a particular comics."""
1197
        short_url = soup.find('link', rel='shortlink')['href']
1198
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1199
        day = string_to_date(date_str, "%B %d, %Y")
1200
        imgs = soup.find('div', id='comic').find_all('img')
1201
        if imgs:
1202
            img = imgs[0]
1203
            title = img['alt']
1204
            assert img['title'] == title
1205
        else:
1206
            title = ""
1207
        return {
1208
            'short_url': short_url,
1209
            'title': title,
1210
            'month': day.month,
1211
            'year': day.year,
1212
            'day': day.day,
1213
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1214
        }
1215
1216
1217
class Channelate(GenericNavigableComic):
1218
    """Class to retrieve Channelate comics."""
1219
    name = 'channelate'
1220
    long_name = 'Channelate'
1221
    url = 'http://www.channelate.com'
1222
    get_first_comic_link = get_div_navfirst_a
1223
    get_navi_link = get_link_rel_next
1224
    get_url_from_link = join_cls_url_to_href
1225
1226
    @classmethod
1227
    def get_comic_info(cls, soup, link):
1228
        """Get information about a particular comics."""
1229
        author = soup.find("span", class_="post-author").find("a").string
1230
        date_str = soup.find('span', class_='post-date').string
1231
        day = string_to_date(date_str, '%Y/%m/%d')
1232
        title = soup.find('meta', property='og:title')['content']
1233
        post = soup.find('div', id='comic')
1234
        imgs = post.find_all('img') if post else []
1235
        extra_url = None
1236
        extra_div = soup.find('div', id='extrapanelbutton')
1237
        if extra_div:
1238
            extra_url = extra_div.find('a')['href']
1239
            extra_soup = get_soup_at_url(extra_url)
1240
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1241
            imgs.extend(extra_imgs)
1242
        return {
1243
            'url_extra': extra_url,
1244
            'title': title,
1245
            'author': author,
1246
            'month': day.month,
1247
            'year': day.year,
1248
            'day': day.day,
1249
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1250
        }
1251
1252
1253
class CyanideAndHappiness(GenericNavigableComic):
1254
    """Class to retrieve Cyanide And Happiness comics."""
1255
    name = 'cyanide'
1256
    long_name = 'Cyanide and Happiness'
1257
    url = 'http://explosm.net'
1258
    _categories = ('NSFW', )
1259
    get_url_from_link = join_cls_url_to_href
1260
1261
    @classmethod
1262
    def get_first_comic_link(cls):
1263
        """Get link to first comics."""
1264
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1265
1266
    @classmethod
1267
    def get_navi_link(cls, last_soup, next_):
1268
        """Get link to next or previous comic."""
1269
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1270
        return None if link.get('href') is None else link
1271
1272
    @classmethod
1273
    def get_comic_info(cls, soup, link):
1274
        """Get information about a particular comics."""
1275
        url2 = soup.find('meta', property='og:url')['content']
1276
        num = int(url2.split('/')[-2])
1277
        date_str = soup.find('h3').find('a').string
1278
        day = string_to_date(date_str, '%Y.%m.%d')
1279
        author = soup.find('small', class_="author-credit-name").string
1280
        assert author.startswith('by ')
1281
        author = author[3:]
1282
        imgs = soup.find_all('img', id='main-comic')
1283
        return {
1284
            'num': num,
1285
            'author': author,
1286
            'month': day.month,
1287
            'year': day.year,
1288
            'day': day.day,
1289
            'prefix': '%d-' % num,
1290
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1291
        }
1292
1293
1294
class MrLovenstein(GenericComic):
1295
    """Class to retrieve Mr Lovenstein comics."""
1296
    # Also on https://tapastic.com/series/MrLovenstein
1297
    name = 'mrlovenstein'
1298
    long_name = 'Mr. Lovenstein'
1299
    url = 'http://www.mrlovenstein.com'
1300
1301
    @classmethod
1302
    def get_next_comic(cls, last_comic):
1303
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1304
        # TODO: more info from http://www.mrlovenstein.com/archive
1305
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1306
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1307
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1308
        first, last = min(nums), max(nums)
1309
        if last_comic:
1310
            first = last_comic['num'] + 1
1311
        for num in range(first, last + 1):
1312
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1313
            soup = get_soup_at_url(url)
1314
            imgs = list(
1315
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1316
            description = soup.find('meta', attrs={'name': 'description'})['content']
1317
            yield {
1318
                'url': url,
1319
                'num': num,
1320
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1321
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1322
                'description': description,
1323
            }
1324
1325
1326
class DinosaurComics(GenericListableComic):
1327
    """Class to retrieve Dinosaur Comics comics."""
1328
    name = 'dinosaur'
1329
    long_name = 'Dinosaur Comics'
1330
    url = 'http://www.qwantz.com'
1331
    get_url_from_archive_element = get_href
1332
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1333
1334
    @classmethod
1335
    def get_archive_elements(cls):
1336
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1337
        # first link is random -> skip it
1338
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1339
1340
    @classmethod
1341
    def get_comic_info(cls, soup, link):
1342
        """Get information about a particular comics."""
1343
        url = cls.get_url_from_archive_element(link)
1344
        num = int(cls.comic_link_re.match(url).groups()[0])
1345
        date_str = link.string
1346
        text = link.next_sibling.string
1347
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1348
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1349
        img = soup.find('img', src=comic_img_re)
1350
        return {
1351
            'month': day.month,
1352
            'year': day.year,
1353
            'day': day.day,
1354
            'img': [img.get('src')],
1355
            'title': img.get('title'),
1356
            'text': text,
1357
            'num': num,
1358
        }
1359
1360
1361 View Code Duplication
class ButterSafe(GenericListableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1362
    """Class to retrieve Butter Safe comics."""
1363
    name = 'butter'
1364
    long_name = 'ButterSafe'
1365
    url = 'http://buttersafe.com'
1366
    get_url_from_archive_element = get_href
1367
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1368
1369
    @classmethod
1370
    def get_archive_elements(cls):
1371
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1372
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1373
1374
    @classmethod
1375
    def get_comic_info(cls, soup, link):
1376
        """Get information about a particular comics."""
1377
        url = cls.get_url_from_archive_element(link)
1378
        title = link.string
1379
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1380
        img = soup.find('div', id='comic').find('img')
1381
        assert img['alt'] == title
1382
        return {
1383
            'title': title,
1384
            'day': day,
1385
            'month': month,
1386
            'year': year,
1387
            'img': [img['src']],
1388
        }
1389
1390
1391
class CalvinAndHobbes(GenericComic):
1392
    """Class to retrieve Calvin and Hobbes comics."""
1393
    # Also on http://www.gocomics.com/calvinandhobbes/
1394
    name = 'calvin'
1395
    long_name = 'Calvin and Hobbes'
1396
    # This is not through any official webpage but eh...
1397
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1398
1399
    @classmethod
1400
    def get_next_comic(cls, last_comic):
1401
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1402
        last_date = get_date_for_comic(
1403
            last_comic) if last_comic else date(1985, 11, 1)
1404
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1405
        img_re = re.compile('')
1406
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1407
            url = link['href']
1408
            year, month = link_re.match(url).groups()
1409
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1410
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1411
                month_url = urljoin_wrapper(cls.url, url)
1412
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1413
                    img_src = img['src']
1414
                    day = int(img_re.match(img_src).groups()[0])
1415
                    comic_date = date(int(year), int(month), day)
1416
                    if comic_date > last_date:
1417
                        yield {
1418
                            'url': month_url,
1419
                            'year': int(year),
1420
                            'month': int(month),
1421
                            'day': int(day),
1422
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1423
                        }
1424
                        last_date = comic_date
1425
1426
1427 View Code Duplication
class AbstruseGoose(GenericListableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1428
    """Class to retrieve AbstruseGoose Comics."""
1429
    name = 'abstruse'
1430
    long_name = 'Abstruse Goose'
1431
    url = 'http://abstrusegoose.com'
1432
    get_url_from_archive_element = get_href
1433
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1434
    comic_img_re = re.compile('^%s/strips/.*' % url)
1435
1436
    @classmethod
1437
    def get_archive_elements(cls):
1438
        archive_url = urljoin_wrapper(cls.url, 'archive')
1439
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1440
1441
    @classmethod
1442
    def get_comic_info(cls, soup, archive_elt):
1443
        comic_url = cls.get_url_from_archive_element(archive_elt)
1444
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1445
        return {
1446
            'num': num,
1447
            'title': archive_elt.string,
1448
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1449
        }
1450
1451
1452
class PhDComics(GenericNavigableComic):
1453
    """Class to retrieve PHD Comics."""
1454
    name = 'phd'
1455
    long_name = 'PhD Comics'
1456
    url = 'http://phdcomics.com/comics/archive.php'
1457
1458
    @classmethod
1459
    def get_first_comic_link(cls):
1460
        """Get link to first comics."""
1461
        soup = get_soup_at_url(cls.url)
1462
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1463
        return None if img is None else img.parent
1464
1465
    @classmethod
1466
    def get_navi_link(cls, last_soup, next_):
1467
        """Get link to next or previous comic."""
1468
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1469
        img = last_soup.find('img', src=url)
1470
        return None if img is None else img.parent
1471
1472
    @classmethod
1473
    def get_comic_info(cls, soup, link):
1474
        """Get information about a particular comics."""
1475
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1476
        imgs = soup.find_all('meta', property='og:image')
1477
        return {
1478
            'img': [i['content'] for i in imgs],
1479
            'title': title,
1480
        }
1481
1482
1483 View Code Duplication
class Octopuns(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1484
    """Class to retrieve Octopuns comics."""
1485
    # Also on http://octopuns.tumblr.com
1486
    name = 'octopuns'
1487
    long_name = 'Octopuns'
1488
    url = 'http://www.octopuns.net'
1489
1490
    @classmethod
1491
    def get_first_comic_link(cls):
1492
        """Get link to first comics."""
1493
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1494
1495
    @classmethod
1496
    def get_navi_link(cls, last_soup, next_):
1497
        """Get link to next or previous comic."""
1498
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1499
        return None if link.get('href') is None else link
1500
1501
    @classmethod
1502
    def get_comic_info(cls, soup, link):
1503
        """Get information about a particular comics."""
1504
        title = soup.find('h3', class_='post-title entry-title').string
1505
        date_str = soup.find('h2', class_='date-header').string
1506
        day = string_to_date(date_str, "%A, %B %d, %Y")
1507
        imgs = soup.find_all('link', rel='image_src')
1508
        return {
1509
            'img': [i['href'] for i in imgs],
1510
            'title': title,
1511
            'day': day.day,
1512
            'month': day.month,
1513
            'year': day.year,
1514
        }
1515
1516
1517
class Quarktees(GenericNavigableComic):
1518
    """Class to retrieve the Quarktees comics."""
1519
    name = 'quarktees'
1520
    long_name = 'Quarktees'
1521
    url = 'http://www.quarktees.com/blogs/news'
1522
    get_url_from_link = join_cls_url_to_href
1523
    get_first_comic_link = simulate_first_link
1524
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1525
1526
    @classmethod
1527
    def get_navi_link(cls, last_soup, next_):
1528
        """Get link to next or previous comic."""
1529
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1530
1531
    @classmethod
1532
    def get_comic_info(cls, soup, link):
1533
        """Get information about a particular comics."""
1534
        title = soup.find('meta', property='og:title')['content']
1535
        article = soup.find('div', class_='single-article')
1536
        imgs = article.find_all('img')
1537
        return {
1538
            'title': title,
1539
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1540
        }
1541
1542
1543
class OverCompensating(GenericNavigableComic):
1544
    """Class to retrieve the Over Compensating comics."""
1545
    name = 'compensating'
1546
    long_name = 'Over Compensating'
1547
    url = 'http://www.overcompensating.com'
1548
    get_url_from_link = join_cls_url_to_href
1549
1550
    @classmethod
1551
    def get_first_comic_link(cls):
1552
        """Get link to first comics."""
1553
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1554
1555
    @classmethod
1556
    def get_navi_link(cls, last_soup, next_):
1557
        """Get link to next or previous comic."""
1558
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1559
1560
    @classmethod
1561
    def get_comic_info(cls, soup, link):
1562
        """Get information about a particular comics."""
1563
        img_src_re = re.compile('^/oc/comics/.*')
1564
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1565
        comic_url = cls.get_url_from_link(link)
1566
        num = int(comic_num_re.match(comic_url).groups()[0])
1567
        img = soup.find('img', src=img_src_re)
1568
        return {
1569
            'num': num,
1570
            'img': [urljoin_wrapper(comic_url, img['src'])],
1571
            'title': img.get('title')
1572
        }
1573
1574
1575
class Oglaf(GenericNavigableComic):
1576
    """Class to retrieve Oglaf comics."""
1577
    name = 'oglaf'
1578
    long_name = 'Oglaf [NSFW]'
1579
    url = 'http://oglaf.com'
1580
    _categories = ('NSFW', )
1581
    get_url_from_link = join_cls_url_to_href
1582
1583
    @classmethod
1584
    def get_first_comic_link(cls):
1585
        """Get link to first comics."""
1586
        return get_soup_at_url(cls.url).find("div", id="st").parent
1587
1588
    @classmethod
1589
    def get_navi_link(cls, last_soup, next_):
1590
        """Get link to next or previous comic."""
1591
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1592
        return div.parent if div else None
1593
1594
    @classmethod
1595
    def get_comic_info(cls, soup, link):
1596
        """Get information about a particular comics."""
1597
        title = soup.find('title').string
1598
        title_imgs = soup.find('div', id='tt').find_all('img')
1599
        assert len(title_imgs) == 1
1600
        strip_imgs = soup.find_all('img', id='strip')
1601
        assert len(strip_imgs) == 1
1602
        imgs = title_imgs + strip_imgs
1603
        desc = ' '.join(i['title'] for i in imgs)
1604
        return {
1605
            'title': title,
1606
            'img': [i['src'] for i in imgs],
1607
            'description': desc,
1608
        }
1609
1610
1611
class ScandinaviaAndTheWorld(GenericNavigableComic):
1612
    """Class to retrieve Scandinavia And The World comics."""
1613
    name = 'satw'
1614
    long_name = 'Scandinavia And The World'
1615
    url = 'http://satwcomic.com'
1616
    get_first_comic_link = simulate_first_link
1617
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1618
1619
    @classmethod
1620
    def get_navi_link(cls, last_soup, next_):
1621
        """Get link to next or previous comic."""
1622
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1623
1624
    @classmethod
1625
    def get_comic_info(cls, soup, link):
1626
        """Get information about a particular comics."""
1627
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1628
        desc = soup.find('meta', property='og:description')['content']
1629
        imgs = soup.find_all('img', itemprop="image")
1630
        return {
1631
            'title': title,
1632
            'description': desc,
1633
            'img': [i['src'] for i in imgs],
1634
        }
1635
1636
1637
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1638
    """Class to retrieve the Something Of That Ilk comics."""
1639
    name = 'somethingofthatilk'
1640
    long_name = 'Something Of That Ilk'
1641
    url = 'http://www.somethingofthatilk.com'
1642
1643
1644
class InfiniteMonkeyBusiness(GenericNavigableComic):
1645
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1646
    name = 'monkey'
1647
    long_name = 'Infinite Monkey Business'
1648
    url = 'http://infinitemonkeybusiness.net'
1649
    get_navi_link = get_a_navi_comicnavnext_navinext
1650
    get_first_comic_link = simulate_first_link
1651
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1652
1653
    @classmethod
1654
    def get_comic_info(cls, soup, link):
1655
        """Get information about a particular comics."""
1656
        title = soup.find('meta', property='og:title')['content']
1657
        imgs = soup.find('div', id='comic').find_all('img')
1658
        return {
1659
            'title': title,
1660
            'img': [i['src'] for i in imgs],
1661
        }
1662
1663
1664
class Wondermark(GenericListableComic):
1665
    """Class to retrieve the Wondermark comics."""
1666
    name = 'wondermark'
1667
    long_name = 'Wondermark'
1668
    url = 'http://wondermark.com'
1669
    get_url_from_archive_element = get_href
1670
1671
    @classmethod
1672
    def get_archive_elements(cls):
1673
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1674
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1675
1676
    @classmethod
1677
    def get_comic_info(cls, soup, link):
1678
        """Get information about a particular comics."""
1679
        date_str = soup.find('div', class_='postdate').find('em').string
1680
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1681
        div = soup.find('div', id='comic')
1682
        if div:
1683
            img = div.find('img')
1684
            img_src = [img['src']]
1685
            alt = img['alt']
1686
            assert alt == img['title']
1687
            title = soup.find('meta', property='og:title')['content']
1688
        else:
1689
            img_src = []
1690
            alt = ''
1691
            title = ''
1692
        return {
1693
            'month': day.month,
1694
            'year': day.year,
1695
            'day': day.day,
1696
            'img': img_src,
1697
            'title': title,
1698
            'alt': alt,
1699
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1700
        }
1701
1702
1703
class WarehouseComic(GenericNavigableComic):
1704
    """Class to retrieve Warehouse Comic comics."""
1705
    name = 'warehouse'
1706
    long_name = 'Warehouse Comic'
1707
    url = 'http://warehousecomic.com'
1708
    get_first_comic_link = get_a_navi_navifirst
1709
    get_navi_link = get_link_rel_next
1710
1711
    @classmethod
1712
    def get_comic_info(cls, soup, link):
1713
        """Get information about a particular comics."""
1714
        title = soup.find('h2', class_='post-title').string
1715
        date_str = soup.find('span', class_='post-date').string
1716
        day = string_to_date(date_str, "%B %d, %Y")
1717
        imgs = soup.find('div', id='comic').find_all('img')
1718
        return {
1719
            'img': [i['src'] for i in imgs],
1720
            'title': title,
1721
            'day': day.day,
1722
            'month': day.month,
1723
            'year': day.year,
1724
        }
1725
1726
1727
class JustSayEh(GenericNavigableComic):
1728
    """Class to retrieve Just Say Eh comics."""
1729
    # Also on http//tapastic.com/series/Just-Say-Eh
1730
    name = 'justsayeh'
1731
    long_name = 'Just Say Eh'
1732
    url = 'http://www.justsayeh.com'
1733
    get_first_comic_link = get_a_navi_navifirst
1734
    get_navi_link = get_a_navi_comicnavnext_navinext
1735
1736
    @classmethod
1737
    def get_comic_info(cls, soup, link):
1738
        """Get information about a particular comics."""
1739
        title = soup.find('h2', class_='post-title').string
1740
        imgs = soup.find("div", id="comic").find_all("img")
1741
        assert all(i['alt'] == i['title'] for i in imgs)
1742
        alt = imgs[0]['alt']
1743
        return {
1744
            'img': [i['src'] for i in imgs],
1745
            'title': title,
1746
            'alt': alt,
1747
        }
1748
1749
1750 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1751
    """Class to retrieve Mouse Bear Comedy comics."""
1752
    # Also on http://mousebearcomedy.tumblr.com
1753
    name = 'mousebear'
1754
    long_name = 'Mouse Bear Comedy'
1755
    url = 'http://www.mousebearcomedy.com'
1756
    get_first_comic_link = get_a_navi_navifirst
1757
    get_navi_link = get_a_navi_comicnavnext_navinext
1758
1759
    @classmethod
1760
    def get_comic_info(cls, soup, link):
1761
        """Get information about a particular comics."""
1762
        title = soup.find('h2', class_='post-title').string
1763
        author = soup.find("span", class_="post-author").find("a").string
1764
        date_str = soup.find("span", class_="post-date").string
1765
        day = string_to_date(date_str, '%B %d, %Y')
1766
        imgs = soup.find("div", id="comic").find_all("img")
1767
        assert all(i['alt'] == i['title'] == title for i in imgs)
1768
        return {
1769
            'day': day.day,
1770
            'month': day.month,
1771
            'year': day.year,
1772
            'img': [i['src'] for i in imgs],
1773
            'title': title,
1774
            'author': author,
1775
        }
1776
1777
1778 View Code Duplication
class BigFootJustice(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1779
    """Class to retrieve Big Foot Justice comics."""
1780
    # Also on http://tapastic.com/series/bigfoot-justice
1781
    name = 'bigfoot'
1782
    long_name = 'Big Foot Justice'
1783
    url = 'http://bigfootjustice.com'
1784
    get_first_comic_link = get_a_navi_navifirst
1785
    get_navi_link = get_a_navi_comicnavnext_navinext
1786
1787
    @classmethod
1788
    def get_comic_info(cls, soup, link):
1789
        """Get information about a particular comics."""
1790
        imgs = soup.find('div', id='comic').find_all('img')
1791
        assert all(i['title'] == i['alt'] for i in imgs)
1792
        title = ' '.join(i['title'] for i in imgs)
1793
        return {
1794
            'img': [i['src'] for i in imgs],
1795
            'title': title,
1796
        }
1797
1798
1799
class RespawnComic(GenericNavigableComic):
1800
    """Class to retrieve Respawn Comic."""
1801
    # Also on http://respawncomic.tumblr.com
1802
    name = 'respawn'
1803
    long_name = 'Respawn Comic'
1804
    url = 'http://respawncomic.com '
1805
    _categories = ('RESPAWN', )
1806
    get_navi_link = get_a_rel_next
1807
    get_first_comic_link = simulate_first_link
1808
    first_url = 'http://respawncomic.com/comic/c0001/'
1809
1810 View Code Duplication
    @classmethod
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1811
    def get_comic_info(cls, soup, link):
1812
        """Get information about a particular comics."""
1813
        title = soup.find('meta', property='og:title')['content']
1814
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1815
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1816
        date_str = date_str[:10]
1817
        day = string_to_date(date_str, "%Y-%m-%d")
1818
        imgs = soup.find_all('meta', property='og:image')
1819
        skip_imgs = {
1820
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1821
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1822
        }
1823
        return {
1824
            'title': title,
1825
            'author': author,
1826
            'day': day.day,
1827
            'month': day.month,
1828
            'year': day.year,
1829
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1830
        }
1831
1832
1833
class SafelyEndangered(GenericNavigableComic):
1834
    """Class to retrieve Safely Endangered comics."""
1835
    # Also on http://tumblr.safelyendangered.com
1836
    name = 'endangered'
1837
    long_name = 'Safely Endangered'
1838
    url = 'http://www.safelyendangered.com'
1839
    get_navi_link = get_link_rel_next
1840
    get_first_comic_link = simulate_first_link
1841
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1842
1843
    @classmethod
1844
    def get_comic_info(cls, soup, link):
1845
        """Get information about a particular comics."""
1846
        title = soup.find('h2', class_='post-title').string
1847
        date_str = soup.find('span', class_='post-date').string
1848
        day = string_to_date(date_str, '%B %d, %Y')
1849
        imgs = soup.find('div', id='comic').find_all('img')
1850
        alt = imgs[0]['alt']
1851
        assert all(i['alt'] == i['title'] for i in imgs)
1852
        return {
1853
            'day': day.day,
1854
            'month': day.month,
1855
            'year': day.year,
1856
            'img': [i['src'] for i in imgs],
1857
            'title': title,
1858
            'alt': alt,
1859
        }
1860
1861
1862 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1863
    """Class to retrieve Pictures In Boxes comics."""
1864
    # Also on http://picturesinboxescomic.tumblr.com
1865
    name = 'picturesinboxes'
1866
    long_name = 'Pictures in Boxes'
1867
    url = 'http://www.picturesinboxes.com'
1868
    get_navi_link = get_a_navi_navinext
1869
    get_first_comic_link = simulate_first_link
1870
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1871
1872
    @classmethod
1873
    def get_comic_info(cls, soup, link):
1874
        """Get information about a particular comics."""
1875
        title = soup.find('h2', class_='post-title').string
1876
        author = soup.find("span", class_="post-author").find("a").string
1877
        date_str = soup.find('span', class_='post-date').string
1878
        day = string_to_date(date_str, '%B %d, %Y')
1879
        imgs = soup.find('div', class_='comicpane').find_all('img')
1880
        assert imgs
1881
        assert all(i['title'] == i['alt'] == title for i in imgs)
1882
        return {
1883
            'day': day.day,
1884
            'month': day.month,
1885
            'year': day.year,
1886
            'img': [i['src'] for i in imgs],
1887
            'title': title,
1888
            'author': author,
1889
        }
1890
1891
1892
class Penmen(GenericNavigableComic):
1893
    """Class to retrieve Penmen comics."""
1894
    name = 'penmen'
1895
    long_name = 'Penmen'
1896
    url = 'http://penmen.com'
1897
    get_navi_link = get_link_rel_next
1898
    get_first_comic_link = simulate_first_link
1899
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1900
1901
    @classmethod
1902
    def get_comic_info(cls, soup, link):
1903
        """Get information about a particular comics."""
1904
        title = soup.find('title').string
1905
        imgs = soup.find('div', class_='entry-content').find_all('img')
1906
        short_url = soup.find('link', rel='shortlink')['href']
1907
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1908
        date_str = soup.find('time')['datetime'][:10]
1909
        day = string_to_date(date_str, "%Y-%m-%d")
1910
        return {
1911
            'title': title,
1912
            'short_url': short_url,
1913
            'img': [i['src'] for i in imgs],
1914
            'tags': tags,
1915
            'month': day.month,
1916
            'year': day.year,
1917
            'day': day.day,
1918
        }
1919
1920
1921
class TheDoghouseDiaries(GenericNavigableComic):
1922
    """Class to retrieve The Dog House Diaries comics."""
1923
    name = 'doghouse'
1924
    long_name = 'The Dog House Diaries'
1925
    url = 'http://thedoghousediaries.com'
1926
1927
    @classmethod
1928
    def get_first_comic_link(cls):
1929
        """Get link to first comics."""
1930
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1931
1932
    @classmethod
1933
    def get_navi_link(cls, last_soup, next_):
1934
        """Get link to next or previous comic."""
1935
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1936
1937
    @classmethod
1938
    def get_comic_info(cls, soup, link):
1939
        """Get information about a particular comics."""
1940
        comic_img_re = re.compile('^dhdcomics/.*')
1941
        img = soup.find('img', src=comic_img_re)
1942
        comic_url = cls.get_url_from_link(link)
1943
        return {
1944
            'title': soup.find('h2', id='titleheader').string,
1945
            'title2': soup.find('div', id='subtext').string,
1946
            'alt': img.get('title'),
1947
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1948
            'num': int(comic_url.split('/')[-1]),
1949
        }
1950
1951
1952
class InvisibleBread(GenericListableComic):
1953
    """Class to retrieve Invisible Bread comics."""
1954
    # Also on http://www.gocomics.com/invisible-bread
1955
    name = 'invisiblebread'
1956
    long_name = 'Invisible Bread'
1957
    url = 'http://invisiblebread.com'
1958
1959
    @classmethod
1960
    def get_archive_elements(cls):
1961
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1962
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1963
1964
    @classmethod
1965
    def get_url_from_archive_element(cls, td):
1966
        return td.find('a')['href']
1967
1968 View Code Duplication
    @classmethod
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1969
    def get_comic_info(cls, soup, td):
1970
        """Get information about a particular comics."""
1971
        url = cls.get_url_from_archive_element(td)
1972
        title = td.find('a').string
1973
        month_and_day = td.previous_sibling.string
1974
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1975
        year = link_re.match(url).groups()[0]
1976
        date_str = month_and_day + ' ' + year
1977
        day = string_to_date(date_str, '%b %d %Y')
1978
        imgs = [soup.find('div', id='comic').find('img')]
1979
        assert len(imgs) == 1
1980
        assert all(i['title'] == i['alt'] == title for i in imgs)
1981
        return {
1982
            'month': day.month,
1983
            'year': day.year,
1984
            'day': day.day,
1985
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1986
            'title': title,
1987
        }
1988
1989
1990
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1991
    """Class to retrieve Disco Bleach Comics."""
1992
    name = 'discobleach'
1993
    long_name = 'Disco Bleach'
1994
    url = 'http://discobleach.com'
1995
1996
1997
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1998
    """Class to retrieve TubeyToons comics."""
1999
    # Also on http://tapastic.com/series/Tubey-Toons
2000
    # Also on http://tubeytoons.tumblr.com
2001
    name = 'tubeytoons'
2002
    long_name = 'Tubey Toons'
2003
    url = 'http://tubeytoons.com'
2004
    _categories = ('TUNEYTOONS', )
2005
2006
2007
class CompletelySeriousComics(GenericNavigableComic):
2008
    """Class to retrieve Completely Serious comics."""
2009
    name = 'completelyserious'
2010
    long_name = 'Completely Serious Comics'
2011
    url = 'http://completelyseriouscomics.com'
2012
    get_first_comic_link = get_a_navi_navifirst
2013
    get_navi_link = get_a_navi_navinext
2014
2015
    @classmethod
2016
    def get_comic_info(cls, soup, link):
2017
        """Get information about a particular comics."""
2018
        title = soup.find('h2', class_='post-title').string
2019
        author = soup.find('span', class_='post-author').contents[1].string
2020
        date_str = soup.find('span', class_='post-date').string
2021
        day = string_to_date(date_str, '%B %d, %Y')
2022
        imgs = soup.find('div', class_='comicpane').find_all('img')
2023
        assert imgs
2024
        alt = imgs[0]['title']
2025
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2026
        return {
2027
            'month': day.month,
2028
            'year': day.year,
2029
            'day': day.day,
2030
            'img': [i['src'] for i in imgs],
2031
            'title': title,
2032
            'alt': alt,
2033
            'author': author,
2034
        }
2035
2036
2037 View Code Duplication
class PoorlyDrawnLines(GenericListableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2038
    """Class to retrieve Poorly Drawn Lines comics."""
2039
    # Also on http://pdlcomics.tumblr.com
2040
    name = 'poorlydrawn'
2041
    long_name = 'Poorly Drawn Lines'
2042
    url = 'http://poorlydrawnlines.com'
2043
    _categories = ('POORLYDRAWN', )
2044
    get_url_from_archive_element = get_href
2045
2046
    @classmethod
2047
    def get_comic_info(cls, soup, link):
2048
        """Get information about a particular comics."""
2049
        imgs = soup.find('div', class_='post').find_all('img')
2050
        assert len(imgs) <= 1
2051
        return {
2052
            'img': [i['src'] for i in imgs],
2053
            'title': imgs[0].get('title', "") if imgs else "",
2054
        }
2055
2056
    @classmethod
2057
    def get_archive_elements(cls):
2058
        archive_url = urljoin_wrapper(cls.url, 'archive')
2059
        url_re = re.compile('^%s/comic/.' % cls.url)
2060
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2061
2062
2063 View Code Duplication
class LoadingComics(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2064
    """Class to retrieve Loading Artist comics."""
2065
    name = 'loadingartist'
2066
    long_name = 'Loading Artist'
2067
    url = 'http://www.loadingartist.com/latest'
2068
2069
    @classmethod
2070
    def get_first_comic_link(cls):
2071
        """Get link to first comics."""
2072
        return get_soup_at_url(cls.url).find('a', title="First")
2073
2074
    @classmethod
2075
    def get_navi_link(cls, last_soup, next_):
2076
        """Get link to next or previous comic."""
2077
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2078
2079
    @classmethod
2080
    def get_comic_info(cls, soup, link):
2081
        """Get information about a particular comics."""
2082
        title = soup.find('h1').string
2083
        date_str = soup.find('span', class_='date').string.strip()
2084
        day = string_to_date(date_str, "%B %d, %Y")
2085
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2086
        return {
2087
            'title': title,
2088
            'img': [i['src'] for i in imgs],
2089
            'month': day.month,
2090
            'year': day.year,
2091
            'day': day.day,
2092
        }
2093
2094
2095
class ChuckleADuck(GenericNavigableComic):
2096
    """Class to retrieve Chuckle-A-Duck comics."""
2097
    name = 'chuckleaduck'
2098
    long_name = 'Chuckle-A-duck'
2099
    url = 'http://chuckleaduck.com'
2100
    get_first_comic_link = get_div_navfirst_a
2101
    get_navi_link = get_link_rel_next
2102
2103
    @classmethod
2104
    def get_comic_info(cls, soup, link):
2105
        """Get information about a particular comics."""
2106
        date_str = soup.find('span', class_='post-date').string
2107
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2108
        author = soup.find('span', class_='post-author').string
2109
        div = soup.find('div', id='comic')
2110
        imgs = div.find_all('img') if div else []
2111
        title = imgs[0]['title'] if imgs else ""
2112
        assert all(i['title'] == i['alt'] == title for i in imgs)
2113
        return {
2114
            'month': day.month,
2115
            'year': day.year,
2116
            'day': day.day,
2117
            'img': [i['src'] for i in imgs],
2118
            'title': title,
2119
            'author': author,
2120
        }
2121
2122
2123
class DepressedAlien(GenericNavigableComic):
2124
    """Class to retrieve Depressed Alien Comics."""
2125
    name = 'depressedalien'
2126
    long_name = 'Depressed Alien'
2127
    url = 'http://depressedalien.com'
2128
    get_url_from_link = join_cls_url_to_href
2129
2130
    @classmethod
2131
    def get_first_comic_link(cls):
2132
        """Get link to first comics."""
2133
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2134
2135
    @classmethod
2136
    def get_navi_link(cls, last_soup, next_):
2137
        """Get link to next or previous comic."""
2138
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2139
2140
    @classmethod
2141
    def get_comic_info(cls, soup, link):
2142
        """Get information about a particular comics."""
2143
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2144
        imgs = soup.find_all('meta', property='og:image')
2145
        return {
2146
            'title': title,
2147
            'img': [i['content'] for i in imgs],
2148
        }
2149
2150
2151
class ThingsInSquares(GenericListableComic):
2152
    """Class to retrieve Things In Squares comics."""
2153
    # This can be retrieved in other languages
2154
    # Also on https://tapastic.com/series/Things-in-Squares
2155
    name = 'squares'
2156
    long_name = 'Things in squares'
2157
    url = 'http://www.thingsinsquares.com'
2158
2159
    @classmethod
2160
    def get_comic_info(cls, soup, tr):
2161
        """Get information about a particular comics."""
2162
        _, td2, td3 = tr.find_all('td')
2163
        a = td2.find('a')
2164
        date_str = td3.string
2165
        day = string_to_date(date_str, "%m.%d.%y")
2166
        title = a.string
2167
        title2 = soup.find('meta', property='og:title')['content']
2168
        desc = soup.find('meta', property='og:description')
2169
        description = desc['content'] if desc else ''
2170
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2171
        imgs = soup.find('div', class_='entry-content').find_all('img')
2172
        return {
2173
            'day': day.day,
2174
            'month': day.month,
2175
            'year': day.year,
2176
            'title': title,
2177
            'title2': title2,
2178
            'description': description,
2179
            'tags': tags,
2180
            'img': [i['src'] for i in imgs],
2181
            'alt': ' '.join(i['alt'] for i in imgs),
2182
        }
2183
2184
    @classmethod
2185
    def get_url_from_archive_element(cls, tr):
2186
        _, td2, td3 = tr.find_all('td')
2187
        return td2.find('a')['href']
2188
2189
    @classmethod
2190
    def get_archive_elements(cls):
2191
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2192
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2193
2194
2195
class HappleTea(GenericNavigableComic):
2196
    """Class to retrieve Happle Tea Comics."""
2197
    name = 'happletea'
2198
    long_name = 'Happle Tea'
2199
    url = 'http://www.happletea.com'
2200
    get_first_comic_link = get_a_navi_navifirst
2201
    get_navi_link = get_link_rel_next
2202
2203
    @classmethod
2204
    def get_comic_info(cls, soup, link):
2205
        """Get information about a particular comics."""
2206
        imgs = soup.find('div', id='comic').find_all('img')
2207
        post = soup.find('div', class_='post-content')
2208
        title = post.find('h2', class_='post-title').string
2209
        author = post.find('a', rel='author').string
2210
        date_str = post.find('span', class_='post-date').string
2211
        day = string_to_date(date_str, "%B %d, %Y")
2212
        assert all(i['alt'] == i['title'] for i in imgs)
2213
        return {
2214
            'title': title,
2215
            'img': [i['src'] for i in imgs],
2216
            'alt': ''.join(i['alt'] for i in imgs),
2217
            'month': day.month,
2218
            'year': day.year,
2219
            'day': day.day,
2220
            'author': author,
2221
        }
2222
2223
2224
class RockPaperScissors(GenericNavigableComic):
2225
    """Class to retrieve Rock Paper Scissors comics."""
2226
    name = 'rps'
2227
    long_name = 'Rock Paper Scissors'
2228
    url = 'http://rps-comics.com'
2229
    get_first_comic_link = get_a_navi_navifirst
2230
    get_navi_link = get_link_rel_next
2231
2232
    @classmethod
2233
    def get_comic_info(cls, soup, link):
2234
        """Get information about a particular comics."""
2235
        title = soup.find('title').string
2236
        imgs = soup.find_all('meta', property='og:image')
2237
        short_url = soup.find('link', rel='shortlink')['href']
2238
        transcript = soup.find('div', id='transcript-content').string
2239
        return {
2240
            'title': title,
2241
            'transcript': transcript,
2242
            'short_url': short_url,
2243
            'img': [i['content'] for i in imgs],
2244
        }
2245
2246
2247
class FatAwesomeComics(GenericNavigableComic):
2248
    """Class to retrieve Fat Awesome Comics."""
2249
    # Also on http://fatawesomecomedy.tumblr.com
2250
    name = 'fatawesome'
2251
    long_name = 'Fat Awesome'
2252
    url = 'http://fatawesome.com/comics'
2253
    get_navi_link = get_a_rel_next
2254
    get_first_comic_link = simulate_first_link
2255
    first_url = 'http://fatawesome.com/shortbus/'
2256
2257
    @classmethod
2258
    def get_comic_info(cls, soup, link):
2259
        """Get information about a particular comics."""
2260
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2261
        description = soup.find('meta', attrs={'name': 'description'})['content']
2262
        tags_prop = soup.find('meta', property='article:tag')
2263
        tags = tags_prop['content'] if tags_prop else ""
2264
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2265
        day = string_to_date(date_str, "%Y-%m-%d")
2266
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2267
        assert len(imgs) == 1
2268
        return {
2269
            'title': title,
2270
            'description': description,
2271
            'tags': tags,
2272
            'alt': "".join(i['alt'] for i in imgs),
2273
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2274
            'month': day.month,
2275
            'year': day.year,
2276
            'day': day.day,
2277
        }
2278
2279
2280
class AnythingComic(GenericListableComic):
2281
    """Class to retrieve Anything Comics."""
2282
    # Also on http://tapastic.com/series/anything
2283
    name = 'anythingcomic'
2284
    long_name = 'Anything Comic'
2285
    url = 'http://www.anythingcomic.com'
2286
2287
    @classmethod
2288
    def get_archive_elements(cls):
2289
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2290
        # The first 2 <tr>'s do not correspond to comics
2291
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2292
2293
    @classmethod
2294
    def get_url_from_archive_element(cls, tr):
2295
        """Get url corresponding to an archive element."""
2296
        td_num, td_comic, td_date, _ = tr.find_all('td')
2297
        link = td_comic.find('a')
2298
        return urljoin_wrapper(cls.url, link['href'])
2299
2300 View Code Duplication
    @classmethod
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2301
    def get_comic_info(cls, soup, tr):
2302
        """Get information about a particular comics."""
2303
        td_num, td_comic, td_date, _ = tr.find_all('td')
2304
        num = int(td_num.string)
2305
        link = td_comic.find('a')
2306
        title = link.string
2307
        imgs = soup.find_all('img', id='comic_image')
2308
        date_str = td_date.string
2309
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2310
        assert len(imgs) == 1
2311
        assert all(i.get('alt') == i.get('title') for i in imgs)
2312
        return {
2313
            'num': num,
2314
            'title': title,
2315
            'alt': imgs[0].get('alt', ''),
2316
            'img': [i['src'] for i in imgs],
2317
            'month': day.month,
2318
            'year': day.year,
2319
            'day': day.day,
2320
        }
2321
2322
2323 View Code Duplication
class LonnieMillsap(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2324
    """Class to retrieve Lonnie Millsap's comics."""
2325
    name = 'millsap'
2326
    long_name = 'Lonnie Millsap'
2327
    url = 'http://www.lonniemillsap.com'
2328
    get_navi_link = get_link_rel_next
2329
    get_first_comic_link = simulate_first_link
2330
    first_url = 'http://www.lonniemillsap.com/?p=42'
2331
2332
    @classmethod
2333
    def get_comic_info(cls, soup, link):
2334
        """Get information about a particular comics."""
2335
        title = soup.find('h2', class_='post-title').string
2336
        post = soup.find('div', class_='post-content')
2337
        author = post.find("span", class_="post-author").find("a").string
2338
        date_str = post.find("span", class_="post-date").string
2339
        day = string_to_date(date_str, "%B %d, %Y")
2340
        imgs = post.find("div", class_="entry").find_all("img")
2341
        return {
2342
            'title': title,
2343
            'author': author,
2344
            'img': [i['src'] for i in imgs],
2345
            'month': day.month,
2346
            'year': day.year,
2347
            'day': day.day,
2348
        }
2349
2350
2351 View Code Duplication
class LinsEditions(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2352
    """Class to retrieve L.I.N.S. Editions comics."""
2353
    # Also on http://linscomics.tumblr.com
2354
    # Now on https://warandpeas.com
2355
    name = 'lins'
2356
    long_name = 'L.I.N.S. Editions'
2357
    url = 'https://linsedition.com'
2358
    _categories = ('LINS', )
2359
    get_navi_link = get_link_rel_next
2360
    get_first_comic_link = simulate_first_link
2361
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2362
2363
    @classmethod
2364
    def get_comic_info(cls, soup, link):
2365
        """Get information about a particular comics."""
2366
        title = soup.find('meta', property='og:title')['content']
2367
        imgs = soup.find_all('meta', property='og:image')
2368
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2369
        day = string_to_date(date_str, "%Y-%m-%d")
2370
        return {
2371
            'title': title,
2372
            'img': [i['content'] for i in imgs],
2373
            'month': day.month,
2374
            'year': day.year,
2375
            'day': day.day,
2376
        }
2377
2378
2379
class ThorsThundershack(GenericNavigableComic):
2380
    """Class to retrieve Thor's Thundershack comics."""
2381
    # Also on http://tapastic.com/series/Thors-Thundershac
2382
    name = 'thor'
2383
    long_name = 'Thor\'s Thundershack'
2384
    url = 'http://www.thorsthundershack.com'
2385
    _categories = ('THOR', )
2386
    get_url_from_link = join_cls_url_to_href
2387
2388
    @classmethod
2389
    def get_first_comic_link(cls):
2390
        """Get link to first comics."""
2391
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2392
2393
    @classmethod
2394
    def get_navi_link(cls, last_soup, next_):
2395
        """Get link to next or previous comic."""
2396
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2397
            if link['href'] != '/comic':
2398
                return link
2399
        return None
2400
2401
    @classmethod
2402
    def get_comic_info(cls, soup, link):
2403
        """Get information about a particular comics."""
2404
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2405
        description = soup.find('div', itemprop='articleBody').text
2406
        author = soup.find('span', itemprop='author copyrightHolder').string
2407
        imgs = soup.find_all('img', itemprop='image')
2408
        assert all(i['title'] == i['alt'] for i in imgs)
2409
        alt = imgs[0]['alt'] if imgs else ""
2410
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2411
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2412
        return {
2413
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2414
            'month': day.month,
2415
            'year': day.year,
2416
            'day': day.day,
2417
            'author': author,
2418
            'title': title,
2419
            'alt': alt,
2420
            'description': description,
2421
        }
2422
2423
2424
class GerbilWithAJetpack(GenericNavigableComic):
2425
    """Class to retrieve GerbilWithAJetpack comics."""
2426
    name = 'gerbil'
2427
    long_name = 'Gerbil With A Jetpack'
2428
    url = 'http://gerbilwithajetpack.com'
2429
    get_first_comic_link = get_a_navi_navifirst
2430
    get_navi_link = get_a_rel_next
2431
2432
    @classmethod
2433
    def get_comic_info(cls, soup, link):
2434
        """Get information about a particular comics."""
2435
        title = soup.find('h2', class_='post-title').string
2436
        author = soup.find("span", class_="post-author").find("a").string
2437
        date_str = soup.find("span", class_="post-date").string
2438
        day = string_to_date(date_str, "%B %d, %Y")
2439
        imgs = soup.find("div", id="comic").find_all("img")
2440
        alt = imgs[0]['alt']
2441
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2442
        return {
2443
            'img': [i['src'] for i in imgs],
2444
            'title': title,
2445
            'alt': alt,
2446
            'author': author,
2447
            'day': day.day,
2448
            'month': day.month,
2449
            'year': day.year
2450
        }
2451
2452
2453 View Code Duplication
class EveryDayBlues(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2454
    """Class to retrieve EveryDayBlues Comics."""
2455
    name = "blues"
2456
    long_name = "Every Day Blues"
2457
    url = "http://everydayblues.net"
2458
    get_first_comic_link = get_a_navi_navifirst
2459
    get_navi_link = get_link_rel_next
2460
2461
    @classmethod
2462
    def get_comic_info(cls, soup, link):
2463
        """Get information about a particular comics."""
2464
        title = soup.find("h2", class_="post-title").string
2465
        author = soup.find("span", class_="post-author").find("a").string
2466
        date_str = soup.find("span", class_="post-date").string
2467
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2468
        imgs = soup.find("div", id="comic").find_all("img")
2469
        assert all(i['alt'] == i['title'] == title for i in imgs)
2470
        assert len(imgs) <= 1
2471
        return {
2472
            'img': [i['src'] for i in imgs],
2473
            'title': title,
2474
            'author': author,
2475
            'day': day.day,
2476
            'month': day.month,
2477
            'year': day.year
2478
        }
2479
2480
2481
class BiterComics(GenericNavigableComic):
2482
    """Class to retrieve Biter Comics."""
2483
    name = "biter"
2484
    long_name = "Biter Comics"
2485
    url = "http://www.bitercomics.com"
2486
    get_first_comic_link = get_a_navi_navifirst
2487
    get_navi_link = get_link_rel_next
2488
2489
    @classmethod
2490
    def get_comic_info(cls, soup, link):
2491
        """Get information about a particular comics."""
2492
        title = soup.find("h1", class_="entry-title").string
2493
        author = soup.find("span", class_="author vcard").find("a").string
2494
        date_str = soup.find("span", class_="entry-date").string
2495
        day = string_to_date(date_str, "%B %d, %Y")
2496
        imgs = soup.find("div", id="comic").find_all("img")
2497
        assert all(i['alt'] == i['title'] for i in imgs)
2498
        assert len(imgs) == 1
2499
        alt = imgs[0]['alt']
2500
        return {
2501
            'img': [i['src'] for i in imgs],
2502
            'title': title,
2503
            'alt': alt,
2504
            'author': author,
2505
            'day': day.day,
2506
            'month': day.month,
2507
            'year': day.year
2508
        }
2509
2510
2511 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2512
    """Class to retrieve The Awkward Yeti comics."""
2513
    # Also on http://www.gocomics.com/the-awkward-yeti
2514
    # Also on http://larstheyeti.tumblr.com
2515
    # Also on https://tapastic.com/series/TheAwkwardYeti
2516
    name = 'yeti'
2517
    long_name = 'The Awkward Yeti'
2518
    url = 'http://theawkwardyeti.com'
2519
    _categories = ('YETI', )
2520
    get_first_comic_link = get_a_navi_navifirst
2521
    get_navi_link = get_link_rel_next
2522
2523
    @classmethod
2524
    def get_comic_info(cls, soup, link):
2525
        """Get information about a particular comics."""
2526
        title = soup.find('h2', class_='post-title').string
2527
        date_str = soup.find("span", class_="post-date").string
2528
        day = string_to_date(date_str, "%B %d, %Y")
2529
        imgs = soup.find("div", id="comic").find_all("img")
2530
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2531
        return {
2532
            'img': [i['src'] for i in imgs],
2533
            'title': title,
2534
            'day': day.day,
2535
            'month': day.month,
2536
            'year': day.year
2537
        }
2538
2539
2540
class PleasantThoughts(GenericNavigableComic):
2541
    """Class to retrieve Pleasant Thoughts comics."""
2542
    name = 'pleasant'
2543
    long_name = 'Pleasant Thoughts'
2544
    url = 'http://pleasant-thoughts.com'
2545
    get_first_comic_link = get_a_navi_navifirst
2546
    get_navi_link = get_link_rel_next
2547
2548
    @classmethod
2549
    def get_comic_info(cls, soup, link):
2550
        """Get information about a particular comics."""
2551
        post = soup.find('div', class_='post-content')
2552
        title = post.find('h2', class_='post-title').string
2553
        imgs = post.find("div", class_="entry").find_all("img")
2554
        return {
2555
            'title': title,
2556
            'img': [i['src'] for i in imgs],
2557
        }
2558
2559
2560
class MisterAndMe(GenericNavigableComic):
2561
    """Class to retrieve Mister & Me Comics."""
2562
    # Also on http://www.gocomics.com/mister-and-me
2563
    # Also on https://tapastic.com/series/Mister-and-Me
2564
    name = 'mister'
2565
    long_name = 'Mister & Me'
2566
    url = 'http://www.mister-and-me.com'
2567
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2568
    get_navi_link = get_link_rel_next
2569
2570
    @classmethod
2571
    def get_comic_info(cls, soup, link):
2572
        """Get information about a particular comics."""
2573
        title = soup.find('h2', class_='post-title').string
2574
        author = soup.find("span", class_="post-author").find("a").string
2575
        date_str = soup.find("span", class_="post-date").string
2576
        day = string_to_date(date_str, "%B %d, %Y")
2577
        imgs = soup.find("div", id="comic").find_all("img")
2578
        assert all(i['alt'] == i['title'] for i in imgs)
2579
        assert len(imgs) <= 1
2580
        alt = imgs[0]['alt'] if imgs else ""
2581
        return {
2582
            'img': [i['src'] for i in imgs],
2583
            'title': title,
2584
            'alt': alt,
2585
            'author': author,
2586
            'day': day.day,
2587
            'month': day.month,
2588
            'year': day.year
2589
        }
2590
2591
2592
class LastPlaceComics(GenericNavigableComic):
2593
    """Class to retrieve Last Place Comics."""
2594
    name = 'lastplace'
2595
    long_name = 'Last Place Comics'
2596
    url = "http://lastplacecomics.com"
2597
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2598
    get_navi_link = get_link_rel_next
2599
2600
    @classmethod
2601
    def get_comic_info(cls, soup, link):
2602
        """Get information about a particular comics."""
2603
        title = soup.find('h2', class_='post-title').string
2604
        author = soup.find("span", class_="post-author").find("a").string
2605
        date_str = soup.find("span", class_="post-date").string
2606
        day = string_to_date(date_str, "%B %d, %Y")
2607
        imgs = soup.find("div", id="comic").find_all("img")
2608
        assert all(i['alt'] == i['title'] for i in imgs)
2609
        assert len(imgs) <= 1
2610
        alt = imgs[0]['alt'] if imgs else ""
2611
        return {
2612
            'img': [i['src'] for i in imgs],
2613
            'title': title,
2614
            'alt': alt,
2615
            'author': author,
2616
            'day': day.day,
2617
            'month': day.month,
2618
            'year': day.year
2619
        }
2620
2621
2622
class TalesOfAbsurdity(GenericNavigableComic):
2623
    """Class to retrieve Tales Of Absurdity comics."""
2624
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2625
    # Also on http://talesofabsurdity.tumblr.com
2626
    name = 'absurdity'
2627
    long_name = 'Tales of Absurdity'
2628
    url = 'http://talesofabsurdity.com'
2629
    _categories = ('ABSURDITY', )
2630
    get_first_comic_link = get_a_navi_navifirst
2631
    get_navi_link = get_a_navi_comicnavnext_navinext
2632
2633
    @classmethod
2634
    def get_comic_info(cls, soup, link):
2635
        """Get information about a particular comics."""
2636
        title = soup.find('h2', class_='post-title').string
2637
        author = soup.find("span", class_="post-author").find("a").string
2638
        date_str = soup.find("span", class_="post-date").string
2639
        day = string_to_date(date_str, "%B %d, %Y")
2640
        imgs = soup.find("div", id="comic").find_all("img")
2641
        assert all(i['alt'] == i['title'] for i in imgs)
2642
        alt = imgs[0]['alt'] if imgs else ""
2643
        return {
2644
            'img': [i['src'] for i in imgs],
2645
            'title': title,
2646
            'alt': alt,
2647
            'author': author,
2648
            'day': day.day,
2649
            'month': day.month,
2650
            'year': day.year
2651
        }
2652
2653
2654
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2655
    """Class to retrieve Endless Origami Comics."""
2656
    name = "origami"
2657
    long_name = "Endless Origami"
2658
    url = "http://endlessorigami.com"
2659
    get_first_comic_link = get_a_navi_navifirst
2660
    get_navi_link = get_link_rel_next
2661
2662
    @classmethod
2663
    def get_comic_info(cls, soup, link):
2664
        """Get information about a particular comics."""
2665
        title = soup.find('h2', class_='post-title').string
2666
        author = soup.find("span", class_="post-author").find("a").string
2667
        date_str = soup.find("span", class_="post-date").string
2668
        day = string_to_date(date_str, "%B %d, %Y")
2669
        imgs = soup.find("div", id="comic").find_all("img")
2670
        assert all(i['alt'] == i['title'] for i in imgs)
2671
        alt = imgs[0]['alt'] if imgs else ""
2672
        return {
2673
            'img': [i['src'] for i in imgs],
2674
            'title': title,
2675
            'alt': alt,
2676
            'author': author,
2677
            'day': day.day,
2678
            'month': day.month,
2679
            'year': day.year
2680
        }
2681
2682
2683
class PlanC(GenericNavigableComic):
2684
    """Class to retrieve Plan C comics."""
2685
    name = 'planc'
2686
    long_name = 'Plan C'
2687
    url = 'http://www.plancomic.com'
2688
    get_first_comic_link = get_a_navi_navifirst
2689
    get_navi_link = get_a_navi_comicnavnext_navinext
2690
2691
    @classmethod
2692
    def get_comic_info(cls, soup, link):
2693
        """Get information about a particular comics."""
2694
        title = soup.find('h2', class_='post-title').string
2695
        date_str = soup.find("span", class_="post-date").string
2696
        day = string_to_date(date_str, "%B %d, %Y")
2697
        imgs = soup.find('div', id='comic').find_all('img')
2698
        return {
2699
            'title': title,
2700
            'img': [i['src'] for i in imgs],
2701
            'month': day.month,
2702
            'year': day.year,
2703
            'day': day.day,
2704
        }
2705
2706
2707 View Code Duplication
class BuniComic(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2708
    """Class to retrieve Buni Comics."""
2709
    name = 'buni'
2710
    long_name = 'BuniComics'
2711
    url = 'http://www.bunicomic.com'
2712
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2713
    get_navi_link = get_link_rel_next
2714
2715
    @classmethod
2716
    def get_comic_info(cls, soup, link):
2717
        """Get information about a particular comics."""
2718
        imgs = soup.find('div', id='comic').find_all('img')
2719
        assert all(i['alt'] == i['title'] for i in imgs)
2720
        assert len(imgs) == 1
2721
        return {
2722
            'img': [i['src'] for i in imgs],
2723
            'title': imgs[0]['title'],
2724
        }
2725
2726
2727
class GenericCommitStrip(GenericNavigableComic):
2728
    """Generic class to retrieve Commit Strips in different languages."""
2729
    get_navi_link = get_a_rel_next
2730
    get_first_comic_link = simulate_first_link
2731
    first_url = NotImplemented
2732
2733
    @classmethod
2734
    def get_comic_info(cls, soup, link):
2735
        """Get information about a particular comics."""
2736
        desc = soup.find('meta', property='og:description')['content']
2737
        title = soup.find('meta', property='og:title')['content']
2738
        imgs = soup.find('div', class_='entry-content').find_all('img')
2739
        title2 = ' '.join(i.get('title', '') for i in imgs)
2740
        return {
2741
            'title': title,
2742
            'title2': title2,
2743
            'description': desc,
2744
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2745
        }
2746
2747
2748
class CommitStripFr(GenericCommitStrip):
2749
    """Class to retrieve Commit Strips in French."""
2750
    name = 'commit_fr'
2751
    long_name = 'Commit Strip (Fr)'
2752
    url = 'http://www.commitstrip.com/fr'
2753
    _categories = ('FRANCAIS', )
2754
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2755
2756
2757
class CommitStripEn(GenericCommitStrip):
2758
    """Class to retrieve Commit Strips in English."""
2759
    name = 'commit_en'
2760
    long_name = 'Commit Strip (En)'
2761
    url = 'http://www.commitstrip.com/en'
2762
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2763
2764
2765 View Code Duplication
class GenericBoumerie(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2766
    """Generic class to retrieve Boumeries comics in different languages."""
2767
    get_first_comic_link = get_a_navi_navifirst
2768
    get_navi_link = get_link_rel_next
2769
    date_format = NotImplemented
2770
    lang = NotImplemented
2771
2772
    @classmethod
2773
    def get_comic_info(cls, soup, link):
2774
        """Get information about a particular comics."""
2775
        title = soup.find('h2', class_='post-title').string
2776
        short_url = soup.find('link', rel='shortlink')['href']
2777
        author = soup.find("span", class_="post-author").find("a").string
2778
        date_str = soup.find('span', class_='post-date').string
2779
        day = string_to_date(date_str, cls.date_format, cls.lang)
2780
        imgs = soup.find('div', id='comic').find_all('img')
2781
        assert all(i['alt'] == i['title'] for i in imgs)
2782
        return {
2783
            'short_url': short_url,
2784
            'img': [i['src'] for i in imgs],
2785
            'title': title,
2786
            'author': author,
2787
            'month': day.month,
2788
            'year': day.year,
2789
            'day': day.day,
2790
        }
2791
2792
2793
class BoumerieEn(GenericBoumerie):
2794
    """Class to retrieve Boumeries comics in English."""
2795
    name = 'boumeries_en'
2796
    long_name = 'Boumeries (En)'
2797
    url = 'http://comics.boumerie.com'
2798
    date_format = "%B %d, %Y"
2799
    lang = 'en_GB.UTF-8'
2800
2801
2802
class BoumerieFr(GenericBoumerie):
2803
    """Class to retrieve Boumeries comics in French."""
2804
    name = 'boumeries_fr'
2805
    long_name = 'Boumeries (Fr)'
2806
    url = 'http://bd.boumerie.com'
2807
    _categories = ('FRANCAIS', )
2808
    date_format = "%A, %d %B %Y"
2809
    lang = "fr_FR.utf8"
2810
2811
2812 View Code Duplication
class UnearthedComics(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2813
    """Class to retrieve Unearthed comics."""
2814
    # Also on http://tapastic.com/series/UnearthedComics
2815
    # Also on http://unearthedcomics.tumblr.com
2816
    name = 'unearthed'
2817
    long_name = 'Unearthed Comics'
2818
    url = 'http://unearthedcomics.com'
2819
    _categories = ('UNEARTHED', )
2820
    get_navi_link = get_link_rel_next
2821
    get_first_comic_link = simulate_first_link
2822
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2823
2824
    @classmethod
2825
    def get_comic_info(cls, soup, link):
2826
        """Get information about a particular comics."""
2827
        short_url = soup.find('link', rel='shortlink')['href']
2828
        title_elt = soup.find('h1') or soup.find('h2')
2829
        title = title_elt.string if title_elt else ""
2830
        desc = soup.find('meta', property='og:description')
2831
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2832
        day = string_to_date(date_str, "%Y-%m-%d")
2833
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2834
        imgs = post.find_all('img')
2835
        return {
2836
            'title': title,
2837
            'description': desc,
2838
            'url2': short_url,
2839
            'img': [i['src'] for i in imgs],
2840
            'month': day.month,
2841
            'year': day.year,
2842
            'day': day.day,
2843
        }
2844
2845
2846
class Optipess(GenericNavigableComic):
2847
    """Class to retrieve Optipess comics."""
2848
    name = 'optipess'
2849
    long_name = 'Optipess'
2850
    url = 'http://www.optipess.com'
2851
    get_first_comic_link = get_a_navi_navifirst
2852
    get_navi_link = get_link_rel_next
2853
2854
    @classmethod
2855
    def get_comic_info(cls, soup, link):
2856
        """Get information about a particular comics."""
2857
        title = soup.find('h2', class_='post-title').string
2858
        author = soup.find("span", class_="post-author").find("a").string
2859
        comic = soup.find('div', id='comic')
2860
        imgs = comic.find_all('img') if comic else []
2861
        alt = imgs[0]['title'] if imgs else ""
2862
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2863
        date_str = soup.find('span', class_='post-date').string
2864
        day = string_to_date(date_str, "%B %d, %Y")
2865
        return {
2866
            'title': title,
2867
            'alt': alt,
2868
            'author': author,
2869 View Code Duplication
            'img': [i['src'] for i in imgs],
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2870
            'month': day.month,
2871
            'year': day.year,
2872
            'day': day.day,
2873
        }
2874
2875
2876
class PainTrainComic(GenericNavigableComic):
2877
    """Class to retrieve Pain Train Comics."""
2878
    name = 'paintrain'
2879
    long_name = 'Pain Train Comics'
2880
    url = 'http://paintraincomic.com'
2881
    get_first_comic_link = get_a_navi_navifirst
2882
    get_navi_link = get_link_rel_next
2883
2884
    @classmethod
2885
    def get_comic_info(cls, soup, link):
2886
        """Get information about a particular comics."""
2887
        title = soup.find('h2', class_='post-title').string
2888
        short_url = soup.find('link', rel='shortlink')['href']
2889
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2890
        num = int(short_url_re.match(short_url).groups()[0])
2891
        imgs = soup.find('div', id='comic').find_all('img')
2892
        alt = imgs[0]['title']
2893
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2894
        date_str = soup.find('span', class_='post-date').string
2895
        day = string_to_date(date_str, "%d/%m/%Y")
2896
        return {
2897
            'short_url': short_url,
2898
            'num': num,
2899
            'img': [i['src'] for i in imgs],
2900
            'month': day.month,
2901
            'year': day.year,
2902
            'day': day.day,
2903
            'alt': alt,
2904
            'title': title,
2905
        }
2906
2907
2908
class MoonBeard(GenericNavigableComic):
2909
    """Class to retrieve MoonBeard comics."""
2910
    # Also on http://blog.squiresjam.es/moonbeard
2911
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2912
    name = 'moonbeard'
2913
    long_name = 'Moon Beard'
2914
    url = 'http://moonbeard.com'
2915
    get_first_comic_link = get_a_navi_navifirst
2916
    get_navi_link = get_a_navi_navinext
2917
2918
    @classmethod
2919
    def get_comic_info(cls, soup, link):
2920
        """Get information about a particular comics."""
2921
        title = soup.find('h2', class_='post-title').string
2922
        short_url = soup.find('link', rel='shortlink')['href']
2923
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2924
        num = int(short_url_re.match(short_url).groups()[0])
2925
        imgs = soup.find('div', id='comic').find_all('img')
2926
        alt = imgs[0]['title']
2927
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2928
        date_str = soup.find('span', class_='post-date').string
2929
        day = string_to_date(date_str, "%B %d, %Y")
2930
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2931
        author = soup.find('span', class_='post-author').string
2932
        return {
2933
            'short_url': short_url,
2934
            'num': num,
2935
            'img': [i['src'] for i in imgs],
2936
            'month': day.month,
2937
            'year': day.year,
2938
            'day': day.day,
2939
            'title': title,
2940
            'tags': tags,
2941
            'alt': alt,
2942
            'author': author,
2943
        }
2944
2945
2946
class AHammADay(GenericNavigableComic):
2947
    """Class to retrieve class A Hamm A Day comics."""
2948
    name = 'hamm'
2949
    long_name = 'A Hamm A Day'
2950
    url = 'http://www.ahammaday.com'
2951
    get_url_from_link = join_cls_url_to_href
2952
    get_first_comic_link = simulate_first_link
2953
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2954
2955
    @classmethod
2956
    def get_navi_link(cls, last_soup, next_):
2957
        """Get link to next or previous comic."""
2958
        # prev is next / next is prev
2959
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2960
2961
    @classmethod
2962
    def get_comic_info(cls, soup, link):
2963
        """Get information about a particular comics."""
2964
        date_str = soup.find('time', class_='published')['datetime']
2965
        day = string_to_date(date_str, "%Y-%m-%d")
2966
        author = soup.find('span', class_='blog-author').find('a').string
2967
        title = soup.find('meta', property='og:title')['content']
2968
        imgs = soup.find_all('meta', itemprop='image')
2969
        return {
2970
            'img': [i['content'] for i in imgs],
2971
            'title': title,
2972
            'author': author,
2973
            'day': day.day,
2974
            'month': day.month,
2975
            'year': day.year,
2976
        }
2977
2978
2979
class LittleLifeLines(GenericNavigableComic):
2980
    """Class to retrieve Little Life Lines comics."""
2981
    # Also on https://little-life-lines.tumblr.com
2982
    name = 'life'
2983
    long_name = 'Little Life Lines'
2984
    url = 'http://www.littlelifelines.com'
2985
    get_url_from_link = join_cls_url_to_href
2986
    get_first_comic_link = simulate_first_link
2987
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2988
2989
    @classmethod
2990
    def get_navi_link(cls, last_soup, next_):
2991
        """Get link to next or previous comic."""
2992
        # prev is next / next is prev
2993
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2994
        return li.find('a') if li else None
2995
2996
    @classmethod
2997
    def get_comic_info(cls, soup, link):
2998
        """Get information about a particular comics."""
2999
        title = soup.find('meta', property='og:title')['content']
3000
        desc = soup.find('meta', property='og:description')['content']
3001
        date_str = soup.find('time', class_='published')['datetime']
3002
        day = string_to_date(date_str, "%Y-%m-%d")
3003
        author = soup.find('a', rel='author').string
3004
        div_content = soup.find('div', class_="body entry-content")
3005
        imgs = div_content.find_all('img')
3006
        imgs = [i for i in imgs if i.get('src') is not None]
3007
        alt = imgs[0]['alt']
3008
        return {
3009
            'title': title,
3010
            'alt': alt,
3011
            'description': desc,
3012
            'author': author,
3013
            'day': day.day,
3014
            'month': day.month,
3015
            'year': day.year,
3016
            'img': [i['src'] for i in imgs],
3017
        }
3018
3019
3020
class GenericWordPressInkblot(GenericNavigableComic):
3021
    """Generic class to retrieve comics using WordPress with Inkblot."""
3022
    get_navi_link = get_link_rel_next
3023
3024
    @classmethod
3025
    def get_first_comic_link(cls):
3026
        """Get link to first comics."""
3027
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3028
3029
    @classmethod
3030
    def get_comic_info(cls, soup, link):
3031
        """Get information about a particular comics."""
3032
        title = soup.find('meta', property='og:title')['content']
3033
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3034
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3035
        day = string_to_date(date_str, "%Y-%m-%d")
3036
        return {
3037
            'title': title,
3038
            'day': day.day,
3039
            'month': day.month,
3040
            'year': day.year,
3041
            'img': [i['src'] for i in imgs],
3042
        }
3043
3044
3045
class EverythingsStupid(GenericWordPressInkblot):
3046
    """Class to retrieve Everything's stupid Comics."""
3047
    # Also on http://tapastic.com/series/EverythingsStupid
3048
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3049
    # Also on http://everythingsstupidcomics.tumblr.com
3050
    name = 'stupid'
3051
    long_name = "Everything's Stupid"
3052
    url = 'http://everythingsstupid.net'
3053
3054
3055
class TheIsmComics(GenericWordPressInkblot):
3056
    """Class to retrieve The Ism Comics."""
3057
    # Also on https://tapastic.com/series/TheIsm (?)
3058
    name = 'theism'
3059
    long_name = "The Ism"
3060
    url = 'http://www.theism-comics.com'
3061
3062
3063
class WoodenPlankStudios(GenericWordPressInkblot):
3064
    """Class to retrieve Wooden Plank Studios comics."""
3065
    name = 'woodenplank'
3066
    long_name = 'Wooden Plank Studios'
3067
    url = 'http://woodenplankstudios.com'
3068
3069
3070
class ElectricBunnyComic(GenericNavigableComic):
3071
    """Class to retrieve Electric Bunny Comics."""
3072
    # Also on http://electricbunnycomics.tumblr.com
3073
    name = 'bunny'
3074
    long_name = 'Electric Bunny Comic'
3075
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3076
    get_url_from_link = join_cls_url_to_href
3077
3078
    @classmethod
3079
    def get_first_comic_link(cls):
3080
        """Get link to first comics."""
3081
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3082
3083
    @classmethod
3084
    def get_navi_link(cls, last_soup, next_):
3085
        """Get link to next or previous comic."""
3086
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3087
        return img.parent if img else None
3088
3089
    @classmethod
3090
    def get_comic_info(cls, soup, link):
3091
        """Get information about a particular comics."""
3092
        title = soup.find('meta', property='og:title')['content']
3093
        imgs = soup.find_all('meta', property='og:image')
3094
        return {
3095
            'title': title,
3096
            'img': [i['content'] for i in imgs],
3097
        }
3098
3099
3100
class SheldonComics(GenericNavigableComic):
3101
    """Class to retrieve Sheldon comics."""
3102
    # Also on http://www.gocomics.com/sheldon
3103
    name = 'sheldon'
3104
    long_name = 'Sheldon Comics'
3105
    url = 'http://www.sheldoncomics.com'
3106
3107
    @classmethod
3108
    def get_first_comic_link(cls):
3109
        """Get link to first comics."""
3110
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3111
3112
    @classmethod
3113
    def get_navi_link(cls, last_soup, next_):
3114
        """Get link to next or previous comic."""
3115
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3116
            if link['href'] != 'http://www.sheldoncomics.com':
3117
                return link
3118
        return None
3119
3120
    @classmethod
3121
    def get_comic_info(cls, soup, link):
3122
        """Get information about a particular comics."""
3123
        imgs = soup.find("div", id="comic-foot").find_all("img")
3124
        assert all(i['alt'] == i['title'] for i in imgs)
3125
        assert len(imgs) == 1
3126
        title = imgs[0]['title']
3127
        return {
3128
            'title': title,
3129
            'img': [i['src'] for i in imgs],
3130
        }
3131
3132
3133 View Code Duplication
class Ubertool(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
3134
    """Class to retrieve Ubertool comics."""
3135
    # Also on http://ubertool.tumblr.com
3136
    # Also on https://tapastic.com/series/ubertool
3137
    name = 'ubertool'
3138
    long_name = 'Ubertool'
3139
    url = 'http://ubertoolcomic.com'
3140
    _categories = ('UBERTOOL', )
3141
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3142
    get_navi_link = get_a_comicnavbase_comicnavnext
3143
3144
    @classmethod
3145
    def get_comic_info(cls, soup, link):
3146
        """Get information about a particular comics."""
3147
        title = soup.find('h2', class_='post-title').string
3148
        date_str = soup.find('span', class_='post-date').string
3149
        day = string_to_date(date_str, "%B %d, %Y")
3150
        imgs = soup.find('div', id='comic').find_all('img')
3151
        return {
3152
            'img': [i['src'] for i in imgs],
3153
            'title': title,
3154
            'month': day.month,
3155
            'year': day.year,
3156
            'day': day.day,
3157
        }
3158
3159
3160
class EarthExplodes(GenericNavigableComic):
3161
    """Class to retrieve The Earth Explodes comics."""
3162
    name = 'earthexplodes'
3163
    long_name = 'The Earth Explodes'
3164
    url = 'http://www.earthexplodes.com'
3165
    get_url_from_link = join_cls_url_to_href
3166
    get_first_comic_link = simulate_first_link
3167
    first_url = 'http://www.earthexplodes.com/comics/000/'
3168
3169
    @classmethod
3170
    def get_navi_link(cls, last_soup, next_):
3171
        """Get link to next or previous comic."""
3172
        return last_soup.find('a', id='next' if next_ else 'prev')
3173
3174
    @classmethod
3175
    def get_comic_info(cls, soup, link):
3176
        """Get information about a particular comics."""
3177
        title = soup.find('title').string
3178
        imgs = soup.find('div', id='image').find_all('img')
3179
        alt = imgs[0].get('title', '')
3180
        return {
3181
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3182
            'title': title,
3183
            'alt': alt,
3184
        }
3185
3186
3187
class PomComics(GenericNavigableComic):
3188
    """Class to retrieve PomComics."""
3189
    name = 'pom'
3190
    long_name = 'Pom Comics / Piece of Me'
3191
    url = 'http://www.pomcomic.com'
3192
    get_url_from_link = join_cls_url_to_href
3193
3194
    @classmethod
3195
    def get_first_comic_link(cls):
3196
        """Get link to first comics."""
3197
        return get_soup_at_url(cls.url).find('a', class_='btn_first')
3198
3199
    @classmethod
3200
    def get_navi_link(cls, last_soup, next_):
3201
        """Get link to next or previous comic."""
3202
        return last_soup.find('a', class_='btn_next' if next_ else 'btn_prev')
3203
3204
    @classmethod
3205
    def get_comic_info(cls, soup, link):
3206
        """Get information about a particular comics."""
3207
        title = soup.find('h1', id="comic-name").string
3208
        desc = soup.find('meta', property='og:description')['content']
3209
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3210
        imgs = soup.find('div', class_='comic').find_all('img')
3211
        return {
3212
            'title': title,
3213
            'desc': desc,
3214
            'tags': tags,
3215
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3216
        }
3217
3218
3219
class CubeDrone(GenericNavigableComic):
3220
    """Class to retrieve Cube Drone comics."""
3221
    name = 'cubedrone'
3222
    long_name = 'Cube Drone'
3223
    url = 'http://cube-drone.com/comics'
3224
    get_url_from_link = join_cls_url_to_href
3225
3226
    @classmethod
3227
    def get_first_comic_link(cls):
3228
        """Get link to first comics."""
3229
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3230
3231
    @classmethod
3232
    def get_navi_link(cls, last_soup, next_):
3233
        """Get link to next or previous comic."""
3234
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3235
        return last_soup.find('span', class_=class_).parent
3236
3237
    @classmethod
3238
    def get_comic_info(cls, soup, link):
3239
        """Get information about a particular comics."""
3240
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3241
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3242
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3243
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3244
        imgs = soup.find_all('img', class_='comic img-responsive')
3245
        title2 = imgs[0]['title']
3246
        alt = imgs[0]['alt']
3247
        return {
3248
            'url2': url2,
3249
            'title': title,
3250
            'title2': title2,
3251
            'alt': alt,
3252
            'img': [i['src'] for i in imgs],
3253
        }
3254
3255
3256
class MakeItStoopid(GenericNavigableComic):
3257
    """Class to retrieve Make It Stoopid Comics."""
3258
    name = 'stoopid'
3259
    long_name = 'Make it stoopid'
3260
    url = 'http://makeitstoopid.com/comic.php'
3261
3262
    @classmethod
3263
    def get_nav(cls, soup):
3264
        """Get the navigation elements from soup object."""
3265
        cnav = soup.find_all(class_='cnav')
3266
        nav1, nav2 = cnav[:5], cnav[5:]
3267
        assert nav1 == nav2
3268
        # begin, prev, archive, next_, end = nav1
3269
        return [None if i.get('href') is None else i for i in nav1]
3270
3271
    @classmethod
3272
    def get_first_comic_link(cls):
3273
        """Get link to first comics."""
3274
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3275
3276
    @classmethod
3277
    def get_navi_link(cls, last_soup, next_):
3278
        """Get link to next or previous comic."""
3279
        return cls.get_nav(last_soup)[3 if next_ else 1]
3280
3281
    @classmethod
3282
    def get_comic_info(cls, soup, link):
3283
        """Get information about a particular comics."""
3284
        title = link['title']
3285
        imgs = soup.find_all('img', id='comicimg')
3286
        return {
3287
            'title': title,
3288
            'img': [i['src'] for i in imgs],
3289
        }
3290
3291
3292 View Code Duplication
class MarketoonistComics(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
3293
    """Class to retrieve Marketoonist Comics."""
3294
    name = 'marketoonist'
3295
    long_name = 'Marketoonist'
3296
    url = 'https://marketoonist.com/cartoons'
3297
    get_first_comic_link = simulate_first_link
3298
    get_navi_link = get_link_rel_next
3299
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3300
3301
    @classmethod
3302
    def get_comic_info(cls, soup, link):
3303
        """Get information about a particular comics."""
3304
        imgs = soup.find_all('meta', property='og:image')
3305
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3306
        day = string_to_date(date_str, "%Y-%m-%d")
3307
        title = soup.find('meta', property='og:title')['content']
3308
        return {
3309
            'img': [i['content'] for i in imgs],
3310
            'day': day.day,
3311
            'month': day.month,
3312
            'year': day.year,
3313
            'title': title,
3314
        }
3315
3316
3317
class ConsoliaComics(GenericNavigableComic):
3318
    """Class to retrieve Consolia comics."""
3319
    name = 'consolia'
3320
    long_name = 'consolia'
3321
    url = 'https://consolia-comic.com'
3322
    get_url_from_link = join_cls_url_to_href
3323
3324
    @classmethod
3325
    def get_first_comic_link(cls):
3326
        """Get link to first comics."""
3327
        return get_soup_at_url(cls.url).find('a', class_='first')
3328
3329
    @classmethod
3330
    def get_navi_link(cls, last_soup, next_):
3331
        """Get link to next or previous comic."""
3332
        return last_soup.find('a', class_='next' if next_ else 'prev')
3333
3334
    @classmethod
3335
    def get_comic_info(cls, soup, link):
3336
        """Get information about a particular comics."""
3337
        title = soup.find('meta', property='og:title')['content']
3338
        date_str = soup.find('time')["datetime"]
3339
        day = string_to_date(date_str, "%Y-%m-%d")
3340
        imgs = soup.find_all('meta', property='og:image')
3341
        return {
3342
            'title': title,
3343
            'img': [i['content'] for i in imgs],
3344
            'day': day.day,
3345
            'month': day.month,
3346
            'year': day.year,
3347
        }
3348
3349
3350
class TuMourrasMoinsBete(GenericNavigableComic):
3351
    """Class to retrieve Tu Mourras Moins Bete comics."""
3352
    name = 'mourrasmoinsbete'
3353
    long_name = 'Tu Mourras Moins Bete'
3354
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3355
    _categories = ('FRANCAIS', )
3356
    get_first_comic_link = simulate_first_link
3357
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3358
3359
    @classmethod
3360
    def get_navi_link(cls, last_soup, next_):
3361
        """Get link to next or previous comic."""
3362
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3363
3364
    @classmethod
3365
    def get_comic_info(cls, soup, link):
3366
        """Get information about a particular comics."""
3367
        title = soup.find('title').string
3368
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3369
        author = soup.find('span', itemprop='author').string
3370
        return {
3371
            'img': [i['src'] for i in imgs],
3372
            'author': author,
3373
            'title': title,
3374
        }
3375
3376
3377
class GeekAndPoke(GenericNavigableComic):
3378
    """Class to retrieve Geek And Poke comics."""
3379
    name = 'geek'
3380
    long_name = 'Geek And Poke'
3381
    url = 'http://geek-and-poke.com'
3382
    get_url_from_link = join_cls_url_to_href
3383
    get_first_comic_link = simulate_first_link
3384
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3385
3386
    @classmethod
3387
    def get_navi_link(cls, last_soup, next_):
3388
        """Get link to next or previous comic."""
3389
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3390
3391
    @classmethod
3392
    def get_comic_info(cls, soup, link):
3393
        """Get information about a particular comics."""
3394
        title = soup.find('meta', property='og:title')['content']
3395
        desc = soup.find('meta', property='og:description')['content']
3396
        date_str = soup.find('time', class_='published')['datetime']
3397
        day = string_to_date(date_str, "%Y-%m-%d")
3398
        author = soup.find('a', rel='author').string
3399
        div_content = (soup.find('div', class_="body entry-content") or
3400
                       soup.find('div', class_="special-content"))
3401
        imgs = div_content.find_all('img')
3402
        imgs = [i for i in imgs if i.get('src') is not None]
3403
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3404
        alt = imgs[0].get('alt', "") if imgs else []
3405
        return {
3406
            'title': title,
3407
            'alt': alt,
3408
            'description': desc,
3409
            'author': author,
3410
            'day': day.day,
3411
            'month': day.month,
3412
            'year': day.year,
3413
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3414
        }
3415
3416
3417
class GloryOwlComix(GenericNavigableComic):
3418
    """Class to retrieve Glory Owl comics."""
3419
    name = 'gloryowl'
3420
    long_name = 'Glory Owl'
3421
    url = 'http://gloryowlcomix.blogspot.fr'
3422
    _categories = ('NSFW', 'FRANCAIS')
3423
    get_first_comic_link = simulate_first_link
3424
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3425
3426
    @classmethod
3427
    def get_navi_link(cls, last_soup, next_):
3428
        """Get link to next or previous comic."""
3429
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3430
3431
    @classmethod
3432
    def get_comic_info(cls, soup, link):
3433
        """Get information about a particular comics."""
3434
        title = soup.find('title').string
3435
        imgs = soup.find_all('link', rel='image_src')
3436
        author = soup.find('a', rel='author').string
3437
        return {
3438
            'img': [i['href'] for i in imgs],
3439
            'author': author,
3440
            'title': title,
3441
        }
3442
3443
3444
class GenericTumblrV1(GenericComic):
3445
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3446
    _categories = ('TUMBLR', )
3447
3448
    @classmethod
3449
    def get_next_comic(cls, last_comic):
3450
        """Generic implementation of get_next_comic for Tumblr comics."""
3451
        for p in cls.get_posts(last_comic):
3452
            comic = cls.get_comic_info(p)
3453
            if comic is not None:
3454
                yield comic
3455
3456
    @classmethod
3457
    def get_url_from_post(cls, post):
3458
        return post['url']
3459
3460
    @classmethod
3461
    def get_api_url(cls):
3462
        return urljoin_wrapper(cls.url, '/api/read/')
3463
3464
    @classmethod
3465
    def get_comic_info(cls, post):
3466
        """Get information about a particular comics."""
3467
        type_ = post['type']
3468
        if type_ != 'photo':
3469
            return None
3470
        tumblr_id = int(post['id'])
3471
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3472
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3473
        caption = post.find('photo-caption')
3474
        title = caption.string if caption else ""
3475
        tags = ' '.join(t.string for t in post.find_all('tag'))
3476
        # Photos may appear in 'photo' tags and/or straight in the post
3477
        photo_tags = post.find_all('photo')
3478
        if not photo_tags:
3479
            photo_tags = [post]
3480
        # Images are in multiple resolutions - taking the first one
3481
        imgs = [photo.find('photo-url') for photo in photo_tags]
3482
        return {
3483
            'url': cls.get_url_from_post(post),
3484
            'url2': post['url-with-slug'],
3485
            'day': day.day,
3486
            'month': day.month,
3487
            'year': day.year,
3488
            'title': title,
3489
            'tags': tags,
3490
            'img': [i.string for i in imgs],
3491
            'tumblr-id': tumblr_id,
3492
            'api_url': api_url,
3493
        }
3494
3495
    @classmethod
3496
    def get_posts(cls, last_comic, nb_post_per_call=10):
3497
        """Get posts using API. nb_post_per_call is max 50.
3498
3499
        Posts are retrieved from newer to older as per the tumblr v1 api
3500
        but are returned in chronological order."""
3501
        waiting_for_url = last_comic['url'] if last_comic else None
3502
        posts_acc = []
3503
        if last_comic is not None:
3504
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3505
            # might end up spending a lot of time looking for something that
3506
            # doesn't exist. Failing early and clearly might be a better option.
3507
            last_api_url = last_comic['api_url']
3508
            try:
3509
                get_soup_at_url(last_api_url)
3510
            except urllib.error.HTTPError:
3511
                try:
3512
                    get_soup_at_url(cls.url)
3513
                except urllib.error.HTTPError:
3514
                    print("Did not find previous post nor main url %s" % cls.url)
3515
                else:
3516
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3517
                return reversed(posts_acc)
3518
        api_url = cls.get_api_url()
3519
        posts = get_soup_at_url(api_url).find('posts')
3520
        start, total = int(posts['start']), int(posts['total'])
3521
        assert start == 0
3522
        for starting_num in range(0, total, nb_post_per_call):
3523
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3524
            posts2 = get_soup_at_url(api_url2).find('posts')
3525
            start2, total2 = int(posts2['start']), int(posts2['total'])
3526
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3527
            # This may happen and should be handled in the future
3528
            assert total == total2, "%d != %d" % (total, total2)
3529
            for p in posts2.find_all('post'):
3530
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3531
                    return reversed(posts_acc)
3532
                posts_acc.append(p)
3533
        if waiting_for_url is None:
3534
            return reversed(posts_acc)
3535
        print("Did not find %s : there might be a problem" % waiting_for_url)
3536
        return []
3537
3538
3539
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3540
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3541
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3542
    # Also on http://www.smbc-comics.com
3543
    name = 'smbc-tumblr'
3544
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3545
    url = 'http://smbc-comics.tumblr.com'
3546
    _categories = ('SMBC', )
3547
3548
3549
class IrwinCardozo(GenericTumblrV1):
3550
    """Class to retrieve Irwin Cardozo Comics."""
3551
    name = 'irwinc'
3552
    long_name = 'Irwin Cardozo'
3553
    url = 'http://irwincardozocomics.tumblr.com'
3554
3555
3556
class AccordingToDevin(GenericTumblrV1):
3557
    """Class to retrieve According To Devin comics."""
3558
    name = 'devin'
3559
    long_name = 'According To Devin'
3560
    url = 'http://accordingtodevin.tumblr.com'
3561
3562
3563
class ItsTheTieTumblr(GenericTumblrV1):
3564
    """Class to retrieve It's the tie comics."""
3565
    # Also on http://itsthetie.com
3566
    # Also on https://tapastic.com/series/itsthetie
3567
    name = 'tie-tumblr'
3568
    long_name = "It's the tie (from Tumblr)"
3569
    url = "http://itsthetie.tumblr.com"
3570
    _categories = ('TIE', )
3571
3572
3573
class OctopunsTumblr(GenericTumblrV1):
3574
    """Class to retrieve Octopuns comics."""
3575
    # Also on http://www.octopuns.net
3576
    name = 'octopuns-tumblr'
3577
    long_name = 'Octopuns (from Tumblr)'
3578
    url = 'http://octopuns.tumblr.com'
3579
3580
3581
class PicturesInBoxesTumblr(GenericTumblrV1):
3582
    """Class to retrieve Pictures In Boxes comics."""
3583
    # Also on http://www.picturesinboxes.com
3584
    name = 'picturesinboxes-tumblr'
3585
    long_name = 'Pictures in Boxes (from Tumblr)'
3586
    url = 'http://picturesinboxescomic.tumblr.com'
3587
3588
3589
class TubeyToonsTumblr(GenericTumblrV1):
3590
    """Class to retrieve TubeyToons comics."""
3591
    # Also on http://tapastic.com/series/Tubey-Toons
3592
    # Also on http://tubeytoons.com
3593
    name = 'tubeytoons-tumblr'
3594
    long_name = 'Tubey Toons (from Tumblr)'
3595
    url = 'http://tubeytoons.tumblr.com'
3596
    _categories = ('TUNEYTOONS', )
3597
3598
3599
class UnearthedComicsTumblr(GenericTumblrV1):
3600
    """Class to retrieve Unearthed comics."""
3601
    # Also on http://tapastic.com/series/UnearthedComics
3602
    # Also on http://unearthedcomics.com
3603
    name = 'unearthed-tumblr'
3604
    long_name = 'Unearthed Comics (from Tumblr)'
3605
    url = 'http://unearthedcomics.tumblr.com'
3606
    _categories = ('UNEARTHED', )
3607
3608
3609
class PieComic(GenericTumblrV1):
3610
    """Class to retrieve Pie Comic comics."""
3611
    name = 'pie'
3612
    long_name = 'Pie Comic'
3613
    url = "http://piecomic.tumblr.com"
3614
3615
3616
class MrEthanDiamond(GenericTumblrV1):
3617
    """Class to retrieve Mr Ethan Diamond comics."""
3618
    name = 'diamond'
3619
    long_name = 'Mr Ethan Diamond'
3620
    url = 'http://mrethandiamond.tumblr.com'
3621
3622
3623
class Flocci(GenericTumblrV1):
3624
    """Class to retrieve floccinaucinihilipilification comics."""
3625
    name = 'flocci'
3626
    long_name = 'floccinaucinihilipilification'
3627
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3628
3629
3630
class UpAndOut(GenericTumblrV1):
3631
    """Class to retrieve Up & Out comics."""
3632
    # Also on http://tapastic.com/series/UP-and-OUT
3633
    name = 'upandout'
3634
    long_name = 'Up And Out (from Tumblr)'
3635
    url = 'http://upandoutcomic.tumblr.com'
3636
3637
3638
class Pundemonium(GenericTumblrV1):
3639
    """Class to retrieve Pundemonium comics."""
3640
    name = 'pundemonium'
3641
    long_name = 'Pundemonium'
3642
    url = 'http://monstika.tumblr.com'
3643
3644
3645
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3646
    """Class to retrieve Poorly Drawn Lines comics."""
3647
    # Also on http://poorlydrawnlines.com
3648
    name = 'poorlydrawn-tumblr'
3649
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3650
    url = 'http://pdlcomics.tumblr.com'
3651
    _categories = ('POORLYDRAWN', )
3652
3653
3654
class PearShapedComics(GenericTumblrV1):
3655
    """Class to retrieve Pear Shaped Comics."""
3656
    name = 'pearshaped'
3657
    long_name = 'Pear-Shaped Comics'
3658
    url = 'http://pearshapedcomics.com'
3659
3660
3661
class PondScumComics(GenericTumblrV1):
3662
    """Class to retrieve Pond Scum Comics."""
3663
    name = 'pond'
3664
    long_name = 'Pond Scum'
3665
    url = 'http://pondscumcomic.tumblr.com'
3666
3667
3668
class MercworksTumblr(GenericTumblrV1):
3669
    """Class to retrieve Mercworks comics."""
3670
    # Also on http://mercworks.net
3671
    name = 'mercworks-tumblr'
3672
    long_name = 'Mercworks (from Tumblr)'
3673
    url = 'http://mercworks.tumblr.com'
3674
3675
3676
class OwlTurdTumblr(GenericTumblrV1):
3677
    """Class to retrieve Owl Turd comics."""
3678
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3679
    name = 'owlturd-tumblr'
3680
    long_name = 'Owl Turd (from Tumblr)'
3681
    url = 'http://owlturd.com'
3682
    _categories = ('OWLTURD', )
3683
3684
3685
class VectorBelly(GenericTumblrV1):
3686
    """Class to retrieve Vector Belly comics."""
3687
    # Also on http://vectorbelly.com
3688
    name = 'vector'
3689
    long_name = 'Vector Belly'
3690
    url = 'http://vectorbelly.tumblr.com'
3691
3692
3693
class GoneIntoRapture(GenericTumblrV1):
3694
    """Class to retrieve Gone Into Rapture comics."""
3695
    # Also on http://goneintorapture.tumblr.com
3696
    # Also on http://tapastic.com/series/Goneintorapture
3697
    name = 'rapture'
3698
    long_name = 'Gone Into Rapture'
3699
    url = 'http://www.goneintorapture.com'
3700
3701
3702
class TheOatmealTumblr(GenericTumblrV1):
3703
    """Class to retrieve The Oatmeal comics."""
3704
    # Also on http://theoatmeal.com
3705
    name = 'oatmeal-tumblr'
3706
    long_name = 'The Oatmeal (from Tumblr)'
3707
    url = 'http://oatmeal.tumblr.com'
3708
3709
3710
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3711
    """Class to retrieve Heck If I Know Comics."""
3712
    # Also on http://tapastic.com/series/Regular
3713
    name = 'heck-tumblr'
3714
    long_name = 'Heck if I Know comics (from Tumblr)'
3715
    url = 'http://heckifiknowcomics.com'
3716
3717
3718
class MyJetPack(GenericTumblrV1):
3719
    """Class to retrieve My Jet Pack comics."""
3720
    name = 'jetpack'
3721
    long_name = 'My Jet Pack'
3722
    url = 'http://myjetpack.tumblr.com'
3723
3724
3725
class CheerUpEmoKidTumblr(GenericTumblrV1):
3726
    """Class to retrieve CheerUpEmoKid comics."""
3727
    # Also on http://www.cheerupemokid.com
3728
    # Also on http://tapastic.com/series/CUEK
3729
    name = 'cuek-tumblr'
3730
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3731
    url = 'http://enzocomics.tumblr.com'
3732
3733
3734
class ForLackOfABetterComic(GenericTumblrV1):
3735
    """Class to retrieve For Lack Of A Better Comics."""
3736
    # Also on http://forlackofabettercomic.com
3737
    name = 'lack'
3738
    long_name = 'For Lack Of A Better Comic'
3739
    url = 'http://forlackofabettercomic.tumblr.com'
3740
3741
3742
class ZenPencilsTumblr(GenericTumblrV1):
3743
    """Class to retrieve ZenPencils comics."""
3744
    # Also on http://zenpencils.com
3745
    # Also on http://www.gocomics.com/zen-pencils
3746
    name = 'zenpencils-tumblr'
3747
    long_name = 'Zen Pencils (from Tumblr)'
3748
    url = 'http://zenpencils.tumblr.com'
3749
    _categories = ('ZENPENCILS', )
3750
3751
3752
class ThreeWordPhraseTumblr(GenericTumblrV1):
3753
    """Class to retrieve Three Word Phrase comics."""
3754
    # Also on http://threewordphrase.com
3755
    name = 'threeword-tumblr'
3756
    long_name = 'Three Word Phrase (from Tumblr)'
3757
    url = 'http://www.threewordphrase.tumblr.com'
3758
3759
3760
class TimeTrabbleTumblr(GenericTumblrV1):
3761
    """Class to retrieve Time Trabble comics."""
3762
    # Also on http://timetrabble.com
3763
    name = 'timetrabble-tumblr'
3764
    long_name = 'Time Trabble (from Tumblr)'
3765
    url = 'http://timetrabble.tumblr.com'
3766
3767
3768
class SafelyEndangeredTumblr(GenericTumblrV1):
3769
    """Class to retrieve Safely Endangered comics."""
3770
    # Also on http://www.safelyendangered.com
3771
    name = 'endangered-tumblr'
3772
    long_name = 'Safely Endangered (from Tumblr)'
3773
    url = 'http://tumblr.safelyendangered.com'
3774
3775
3776
class MouseBearComedyTumblr(GenericTumblrV1):
3777
    """Class to retrieve Mouse Bear Comedy comics."""
3778
    # Also on http://www.mousebearcomedy.com
3779
    name = 'mousebear-tumblr'
3780
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3781
    url = 'http://mousebearcomedy.tumblr.com'
3782
3783
3784
class BouletCorpTumblr(GenericTumblrV1):
3785
    """Class to retrieve BouletCorp comics."""
3786
    # Also on http://www.bouletcorp.com
3787
    name = 'boulet-tumblr'
3788
    long_name = 'Boulet Corp (from Tumblr)'
3789
    url = 'http://bouletcorp.tumblr.com'
3790
    _categories = ('BOULET', )
3791
3792
3793
class TheAwkwardYetiTumblr(GenericTumblrV1):
3794
    """Class to retrieve The Awkward Yeti comics."""
3795
    # Also on http://www.gocomics.com/the-awkward-yeti
3796
    # Also on http://theawkwardyeti.com
3797
    # Also on https://tapastic.com/series/TheAwkwardYeti
3798
    name = 'yeti-tumblr'
3799
    long_name = 'The Awkward Yeti (from Tumblr)'
3800
    url = 'http://larstheyeti.tumblr.com'
3801
    _categories = ('YETI', )
3802
3803
3804
class NellucNhoj(GenericTumblrV1):
3805
    """Class to retrieve NellucNhoj comics."""
3806
    name = 'nhoj'
3807
    long_name = 'Nelluc Nhoj'
3808
    url = 'http://nellucnhoj.com'
3809
3810
3811
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3812
    """Class to retrieve Down The Upward Spiral comics."""
3813
    # Also on http://www.downtheupwardspiral.com
3814
    name = 'spiral-tumblr'
3815
    long_name = 'Down the Upward Spiral (from Tumblr)'
3816
    url = 'http://downtheupwardspiral.tumblr.com'
3817
3818
3819
class AsPerUsualTumblr(GenericTumblrV1):
3820
    """Class to retrieve As Per Usual comics."""
3821
    # Also on https://tapastic.com/series/AsPerUsual
3822
    name = 'usual-tumblr'
3823
    long_name = 'As Per Usual (from Tumblr)'
3824
    url = 'http://as-per-usual.tumblr.com'
3825
    categories = ('DAMILEE', )
3826
3827
3828
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3829
    """Class to retrieve Hot Comics For Cool People."""
3830
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3831
    # Also on http://hotcomics.biz (links to tumblr)
3832
    # Also on http://hcfcp.com (links to tumblr)
3833
    name = 'hotcomics-tumblr'
3834
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3835
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3836
    categories = ('DAMILEE', )
3837
3838
3839
class OneOneOneOneComicTumblr(GenericTumblrV1):
3840
    """Class to retrieve 1111 Comics."""
3841
    # Also on http://www.1111comics.me
3842
    # Also on https://tapastic.com/series/1111-Comics
3843
    name = '1111-tumblr'
3844
    long_name = '1111 Comics (from Tumblr)'
3845
    url = 'http://comics1111.tumblr.com'
3846
    _categories = ('ONEONEONEONE', )
3847
3848
3849
class JhallComicsTumblr(GenericTumblrV1):
3850
    """Class to retrieve Jhall Comics."""
3851
    # Also on http://jhallcomics.com
3852
    name = 'jhall-tumblr'
3853
    long_name = 'Jhall Comics (from Tumblr)'
3854
    url = 'http://jhallcomics.tumblr.com'
3855
3856
3857
class BerkeleyMewsTumblr(GenericTumblrV1):
3858
    """Class to retrieve Berkeley Mews comics."""
3859
    # Also on http://www.gocomics.com/berkeley-mews
3860
    # Also on http://www.berkeleymews.com
3861
    name = 'berkeley-tumblr'
3862
    long_name = 'Berkeley Mews (from Tumblr)'
3863
    url = 'http://mews.tumblr.com'
3864
    _categories = ('BERKELEY', )
3865
3866
3867
class JoanCornellaTumblr(GenericTumblrV1):
3868
    """Class to retrieve Joan Cornella comics."""
3869
    # Also on http://joancornella.net
3870
    name = 'cornella-tumblr'
3871
    long_name = 'Joan Cornella (from Tumblr)'
3872
    url = 'http://cornellajoan.tumblr.com'
3873
3874
3875
class RespawnComicTumblr(GenericTumblrV1):
3876
    """Class to retrieve Respawn Comic."""
3877
    # Also on http://respawncomic.com
3878
    name = 'respawn-tumblr'
3879
    long_name = 'Respawn Comic (from Tumblr)'
3880
    url = 'http://respawncomic.tumblr.com'
3881
3882
3883
class ChrisHallbeckTumblr(GenericTumblrV1):
3884
    """Class to retrieve Chris Hallbeck comics."""
3885
    # Also on https://tapastic.com/ChrisHallbeck
3886
    # Also on http://maximumble.com
3887
    # Also on http://minimumble.com
3888
    # Also on http://thebookofbiff.com
3889
    name = 'hallbeck-tumblr'
3890
    long_name = 'Chris Hallback (from Tumblr)'
3891
    url = 'http://chrishallbeck.tumblr.com'
3892
    _categories = ('HALLBACK', )
3893
3894
3895
class ComicNuggets(GenericTumblrV1):
3896
    """Class to retrieve Comic Nuggets."""
3897
    name = 'nuggets'
3898
    long_name = 'Comic Nuggets'
3899
    url = 'http://comicnuggets.com'
3900
3901
3902
class PigeonGazetteTumblr(GenericTumblrV1):
3903
    """Class to retrieve The Pigeon Gazette comics."""
3904
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3905
    name = 'pigeon-tumblr'
3906
    long_name = 'The Pigeon Gazette (from Tumblr)'
3907
    url = 'http://thepigeongazette.tumblr.com'
3908
3909
3910
class CancerOwl(GenericTumblrV1):
3911
    """Class to retrieve Cancer Owl comics."""
3912
    # Also on http://cancerowl.com
3913
    name = 'cancerowl-tumblr'
3914
    long_name = 'Cancer Owl (from Tumblr)'
3915
    url = 'http://cancerowl.tumblr.com'
3916
3917
3918
class FowlLanguageTumblr(GenericTumblrV1):
3919
    """Class to retrieve Fowl Language comics."""
3920
    # Also on http://www.fowllanguagecomics.com
3921
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3922
    # Also on http://www.gocomics.com/fowl-language
3923
    name = 'fowllanguage-tumblr'
3924
    long_name = 'Fowl Language Comics (from Tumblr)'
3925
    url = 'http://fowllanguagecomics.tumblr.com'
3926
    _categories = ('FOWLLANGUAGE', )
3927
3928
3929
class TheOdd1sOutTumblr(GenericTumblrV1):
3930
    """Class to retrieve The Odd 1s Out comics."""
3931
    # Also on http://theodd1sout.com
3932
    # Also on https://tapastic.com/series/Theodd1sout
3933
    name = 'theodd-tumblr'
3934
    long_name = 'The Odd 1s Out (from Tumblr)'
3935
    url = 'http://theodd1sout.tumblr.com'
3936
3937
3938
class TheUnderfoldTumblr(GenericTumblrV1):
3939
    """Class to retrieve The Underfold comics."""
3940
    # Also on http://theunderfold.com
3941
    name = 'underfold-tumblr'
3942
    long_name = 'The Underfold (from Tumblr)'
3943
    url = 'http://theunderfold.tumblr.com'
3944
3945
3946
class LolNeinTumblr(GenericTumblrV1):
3947
    """Class to retrieve Lol Nein comics."""
3948
    # Also on http://lolnein.com
3949
    name = 'lolnein-tumblr'
3950
    long_name = 'Lol Nein (from Tumblr)'
3951
    url = 'http://lolneincom.tumblr.com'
3952
3953
3954
class FatAwesomeComicsTumblr(GenericTumblrV1):
3955
    """Class to retrieve Fat Awesome Comics."""
3956
    # Also on http://fatawesome.com/comics
3957
    name = 'fatawesome-tumblr'
3958
    long_name = 'Fat Awesome (from Tumblr)'
3959
    url = 'http://fatawesomecomedy.tumblr.com'
3960
3961
3962
class TheWorldIsFlatTumblr(GenericTumblrV1):
3963
    """Class to retrieve The World Is Flat Comics."""
3964
    # Also on https://tapastic.com/series/The-World-is-Flat
3965
    name = 'flatworld-tumblr'
3966
    long_name = 'The World Is Flat (from Tumblr)'
3967
    url = 'http://theworldisflatcomics.tumblr.com'
3968
3969
3970
class DorrisMc(GenericTumblrV1):
3971
    """Class to retrieve Dorris Mc Comics"""
3972
    # Also on http://www.gocomics.com/dorris-mccomics
3973
    name = 'dorrismc'
3974
    long_name = 'Dorris Mc'
3975
    url = 'http://dorrismccomics.com'
3976
3977
3978
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3979
    """Class to retrieve Leleoz comics."""
3980
    # Also on https://tapastic.com/series/Leleoz
3981
    name = 'leleoz-tumblr'
3982
    long_name = 'Leleoz (from Tumblr)'
3983
    url = 'http://leleozcomics.tumblr.com'
3984
3985
3986
class MoonBeardTumblr(GenericTumblrV1):
3987
    """Class to retrieve MoonBeard comics."""
3988
    # Also on http://moonbeard.com
3989
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3990
    name = 'moonbeard-tumblr'
3991
    long_name = 'Moon Beard (from Tumblr)'
3992
    url = 'http://blog.squiresjam.es/moonbeard'
3993
3994
3995
class AComik(GenericTumblrV1):
3996
    """Class to retrieve A Comik"""
3997
    name = 'comik'
3998
    long_name = 'A Comik'
3999
    url = 'http://acomik.com'
4000
4001
4002
class ClassicRandy(GenericTumblrV1):
4003
    """Class to retrieve Classic Randy comics."""
4004
    name = 'randy'
4005
    long_name = 'Classic Randy'
4006
    url = 'http://classicrandy.tumblr.com'
4007
4008
4009
class DagssonTumblr(GenericTumblrV1):
4010
    """Class to retrieve Dagsson comics."""
4011
    # Also on http://www.dagsson.com
4012
    name = 'dagsson-tumblr'
4013
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4014
    url = 'http://hugleikurdagsson.tumblr.com'
4015
4016
4017
class LinsEditionsTumblr(GenericTumblrV1):
4018
    """Class to retrieve L.I.N.S. Editions comics."""
4019
    # Also on https://linsedition.com
4020
    # Now on http://warandpeas.tumblr.com
4021
    name = 'lins-tumblr'
4022
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4023
    url = 'http://linscomics.tumblr.com'
4024
    _categories = ('LINS', )
4025
4026
4027
class WarAndPeasTumblr(GenericTumblrV1):
4028
    """Class to retrieve War And Peas comics."""
4029
    # Was on http://linscomics.tumblr.com
4030
    name = 'warandpeas-tumblr'
4031
    long_name = 'War And Peas (from Tumblr)'
4032
    url = 'http://warandpeas.tumblr.com'
4033
    _categories = ('WARANDPEAS', )
4034
4035
4036
class OrigamiHotDish(GenericTumblrV1):
4037
    """Class to retrieve Origami Hot Dish comics."""
4038
    name = 'origamihotdish'
4039
    long_name = 'Origami Hot Dish'
4040
    url = 'http://origamihotdish.com'
4041
4042
4043
class HitAndMissComicsTumblr(GenericTumblrV1):
4044
    """Class to retrieve Hit and Miss Comics."""
4045
    name = 'hitandmiss'
4046
    long_name = 'Hit and Miss Comics'
4047
    url = 'http://hitandmisscomics.tumblr.com'
4048
4049
4050
class HMBlanc(GenericTumblrV1):
4051
    """Class to retrieve HM Blanc comics."""
4052
    name = 'hmblanc'
4053
    long_name = 'HM Blanc'
4054
    url = 'http://hmblanc.tumblr.com'
4055
4056
4057
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4058
    """Class to retrieve Tales Of Absurdity comics."""
4059
    # Also on http://talesofabsurdity.com
4060
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4061
    name = 'absurdity-tumblr'
4062
    long_name = 'Tales of Absurdity (from Tumblr)'
4063
    url = 'http://talesofabsurdity.tumblr.com'
4064
    _categories = ('ABSURDITY', )
4065
4066
4067
class RobbieAndBobby(GenericTumblrV1):
4068
    """Class to retrieve Robbie And Bobby comics."""
4069
    # Also on http://robbieandbobby.com
4070
    name = 'robbie-tumblr'
4071
    long_name = 'Robbie And Bobby (from Tumblr)'
4072
    url = 'http://robbieandbobby.tumblr.com'
4073
4074
4075
class ElectricBunnyComicTumblr(GenericTumblrV1):
4076
    """Class to retrieve Electric Bunny Comics."""
4077
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4078
    name = 'bunny-tumblr'
4079
    long_name = 'Electric Bunny Comic (from Tumblr)'
4080
    url = 'http://electricbunnycomics.tumblr.com'
4081
4082
4083
class Hoomph(GenericTumblrV1):
4084
    """Class to retrieve Hoomph comics."""
4085
    name = 'hoomph'
4086
    long_name = 'Hoomph'
4087
    url = 'http://hoom.ph'
4088
4089
4090
class BFGFSTumblr(GenericTumblrV1):
4091
    """Class to retrieve BFGFS comics."""
4092
    # Also on https://tapastic.com/series/BFGFS
4093
    # Also on http://bfgfs.com
4094
    name = 'bfgfs-tumblr'
4095
    long_name = 'BFGFS (from Tumblr)'
4096
    url = 'http://bfgfs.tumblr.com'
4097
4098
4099
class DoodleForFood(GenericTumblrV1):
4100
    """Class to retrieve Doodle For Food comics."""
4101
    # Also on http://doodleforfood.com
4102
    name = 'doodle'
4103
    long_name = 'Doodle For Food'
4104
    url = 'http://doodleforfood.com'
4105
4106
4107
class CassandraCalinTumblr(GenericTumblrV1):
4108
    """Class to retrieve C. Cassandra comics."""
4109
    # Also on http://cassandracalin.com
4110
    # Also on https://tapastic.com/series/C-Cassandra-comics
4111
    name = 'cassandra-tumblr'
4112
    long_name = 'Cassandra Calin (from Tumblr)'
4113
    url = 'http://c-cassandra.tumblr.com'
4114
4115
4116
class DougWasTaken(GenericTumblrV1):
4117
    """Class to retrieve Doug Was Taken comics."""
4118
    name = 'doug'
4119
    long_name = 'Doug Was Taken'
4120
    url = 'http://dougwastaken.tumblr.com'
4121
4122
4123
class MandatoryRollerCoaster(GenericTumblrV1):
4124
    """Class to retrieve Mandatory Roller Coaster comics."""
4125
    name = 'rollercoaster'
4126
    long_name = 'Mandatory Roller Coaster'
4127
    url = 'http://mandatoryrollercoaster.com'
4128
4129
4130
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4131
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4132
    name = 'cperspqccltt'
4133
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4134
    url = 'http://cperspqccltt.tumblr.com'
4135
4136
4137
class TheGrohlTroll(GenericTumblrV1):
4138
    """Class to retrieve The Grohl Troll comics."""
4139
    name = 'grohltroll'
4140
    long_name = 'The Grohl Troll'
4141
    url = 'http://thegrohltroll.com'
4142
4143
4144
class WebcomicName(GenericTumblrV1):
4145
    """Class to retrieve Webcomic Name comics."""
4146
    name = 'webcomicname'
4147
    long_name = 'Webcomic Name'
4148
    url = 'http://webcomicname.com'
4149
4150
4151
class BooksOfAdam(GenericTumblrV1):
4152
    """Class to retrieve Books of Adam comics."""
4153
    # Also on http://www.booksofadam.com
4154
    name = 'booksofadam'
4155
    long_name = 'Books of Adam'
4156
    url = 'http://booksofadam.tumblr.com'
4157
4158
4159
class HarkAVagrant(GenericTumblrV1):
4160
    """Class to retrieve Hark A Vagrant comics."""
4161
    # Also on http://www.harkavagrant.com
4162
    name = 'hark-tumblr'
4163
    long_name = 'Hark A Vagrant (from Tumblr)'
4164
    url = 'http://beatonna.tumblr.com'
4165
4166
4167
class OurSuperAdventureTumblr(GenericTumblrV1):
4168
    """Class to retrieve Our Super Adventure comics."""
4169
    # Also on https://tapastic.com/series/Our-Super-Adventure
4170
    # Also on http://www.oursuperadventure.com
4171
    # http://sarahgraley.com
4172
    name = 'superadventure-tumblr'
4173
    long_name = 'Our Super Adventure (from Tumblr)'
4174
    url = 'http://sarahssketchbook.tumblr.com'
4175
4176
4177
class JakeLikesOnions(GenericTumblrV1):
4178
    """Class to retrieve Jake Likes Onions comics."""
4179
    name = 'jake'
4180
    long_name = 'Jake Likes Onions'
4181
    url = 'http://jakelikesonions.com'
4182
4183
4184
class InYourFaceCake(GenericTumblrV1):
4185
    """Class to retrieve In Your Face Cake comics."""
4186
    name = 'inyourfacecake-tumblr'
4187
    long_name = 'In Your Face Cake (from Tumblr)'
4188
    url = 'http://in-your-face-cake.tumblr.com'
4189
4190
4191
class Robospunk(GenericTumblrV1):
4192
    """Class to retrieve Robospunk comics."""
4193
    name = 'robospunk'
4194
    long_name = 'Robospunk'
4195
    url = 'http://robospunk.com'
4196
4197
4198
class BananaTwinky(GenericTumblrV1):
4199
    """Class to retrieve Banana Twinky comics."""
4200
    name = 'banana'
4201
    long_name = 'Banana Twinky'
4202
    url = 'http://bananatwinky.tumblr.com'
4203
4204
4205
class YesterdaysPopcornTumblr(GenericTumblrV1):
4206
    """Class to retrieve Yesterday's Popcorn comics."""
4207
    # Also on http://www.yesterdayspopcorn.com
4208
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4209
    name = 'popcorn-tumblr'
4210
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4211
    url = 'http://yesterdayspopcorn.tumblr.com'
4212
4213
4214
class TwistedDoodles(GenericTumblrV1):
4215
    """Class to retrieve Twisted Doodles comics."""
4216
    name = 'twisted'
4217
    long_name = 'Twisted Doodles'
4218
    url = 'http://www.twisteddoodles.com'
4219
4220
4221
class UbertoolTumblr(GenericTumblrV1):
4222
    """Class to retrieve Ubertool comics."""
4223
    # Also on http://ubertoolcomic.com
4224
    # Also on https://tapastic.com/series/ubertool
4225
    name = 'ubertool-tumblr'
4226
    long_name = 'Ubertool (from Tumblr)'
4227
    url = 'http://ubertool.tumblr.com'
4228
    _categories = ('UBERTOOL', )
4229
4230
4231
class LittleLifeLinesTumblr(GenericTumblrV1):
4232
    """Class to retrieve Little Life Lines comics."""
4233
    # Also on http://www.littlelifelines.com
4234
    name = 'life-tumblr'
4235
    long_name = 'Little Life Lines (from Tumblr)'
4236
    url = 'https://little-life-lines.tumblr.com'
4237
4238
4239
class TheyCanTalk(GenericTumblrV1):
4240
    """Class to retrieve They Can Talk comics."""
4241
    name = 'theycantalk'
4242
    long_name = 'They Can Talk'
4243
    url = 'http://theycantalk.com'
4244
4245
4246
class Will5NeverCome(GenericTumblrV1):
4247
    """Class to retrieve Will 5:00 Never Come comics."""
4248
    name = 'will5'
4249
    long_name = 'Will 5:00 Never Come ?'
4250
    url = 'http://will5nevercome.com'
4251
4252
4253
class Sephko(GenericTumblrV1):
4254
    """Class to retrieve Sephko Comics."""
4255
    # Also on http://www.sephko.com
4256
    name = 'sephko'
4257
    long_name = 'Sephko'
4258
    url = 'http://sephko.tumblr.com'
4259
4260
4261
class BlazersAtDawn(GenericTumblrV1):
4262
    """Class to retrieve Blazers At Dawn Comics."""
4263
    name = 'blazers'
4264
    long_name = 'Blazers At Dawn'
4265
    url = 'http://blazersatdawn.tumblr.com'
4266
4267
4268
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4269
    """Class to retrieve Art By Moga Comics."""
4270
    name = 'moga'
4271
    long_name = 'Art By Moga'
4272
    url = 'http://artbymoga.tumblr.com'
4273
4274
4275
class VerbalVomitTumblr(GenericTumblrV1):
4276
    """Class to retrieve Verbal Vomit comics."""
4277
    # Also on http://www.verbal-vomit.com
4278
    name = 'vomit-tumblr'
4279
    long_name = 'Verbal Vomit (from Tumblr)'
4280
    url = 'http://verbalvomits.tumblr.com'
4281
4282
4283
class LibraryComic(GenericTumblrV1):
4284
    """Class to retrieve LibraryComic."""
4285
    # Also on http://librarycomic.com
4286
    name = 'library-tumblr'
4287
    long_name = 'LibraryComic (from Tumblr)'
4288
    url = 'http://librarycomic.tumblr.com'
4289
4290
4291
class TizzyStitchBirdTumblr(GenericTumblrV1):
4292
    """Class to retrieve Tizzy Stitch Bird comics."""
4293
    # Also on http://tizzystitchbird.com
4294
    # Also on https://tapastic.com/series/TizzyStitchbird
4295
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4296
    name = 'tizzy-tumblr'
4297
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4298
    url = 'http://tizzystitchbird.tumblr.com'
4299
4300
4301
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4302
    """Class to retrieve VictimsOfCircumsolar comics."""
4303
    # Also on http://www.victimsofcircumsolar.com
4304
    name = 'circumsolar-tumblr'
4305
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4306
    url = 'http://victimsofcomics.tumblr.com'
4307
4308
4309
class RockPaperCynicTumblr(GenericTumblrV1):
4310
    """Class to retrieve RockPaperCynic comics."""
4311
    # Also on http://www.rockpapercynic.com
4312 View Code Duplication
    # Also on https://tapastic.com/series/rockpapercynic
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
4313
    name = 'rpc-tumblr'
4314
    long_name = 'Rock Paper Cynic (from Tumblr)'
4315
    url = 'http://rockpapercynic.tumblr.com'
4316
4317
4318
class DeadlyPanelTumblr(GenericTumblrV1):
4319
    """Class to retrieve Deadly Panel comics."""
4320
    # Also on http://www.deadlypanel.com
4321
    # Also on https://tapastic.com/series/deadlypanel
4322
    name = 'deadly-tumblr'
4323
    long_name = 'Deadly Panel (from Tumblr)'
4324
    url = 'http://deadlypanel.tumblr.com'
4325
4326
4327
class CatanaComics(GenericTumblrV1):
4328
    """Class to retrieve Catana comics."""
4329
    name = 'catana'
4330
    long_name = 'Catana'
4331
    url = 'http://www.catanacomics.com'
4332
4333
4334
class ShanghaiTango(GenericTumblrV1):
4335
    """Class to retrieve Shanghai Tango comic."""
4336
    name = 'tango'
4337
    long_name = 'Shanghai Tango'
4338
    url = 'http://tango2010weibo.tumblr.com'
4339
4340
4341
class OffTheLeashDogTumblr(GenericTumblrV1):
4342
    """Class to retrieve Off The Leash Dog comics."""
4343
    # Also on http://offtheleashdogcartoons.com
4344
    # Also on http://www.rupertfawcettcartoons.com
4345
    name = 'offtheleash-tumblr'
4346
    long_name = 'Off The Leash Dog (from Tumblr)'
4347
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4348
    _categories = ('FAWCETT', )
4349
4350
4351
class ImogenQuestTumblr(GenericTumblrV1):
4352
    """Class to retrieve Imogen Quest comics."""
4353
    # Also on http://imogenquest.net
4354
    name = 'imogen-tumblr'
4355
    long_name = 'Imogen Quest (from Tumblr)'
4356
    url = 'http://imoquest.tumblr.com'
4357
4358
4359
class HorovitzComics(GenericListableComic):
4360
    """Generic class to handle the logic common to the different comics from Horovitz."""
4361
    url = 'http://www.horovitzcomics.com'
4362
    _categories = ('HOROVITZ', )
4363
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4364
    link_re = NotImplemented
4365
    get_url_from_archive_element = join_cls_url_to_href
4366
4367
    @classmethod
4368
    def get_comic_info(cls, soup, link):
4369
        """Get information about a particular comics."""
4370
        href = link['href']
4371
        num = int(cls.link_re.match(href).groups()[0])
4372
        title = link.string
4373
        imgs = soup.find_all('img', id='comic')
4374
        assert len(imgs) == 1
4375
        year, month, day = [int(s)
4376
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4377
        return {
4378
            'title': title,
4379
            'day': day,
4380
            'month': month,
4381
            'year': year,
4382
            'img': [i['src'] for i in imgs],
4383
            'num': num,
4384
        }
4385
4386
    @classmethod
4387
    def get_archive_elements(cls):
4388
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4389
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4390
4391
4392
class HorovitzNew(HorovitzComics):
4393
    """Class to retrieve Horovitz new comics."""
4394
    name = 'horovitznew'
4395
    long_name = 'Horovitz New'
4396
    link_re = re.compile('^/comics/new/([0-9]+)$')
4397
4398
4399
class HorovitzClassic(HorovitzComics):
4400
    """Class to retrieve Horovitz classic comics."""
4401
    name = 'horovitzclassic'
4402
    long_name = 'Horovitz Classic'
4403
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4404
4405
4406
class GenericGoComic(GenericNavigableComic):
4407
    """Generic class to handle the logic common to comics from gocomics.com."""
4408
    _categories = ('GOCOMIC', )
4409
4410
    @classmethod
4411
    def get_first_comic_link(cls):
4412
        """Get link to first comics."""
4413
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4414
4415
    @classmethod
4416
    def get_navi_link(cls, last_soup, next_):
4417
        """Get link to next or previous comic."""
4418
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4419
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
4420
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4421
4422
    @classmethod
4423
    def get_url_from_link(cls, link):
4424
        gocomics = 'http://www.gocomics.com'
4425
        return urljoin_wrapper(gocomics, link['href'])
4426
4427
    @classmethod
4428
    def get_comic_info(cls, soup, link):
4429
        """Get information about a particular comics."""
4430
        date_str = soup.find('meta', property='article:published_time')['content']
4431
        day = string_to_date(date_str, "%Y-%m-%d")
4432
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4433
        author = soup.find('meta', property='article:author')['content']
4434
        tags = soup.find('meta', property='article:tag')['content']
4435
        return {
4436
            'day': day.day,
4437
            'month': day.month,
4438
            'year': day.year,
4439
            'img': [i['src'] for i in imgs],
4440
            'author': author,
4441
            'tags': tags,
4442
        }
4443
4444
4445
class PearlsBeforeSwine(GenericGoComic):
4446
    """Class to retrieve Pearls Before Swine comics."""
4447
    name = 'pearls'
4448
    long_name = 'Pearls Before Swine'
4449
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4450
4451
4452
class Peanuts(GenericGoComic):
4453
    """Class to retrieve Peanuts comics."""
4454
    name = 'peanuts'
4455
    long_name = 'Peanuts'
4456
    url = 'http://www.gocomics.com/peanuts'
4457
4458
4459
class MattWuerker(GenericGoComic):
4460
    """Class to retrieve Matt Wuerker comics."""
4461
    name = 'wuerker'
4462
    long_name = 'Matt Wuerker'
4463
    url = 'http://www.gocomics.com/mattwuerker'
4464
4465
4466
class TomToles(GenericGoComic):
4467
    """Class to retrieve Tom Toles comics."""
4468
    name = 'toles'
4469
    long_name = 'Tom Toles'
4470
    url = 'http://www.gocomics.com/tomtoles'
4471
4472
4473
class BreakOfDay(GenericGoComic):
4474
    """Class to retrieve Break Of Day comics."""
4475
    name = 'breakofday'
4476
    long_name = 'Break Of Day'
4477
    url = 'http://www.gocomics.com/break-of-day'
4478
4479
4480
class Brevity(GenericGoComic):
4481
    """Class to retrieve Brevity comics."""
4482
    name = 'brevity'
4483
    long_name = 'Brevity'
4484
    url = 'http://www.gocomics.com/brevitypanel'
4485
4486
4487
class MichaelRamirez(GenericGoComic):
4488
    """Class to retrieve Michael Ramirez comics."""
4489
    name = 'ramirez'
4490
    long_name = 'Michael Ramirez'
4491
    url = 'http://www.gocomics.com/michaelramirez'
4492
4493
4494
class MikeLuckovich(GenericGoComic):
4495
    """Class to retrieve Mike Luckovich comics."""
4496
    name = 'luckovich'
4497
    long_name = 'Mike Luckovich'
4498
    url = 'http://www.gocomics.com/mikeluckovich'
4499
4500
4501
class JimBenton(GenericGoComic):
4502
    """Class to retrieve Jim Benton comics."""
4503
    # Also on http://jimbenton.tumblr.com
4504
    name = 'benton'
4505
    long_name = 'Jim Benton'
4506
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4507
4508
4509
class TheArgyleSweater(GenericGoComic):
4510
    """Class to retrieve the Argyle Sweater comics."""
4511
    name = 'argyle'
4512
    long_name = 'Argyle Sweater'
4513
    url = 'http://www.gocomics.com/theargylesweater'
4514
4515
4516
class SunnyStreet(GenericGoComic):
4517
    """Class to retrieve Sunny Street comics."""
4518
    # Also on http://www.sunnystreetcomics.com
4519
    name = 'sunny'
4520
    long_name = 'Sunny Street'
4521
    url = 'http://www.gocomics.com/sunny-street'
4522
4523
4524
class OffTheMark(GenericGoComic):
4525
    """Class to retrieve Off The Mark comics."""
4526
    # Also on https://www.offthemark.com
4527
    name = 'offthemark'
4528
    long_name = 'Off The Mark'
4529
    url = 'http://www.gocomics.com/offthemark'
4530
4531
4532
class WuMo(GenericGoComic):
4533
    """Class to retrieve WuMo comics."""
4534
    # Also on http://wumo.com
4535
    name = 'wumo'
4536
    long_name = 'WuMo'
4537
    url = 'http://www.gocomics.com/wumo'
4538
4539
4540
class LunarBaboon(GenericGoComic):
4541
    """Class to retrieve Lunar Baboon comics."""
4542
    # Also on http://www.lunarbaboon.com
4543
    # Also on https://tapastic.com/series/Lunarbaboon
4544
    name = 'lunarbaboon'
4545
    long_name = 'Lunar Baboon'
4546
    url = 'http://www.gocomics.com/lunarbaboon'
4547
4548
4549
class SandersenGocomic(GenericGoComic):
4550
    """Class to retrieve Sarah Andersen comics."""
4551
    # Also on http://sarahcandersen.com
4552
    # Also on http://tapastic.com/series/Doodle-Time
4553
    name = 'sandersen-goc'
4554
    long_name = 'Sarah Andersen (from GoComics)'
4555
    url = 'http://www.gocomics.com/sarahs-scribbles'
4556
4557
4558
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4559
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4560
    # Also on http://smbc-comics.tumblr.com
4561
    # Also on http://www.smbc-comics.com
4562
    name = 'smbc-goc'
4563
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4564
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4565
    _categories = ('SMBC', )
4566
4567
4568
class CalvinAndHobbesGoComic(GenericGoComic):
4569
    """Class to retrieve Calvin and Hobbes comics."""
4570
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4571
    name = 'calvin-goc'
4572
    long_name = 'Calvin and Hobbes (from GoComics)'
4573
    url = 'http://www.gocomics.com/calvinandhobbes'
4574
4575
4576
class RallGoComic(GenericGoComic):
4577
    """Class to retrieve Ted Rall comics."""
4578
    # Also on http://rall.com/comic
4579
    name = 'rall-goc'
4580
    long_name = "Ted Rall (from GoComics)"
4581
    url = "http://www.gocomics.com/ted-rall"
4582
    _categories = ('RALL', )
4583
4584
4585
class TheAwkwardYetiGoComic(GenericGoComic):
4586
    """Class to retrieve The Awkward Yeti comics."""
4587
    # Also on http://larstheyeti.tumblr.com
4588
    # Also on http://theawkwardyeti.com
4589
    # Also on https://tapastic.com/series/TheAwkwardYeti
4590
    name = 'yeti-goc'
4591
    long_name = 'The Awkward Yeti (from GoComics)'
4592
    url = 'http://www.gocomics.com/the-awkward-yeti'
4593
    _categories = ('YETI', )
4594
4595
4596
class BerkeleyMewsGoComics(GenericGoComic):
4597
    """Class to retrieve Berkeley Mews comics."""
4598
    # Also on http://mews.tumblr.com
4599
    # Also on http://www.berkeleymews.com
4600
    name = 'berkeley-goc'
4601
    long_name = 'Berkeley Mews (from GoComics)'
4602
    url = 'http://www.gocomics.com/berkeley-mews'
4603
    _categories = ('BERKELEY', )
4604
4605
4606
class SheldonGoComics(GenericGoComic):
4607
    """Class to retrieve Sheldon comics."""
4608
    # Also on http://www.sheldoncomics.com
4609
    name = 'sheldon-goc'
4610
    long_name = 'Sheldon Comics (from GoComics)'
4611
    url = 'http://www.gocomics.com/sheldon'
4612
4613
4614
class FowlLanguageGoComics(GenericGoComic):
4615
    """Class to retrieve Fowl Language comics."""
4616
    # Also on http://www.fowllanguagecomics.com
4617
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4618
    # Also on http://fowllanguagecomics.tumblr.com
4619
    name = 'fowllanguage-goc'
4620
    long_name = 'Fowl Language Comics (from GoComics)'
4621
    url = 'http://www.gocomics.com/fowl-language'
4622
    _categories = ('FOWLLANGUAGE', )
4623
4624
4625
class NickAnderson(GenericGoComic):
4626
    """Class to retrieve Nick Anderson comics."""
4627
    name = 'nickanderson'
4628
    long_name = 'Nick Anderson'
4629
    url = 'http://www.gocomics.com/nickanderson'
4630
4631
4632
class GarfieldGoComics(GenericGoComic):
4633
    """Class to retrieve Garfield comics."""
4634
    # Also on http://garfield.com
4635
    name = 'garfield-goc'
4636
    long_name = 'Garfield (from GoComics)'
4637
    url = 'http://www.gocomics.com/garfield'
4638
    _categories = ('GARFIELD', )
4639
4640
4641
class DorrisMcGoComics(GenericGoComic):
4642
    """Class to retrieve Dorris Mc Comics"""
4643
    # Also on http://dorrismccomics.com
4644
    name = 'dorrismc-goc'
4645
    long_name = 'Dorris Mc (from GoComics)'
4646
    url = 'http://www.gocomics.com/dorris-mccomics'
4647
4648
4649
class FoxTrot(GenericGoComic):
4650
    """Class to retrieve FoxTrot comics."""
4651
    name = 'foxtrot'
4652
    long_name = 'FoxTrot'
4653
    url = 'http://www.gocomics.com/foxtrot'
4654
4655
4656
class FoxTrotClassics(GenericGoComic):
4657
    """Class to retrieve FoxTrot Classics comics."""
4658
    name = 'foxtrot-classics'
4659
    long_name = 'FoxTrot Classics'
4660
    url = 'http://www.gocomics.com/foxtrotclassics'
4661
4662
4663
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4664
    """Class to retrieve Mister & Me Comics."""
4665
    # Also on http://www.mister-and-me.com
4666
    # Also on https://tapastic.com/series/Mister-and-Me
4667
    name = 'mister-goc'
4668
    long_name = 'Mister & Me (from GoComics)'
4669
    url = 'http://www.gocomics.com/mister-and-me'
4670
4671
4672
class NonSequitur(GenericGoComic):
4673
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4674
    name = 'nonsequitur'
4675
    long_name = 'Non Sequitur'
4676
    url = 'http://www.gocomics.com/nonsequitur'
4677
4678
4679
class GenericTapasticComic(GenericListableComic):
4680
    """Generic class to handle the logic common to comics from tapastic.com."""
4681
    _categories = ('TAPASTIC', )
4682
4683
    @classmethod
4684
    def get_comic_info(cls, soup, archive_elt):
4685
        """Get information about a particular comics."""
4686
        timestamp = int(archive_elt['publishDate']) / 1000.0
4687
        day = datetime.datetime.fromtimestamp(timestamp).date()
4688
        imgs = soup.find_all('img', class_='art-image')
4689
        if not imgs:
4690
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4691
            return None
4692
        assert len(imgs) > 0
4693
        return {
4694
            'day': day.day,
4695
            'year': day.year,
4696
            'month': day.month,
4697
            'img': [i['src'] for i in imgs],
4698
            'title': archive_elt['title'],
4699
        }
4700
4701
    @classmethod
4702
    def get_url_from_archive_element(cls, archive_elt):
4703
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4704
4705
    @classmethod
4706
    def get_archive_elements(cls):
4707
        pref, suff = 'episodeList : ', ','
4708
        # Information is stored in the javascript part
4709
        # I don't know the clean way to get it so this is the ugly way.
4710
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4711
        return json.loads(string)
4712
4713
4714
class VegetablesForDessert(GenericTapasticComic):
4715
    """Class to retrieve Vegetables For Dessert comics."""
4716
    # Also on http://vegetablesfordessert.tumblr.com
4717
    name = 'vegetables'
4718
    long_name = 'Vegetables For Dessert'
4719
    url = 'http://tapastic.com/series/vegetablesfordessert'
4720
4721
4722
class FowlLanguageTapa(GenericTapasticComic):
4723
    """Class to retrieve Fowl Language comics."""
4724
    # Also on http://www.fowllanguagecomics.com
4725
    # Also on http://fowllanguagecomics.tumblr.com
4726
    # Also on http://www.gocomics.com/fowl-language
4727
    name = 'fowllanguage-tapa'
4728
    long_name = 'Fowl Language Comics (from Tapastic)'
4729
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4730
    _categories = ('FOWLLANGUAGE', )
4731
4732
4733
class OscillatingProfundities(GenericTapasticComic):
4734
    """Class to retrieve Oscillating Profundities comics."""
4735
    name = 'oscillating'
4736
    long_name = 'Oscillating Profundities'
4737
    url = 'http://tapastic.com/series/oscillatingprofundities'
4738
4739
4740
class ZnoflatsComics(GenericTapasticComic):
4741
    """Class to retrieve Znoflats comics."""
4742
    name = 'znoflats'
4743
    long_name = 'Znoflats Comics'
4744
    url = 'http://tapastic.com/series/Znoflats-Comics'
4745
4746
4747
class SandersenTapastic(GenericTapasticComic):
4748
    """Class to retrieve Sarah Andersen comics."""
4749
    # Also on http://sarahcandersen.com
4750
    # Also on http://www.gocomics.com/sarahs-scribbles
4751
    name = 'sandersen-tapa'
4752
    long_name = 'Sarah Andersen (from Tapastic)'
4753
    url = 'http://tapastic.com/series/Doodle-Time'
4754
4755
4756
class TubeyToonsTapastic(GenericTapasticComic):
4757
    """Class to retrieve TubeyToons comics."""
4758
    # Also on http://tubeytoons.com
4759
    # Also on http://tubeytoons.tumblr.com
4760
    name = 'tubeytoons-tapa'
4761
    long_name = 'Tubey Toons (from Tapastic)'
4762
    url = 'http://tapastic.com/series/Tubey-Toons'
4763
    _categories = ('TUNEYTOONS', )
4764
4765
4766
class AnythingComicTapastic(GenericTapasticComic):
4767
    """Class to retrieve Anything Comics."""
4768
    # Also on http://www.anythingcomic.com
4769
    name = 'anythingcomic-tapa'
4770
    long_name = 'Anything Comic (from Tapastic)'
4771
    url = 'http://tapastic.com/series/anything'
4772
4773
4774
class UnearthedComicsTapastic(GenericTapasticComic):
4775
    """Class to retrieve Unearthed comics."""
4776
    # Also on http://unearthedcomics.com
4777
    # Also on http://unearthedcomics.tumblr.com
4778
    name = 'unearthed-tapa'
4779
    long_name = 'Unearthed Comics (from Tapastic)'
4780
    url = 'http://tapastic.com/series/UnearthedComics'
4781
    _categories = ('UNEARTHED', )
4782
4783
4784
class EverythingsStupidTapastic(GenericTapasticComic):
4785
    """Class to retrieve Everything's stupid Comics."""
4786
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4787
    # Also on http://everythingsstupid.net
4788
    name = 'stupid-tapa'
4789
    long_name = "Everything's Stupid (from Tapastic)"
4790
    url = 'http://tapastic.com/series/EverythingsStupid'
4791
4792
4793
class JustSayEhTapastic(GenericTapasticComic):
4794
    """Class to retrieve Just Say Eh comics."""
4795
    # Also on http://www.justsayeh.com
4796
    name = 'justsayeh-tapa'
4797
    long_name = 'Just Say Eh (from Tapastic)'
4798
    url = 'http://tapastic.com/series/Just-Say-Eh'
4799
4800
4801
class ThorsThundershackTapastic(GenericTapasticComic):
4802
    """Class to retrieve Thor's Thundershack comics."""
4803
    # Also on http://www.thorsthundershack.com
4804
    name = 'thor-tapa'
4805
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4806
    url = 'http://tapastic.com/series/Thors-Thundershac'
4807
    _categories = ('THOR', )
4808
4809
4810
class OwlTurdTapastic(GenericTapasticComic):
4811
    """Class to retrieve Owl Turd comics."""
4812
    # Also on http://owlturd.com
4813
    name = 'owlturd-tapa'
4814
    long_name = 'Owl Turd (from Tapastic)'
4815
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4816
    _categories = ('OWLTURD', )
4817
4818
4819
class GoneIntoRaptureTapastic(GenericTapasticComic):
4820
    """Class to retrieve Gone Into Rapture comics."""
4821
    # Also on http://goneintorapture.tumblr.com
4822
    # Also on http://www.goneintorapture.com
4823
    name = 'rapture-tapa'
4824
    long_name = 'Gone Into Rapture (from Tapastic)'
4825
    url = 'http://tapastic.com/series/Goneintorapture'
4826
4827
4828
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4829
    """Class to retrieve Heck If I Know Comics."""
4830
    # Also on http://heckifiknowcomics.com
4831
    name = 'heck-tapa'
4832
    long_name = 'Heck if I Know comics (from Tapastic)'
4833
    url = 'http://tapastic.com/series/Regular'
4834
4835
4836
class CheerUpEmoKidTapa(GenericTapasticComic):
4837
    """Class to retrieve CheerUpEmoKid comics."""
4838
    # Also on http://www.cheerupemokid.com
4839
    # Also on http://enzocomics.tumblr.com
4840
    name = 'cuek-tapa'
4841
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4842
    url = 'http://tapastic.com/series/CUEK'
4843
4844
4845
class BigFootJusticeTapa(GenericTapasticComic):
4846
    """Class to retrieve Big Foot Justice comics."""
4847
    # Also on http://bigfootjustice.com
4848
    name = 'bigfoot-tapa'
4849
    long_name = 'Big Foot Justice (from Tapastic)'
4850
    url = 'http://tapastic.com/series/bigfoot-justice'
4851
4852
4853
class UpAndOutTapa(GenericTapasticComic):
4854
    """Class to retrieve Up & Out comics."""
4855
    # Also on http://upandoutcomic.tumblr.com
4856
    name = 'upandout-tapa'
4857
    long_name = 'Up And Out (from Tapastic)'
4858
    url = 'http://tapastic.com/series/UP-and-OUT'
4859
4860
4861
class ToonHoleTapa(GenericTapasticComic):
4862
    """Class to retrieve Toon Holes comics."""
4863
    # Also on http://www.toonhole.com
4864
    name = 'toonhole-tapa'
4865
    long_name = 'Toon Hole (from Tapastic)'
4866
    url = 'http://tapastic.com/series/TOONHOLE'
4867
4868
4869
class AngryAtNothingTapa(GenericTapasticComic):
4870
    """Class to retrieve Angry at Nothing comics."""
4871
    # Also on http://www.angryatnothing.net
4872
    name = 'angry-tapa'
4873
    long_name = 'Angry At Nothing (from Tapastic)'
4874
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4875
4876
4877
class LeleozTapa(GenericTapasticComic):
4878
    """Class to retrieve Leleoz comics."""
4879
    # Also on http://leleozcomics.tumblr.com
4880
    name = 'leleoz-tapa'
4881
    long_name = 'Leleoz (from Tapastic)'
4882
    url = 'https://tapastic.com/series/Leleoz'
4883
4884
4885
class TheAwkwardYetiTapa(GenericTapasticComic):
4886
    """Class to retrieve The Awkward Yeti comics."""
4887
    # Also on http://www.gocomics.com/the-awkward-yeti
4888
    # Also on http://theawkwardyeti.com
4889
    # Also on http://larstheyeti.tumblr.com
4890
    name = 'yeti-tapa'
4891
    long_name = 'The Awkward Yeti (from Tapastic)'
4892
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4893
    _categories = ('YETI', )
4894
4895
4896
class AsPerUsualTapa(GenericTapasticComic):
4897
    """Class to retrieve As Per Usual comics."""
4898
    # Also on http://as-per-usual.tumblr.com
4899
    name = 'usual-tapa'
4900
    long_name = 'As Per Usual (from Tapastic)'
4901
    url = 'https://tapastic.com/series/AsPerUsual'
4902
    categories = ('DAMILEE', )
4903
4904
4905
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4906
    """Class to retrieve Hot Comics For Cool People."""
4907
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4908
    # Also on http://hotcomics.biz (links to tumblr)
4909
    # Also on http://hcfcp.com (links to tumblr)
4910
    name = 'hotcomics-tapa'
4911
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4912
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4913
    categories = ('DAMILEE', )
4914
4915
4916
class OneOneOneOneComicTapa(GenericTapasticComic):
4917
    """Class to retrieve 1111 Comics."""
4918
    # Also on http://www.1111comics.me
4919
    # Also on http://comics1111.tumblr.com
4920
    name = '1111-tapa'
4921
    long_name = '1111 Comics (from Tapastic)'
4922
    url = 'https://tapastic.com/series/1111-Comics'
4923
    _categories = ('ONEONEONEONE', )
4924
4925
4926
class TumbleDryTapa(GenericTapasticComic):
4927
    """Class to retrieve Tumble Dry comics."""
4928
    # Also on http://tumbledrycomics.com
4929
    name = 'tumbledry-tapa'
4930
    long_name = 'Tumblr Dry (from Tapastic)'
4931
    url = 'https://tapastic.com/series/TumbleDryComics'
4932
4933
4934
class DeadlyPanelTapa(GenericTapasticComic):
4935
    """Class to retrieve Deadly Panel comics."""
4936
    # Also on http://www.deadlypanel.com
4937
    # Also on http://deadlypanel.tumblr.com
4938
    name = 'deadly-tapa'
4939
    long_name = 'Deadly Panel (from Tapastic)'
4940
    url = 'https://tapastic.com/series/deadlypanel'
4941
4942
4943
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4944
    """Class to retrieve Chris Hallbeck comics."""
4945
    # Also on http://chrishallbeck.tumblr.com
4946
    # Also on http://maximumble.com
4947
    name = 'hallbeckmaxi-tapa'
4948
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4949
    url = 'https://tapastic.com/series/Maximumble'
4950
    _categories = ('HALLBACK', )
4951
4952
4953
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4954
    """Class to retrieve Chris Hallbeck comics."""
4955
    # Also on http://chrishallbeck.tumblr.com
4956
    # Also on http://minimumble.com
4957
    name = 'hallbeckmini-tapa'
4958
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4959
    url = 'https://tapastic.com/series/Minimumble'
4960
    _categories = ('HALLBACK', )
4961
4962
4963
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4964
    """Class to retrieve Chris Hallbeck comics."""
4965
    # Also on http://chrishallbeck.tumblr.com
4966
    # Also on http://thebookofbiff.com
4967
    name = 'hallbeckbiff-tapa'
4968
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4969
    url = 'https://tapastic.com/series/Biff'
4970
    _categories = ('HALLBACK', )
4971
4972
4973
class RandoWisTapa(GenericTapasticComic):
4974
    """Class to retrieve RandoWis comics."""
4975
    # Also on https://randowis.com
4976
    name = 'randowis-tapa'
4977
    long_name = 'RandoWis (from Tapastic)'
4978
    url = 'https://tapastic.com/series/RandoWis'
4979
4980
4981
class PigeonGazetteTapa(GenericTapasticComic):
4982
    """Class to retrieve The Pigeon Gazette comics."""
4983
    # Also on http://thepigeongazette.tumblr.com
4984
    name = 'pigeon-tapa'
4985
    long_name = 'The Pigeon Gazette (from Tapastic)'
4986
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4987
4988
4989
class TheOdd1sOutTapa(GenericTapasticComic):
4990
    """Class to retrieve The Odd 1s Out comics."""
4991
    # Also on http://theodd1sout.com
4992
    # Also on http://theodd1sout.tumblr.com
4993
    name = 'theodd-tapa'
4994
    long_name = 'The Odd 1s Out (from Tapastic)'
4995
    url = 'https://tapastic.com/series/Theodd1sout'
4996
4997
4998
class TheWorldIsFlatTapa(GenericTapasticComic):
4999
    """Class to retrieve The World Is Flat Comics."""
5000
    # Also on http://theworldisflatcomics.tumblr.com
5001
    name = 'flatworld-tapa'
5002
    long_name = 'The World Is Flat (from Tapastic)'
5003
    url = 'https://tapastic.com/series/The-World-is-Flat'
5004
5005
5006
class MisterAndMeTapa(GenericTapasticComic):
5007
    """Class to retrieve Mister & Me Comics."""
5008
    # Also on http://www.mister-and-me.com
5009
    # Also on http://www.gocomics.com/mister-and-me
5010
    name = 'mister-tapa'
5011
    long_name = 'Mister & Me (from Tapastic)'
5012
    url = 'https://tapastic.com/series/Mister-and-Me'
5013
5014
5015
class TalesOfAbsurdityTapa(GenericTapasticComic):
5016
    """Class to retrieve Tales Of Absurdity comics."""
5017
    # Also on http://talesofabsurdity.com
5018
    # Also on http://talesofabsurdity.tumblr.com
5019
    name = 'absurdity-tapa'
5020
    long_name = 'Tales of Absurdity (from Tapastic)'
5021
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5022
    _categories = ('ABSURDITY', )
5023
5024
5025
class BFGFSTapa(GenericTapasticComic):
5026
    """Class to retrieve BFGFS comics."""
5027
    # Also on http://bfgfs.com
5028
    # Also on http://bfgfs.tumblr.com
5029
    name = 'bfgfs-tapa'
5030
    long_name = 'BFGFS (from Tapastic)'
5031
    url = 'https://tapastic.com/series/BFGFS'
5032
5033
5034
class DoodleForFoodTapa(GenericTapasticComic):
5035
    """Class to retrieve Doodle For Food comics."""
5036
    # Also on http://doodleforfood.com
5037
    name = 'doodle-tapa'
5038
    long_name = 'Doodle For Food (from Tapastic)'
5039
    url = 'https://tapastic.com/series/Doodle-for-Food'
5040
5041
5042
class MrLovensteinTapa(GenericTapasticComic):
5043
    """Class to retrieve Mr Lovenstein comics."""
5044
    # Also on  https://tapastic.com/series/MrLovenstein
5045
    name = 'mrlovenstein-tapa'
5046
    long_name = 'Mr. Lovenstein (from Tapastic)'
5047
    url = 'https://tapastic.com/series/MrLovenstein'
5048
5049
5050
class CassandraCalinTapa(GenericTapasticComic):
5051
    """Class to retrieve C. Cassandra comics."""
5052
    # Also on http://cassandracalin.com
5053
    # Also on http://c-cassandra.tumblr.com
5054
    name = 'cassandra-tapa'
5055
    long_name = 'Cassandra Calin (from Tapastic)'
5056
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5057
5058
5059
class WafflesAndPancakes(GenericTapasticComic):
5060
    """Class to retrieve Waffles And Pancakes comics."""
5061
    # Also on http://wandpcomic.com
5062
    name = 'waffles'
5063
    long_name = 'Waffles And Pancakes'
5064
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5065
5066
5067
class YesterdaysPopcornTapastic(GenericTapasticComic):
5068
    """Class to retrieve Yesterday's Popcorn comics."""
5069
    # Also on http://www.yesterdayspopcorn.com
5070
    # Also on http://yesterdayspopcorn.tumblr.com
5071
    name = 'popcorn-tapa'
5072
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5073
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5074
5075
5076
class OurSuperAdventureTapastic(GenericTapasticComic):
5077
    """Class to retrieve Our Super Adventure comics."""
5078
    # Also on http://www.oursuperadventure.com
5079
    # http://sarahssketchbook.tumblr.com
5080
    # http://sarahgraley.com
5081
    name = 'superadventure-tapastic'
5082
    long_name = 'Our Super Adventure (from Tapastic)'
5083
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5084
5085
5086
class NamelessPCs(GenericTapasticComic):
5087
    """Class to retrieve Nameless PCs comics."""
5088
    # Also on http://namelesspcs.com
5089
    name = 'namelesspcs-tapa'
5090
    long_name = 'NamelessPCs (from Tapastic)'
5091
    url = 'https://tapastic.com/series/NamelessPC'
5092
5093
5094
class UbertoolTapa(GenericTapasticComic):
5095
    """Class to retrieve Ubertool comics."""
5096
    # Also on http://ubertoolcomic.com
5097
    # Also on http://ubertool.tumblr.com
5098
    name = 'ubertool-tapa'
5099
    long_name = 'Ubertool (from Tapastic)'
5100
    url = 'https://tapastic.com/series/ubertool'
5101
    _categories = ('UBERTOOL', )
5102
5103
5104
class BarteNerdsTapa(GenericTapasticComic):
5105
    """Class to retrieve BarteNerds comics."""
5106
    # Also on http://www.bartenerds.com
5107
    name = 'bartenerds-tapa'
5108
    long_name = 'BarteNerds (from Tapastic)'
5109
    url = 'https://tapastic.com/series/BarteNERDS'
5110
5111
5112
class SmallBlueYonderTapa(GenericTapasticComic):
5113
    """Class to retrieve Small Blue Yonder comics."""
5114
    # Also on http://www.smallblueyonder.com
5115
    name = 'smallblue-tapa'
5116
    long_name = 'Small Blue Yonder (from Tapastic)'
5117
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5118
5119
5120
class TizzyStitchBirdTapa(GenericTapasticComic):
5121
    """Class to retrieve Tizzy Stitch Bird comics."""
5122
    # Also on http://tizzystitchbird.com
5123
    # Also on http://tizzystitchbird.tumblr.com
5124
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5125
    name = 'tizzy-tapa'
5126
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5127
    url = 'https://tapastic.com/series/TizzyStitchbird'
5128
5129
5130
class RockPaperCynicTapa(GenericTapasticComic):
5131
    """Class to retrieve RockPaperCynic comics."""
5132
    # Also on http://www.rockpapercynic.com
5133
    # Also on http://rockpapercynic.tumblr.com
5134
    name = 'rpc-tapa'
5135
    long_name = 'Rock Paper Cynic (from Tapastic)'
5136
    url = 'https://tapastic.com/series/rockpapercynic'
5137
5138
5139
class ItsTheTieTapa(GenericTapasticComic):
5140
    """Class to retrieve It's the tie comics."""
5141
    # Also on http://itsthetie.com
5142
    # Also on http://itsthetie.tumblr.com
5143
    name = 'tie-tapa'
5144
    long_name = "It's the tie (from Tapastic)"
5145
    url = "https://tapastic.com/series/itsthetie"
5146
    _categories = ('TIE', )
5147
5148
5149
def get_subclasses(klass):
5150
    """Gets the list of direct/indirect subclasses of a class"""
5151
    subclasses = klass.__subclasses__()
5152
    for derived in list(subclasses):
5153
        subclasses.extend(get_subclasses(derived))
5154
    return subclasses
5155
5156
5157
def remove_st_nd_rd_th_from_date(string):
5158
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5159
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5160
    return (string.replace('st', '')
5161
            .replace('nd', '')
5162
            .replace('rd', '')
5163
            .replace('th', '')
5164
            .replace('Augu', 'August'))
5165
5166
5167
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5168
    """Function to convert string to date object.
5169
    Wrapper around datetime.datetime.strptime."""
5170
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5171
    prev_locale = locale.setlocale(locale.LC_ALL)
5172
    if local != prev_locale:
5173
        locale.setlocale(locale.LC_ALL, local)
5174
    ret = datetime.datetime.strptime(string, date_format).date()
5175
    if local != prev_locale:
5176
        locale.setlocale(locale.LC_ALL, prev_locale)
5177
    return ret
5178
5179
5180
COMICS = set(get_subclasses(GenericComic))
5181
VALID_COMICS = [c for c in COMICS if c.name is not None]
5182
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5183
assert len(VALID_COMICS) == len(COMIC_NAMES)
5184
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5185
assert len(VALID_COMICS) == len(CLASS_NAMES)
5186