Completed
Push — master ( 50c35d...512335 )
by De
01:11
created

comics.py (29 issues)

Code
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        for archive_elt in cls.get_archive_elements():
240
            url = cls.get_url_from_archive_element(archive_elt)
241
            cls.log("considering %s" % url)
242
            if waiting_for_url is None:
243
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
244
                soup = get_soup_at_url(url)
245
                comic = cls.get_comic_info(soup, archive_elt)
246
                if comic is not None:
247
                    assert 'url' not in comic
248
                    comic['url'] = url
249
                    yield comic
250
            elif waiting_for_url == url:
251
                waiting_for_url = None
252
        if waiting_for_url is not None:
253
            print("Did not find %s : there might be a problem" % waiting_for_url)
254
255
# Helper functions corresponding to get_first_comic_link/get_navi_link
256
257
258
@classmethod
259
def get_link_rel_next(cls, last_soup, next_):
260
    """Implementation of get_navi_link."""
261
    return last_soup.find('link', rel='next' if next_ else 'prev')
262
263
264
@classmethod
265
def get_a_rel_next(cls, last_soup, next_):
266
    """Implementation of get_navi_link."""
267
    return last_soup.find('a', rel='next' if next_ else 'prev')
268
269
270
@classmethod
271
def get_a_navi_navinext(cls, last_soup, next_):
272
    """Implementation of get_navi_link."""
273
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
274
275
276
@classmethod
277
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
280
281
282
@classmethod
283
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
284
    """Implementation of get_navi_link."""
285
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
286
287
288
@classmethod
289
def get_a_navi_navifirst(cls):
290
    """Implementation of get_first_comic_link."""
291
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
292
293
294
@classmethod
295
def get_div_navfirst_a(cls):
296
    """Implementation of get_first_comic_link."""
297
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
298
299
300
@classmethod
301
def get_a_comicnavbase_comicnavfirst(cls):
302
    """Implementation of get_first_comic_link."""
303
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
304
305
306
@classmethod
307
def simulate_first_link(cls):
308
    """Implementation of get_first_comic_link creating a link-like object from
309
    an URL provided by the class.
310
311
    Note: The first URL can easily be found using :
312
    `get_first_comic_link = navigate_to_first_comic`.
313
    """
314
    return {'href': cls.first_url}
315
316
317
@classmethod
318
def navigate_to_first_comic(cls):
319
    """Implementation of get_first_comic_link navigating from a user provided
320
    URL to the first comic.
321
322
    Sometimes, the first comic cannot be reached directly so to start
323
    from the first comic one has to go to the previous comic until
324
    there is no previous comics. Once this URL is reached, it
325
    is better to hardcode it but for development purposes, it
326
    is convenient to have an automatic way to find it.
327
328
    Then, the URL found can easily be used via `simulate_first_link`.
329
    """
330
    url = input("Get starting URL: ")
331
    print(url)
332
    comic = cls.get_prev_link(get_soup_at_url(url))
333
    while comic:
334
        url = cls.get_url_from_link(comic)
335
        print(url)
336
        comic = cls.get_prev_link(get_soup_at_url(url))
337
    return {'href': url}
338
339
340
class GenericEmptyComic(GenericComic):
341
    """Generic class for comics where nothing is to be done.
342
343
    It can be useful to deactivate temporarily comics that do not work
344
    properly by replacing `def MyComic(GenericWhateverComic)` with
345
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
346
    _categories = ('EMPTY', )
347
348
    @classmethod
349
    def get_next_comic(cls, last_comic):
350
        """Implementation of get_next_comic returning no comics."""
351
        cls.log("comic is considered as empty - returning no comic")
352
        return []
353
354
355 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
356
    """Class to retrieve Extra Fabulous Comics."""
357
    name = 'efc'
358
    long_name = 'Extra Fabulous Comics'
359
    url = 'http://extrafabulouscomics.com'
360
    get_first_comic_link = get_a_navi_navifirst
361
    get_navi_link = get_link_rel_next
362
363
    @classmethod
364
    def get_comic_info(cls, soup, link):
365
        """Get information about a particular comics."""
366
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367
        imgs = soup.find_all('img', src=img_src_re)
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
370
        day = string_to_date(date_str, "%Y-%m-%d")
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
375
            'year': day.year,
376
            'day': day.day,
377
            'prefix': title + '-'
378
        }
379
380
381 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
382
    """Generic class to retrieve comics from Le Monde blogs."""
383
    _categories = ('LEMONDE', 'FRANCAIS')
384
    get_navi_link = get_link_rel_next
385
    get_first_comic_link = simulate_first_link
386
    first_url = NotImplemented
387
388
    @classmethod
389
    def get_comic_info(cls, soup, link):
390
        """Get information about a particular comics."""
391
        url2 = soup.find('link', rel='shortlink')['href']
392
        title = soup.find('meta', property='og:title')['content']
393
        date_str = soup.find("span", class_="entry-date").string
394
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395
        imgs = soup.find_all('meta', property='og:image')
396
        return {
397
            'title': title,
398
            'url2': url2,
399
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400
            'month': day.month,
401
            'year': day.year,
402
            'day': day.day,
403
        }
404
405
406
class ZepWorld(GenericLeMondeBlog):
407
    """Class to retrieve Zep World comics."""
408
    name = "zep"
409
    long_name = "Zep World"
410
    url = "http://zepworld.blog.lemonde.fr"
411
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
412
413
414
class Vidberg(GenericLeMondeBlog):
415
    """Class to retrieve Vidberg comics."""
416
    name = 'vidberg'
417
    long_name = "Vidberg - l'actu en patates"
418
    url = "http://vidberg.blog.lemonde.fr"
419
    # Not the first but I didn't find an efficient way to retrieve it
420
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
421
422
423
class Plantu(GenericLeMondeBlog):
424
    """Class to retrieve Plantu comics."""
425
    name = 'plantu'
426
    long_name = "Plantu"
427
    url = "http://plantu.blog.lemonde.fr"
428
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
429
430
431
class XavierGorce(GenericLeMondeBlog):
432
    """Class to retrieve Xavier Gorce comics."""
433
    name = 'gorce'
434
    long_name = "Xavier Gorce"
435
    url = "http://xaviergorce.blog.lemonde.fr"
436
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
437
438
439
class CartooningForPeace(GenericLeMondeBlog):
440
    """Class to retrieve Cartooning For Peace comics."""
441
    name = 'forpeace'
442
    long_name = "Cartooning For Peace"
443
    url = "http://cartooningforpeace.blog.lemonde.fr"
444
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
445
446
447
class Aurel(GenericLeMondeBlog):
448
    """Class to retrieve Aurel comics."""
449
    name = 'aurel'
450
    long_name = "Aurel"
451
    url = "http://aurel.blog.lemonde.fr"
452
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
453
454
455
class LesCulottees(GenericLeMondeBlog):
456
    """Class to retrieve Les Culottees comics."""
457
    name = 'culottees'
458
    long_name = 'Les Culottees'
459
    url = "http://lesculottees.blog.lemonde.fr"
460
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
461
462
463
class UneAnneeAuLycee(GenericLeMondeBlog):
464
    """Class to retrieve Une Annee Au Lycee comics."""
465
    name = 'lycee'
466
    long_name = 'Une Annee au Lycee'
467
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
468
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
469
470
471 View Code Duplication
class Rall(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
472
    """Class to retrieve Ted Rall comics."""
473
    # Also on http://www.gocomics.com/tedrall
474
    name = 'rall'
475
    long_name = "Ted Rall"
476
    url = "http://rall.com/comic"
477
    _categories = ('RALL', )
478
    get_navi_link = get_link_rel_next
479
    get_first_comic_link = simulate_first_link
480
    # Not the first but I didn't find an efficient way to retrieve it
481
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
482
483
    @classmethod
484
    def get_comic_info(cls, soup, link):
485
        """Get information about a particular comics."""
486
        title = soup.find('meta', property='og:title')['content']
487
        author = soup.find("span", class_="author vcard").find("a").string
488
        date_str = soup.find("span", class_="entry-date").string
489
        day = string_to_date(date_str, "%B %d, %Y")
490
        desc = soup.find('meta', property='og:description')['content']
491
        imgs = soup.find('div', class_='entry-content').find_all('img')
492
        imgs = imgs[:-7]  # remove social media buttons
493
        return {
494
            'title': title,
495
            'author': author,
496
            'month': day.month,
497
            'year': day.year,
498
            'day': day.day,
499
            'description': desc,
500
            'img': [i['src'] for i in imgs],
501
        }
502
503
504
class Dilem(GenericNavigableComic):
505
    """Class to retrieve Ali Dilem comics."""
506
    name = 'dilem'
507
    long_name = 'Ali Dilem'
508
    url = 'http://information.tv5monde.com/dilem'
509
    _categories = ('FRANCAIS', )
510
    get_url_from_link = join_cls_url_to_href
511
    get_first_comic_link = simulate_first_link
512
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
513
514
    @classmethod
515
    def get_navi_link(cls, last_soup, next_):
516
        """Get link to next or previous comic."""
517
        # prev is next / next is prev
518
        li = last_soup.find('li', class_='prev' if next_ else 'next')
519
        return li.find('a') if li else None
520
521
    @classmethod
522
    def get_comic_info(cls, soup, link):
523
        """Get information about a particular comics."""
524
        short_url = soup.find('link', rel='shortlink')['href']
525
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
526
        imgs = soup.find_all('meta', property='og:image')
527
        date_str = soup.find('span', property='dc:date')['content']
528
        date_str = date_str[:10]
529
        day = string_to_date(date_str, "%Y-%m-%d")
530
        return {
531
            'short_url': short_url,
532
            'title': title,
533
            'img': [i['content'] for i in imgs],
534
            'day': day.day,
535
            'month': day.month,
536
            'year': day.year,
537
        }
538
539
540
class SpaceAvalanche(GenericNavigableComic):
541
    """Class to retrieve Space Avalanche comics."""
542
    name = 'avalanche'
543
    long_name = 'Space Avalanche'
544
    url = 'http://www.spaceavalanche.com'
545
    get_navi_link = get_link_rel_next
546
547
    @classmethod
548
    def get_first_comic_link(cls):
549
        """Get link to first comics."""
550
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
551
552
    @classmethod
553
    def get_comic_info(cls, soup, link):
554
        """Get information about a particular comics."""
555
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
556
        title = link['title']
557
        url = cls.get_url_from_link(link)
558
        year, month, day = [int(s)
559
                            for s in url_date_re.match(url).groups()]
560
        imgs = soup.find("div", class_="entry").find_all("img")
561
        return {
562
            'title': title,
563
            'day': day,
564
            'month': month,
565
            'year': year,
566
            'img': [i['src'] for i in imgs],
567
        }
568
569
570
class ZenPencils(GenericNavigableComic):
571
    """Class to retrieve ZenPencils comics."""
572
    # Also on http://zenpencils.tumblr.com
573
    # Also on http://www.gocomics.com/zen-pencils
574
    name = 'zenpencils'
575
    long_name = 'Zen Pencils'
576
    url = 'http://zenpencils.com'
577
    _categories = ('ZENPENCILS', )
578
    get_navi_link = get_link_rel_next
579
    get_first_comic_link = simulate_first_link
580
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
581
582
    @classmethod
583
    def get_comic_info(cls, soup, link):
584
        """Get information about a particular comics."""
585
        imgs = soup.find('div', id='comic').find_all('img')
586
        # imgs2 = soup.find_all('meta', property='og:image')
587
        post = soup.find('div', class_='post-content')
588
        author = post.find("span", class_="post-author").find("a").string
589
        title = soup.find('meta', property='og:title')['content']
590
        date_str = post.find('span', class_='post-date').string
591
        day = string_to_date(date_str, "%B %d, %Y")
592
        assert imgs
593
        assert all(i['alt'] == i['title'] for i in imgs)
594
        assert all(i['alt'] in (title, "") for i in imgs)
595
        desc = soup.find('meta', property='og:description')['content']
596
        return {
597
            'title': title,
598
            'description': desc,
599
            'author': author,
600
            'day': day.day,
601
            'month': day.month,
602
            'year': day.year,
603
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
604
        }
605
606
607
class ItsTheTie(GenericEmptyComic, GenericNavigableComic):
608
    """Class to retrieve It's the tie comics."""
609
    # Also on http://itsthetie.tumblr.com
610
    # Also on https://tapastic.com/series/itsthetie
611
    name = 'tie'
612
    long_name = "It's the tie"
613
    url = "http://itsthetie.com"
614
    _categories = ('TIE', )
615
    get_first_comic_link = get_div_navfirst_a
616
    get_navi_link = get_a_rel_next
617
618
    @classmethod
619
    def get_comic_info(cls, soup, link):
620
        """Get information about a particular comics."""
621
        title = soup.find('h1', class_='comic-title').find('a').string
622
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
623
        day = string_to_date(date_str, "%B %d, %Y")
624
        # Bonus images may or may not be in meta og:image.
625
        imgs = soup.find_all('meta', property='og:image')
626
        imgs_src = [i['content'] for i in imgs]
627
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
628
        bonus_src = [b['data-oversrc'] for b in bonus]
629
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
630
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
631
        tag_meta = soup.find('meta', property='article:tag')
632
        tags = tag_meta['content'] if tag_meta else ""
633
        return {
634
            'title': title,
635
            'month': day.month,
636
            'year': day.year,
637
            'day': day.day,
638
            'img': all_imgs_src,
639
            'tags': tags,
640
        }
641
642
643
class PenelopeBagieu(GenericNavigableComic):
644
    """Class to retrieve comics from Penelope Bagieu's blog."""
645 View Code Duplication
    name = 'bagieu'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
646
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
647
    url = 'http://www.penelope-jolicoeur.com'
648
    _categories = ('FRANCAIS', )
649
    get_navi_link = get_link_rel_next
650
    get_first_comic_link = simulate_first_link
651
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
652
653
    @classmethod
654
    def get_comic_info(cls, soup, link):
655
        """Get information about a particular comics."""
656
        date_str = soup.find('h2', class_='date-header').string
657
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
658
        imgs = soup.find('div', class_='entry-body').find_all('img')
659
        title = soup.find('h3', class_='entry-header').string
660
        return {
661
            'title': title,
662
            'img': [i['src'] for i in imgs],
663
            'month': day.month,
664
            'year': day.year,
665
            'day': day.day,
666
        }
667
668
669
class OneOneOneOneComic(GenericEmptyComic, GenericNavigableComic):
670
    """Class to retrieve 1111 Comics."""
671
    # Also on http://comics1111.tumblr.com
672
    # Also on https://tapastic.com/series/1111-Comics
673
    name = '1111'
674
    long_name = '1111 Comics'
675
    url = 'http://www.1111comics.me'
676
    _categories = ('ONEONEONEONE', )
677
    get_first_comic_link = get_div_navfirst_a
678
    get_navi_link = get_link_rel_next
679
680
    @classmethod
681
    def get_comic_info(cls, soup, link):
682
        """Get information about a particular comics."""
683
        title = soup.find('h1', class_='comic-title').find('a').string
684
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
685
        day = string_to_date(date_str, "%B %d, %Y")
686
        imgs = soup.find_all('meta', property='og:image')
687
        return {
688
            'title': title,
689
            'month': day.month,
690
            'year': day.year,
691
            'day': day.day,
692
            'img': [i['content'] for i in imgs],
693
        }
694
695
696
class AngryAtNothing(GenericNavigableComic):
697
    """Class to retrieve Angry at Nothing comics."""
698
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
699
    name = 'angry'
700
    long_name = 'Angry At Nothing'
701
    url = 'http://www.angryatnothing.net'
702
    get_first_comic_link = get_div_navfirst_a
703
    get_navi_link = get_a_rel_next
704
705
    @classmethod
706
    def get_comic_info(cls, soup, link):
707
        """Get information about a particular comics."""
708
        title = soup.find('h1', class_='comic-title').find('a').string
709
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
710
        day = string_to_date(date_str, "%B %d, %Y")
711
        imgs = soup.find_all('meta', property='og:image')
712
        return {
713
            'title': title,
714
            'month': day.month,
715
            'year': day.year,
716
            'day': day.day,
717
            'img': [i['content'] for i in imgs],
718
        }
719
720
721
class NeDroid(GenericNavigableComic):
722
    """Class to retrieve NeDroid comics."""
723
    name = 'nedroid'
724
    long_name = 'NeDroid'
725
    url = 'http://nedroid.com'
726
    get_first_comic_link = get_div_navfirst_a
727
    get_navi_link = get_link_rel_next
728
    get_url_from_link = join_cls_url_to_href
729
730 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
731
    def get_comic_info(cls, soup, link):
732
        """Get information about a particular comics."""
733
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
734
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
735
        num = int(short_url_re.match(short_url).groups()[0])
736
        imgs = soup.find('div', id='comic').find_all('img')
737
        assert len(imgs) == 1
738
        title = imgs[0]['alt']
739
        title2 = imgs[0]['title']
740
        return {
741
            'short_url': short_url,
742
            'title': title,
743
            'title2': title2,
744
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
745
            'num': num,
746
        }
747
748
749
class Garfield(GenericNavigableComic):
750
    """Class to retrieve Garfield comics."""
751
    # Also on http://www.gocomics.com/garfield
752
    name = 'garfield'
753
    long_name = 'Garfield'
754
    url = 'https://garfield.com'
755
    _categories = ('GARFIELD', )
756
    get_first_comic_link = simulate_first_link
757
    first_url = 'https://garfield.com/comic/1978/06/19'
758 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
759
    @classmethod
760
    def get_navi_link(cls, last_soup, next_):
761
        """Get link to next or previous comic."""
762
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
763
764
    @classmethod
765
    def get_comic_info(cls, soup, link):
766
        """Get information about a particular comics."""
767
        url = cls.get_url_from_link(link)
768
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
769
        year, month, day = [int(s) for s in date_re.match(url).groups()]
770
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
771
        return {
772
            'month': month,
773
            'year': year,
774
            'day': day,
775
            'img': [i['src'] for i in imgs],
776
        }
777
778
779
class Dilbert(GenericNavigableComic):
780
    """Class to retrieve Dilbert comics."""
781
    # Also on http://www.gocomics.com/dilbert-classics
782
    name = 'dilbert'
783
    long_name = 'Dilbert'
784
    url = 'http://dilbert.com'
785
    get_url_from_link = join_cls_url_to_href
786
    get_first_comic_link = simulate_first_link
787
    first_url = 'http://dilbert.com/strip/1989-04-16'
788
789
    @classmethod
790
    def get_navi_link(cls, last_soup, next_):
791
        """Get link to next or previous comic."""
792
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
793
        return link.find('a') if link else None
794
795
    @classmethod
796
    def get_comic_info(cls, soup, link):
797
        """Get information about a particular comics."""
798
        title = soup.find('meta', property='og:title')['content']
799
        imgs = soup.find_all('meta', property='og:image')
800
        desc = soup.find('meta', property='og:description')['content']
801
        date_str = soup.find('meta', property='article:publish_date')['content']
802
        day = string_to_date(date_str, "%B %d, %Y")
803
        author = soup.find('meta', property='article:author')['content']
804
        tags = soup.find('meta', property='article:tag')['content']
805
        return {
806
            'title': title,
807
            'description': desc,
808
            'img': [i['content'] for i in imgs],
809
            'author': author,
810
            'tags': tags,
811
            'day': day.day,
812
            'month': day.month,
813
            'year': day.year
814
        }
815
816
817
class VictimsOfCircumsolar(GenericNavigableComic):
818
    """Class to retrieve VictimsOfCircumsolar comics."""
819
    # Also on http://victimsofcomics.tumblr.com
820
    name = 'circumsolar'
821
    long_name = 'Victims Of Circumsolar'
822
    url = 'http://www.victimsofcircumsolar.com'
823
    get_navi_link = get_a_navi_comicnavnext_navinext
824
    get_first_comic_link = simulate_first_link
825
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
826
827
    @classmethod
828
    def get_comic_info(cls, soup, link):
829
        """Get information about a particular comics."""
830
        # Date is on the archive page
831
        title = soup.find_all('meta', property='og:title')[-1]['content']
832
        desc = soup.find_all('meta', property='og:description')[-1]['content']
833
        imgs = soup.find('div', id='comic').find_all('img')
834
        assert all(i['title'] == i['alt'] == title for i in imgs)
835
        return {
836
            'title': title,
837
            'description': desc,
838
            'img': [i['src'] for i in imgs],
839
        }
840
841
842
class ThreeWordPhrase(GenericNavigableComic):
843
    """Class to retrieve Three Word Phrase comics."""
844
    # Also on http://www.threewordphrase.tumblr.com
845
    name = 'threeword'
846
    long_name = 'Three Word Phrase'
847
    url = 'http://threewordphrase.com'
848
    get_url_from_link = join_cls_url_to_href
849
850
    @classmethod
851
    def get_first_comic_link(cls):
852
        """Get link to first comics."""
853
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
854
855
    @classmethod
856
    def get_navi_link(cls, last_soup, next_):
857
        """Get link to next or previous comic."""
858
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
859
        return None if link.get('href') is None else link
860
861
    @classmethod
862
    def get_comic_info(cls, soup, link):
863
        """Get information about a particular comics."""
864
        title = soup.find('title')
865
        imgs = [img for img in soup.find_all('img')
866
                if not img['src'].endswith(
867
                    ('link.gif', '32.png', 'twpbookad.jpg',
868
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
869
        return {
870
            'title': title.string if title else None,
871
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
872
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
873
        }
874
875
876
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
877
    """Class to retrieve Deadly Panel comics."""
878
    # Also on https://tapastic.com/series/deadlypanel
879
    # Also on http://deadlypanel.tumblr.com
880
    name = 'deadly'
881
    long_name = 'Deadly Panel'
882
    url = 'http://www.deadlypanel.com'
883
    get_first_comic_link = get_a_navi_navifirst
884
    get_navi_link = get_a_navi_comicnavnext_navinext
885
886
    @classmethod
887
    def get_comic_info(cls, soup, link):
888
        """Get information about a particular comics."""
889
        imgs = soup.find('div', id='comic').find_all('img')
890
        assert all(i['alt'] == i['title'] for i in imgs)
891
        return {
892
            'img': [i['src'] for i in imgs],
893
        }
894
895
896
class TheGentlemanArmchair(GenericNavigableComic):
897
    """Class to retrieve The Gentleman Armchair comics."""
898
    name = 'gentlemanarmchair'
899
    long_name = 'The Gentleman Armchair'
900
    url = 'http://thegentlemansarmchair.com'
901
    get_first_comic_link = get_a_navi_navifirst
902
    get_navi_link = get_link_rel_next
903
904
    @classmethod
905
    def get_comic_info(cls, soup, link):
906
        """Get information about a particular comics."""
907
        title = soup.find('h2', class_='post-title').string
908
        author = soup.find("span", class_="post-author").find("a").string
909
        date_str = soup.find('span', class_='post-date').string
910
        day = string_to_date(date_str, "%B %d, %Y")
911
        imgs = soup.find('div', id='comic').find_all('img')
912
        return {
913
            'img': [i['src'] for i in imgs],
914
            'title': title,
915
            'author': author,
916
            'month': day.month,
917
            'year': day.year,
918
            'day': day.day,
919
        }
920
921
922 View Code Duplication
class MyExtraLife(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
923
    """Class to retrieve My Extra Life comics."""
924
    name = 'extralife'
925
    long_name = 'My Extra Life'
926
    url = 'http://www.myextralife.com'
927
    get_navi_link = get_link_rel_next
928
929
    @classmethod
930
    def get_first_comic_link(cls):
931
        """Get link to first comics."""
932
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
933
934
    @classmethod
935
    def get_comic_info(cls, soup, link):
936
        """Get information about a particular comics."""
937
        title = soup.find("h1", class_="comic_title").string
938
        date_str = soup.find("span", class_="comic_date").string
939
        day = string_to_date(date_str, "%B %d, %Y")
940
        imgs = soup.find_all("img", class_="comic")
941
        assert all(i['alt'] == i['title'] == title for i in imgs)
942
        return {
943
            'title': title,
944
            'img': [i['src'] for i in imgs if i["src"]],
945
            'day': day.day,
946
            'month': day.month,
947
            'year': day.year
948
        }
949
950
951
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
952
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
953
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
954
    # Also on http://smbc-comics.tumblr.com
955
    name = 'smbc'
956
    long_name = 'Saturday Morning Breakfast Cereal'
957
    url = 'http://www.smbc-comics.com'
958
    _categories = ('SMBC', )
959
    get_navi_link = get_a_rel_next
960
961
    @classmethod
962
    def get_first_comic_link(cls):
963
        """Get link to first comics."""
964
        return get_soup_at_url(cls.url).find('a', rel='start')
965
966
    @classmethod
967
    def get_comic_info(cls, soup, link):
968
        """Get information about a particular comics."""
969
        image1 = soup.find('img', id='cc-comic')
970
        image_url1 = image1['src']
971
        aftercomic = soup.find('div', id='aftercomic')
972
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
973
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
974
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
975
        day = string_to_date(date_str, "%B %d, %Y")
976
        return {
977
            'title': image1['title'],
978
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
979
            'day': day.day,
980
            'month': day.month,
981
            'year': day.year
982
        }
983
984
985
class PerryBibleFellowship(GenericListableComic):
986
    """Class to retrieve Perry Bible Fellowship comics."""
987
    name = 'pbf'
988
    long_name = 'Perry Bible Fellowship'
989
    url = 'http://pbfcomics.com'
990
    get_url_from_archive_element = join_cls_url_to_href
991
992
    @classmethod
993
    def get_archive_elements(cls):
994
        comic_link_re = re.compile('^/[0-9]*/$')
995
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
996
997
    @classmethod
998
    def get_comic_info(cls, soup, link):
999
        """Get information about a particular comics."""
1000
        url = cls.get_url_from_archive_element(link)
1001
        comic_img_re = re.compile('^/archive_b/PBF.*')
1002
        name = link.string
1003
        num = int(link['name'])
1004
        href = link['href']
1005
        assert href == '/%d/' % num
1006
        imgs = soup.find_all('img', src=comic_img_re)
1007
        assert len(imgs) == 1
1008
        assert imgs[0]['alt'] == name
1009
        return {
1010
            'num': num,
1011
            'name': name,
1012
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1013
            'prefix': '%d-' % num,
1014
        }
1015
1016 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1017
class Mercworks(GenericNavigableComic):
1018
    """Class to retrieve Mercworks comics."""
1019
    # Also on http://mercworks.tumblr.com
1020
    name = 'mercworks'
1021
    long_name = 'Mercworks'
1022
    url = 'http://mercworks.net'
1023
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1024
    get_navi_link = get_link_rel_next
1025
1026
    @classmethod
1027
    def get_comic_info(cls, soup, link):
1028
        """Get information about a particular comics."""
1029
        title = soup.find('meta', property='og:title')['content']
1030
        metadesc = soup.find('meta', property='og:description')
1031
        desc = metadesc['content'] if metadesc else ""
1032
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1033
        day = string_to_date(date_str, "%Y-%m-%d")
1034
        imgs = soup.find_all('meta', property='og:image')
1035
        return {
1036
            'img': [i['content'] for i in imgs],
1037
            'title': title,
1038
            'desc': desc,
1039
            'day': day.day,
1040
            'month': day.month,
1041
            'year': day.year
1042
        }
1043
1044
1045
class BerkeleyMews(GenericListableComic):
1046
    """Class to retrieve Berkeley Mews comics."""
1047
    # Also on http://mews.tumblr.com
1048
    # Also on http://www.gocomics.com/berkeley-mews
1049
    name = 'berkeley'
1050
    long_name = 'Berkeley Mews'
1051
    url = 'http://www.berkeleymews.com'
1052
    _categories = ('BERKELEY', )
1053
    get_url_from_archive_element = get_href
1054
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1055
1056
    @classmethod
1057
    def get_archive_elements(cls):
1058
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1059
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1060
1061
    @classmethod
1062
    def get_comic_info(cls, soup, link):
1063
        """Get information about a particular comics."""
1064
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1065
        url = cls.get_url_from_archive_element(link)
1066
        num = int(cls.comic_num_re.match(url).groups()[0])
1067
        img = soup.find('div', id='comic').find('img')
1068
        assert all(i['alt'] == i['title'] for i in [img])
1069
        title2 = img['title']
1070
        img_url = img['src']
1071
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1072
        return {
1073
            'num': num,
1074
            'title': link.string,
1075
            'title2': title2,
1076
            'img': [img_url],
1077
            'year': year,
1078
            'month': month,
1079
            'day': day,
1080
        }
1081
1082
1083
class GenericBouletCorp(GenericNavigableComic):
1084
    """Generic class to retrieve BouletCorp comics in different languages."""
1085
    # Also on http://bouletcorp.tumblr.com
1086
    _categories = ('BOULET', )
1087
    get_navi_link = get_link_rel_next
1088
1089
    @classmethod
1090
    def get_first_comic_link(cls):
1091
        """Get link to first comics."""
1092
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1093
1094
    @classmethod
1095
    def get_comic_info(cls, soup, link):
1096
        """Get information about a particular comics."""
1097
        url = cls.get_url_from_link(link)
1098
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1099
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1100
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1101
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1102
        title = soup.find('title').string
1103
        return {
1104
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1105
            'title': title,
1106
            'texts': texts,
1107
            'year': year,
1108
            'month': month,
1109
            'day': day,
1110
        }
1111
1112
1113
class BouletCorp(GenericBouletCorp):
1114
    """Class to retrieve BouletCorp comics."""
1115
    name = 'boulet'
1116
    long_name = 'Boulet Corp'
1117
    url = 'http://www.bouletcorp.com'
1118
    _categories = ('FRANCAIS', )
1119
1120
1121
class BouletCorpEn(GenericBouletCorp):
1122
    """Class to retrieve EnglishBouletCorp comics."""
1123
    name = 'boulet_en'
1124
    long_name = 'Boulet Corp English'
1125
    url = 'http://english.bouletcorp.com'
1126
1127
1128
class AmazingSuperPowers(GenericNavigableComic):
1129
    """Class to retrieve Amazing Super Powers comics."""
1130
    name = 'asp'
1131
    long_name = 'Amazing Super Powers'
1132
    url = 'http://www.amazingsuperpowers.com'
1133
    get_first_comic_link = get_a_navi_navifirst
1134
    get_navi_link = get_a_navi_navinext
1135
1136
    @classmethod
1137
    def get_comic_info(cls, soup, link):
1138
        """Get information about a particular comics."""
1139
        author = soup.find("span", class_="post-author").find("a").string
1140
        date_str = soup.find('span', class_='post-date').string
1141
        day = string_to_date(date_str, "%B %d, %Y")
1142
        imgs = soup.find('div', id='comic').find_all('img')
1143
        title = ' '.join(i['title'] for i in imgs)
1144
        assert all(i['alt'] == i['title'] for i in imgs)
1145
        return {
1146
            'title': title,
1147
            'author': author,
1148
            'img': [img['src'] for img in imgs],
1149
            'day': day.day,
1150
            'month': day.month,
1151
            'year': day.year
1152
        }
1153
1154
1155
class ToonHole(GenericNavigableComic):
1156
    """Class to retrieve Toon Holes comics."""
1157
    # Also on http://tapastic.com/series/TOONHOLE
1158
    name = 'toonhole'
1159
    long_name = 'Toon Hole'
1160
    url = 'http://www.toonhole.com'
1161
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1162
    get_navi_link = get_link_rel_next
1163
1164
    @classmethod
1165
    def get_comic_info(cls, soup, link):
1166
        """Get information about a particular comics."""
1167
        short_url = soup.find('link', rel='shortlink')['href']
1168
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1169
        day = string_to_date(date_str, "%B %d, %Y")
1170
        imgs = soup.find('div', id='comic').find_all('img')
1171
        if imgs:
1172
            img = imgs[0]
1173
            title = img['alt']
1174
            assert img['title'] == title
1175
        else:
1176
            title = ""
1177
        return {
1178
            'short_url': short_url,
1179
            'title': title,
1180
            'month': day.month,
1181
            'year': day.year,
1182
            'day': day.day,
1183
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1184
        }
1185
1186
1187
class Channelate(GenericNavigableComic):
1188
    """Class to retrieve Channelate comics."""
1189
    name = 'channelate'
1190
    long_name = 'Channelate'
1191
    url = 'http://www.channelate.com'
1192
    get_first_comic_link = get_div_navfirst_a
1193
    get_navi_link = get_link_rel_next
1194
    get_url_from_link = join_cls_url_to_href
1195
1196
    @classmethod
1197
    def get_comic_info(cls, soup, link):
1198
        """Get information about a particular comics."""
1199
        author = soup.find("span", class_="post-author").find("a").string
1200
        date_str = soup.find('span', class_='post-date').string
1201
        day = string_to_date(date_str, '%Y/%m/%d')
1202
        title = soup.find('meta', property='og:title')['content']
1203
        post = soup.find('div', id='comic')
1204
        imgs = post.find_all('img') if post else []
1205
        extra_url = None
1206
        extra_div = soup.find('div', id='extrapanelbutton')
1207
        if extra_div:
1208
            extra_url = extra_div.find('a')['href']
1209
            extra_soup = get_soup_at_url(extra_url)
1210
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1211
            imgs.extend(extra_imgs)
1212
        return {
1213
            'url_extra': extra_url,
1214
            'title': title,
1215
            'author': author,
1216
            'month': day.month,
1217
            'year': day.year,
1218
            'day': day.day,
1219
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1220
        }
1221
1222
1223
class CyanideAndHappiness(GenericNavigableComic):
1224
    """Class to retrieve Cyanide And Happiness comics."""
1225
    name = 'cyanide'
1226
    long_name = 'Cyanide and Happiness'
1227
    url = 'http://explosm.net'
1228
    _categories = ('NSFW', )
1229
    get_url_from_link = join_cls_url_to_href
1230
1231
    @classmethod
1232
    def get_first_comic_link(cls):
1233
        """Get link to first comics."""
1234
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1235
1236
    @classmethod
1237
    def get_navi_link(cls, last_soup, next_):
1238
        """Get link to next or previous comic."""
1239
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1240
        return None if link.get('href') is None else link
1241
1242
    @classmethod
1243
    def get_comic_info(cls, soup, link):
1244
        """Get information about a particular comics."""
1245
        url2 = soup.find('meta', property='og:url')['content']
1246
        num = int(url2.split('/')[-2])
1247
        date_str = soup.find('h3').find('a').string
1248
        day = string_to_date(date_str, '%Y.%m.%d')
1249
        author = soup.find('small', class_="author-credit-name").string
1250
        assert author.startswith('by ')
1251
        author = author[3:]
1252
        imgs = soup.find_all('img', id='main-comic')
1253
        return {
1254
            'num': num,
1255
            'author': author,
1256
            'month': day.month,
1257
            'year': day.year,
1258
            'day': day.day,
1259
            'prefix': '%d-' % num,
1260
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1261
        }
1262
1263
1264
class MrLovenstein(GenericComic):
1265
    """Class to retrieve Mr Lovenstein comics."""
1266
    # Also on https://tapastic.com/series/MrLovenstein
1267
    name = 'mrlovenstein'
1268
    long_name = 'Mr. Lovenstein'
1269
    url = 'http://www.mrlovenstein.com'
1270
1271
    @classmethod
1272
    def get_next_comic(cls, last_comic):
1273
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1274
        # TODO: more info from http://www.mrlovenstein.com/archive
1275
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1276
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1277
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1278
        first, last = min(nums), max(nums)
1279
        if last_comic:
1280
            first = last_comic['num'] + 1
1281
        for num in range(first, last + 1):
1282
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1283
            soup = get_soup_at_url(url)
1284
            imgs = list(
1285
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1286
            description = soup.find('meta', attrs={'name': 'description'})['content']
1287
            yield {
1288
                'url': url,
1289
                'num': num,
1290
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1291
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1292
                'description': description,
1293
            }
1294
1295
1296
class DinosaurComics(GenericListableComic):
1297
    """Class to retrieve Dinosaur Comics comics."""
1298
    name = 'dinosaur'
1299
    long_name = 'Dinosaur Comics'
1300
    url = 'http://www.qwantz.com'
1301
    get_url_from_archive_element = get_href
1302
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1303
1304
    @classmethod
1305
    def get_archive_elements(cls):
1306
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1307
        # first link is random -> skip it
1308
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1309
1310
    @classmethod
1311
    def get_comic_info(cls, soup, link):
1312
        """Get information about a particular comics."""
1313
        url = cls.get_url_from_archive_element(link)
1314
        num = int(cls.comic_link_re.match(url).groups()[0])
1315
        date_str = link.string
1316
        text = link.next_sibling.string
1317
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1318
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1319
        img = soup.find('img', src=comic_img_re)
1320
        return {
1321
            'month': day.month,
1322
            'year': day.year,
1323
            'day': day.day,
1324
            'img': [img.get('src')],
1325
            'title': img.get('title'),
1326
            'text': text,
1327 View Code Duplication
            'num': num,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1328
        }
1329
1330
1331
class ButterSafe(GenericListableComic):
1332
    """Class to retrieve Butter Safe comics."""
1333
    name = 'butter'
1334
    long_name = 'ButterSafe'
1335
    url = 'http://buttersafe.com'
1336
    get_url_from_archive_element = get_href
1337
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1338
1339
    @classmethod
1340
    def get_archive_elements(cls):
1341
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1342
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1343
1344
    @classmethod
1345
    def get_comic_info(cls, soup, link):
1346
        """Get information about a particular comics."""
1347
        url = cls.get_url_from_archive_element(link)
1348
        title = link.string
1349
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1350
        img = soup.find('div', id='comic').find('img')
1351
        assert img['alt'] == title
1352
        return {
1353
            'title': title,
1354
            'day': day,
1355
            'month': month,
1356
            'year': year,
1357
            'img': [img['src']],
1358
        }
1359
1360
1361
class CalvinAndHobbes(GenericComic):
1362
    """Class to retrieve Calvin and Hobbes comics."""
1363
    # Also on http://www.gocomics.com/calvinandhobbes/
1364
    name = 'calvin'
1365
    long_name = 'Calvin and Hobbes'
1366
    # This is not through any official webpage but eh...
1367
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1368
1369
    @classmethod
1370
    def get_next_comic(cls, last_comic):
1371
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1372
        last_date = get_date_for_comic(
1373
            last_comic) if last_comic else date(1985, 11, 1)
1374
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1375
        img_re = re.compile('')
1376
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1377
            url = link['href']
1378
            year, month = link_re.match(url).groups()
1379
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1380
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1381
                month_url = urljoin_wrapper(cls.url, url)
1382
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1383
                    img_src = img['src']
1384
                    day = int(img_re.match(img_src).groups()[0])
1385
                    comic_date = date(int(year), int(month), day)
1386
                    if comic_date > last_date:
1387
                        yield {
1388
                            'url': month_url,
1389
                            'year': int(year),
1390
                            'month': int(month),
1391
                            'day': int(day),
1392
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1393 View Code Duplication
                        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1394
                        last_date = comic_date
1395
1396
1397
class AbstruseGoose(GenericListableComic):
1398
    """Class to retrieve AbstruseGoose Comics."""
1399
    name = 'abstruse'
1400
    long_name = 'Abstruse Goose'
1401
    url = 'http://abstrusegoose.com'
1402
    get_url_from_archive_element = get_href
1403
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1404
    comic_img_re = re.compile('^%s/strips/.*' % url)
1405
1406
    @classmethod
1407
    def get_archive_elements(cls):
1408
        archive_url = urljoin_wrapper(cls.url, 'archive')
1409
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1410
1411
    @classmethod
1412
    def get_comic_info(cls, soup, archive_elt):
1413
        comic_url = cls.get_url_from_archive_element(archive_elt)
1414
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1415
        return {
1416
            'num': num,
1417
            'title': archive_elt.string,
1418
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1419
        }
1420
1421
1422
class PhDComics(GenericNavigableComic):
1423
    """Class to retrieve PHD Comics."""
1424
    name = 'phd'
1425
    long_name = 'PhD Comics'
1426
    url = 'http://phdcomics.com/comics/archive.php'
1427
1428
    @classmethod
1429
    def get_first_comic_link(cls):
1430
        """Get link to first comics."""
1431
        soup = get_soup_at_url(cls.url)
1432
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1433
        return None if img is None else img.parent
1434
1435
    @classmethod
1436
    def get_navi_link(cls, last_soup, next_):
1437
        """Get link to next or previous comic."""
1438
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1439
        img = last_soup.find('img', src=url)
1440
        return None if img is None else img.parent
1441
1442
    @classmethod
1443
    def get_comic_info(cls, soup, link):
1444
        """Get information about a particular comics."""
1445
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1446
        imgs = soup.find_all('meta', property='og:image')
1447
        return {
1448
            'img': [i['content'] for i in imgs],
1449
            'title': title,
1450
        }
1451
1452
1453 View Code Duplication
class Octopuns(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1454
    """Class to retrieve Octopuns comics."""
1455
    # Also on http://octopuns.tumblr.com
1456
    name = 'octopuns'
1457
    long_name = 'Octopuns'
1458
    url = 'http://www.octopuns.net'
1459
1460
    @classmethod
1461
    def get_first_comic_link(cls):
1462
        """Get link to first comics."""
1463
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1464
1465
    @classmethod
1466
    def get_navi_link(cls, last_soup, next_):
1467
        """Get link to next or previous comic."""
1468
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1469
        return None if link.get('href') is None else link
1470
1471
    @classmethod
1472
    def get_comic_info(cls, soup, link):
1473
        """Get information about a particular comics."""
1474
        title = soup.find('h3', class_='post-title entry-title').string
1475
        date_str = soup.find('h2', class_='date-header').string
1476
        day = string_to_date(date_str, "%A, %B %d, %Y")
1477
        imgs = soup.find_all('link', rel='image_src')
1478
        return {
1479
            'img': [i['href'] for i in imgs],
1480
            'title': title,
1481
            'day': day.day,
1482
            'month': day.month,
1483
            'year': day.year,
1484
        }
1485
1486
1487
class Quarktees(GenericNavigableComic):
1488
    """Class to retrieve the Quarktees comics."""
1489
    name = 'quarktees'
1490
    long_name = 'Quarktees'
1491
    url = 'http://www.quarktees.com/blogs/news'
1492
    get_url_from_link = join_cls_url_to_href
1493
    get_first_comic_link = simulate_first_link
1494
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1495
1496
    @classmethod
1497
    def get_navi_link(cls, last_soup, next_):
1498
        """Get link to next or previous comic."""
1499
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1500
1501
    @classmethod
1502
    def get_comic_info(cls, soup, link):
1503
        """Get information about a particular comics."""
1504
        title = soup.find('meta', property='og:title')['content']
1505
        article = soup.find('div', class_='single-article')
1506
        imgs = article.find_all('img')
1507
        return {
1508
            'title': title,
1509
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1510
        }
1511
1512
1513
class OverCompensating(GenericNavigableComic):
1514
    """Class to retrieve the Over Compensating comics."""
1515
    name = 'compensating'
1516
    long_name = 'Over Compensating'
1517
    url = 'http://www.overcompensating.com'
1518
    get_url_from_link = join_cls_url_to_href
1519
1520
    @classmethod
1521
    def get_first_comic_link(cls):
1522
        """Get link to first comics."""
1523
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1524
1525
    @classmethod
1526
    def get_navi_link(cls, last_soup, next_):
1527
        """Get link to next or previous comic."""
1528
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1529
1530
    @classmethod
1531
    def get_comic_info(cls, soup, link):
1532
        """Get information about a particular comics."""
1533
        img_src_re = re.compile('^/oc/comics/.*')
1534
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1535
        comic_url = cls.get_url_from_link(link)
1536
        num = int(comic_num_re.match(comic_url).groups()[0])
1537
        img = soup.find('img', src=img_src_re)
1538
        return {
1539
            'num': num,
1540
            'img': [urljoin_wrapper(comic_url, img['src'])],
1541
            'title': img.get('title')
1542
        }
1543
1544
1545
class Oglaf(GenericNavigableComic):
1546
    """Class to retrieve Oglaf comics."""
1547
    name = 'oglaf'
1548
    long_name = 'Oglaf [NSFW]'
1549
    url = 'http://oglaf.com'
1550
    _categories = ('NSFW', )
1551
    get_url_from_link = join_cls_url_to_href
1552
1553
    @classmethod
1554
    def get_first_comic_link(cls):
1555
        """Get link to first comics."""
1556
        return get_soup_at_url(cls.url).find("div", id="st").parent
1557
1558
    @classmethod
1559
    def get_navi_link(cls, last_soup, next_):
1560
        """Get link to next or previous comic."""
1561
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1562
        return div.parent if div else None
1563
1564
    @classmethod
1565
    def get_comic_info(cls, soup, link):
1566
        """Get information about a particular comics."""
1567
        title = soup.find('title').string
1568
        title_imgs = soup.find('div', id='tt').find_all('img')
1569
        assert len(title_imgs) == 1
1570
        strip_imgs = soup.find_all('img', id='strip')
1571
        assert len(strip_imgs) == 1
1572
        imgs = title_imgs + strip_imgs
1573
        desc = ' '.join(i['title'] for i in imgs)
1574
        return {
1575
            'title': title,
1576
            'img': [i['src'] for i in imgs],
1577
            'description': desc,
1578
        }
1579
1580
1581
class ScandinaviaAndTheWorld(GenericNavigableComic):
1582
    """Class to retrieve Scandinavia And The World comics."""
1583
    name = 'satw'
1584
    long_name = 'Scandinavia And The World'
1585
    url = 'http://satwcomic.com'
1586
    get_first_comic_link = simulate_first_link
1587
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1588
1589
    @classmethod
1590
    def get_navi_link(cls, last_soup, next_):
1591
        """Get link to next or previous comic."""
1592
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1593
1594
    @classmethod
1595
    def get_comic_info(cls, soup, link):
1596
        """Get information about a particular comics."""
1597
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1598
        desc = soup.find('meta', property='og:description')['content']
1599
        imgs = soup.find_all('img', itemprop="image")
1600
        return {
1601
            'title': title,
1602
            'description': desc,
1603
            'img': [i['src'] for i in imgs],
1604
        }
1605
1606
1607
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1608
    """Class to retrieve the Something Of That Ilk comics."""
1609
    name = 'somethingofthatilk'
1610
    long_name = 'Something Of That Ilk'
1611
    url = 'http://www.somethingofthatilk.com'
1612
1613
1614
class InfiniteMonkeyBusiness(GenericNavigableComic):
1615
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1616
    name = 'monkey'
1617
    long_name = 'Infinite Monkey Business'
1618
    url = 'http://infinitemonkeybusiness.net'
1619
    get_navi_link = get_a_navi_comicnavnext_navinext
1620
    get_first_comic_link = simulate_first_link
1621
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1622
1623
    @classmethod
1624
    def get_comic_info(cls, soup, link):
1625
        """Get information about a particular comics."""
1626
        title = soup.find('meta', property='og:title')['content']
1627
        imgs = soup.find('div', id='comic').find_all('img')
1628
        return {
1629
            'title': title,
1630
            'img': [i['src'] for i in imgs],
1631
        }
1632
1633
1634
class Wondermark(GenericListableComic):
1635
    """Class to retrieve the Wondermark comics."""
1636
    name = 'wondermark'
1637
    long_name = 'Wondermark'
1638
    url = 'http://wondermark.com'
1639
    get_url_from_archive_element = get_href
1640
1641
    @classmethod
1642
    def get_archive_elements(cls):
1643
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1644
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1645
1646
    @classmethod
1647
    def get_comic_info(cls, soup, link):
1648
        """Get information about a particular comics."""
1649
        date_str = soup.find('div', class_='postdate').find('em').string
1650 View Code Duplication
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1651
        div = soup.find('div', id='comic')
1652
        if div:
1653
            img = div.find('img')
1654
            img_src = [img['src']]
1655
            alt = img['alt']
1656
            assert alt == img['title']
1657
            title = soup.find('meta', property='og:title')['content']
1658
        else:
1659
            img_src = []
1660
            alt = ''
1661
            title = ''
1662
        return {
1663
            'month': day.month,
1664
            'year': day.year,
1665
            'day': day.day,
1666
            'img': img_src,
1667
            'title': title,
1668
            'alt': alt,
1669
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1670
        }
1671
1672
1673
class WarehouseComic(GenericNavigableComic):
1674
    """Class to retrieve Warehouse Comic comics."""
1675
    name = 'warehouse'
1676
    long_name = 'Warehouse Comic'
1677
    url = 'http://warehousecomic.com'
1678
    get_first_comic_link = get_a_navi_navifirst
1679
    get_navi_link = get_link_rel_next
1680
1681
    @classmethod
1682
    def get_comic_info(cls, soup, link):
1683
        """Get information about a particular comics."""
1684
        title = soup.find('h2', class_='post-title').string
1685
        date_str = soup.find('span', class_='post-date').string
1686
        day = string_to_date(date_str, "%B %d, %Y")
1687
        imgs = soup.find('div', id='comic').find_all('img')
1688
        return {
1689
            'img': [i['src'] for i in imgs],
1690
            'title': title,
1691
            'day': day.day,
1692
            'month': day.month,
1693
            'year': day.year,
1694
        }
1695
1696
1697
class JustSayEh(GenericNavigableComic):
1698
    """Class to retrieve Just Say Eh comics."""
1699
    # Also on http//tapastic.com/series/Just-Say-Eh
1700
    name = 'justsayeh'
1701
    long_name = 'Just Say Eh'
1702
    url = 'http://www.justsayeh.com'
1703
    get_first_comic_link = get_a_navi_navifirst
1704
    get_navi_link = get_a_navi_comicnavnext_navinext
1705
1706
    @classmethod
1707
    def get_comic_info(cls, soup, link):
1708
        """Get information about a particular comics."""
1709
        title = soup.find('h2', class_='post-title').string
1710
        imgs = soup.find("div", id="comic").find_all("img")
1711
        assert all(i['alt'] == i['title'] for i in imgs)
1712
        alt = imgs[0]['alt']
1713
        return {
1714
            'img': [i['src'] for i in imgs],
1715
            'title': title,
1716
            'alt': alt,
1717
        }
1718
1719
1720
class MouseBearComedy(GenericNavigableComic):
1721
    """Class to retrieve Mouse Bear Comedy comics."""
1722
    # Also on http://mousebearcomedy.tumblr.com
1723
    name = 'mousebear'
1724
    long_name = 'Mouse Bear Comedy'
1725
    url = 'http://www.mousebearcomedy.com'
1726
    get_first_comic_link = get_a_navi_navifirst
1727
    get_navi_link = get_a_navi_comicnavnext_navinext
1728
1729
    @classmethod
1730
    def get_comic_info(cls, soup, link):
1731
        """Get information about a particular comics."""
1732
        title = soup.find('h2', class_='post-title').string
1733
        author = soup.find("span", class_="post-author").find("a").string
1734
        date_str = soup.find("span", class_="post-date").string
1735
        day = string_to_date(date_str, '%B %d, %Y')
1736
        imgs = soup.find("div", id="comic").find_all("img")
1737
        assert all(i['alt'] == i['title'] == title for i in imgs)
1738
        return {
1739
            'day': day.day,
1740
            'month': day.month,
1741
            'year': day.year,
1742
            'img': [i['src'] for i in imgs],
1743
            'title': title,
1744
            'author': author,
1745
        }
1746
1747
1748
class BigFootJustice(GenericNavigableComic):
1749
    """Class to retrieve Big Foot Justice comics."""
1750
    # Also on http://tapastic.com/series/bigfoot-justice
1751
    name = 'bigfoot'
1752 View Code Duplication
    long_name = 'Big Foot Justice'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1753
    url = 'http://bigfootjustice.com'
1754
    get_first_comic_link = get_a_navi_navifirst
1755
    get_navi_link = get_a_navi_comicnavnext_navinext
1756
1757
    @classmethod
1758
    def get_comic_info(cls, soup, link):
1759
        """Get information about a particular comics."""
1760
        imgs = soup.find('div', id='comic').find_all('img')
1761
        assert all(i['title'] == i['alt'] for i in imgs)
1762
        title = ' '.join(i['title'] for i in imgs)
1763
        return {
1764
            'img': [i['src'] for i in imgs],
1765
            'title': title,
1766
        }
1767
1768
1769
class RespawnComic(GenericNavigableComic):
1770
    """Class to retrieve Respawn Comic."""
1771
    # Also on http://respawncomic.tumblr.com
1772
    name = 'respawn'
1773
    long_name = 'Respawn Comic'
1774
    url = 'http://respawncomic.com '
1775
    _categories = ('RESPAWN', )
1776
    get_navi_link = get_a_rel_next
1777
    get_first_comic_link = simulate_first_link
1778
    first_url = 'http://respawncomic.com/comic/c0001/'
1779
1780
    @classmethod
1781
    def get_comic_info(cls, soup, link):
1782
        """Get information about a particular comics."""
1783
        title = soup.find('meta', property='og:title')['content']
1784
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1785
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1786
        date_str = date_str[:10]
1787
        day = string_to_date(date_str, "%Y-%m-%d")
1788
        imgs = soup.find_all('meta', property='og:image')
1789
        skip_imgs = {
1790
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1791
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1792
        }
1793
        return {
1794
            'title': title,
1795
            'author': author,
1796
            'day': day.day,
1797
            'month': day.month,
1798
            'year': day.year,
1799
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1800
        }
1801
1802 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1803
class SafelyEndangered(GenericNavigableComic):
1804
    """Class to retrieve Safely Endangered comics."""
1805
    # Also on http://tumblr.safelyendangered.com
1806
    name = 'endangered'
1807
    long_name = 'Safely Endangered'
1808
    url = 'http://www.safelyendangered.com'
1809
    get_navi_link = get_link_rel_next
1810
    get_first_comic_link = simulate_first_link
1811
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1812
1813
    @classmethod
1814
    def get_comic_info(cls, soup, link):
1815
        """Get information about a particular comics."""
1816
        title = soup.find('h2', class_='post-title').string
1817
        date_str = soup.find('span', class_='post-date').string
1818
        day = string_to_date(date_str, '%B %d, %Y')
1819
        imgs = soup.find('div', id='comic').find_all('img')
1820
        alt = imgs[0]['alt']
1821
        assert all(i['alt'] == i['title'] for i in imgs)
1822
        return {
1823
            'day': day.day,
1824
            'month': day.month,
1825
            'year': day.year,
1826
            'img': [i['src'] for i in imgs],
1827
            'title': title,
1828
            'alt': alt,
1829
        }
1830
1831
1832
class PicturesInBoxes(GenericNavigableComic):
1833
    """Class to retrieve Pictures In Boxes comics."""
1834
    # Also on http://picturesinboxescomic.tumblr.com
1835
    name = 'picturesinboxes'
1836
    long_name = 'Pictures in Boxes'
1837
    url = 'http://www.picturesinboxes.com'
1838
    get_navi_link = get_a_navi_navinext
1839
    get_first_comic_link = simulate_first_link
1840
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1841
1842
    @classmethod
1843
    def get_comic_info(cls, soup, link):
1844
        """Get information about a particular comics."""
1845
        title = soup.find('h2', class_='post-title').string
1846
        author = soup.find("span", class_="post-author").find("a").string
1847
        date_str = soup.find('span', class_='post-date').string
1848
        day = string_to_date(date_str, '%B %d, %Y')
1849
        imgs = soup.find('div', class_='comicpane').find_all('img')
1850
        assert imgs
1851
        assert all(i['title'] == i['alt'] == title for i in imgs)
1852
        return {
1853
            'day': day.day,
1854
            'month': day.month,
1855
            'year': day.year,
1856
            'img': [i['src'] for i in imgs],
1857
            'title': title,
1858
            'author': author,
1859
        }
1860
1861 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1862
class Penmen(GenericNavigableComic):
1863
    """Class to retrieve Penmen comics."""
1864
    name = 'penmen'
1865
    long_name = 'Penmen'
1866
    url = 'http://penmen.com'
1867
    get_navi_link = get_link_rel_next
1868
    get_first_comic_link = simulate_first_link
1869
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1870
1871
    @classmethod
1872
    def get_comic_info(cls, soup, link):
1873
        """Get information about a particular comics."""
1874
        title = soup.find('title').string
1875
        imgs = soup.find('div', class_='entry-content').find_all('img')
1876
        short_url = soup.find('link', rel='shortlink')['href']
1877
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1878
        date_str = soup.find('time')['datetime'][:10]
1879
        day = string_to_date(date_str, "%Y-%m-%d")
1880
        return {
1881
            'title': title,
1882
            'short_url': short_url,
1883
            'img': [i['src'] for i in imgs],
1884
            'tags': tags,
1885
            'month': day.month,
1886
            'year': day.year,
1887
            'day': day.day,
1888
        }
1889
1890
1891
class TheDoghouseDiaries(GenericNavigableComic):
1892
    """Class to retrieve The Dog House Diaries comics."""
1893
    name = 'doghouse'
1894
    long_name = 'The Dog House Diaries'
1895
    url = 'http://thedoghousediaries.com'
1896
1897
    @classmethod
1898
    def get_first_comic_link(cls):
1899
        """Get link to first comics."""
1900
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1901
1902
    @classmethod
1903
    def get_navi_link(cls, last_soup, next_):
1904
        """Get link to next or previous comic."""
1905
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1906
1907
    @classmethod
1908
    def get_comic_info(cls, soup, link):
1909
        """Get information about a particular comics."""
1910
        comic_img_re = re.compile('^dhdcomics/.*')
1911
        img = soup.find('img', src=comic_img_re)
1912
        comic_url = cls.get_url_from_link(link)
1913
        return {
1914 View Code Duplication
            'title': soup.find('h2', id='titleheader').string,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1915
            'title2': soup.find('div', id='subtext').string,
1916
            'alt': img.get('title'),
1917
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1918
            'num': int(comic_url.split('/')[-1]),
1919
        }
1920
1921
1922
class InvisibleBread(GenericListableComic):
1923
    """Class to retrieve Invisible Bread comics."""
1924
    # Also on http://www.gocomics.com/invisible-bread
1925
    name = 'invisiblebread'
1926
    long_name = 'Invisible Bread'
1927
    url = 'http://invisiblebread.com'
1928
1929
    @classmethod
1930
    def get_archive_elements(cls):
1931
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1932
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1933
1934
    @classmethod
1935
    def get_url_from_archive_element(cls, td):
1936
        return td.find('a')['href']
1937
1938
    @classmethod
1939
    def get_comic_info(cls, soup, td):
1940
        """Get information about a particular comics."""
1941
        url = cls.get_url_from_archive_element(td)
1942
        title = td.find('a').string
1943
        month_and_day = td.previous_sibling.string
1944
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1945
        year = link_re.match(url).groups()[0]
1946
        date_str = month_and_day + ' ' + year
1947
        day = string_to_date(date_str, '%b %d %Y')
1948
        imgs = [soup.find('div', id='comic').find('img')]
1949
        assert len(imgs) == 1
1950
        assert all(i['title'] == i['alt'] == title for i in imgs)
1951
        return {
1952
            'month': day.month,
1953
            'year': day.year,
1954
            'day': day.day,
1955
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1956
            'title': title,
1957
        }
1958
1959
1960
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1961
    """Class to retrieve Disco Bleach Comics."""
1962
    name = 'discobleach'
1963
    long_name = 'Disco Bleach'
1964
    url = 'http://discobleach.com'
1965
1966
1967
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1968
    """Class to retrieve TubeyToons comics."""
1969
    # Also on http://tapastic.com/series/Tubey-Toons
1970
    # Also on http://tubeytoons.tumblr.com
1971
    name = 'tubeytoons'
1972
    long_name = 'Tubey Toons'
1973
    url = 'http://tubeytoons.com'
1974
    _categories = ('TUNEYTOONS', )
1975
1976
1977
class CompletelySeriousComics(GenericNavigableComic):
1978
    """Class to retrieve Completely Serious comics."""
1979
    name = 'completelyserious'
1980
    long_name = 'Completely Serious Comics'
1981
    url = 'http://completelyseriouscomics.com'
1982
    get_first_comic_link = get_a_navi_navifirst
1983
    get_navi_link = get_a_navi_navinext
1984
1985
    @classmethod
1986
    def get_comic_info(cls, soup, link):
1987
        """Get information about a particular comics."""
1988
        title = soup.find('h2', class_='post-title').string
1989
        author = soup.find('span', class_='post-author').contents[1].string
1990
        date_str = soup.find('span', class_='post-date').string
1991
        day = string_to_date(date_str, '%B %d, %Y')
1992
        imgs = soup.find('div', class_='comicpane').find_all('img')
1993
        assert imgs
1994
        alt = imgs[0]['title']
1995
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1996
        return {
1997
            'month': day.month,
1998
            'year': day.year,
1999
            'day': day.day,
2000
            'img': [i['src'] for i in imgs],
2001
            'title': title,
2002
            'alt': alt,
2003
            'author': author,
2004
        }
2005
2006
2007
class PoorlyDrawnLines(GenericListableComic):
2008
    """Class to retrieve Poorly Drawn Lines comics."""
2009
    # Also on http://pdlcomics.tumblr.com
2010
    name = 'poorlydrawn'
2011
    long_name = 'Poorly Drawn Lines'
2012
    url = 'http://poorlydrawnlines.com'
2013
    _categories = ('POORLYDRAWN', )
2014
    get_url_from_archive_element = get_href
2015
2016
    @classmethod
2017
    def get_comic_info(cls, soup, link):
2018
        """Get information about a particular comics."""
2019
        imgs = soup.find('div', class_='post').find_all('img')
2020
        assert len(imgs) <= 1
2021
        return {
2022
            'img': [i['src'] for i in imgs],
2023
            'title': imgs[0].get('title', "") if imgs else "",
2024
        }
2025
2026
    @classmethod
2027
    def get_archive_elements(cls):
2028
        archive_url = urljoin_wrapper(cls.url, 'archive')
2029
        url_re = re.compile('^%s/comic/.' % cls.url)
2030
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2031
2032
2033 View Code Duplication
class LoadingComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2034
    """Class to retrieve Loading Artist comics."""
2035
    name = 'loadingartist'
2036
    long_name = 'Loading Artist'
2037
    url = 'http://www.loadingartist.com/latest'
2038
2039
    @classmethod
2040
    def get_first_comic_link(cls):
2041
        """Get link to first comics."""
2042
        return get_soup_at_url(cls.url).find('a', title="First")
2043
2044
    @classmethod
2045
    def get_navi_link(cls, last_soup, next_):
2046
        """Get link to next or previous comic."""
2047
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2048
2049
    @classmethod
2050
    def get_comic_info(cls, soup, link):
2051
        """Get information about a particular comics."""
2052
        title = soup.find('h1').string
2053
        date_str = soup.find('span', class_='date').string.strip()
2054
        day = string_to_date(date_str, "%B %d, %Y")
2055
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2056
        return {
2057
            'title': title,
2058
            'img': [i['src'] for i in imgs],
2059
            'month': day.month,
2060
            'year': day.year,
2061
            'day': day.day,
2062
        }
2063
2064
2065
class ChuckleADuck(GenericNavigableComic):
2066
    """Class to retrieve Chuckle-A-Duck comics."""
2067
    name = 'chuckleaduck'
2068
    long_name = 'Chuckle-A-duck'
2069
    url = 'http://chuckleaduck.com'
2070
    get_first_comic_link = get_div_navfirst_a
2071
    get_navi_link = get_link_rel_next
2072
2073
    @classmethod
2074
    def get_comic_info(cls, soup, link):
2075
        """Get information about a particular comics."""
2076
        date_str = soup.find('span', class_='post-date').string
2077
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2078
        author = soup.find('span', class_='post-author').string
2079
        div = soup.find('div', id='comic')
2080
        imgs = div.find_all('img') if div else []
2081
        title = imgs[0]['title'] if imgs else ""
2082
        assert all(i['title'] == i['alt'] == title for i in imgs)
2083
        return {
2084
            'month': day.month,
2085
            'year': day.year,
2086
            'day': day.day,
2087
            'img': [i['src'] for i in imgs],
2088
            'title': title,
2089
            'author': author,
2090
        }
2091
2092
2093
class DepressedAlien(GenericNavigableComic):
2094
    """Class to retrieve Depressed Alien Comics."""
2095
    name = 'depressedalien'
2096
    long_name = 'Depressed Alien'
2097
    url = 'http://depressedalien.com'
2098
    get_url_from_link = join_cls_url_to_href
2099
2100
    @classmethod
2101
    def get_first_comic_link(cls):
2102
        """Get link to first comics."""
2103
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2104
2105
    @classmethod
2106
    def get_navi_link(cls, last_soup, next_):
2107
        """Get link to next or previous comic."""
2108
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2109
2110
    @classmethod
2111
    def get_comic_info(cls, soup, link):
2112
        """Get information about a particular comics."""
2113
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2114
        imgs = soup.find_all('meta', property='og:image')
2115
        return {
2116
            'title': title,
2117
            'img': [i['content'] for i in imgs],
2118
        }
2119
2120
2121
class ThingsInSquares(GenericListableComic):
2122
    """Class to retrieve Things In Squares comics."""
2123
    # This can be retrieved in other languages
2124
    # Also on https://tapastic.com/series/Things-in-Squares
2125
    name = 'squares'
2126
    long_name = 'Things in squares'
2127
    url = 'http://www.thingsinsquares.com'
2128
2129
    @classmethod
2130
    def get_comic_info(cls, soup, tr):
2131
        """Get information about a particular comics."""
2132
        _, td2, td3 = tr.find_all('td')
2133
        a = td2.find('a')
2134
        date_str = td3.string
2135
        day = string_to_date(date_str, "%m.%d.%y")
2136
        title = a.string
2137
        title2 = soup.find('meta', property='og:title')['content']
2138
        desc = soup.find('meta', property='og:description')
2139
        description = desc['content'] if desc else ''
2140
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2141
        imgs = soup.find('div', class_='entry-content').find_all('img')
2142
        return {
2143
            'day': day.day,
2144
            'month': day.month,
2145
            'year': day.year,
2146
            'title': title,
2147
            'title2': title2,
2148
            'description': description,
2149
            'tags': tags,
2150
            'img': [i['src'] for i in imgs],
2151
            'alt': ' '.join(i['alt'] for i in imgs),
2152
        }
2153
2154
    @classmethod
2155
    def get_url_from_archive_element(cls, tr):
2156
        _, td2, td3 = tr.find_all('td')
2157
        return td2.find('a')['href']
2158
2159
    @classmethod
2160
    def get_archive_elements(cls):
2161
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2162
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2163
2164
2165
class HappleTea(GenericNavigableComic):
2166
    """Class to retrieve Happle Tea Comics."""
2167
    name = 'happletea'
2168
    long_name = 'Happle Tea'
2169
    url = 'http://www.happletea.com'
2170
    get_first_comic_link = get_a_navi_navifirst
2171
    get_navi_link = get_link_rel_next
2172
2173
    @classmethod
2174
    def get_comic_info(cls, soup, link):
2175
        """Get information about a particular comics."""
2176
        imgs = soup.find('div', id='comic').find_all('img')
2177
        post = soup.find('div', class_='post-content')
2178
        title = post.find('h2', class_='post-title').string
2179
        author = post.find('a', rel='author').string
2180
        date_str = post.find('span', class_='post-date').string
2181
        day = string_to_date(date_str, "%B %d, %Y")
2182
        assert all(i['alt'] == i['title'] for i in imgs)
2183
        return {
2184
            'title': title,
2185
            'img': [i['src'] for i in imgs],
2186
            'alt': ''.join(i['alt'] for i in imgs),
2187
            'month': day.month,
2188
            'year': day.year,
2189
            'day': day.day,
2190
            'author': author,
2191
        }
2192
2193
2194
class RockPaperScissors(GenericNavigableComic):
2195
    """Class to retrieve Rock Paper Scissors comics."""
2196
    name = 'rps'
2197
    long_name = 'Rock Paper Scissors'
2198
    url = 'http://rps-comics.com'
2199
    get_first_comic_link = get_a_navi_navifirst
2200
    get_navi_link = get_link_rel_next
2201
2202
    @classmethod
2203
    def get_comic_info(cls, soup, link):
2204
        """Get information about a particular comics."""
2205
        title = soup.find('title').string
2206
        imgs = soup.find_all('meta', property='og:image')
2207
        short_url = soup.find('link', rel='shortlink')['href']
2208
        transcript = soup.find('div', id='transcript-content').string
2209
        return {
2210
            'title': title,
2211
            'transcript': transcript,
2212
            'short_url': short_url,
2213
            'img': [i['content'] for i in imgs],
2214
        }
2215
2216
2217
class FatAwesomeComics(GenericNavigableComic):
2218
    """Class to retrieve Fat Awesome Comics."""
2219
    # Also on http://fatawesomecomedy.tumblr.com
2220
    name = 'fatawesome'
2221
    long_name = 'Fat Awesome'
2222
    url = 'http://fatawesome.com/comics'
2223 View Code Duplication
    get_navi_link = get_a_rel_next
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2224
    get_first_comic_link = simulate_first_link
2225
    first_url = 'http://fatawesome.com/shortbus/'
2226
2227
    @classmethod
2228
    def get_comic_info(cls, soup, link):
2229
        """Get information about a particular comics."""
2230
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2231
        description = soup.find('meta', attrs={'name': 'description'})['content']
2232
        tags_prop = soup.find('meta', property='article:tag')
2233
        tags = tags_prop['content'] if tags_prop else ""
2234
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2235
        day = string_to_date(date_str, "%Y-%m-%d")
2236
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2237
        assert len(imgs) == 1
2238
        return {
2239
            'title': title,
2240
            'description': description,
2241
            'tags': tags,
2242
            'alt': "".join(i['alt'] for i in imgs),
2243
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2244
            'month': day.month,
2245
            'year': day.year,
2246
            'day': day.day,
2247
        }
2248
2249
2250
class AnythingComic(GenericListableComic):
2251
    """Class to retrieve Anything Comics."""
2252
    # Also on http://tapastic.com/series/anything
2253
    name = 'anythingcomic'
2254
    long_name = 'Anything Comic'
2255
    url = 'http://www.anythingcomic.com'
2256
2257
    @classmethod
2258
    def get_archive_elements(cls):
2259
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2260
        # The first 2 <tr>'s do not correspond to comics
2261
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2262
2263
    @classmethod
2264
    def get_url_from_archive_element(cls, tr):
2265
        """Get url corresponding to an archive element."""
2266
        td_num, td_comic, td_date, _ = tr.find_all('td')
2267
        link = td_comic.find('a')
2268
        return urljoin_wrapper(cls.url, link['href'])
2269
2270
    @classmethod
2271
    def get_comic_info(cls, soup, tr):
2272
        """Get information about a particular comics."""
2273
        td_num, td_comic, td_date, _ = tr.find_all('td')
2274
        num = int(td_num.string)
2275
        link = td_comic.find('a')
2276
        title = link.string
2277
        imgs = soup.find_all('img', id='comic_image')
2278
        date_str = td_date.string
2279
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2280
        assert len(imgs) == 1
2281
        assert all(i.get('alt') == i.get('title') for i in imgs)
2282
        return {
2283
            'num': num,
2284
            'title': title,
2285
            'alt': imgs[0].get('alt', ''),
2286
            'img': [i['src'] for i in imgs],
2287
            'month': day.month,
2288
            'year': day.year,
2289
            'day': day.day,
2290
        }
2291
2292
2293
class LonnieMillsap(GenericNavigableComic):
2294
    """Class to retrieve Lonnie Millsap's comics."""
2295
    name = 'millsap'
2296
    long_name = 'Lonnie Millsap'
2297
    url = 'http://www.lonniemillsap.com'
2298
    get_navi_link = get_link_rel_next
2299
    get_first_comic_link = simulate_first_link
2300
    first_url = 'http://www.lonniemillsap.com/?p=42'
2301
2302
    @classmethod
2303
    def get_comic_info(cls, soup, link):
2304
        """Get information about a particular comics."""
2305
        title = soup.find('h2', class_='post-title').string
2306
        post = soup.find('div', class_='post-content')
2307
        author = post.find("span", class_="post-author").find("a").string
2308
        date_str = post.find("span", class_="post-date").string
2309
        day = string_to_date(date_str, "%B %d, %Y")
2310
        imgs = post.find("div", class_="entry").find_all("img")
2311
        return {
2312
            'title': title,
2313
            'author': author,
2314
            'img': [i['src'] for i in imgs],
2315
            'month': day.month,
2316
            'year': day.year,
2317
            'day': day.day,
2318
        }
2319
2320 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2321
class LinsEditions(GenericNavigableComic):
2322
    """Class to retrieve L.I.N.S. Editions comics."""
2323
    # Also on http://linscomics.tumblr.com
2324
    # Now on https://warandpeas.com
2325
    name = 'lins'
2326
    long_name = 'L.I.N.S. Editions'
2327
    url = 'https://linsedition.com'
2328
    _categories = ('LINS', )
2329
    get_navi_link = get_link_rel_next
2330
    get_first_comic_link = simulate_first_link
2331
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2332
2333
    @classmethod
2334
    def get_comic_info(cls, soup, link):
2335
        """Get information about a particular comics."""
2336
        title = soup.find('meta', property='og:title')['content']
2337
        imgs = soup.find_all('meta', property='og:image')
2338
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2339
        day = string_to_date(date_str, "%Y-%m-%d")
2340
        return {
2341
            'title': title,
2342
            'img': [i['content'] for i in imgs],
2343
            'month': day.month,
2344
            'year': day.year,
2345
            'day': day.day,
2346
        }
2347
2348
2349
class ThorsThundershack(GenericNavigableComic):
2350
    """Class to retrieve Thor's Thundershack comics."""
2351
    # Also on http://tapastic.com/series/Thors-Thundershac
2352
    name = 'thor'
2353
    long_name = 'Thor\'s Thundershack'
2354
    url = 'http://www.thorsthundershack.com'
2355
    _categories = ('THOR', )
2356
    get_url_from_link = join_cls_url_to_href
2357
2358
    @classmethod
2359
    def get_first_comic_link(cls):
2360
        """Get link to first comics."""
2361
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2362
2363
    @classmethod
2364
    def get_navi_link(cls, last_soup, next_):
2365
        """Get link to next or previous comic."""
2366
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2367
            if link['href'] != '/comic':
2368
                return link
2369
        return None
2370
2371
    @classmethod
2372
    def get_comic_info(cls, soup, link):
2373
        """Get information about a particular comics."""
2374
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2375
        description = soup.find('div', itemprop='articleBody').text
2376
        author = soup.find('span', itemprop='author copyrightHolder').string
2377
        imgs = soup.find_all('img', itemprop='image')
2378
        assert all(i['title'] == i['alt'] for i in imgs)
2379
        alt = imgs[0]['alt'] if imgs else ""
2380
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2381
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2382
        return {
2383
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2384
            'month': day.month,
2385
            'year': day.year,
2386
            'day': day.day,
2387
            'author': author,
2388
            'title': title,
2389
            'alt': alt,
2390
            'description': description,
2391
        }
2392
2393
2394
class GerbilWithAJetpack(GenericNavigableComic):
2395
    """Class to retrieve GerbilWithAJetpack comics."""
2396
    name = 'gerbil'
2397
    long_name = 'Gerbil With A Jetpack'
2398
    url = 'http://gerbilwithajetpack.com'
2399
    get_first_comic_link = get_a_navi_navifirst
2400
    get_navi_link = get_a_rel_next
2401
2402
    @classmethod
2403
    def get_comic_info(cls, soup, link):
2404
        """Get information about a particular comics."""
2405
        title = soup.find('h2', class_='post-title').string
2406
        author = soup.find("span", class_="post-author").find("a").string
2407
        date_str = soup.find("span", class_="post-date").string
2408
        day = string_to_date(date_str, "%B %d, %Y")
2409
        imgs = soup.find("div", id="comic").find_all("img")
2410
        alt = imgs[0]['alt']
2411
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2412
        return {
2413
            'img': [i['src'] for i in imgs],
2414
            'title': title,
2415
            'alt': alt,
2416
            'author': author,
2417
            'day': day.day,
2418
            'month': day.month,
2419
            'year': day.year
2420
        }
2421
2422
2423
class EveryDayBlues(GenericNavigableComic):
2424
    """Class to retrieve EveryDayBlues Comics."""
2425
    name = "blues"
2426
    long_name = "Every Day Blues"
2427
    url = "http://everydayblues.net"
2428
    get_first_comic_link = get_a_navi_navifirst
2429
    get_navi_link = get_link_rel_next
2430
2431
    @classmethod
2432
    def get_comic_info(cls, soup, link):
2433
        """Get information about a particular comics."""
2434
        title = soup.find("h2", class_="post-title").string
2435
        author = soup.find("span", class_="post-author").find("a").string
2436
        date_str = soup.find("span", class_="post-date").string
2437
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2438
        imgs = soup.find("div", id="comic").find_all("img")
2439
        assert all(i['alt'] == i['title'] == title for i in imgs)
2440
        assert len(imgs) <= 1
2441
        return {
2442
            'img': [i['src'] for i in imgs],
2443
            'title': title,
2444
            'author': author,
2445
            'day': day.day,
2446
            'month': day.month,
2447
            'year': day.year
2448
        }
2449
2450
2451
class BiterComics(GenericNavigableComic):
2452
    """Class to retrieve Biter Comics."""
2453
    name = "biter"
2454
    long_name = "Biter Comics"
2455
    url = "http://www.bitercomics.com"
2456
    get_first_comic_link = get_a_navi_navifirst
2457
    get_navi_link = get_link_rel_next
2458
2459
    @classmethod
2460
    def get_comic_info(cls, soup, link):
2461
        """Get information about a particular comics."""
2462
        title = soup.find("h1", class_="entry-title").string
2463
        author = soup.find("span", class_="author vcard").find("a").string
2464
        date_str = soup.find("span", class_="entry-date").string
2465
        day = string_to_date(date_str, "%B %d, %Y")
2466
        imgs = soup.find("div", id="comic").find_all("img")
2467
        assert all(i['alt'] == i['title'] for i in imgs)
2468
        assert len(imgs) == 1
2469
        alt = imgs[0]['alt']
2470
        return {
2471
            'img': [i['src'] for i in imgs],
2472
            'title': title,
2473
            'alt': alt,
2474
            'author': author,
2475
            'day': day.day,
2476
            'month': day.month,
2477
            'year': day.year
2478
        }
2479
2480
2481
class TheAwkwardYeti(GenericNavigableComic):
2482
    """Class to retrieve The Awkward Yeti comics."""
2483
    # Also on http://www.gocomics.com/the-awkward-yeti
2484
    # Also on http://larstheyeti.tumblr.com
2485
    # Also on https://tapastic.com/series/TheAwkwardYeti
2486
    name = 'yeti'
2487
    long_name = 'The Awkward Yeti'
2488
    url = 'http://theawkwardyeti.com'
2489
    _categories = ('YETI', )
2490
    get_first_comic_link = get_a_navi_navifirst
2491
    get_navi_link = get_link_rel_next
2492
2493
    @classmethod
2494
    def get_comic_info(cls, soup, link):
2495
        """Get information about a particular comics."""
2496
        title = soup.find('h2', class_='post-title').string
2497
        date_str = soup.find("span", class_="post-date").string
2498
        day = string_to_date(date_str, "%B %d, %Y")
2499
        imgs = soup.find("div", id="comic").find_all("img")
2500
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2501
        return {
2502
            'img': [i['src'] for i in imgs],
2503
            'title': title,
2504
            'day': day.day,
2505
            'month': day.month,
2506
            'year': day.year
2507
        }
2508
2509
2510
class PleasantThoughts(GenericNavigableComic):
2511
    """Class to retrieve Pleasant Thoughts comics."""
2512
    name = 'pleasant'
2513
    long_name = 'Pleasant Thoughts'
2514
    url = 'http://pleasant-thoughts.com'
2515
    get_first_comic_link = get_a_navi_navifirst
2516
    get_navi_link = get_link_rel_next
2517
2518
    @classmethod
2519
    def get_comic_info(cls, soup, link):
2520
        """Get information about a particular comics."""
2521
        post = soup.find('div', class_='post-content')
2522
        title = post.find('h2', class_='post-title').string
2523
        imgs = post.find("div", class_="entry").find_all("img")
2524
        return {
2525
            'title': title,
2526
            'img': [i['src'] for i in imgs],
2527
        }
2528
2529
2530
class MisterAndMe(GenericNavigableComic):
2531
    """Class to retrieve Mister & Me Comics."""
2532
    # Also on http://www.gocomics.com/mister-and-me
2533
    # Also on https://tapastic.com/series/Mister-and-Me
2534
    name = 'mister'
2535
    long_name = 'Mister & Me'
2536
    url = 'http://www.mister-and-me.com'
2537
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2538
    get_navi_link = get_link_rel_next
2539
2540
    @classmethod
2541
    def get_comic_info(cls, soup, link):
2542
        """Get information about a particular comics."""
2543
        title = soup.find('h2', class_='post-title').string
2544
        author = soup.find("span", class_="post-author").find("a").string
2545
        date_str = soup.find("span", class_="post-date").string
2546
        day = string_to_date(date_str, "%B %d, %Y")
2547
        imgs = soup.find("div", id="comic").find_all("img")
2548
        assert all(i['alt'] == i['title'] for i in imgs)
2549
        assert len(imgs) <= 1
2550
        alt = imgs[0]['alt'] if imgs else ""
2551
        return {
2552
            'img': [i['src'] for i in imgs],
2553
            'title': title,
2554
            'alt': alt,
2555
            'author': author,
2556
            'day': day.day,
2557
            'month': day.month,
2558
            'year': day.year
2559
        }
2560
2561
2562
class LastPlaceComics(GenericNavigableComic):
2563
    """Class to retrieve Last Place Comics."""
2564
    name = 'lastplace'
2565
    long_name = 'Last Place Comics'
2566
    url = "http://lastplacecomics.com"
2567
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2568
    get_navi_link = get_link_rel_next
2569
2570
    @classmethod
2571
    def get_comic_info(cls, soup, link):
2572
        """Get information about a particular comics."""
2573
        title = soup.find('h2', class_='post-title').string
2574
        author = soup.find("span", class_="post-author").find("a").string
2575
        date_str = soup.find("span", class_="post-date").string
2576
        day = string_to_date(date_str, "%B %d, %Y")
2577
        imgs = soup.find("div", id="comic").find_all("img")
2578
        assert all(i['alt'] == i['title'] for i in imgs)
2579
        assert len(imgs) <= 1
2580
        alt = imgs[0]['alt'] if imgs else ""
2581
        return {
2582
            'img': [i['src'] for i in imgs],
2583
            'title': title,
2584
            'alt': alt,
2585
            'author': author,
2586
            'day': day.day,
2587
            'month': day.month,
2588
            'year': day.year
2589
        }
2590
2591
2592
class TalesOfAbsurdity(GenericNavigableComic):
2593
    """Class to retrieve Tales Of Absurdity comics."""
2594
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2595
    # Also on http://talesofabsurdity.tumblr.com
2596
    name = 'absurdity'
2597
    long_name = 'Tales of Absurdity'
2598
    url = 'http://talesofabsurdity.com'
2599
    _categories = ('ABSURDITY', )
2600
    get_first_comic_link = get_a_navi_navifirst
2601
    get_navi_link = get_a_navi_comicnavnext_navinext
2602
2603
    @classmethod
2604
    def get_comic_info(cls, soup, link):
2605
        """Get information about a particular comics."""
2606
        title = soup.find('h2', class_='post-title').string
2607
        author = soup.find("span", class_="post-author").find("a").string
2608
        date_str = soup.find("span", class_="post-date").string
2609
        day = string_to_date(date_str, "%B %d, %Y")
2610
        imgs = soup.find("div", id="comic").find_all("img")
2611
        assert all(i['alt'] == i['title'] for i in imgs)
2612
        alt = imgs[0]['alt'] if imgs else ""
2613
        return {
2614
            'img': [i['src'] for i in imgs],
2615
            'title': title,
2616
            'alt': alt,
2617
            'author': author,
2618
            'day': day.day,
2619
            'month': day.month,
2620
            'year': day.year
2621
        }
2622
2623
2624
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2625
    """Class to retrieve Endless Origami Comics."""
2626
    name = "origami"
2627
    long_name = "Endless Origami"
2628
    url = "http://endlessorigami.com"
2629
    get_first_comic_link = get_a_navi_navifirst
2630
    get_navi_link = get_link_rel_next
2631
2632
    @classmethod
2633
    def get_comic_info(cls, soup, link):
2634
        """Get information about a particular comics."""
2635
        title = soup.find('h2', class_='post-title').string
2636
        author = soup.find("span", class_="post-author").find("a").string
2637
        date_str = soup.find("span", class_="post-date").string
2638
        day = string_to_date(date_str, "%B %d, %Y")
2639
        imgs = soup.find("div", id="comic").find_all("img")
2640
        assert all(i['alt'] == i['title'] for i in imgs)
2641
        alt = imgs[0]['alt'] if imgs else ""
2642
        return {
2643
            'img': [i['src'] for i in imgs],
2644
            'title': title,
2645
            'alt': alt,
2646
            'author': author,
2647
            'day': day.day,
2648
            'month': day.month,
2649
            'year': day.year
2650
        }
2651
2652
2653
class PlanC(GenericNavigableComic):
2654
    """Class to retrieve Plan C comics."""
2655
    name = 'planc'
2656
    long_name = 'Plan C'
2657
    url = 'http://www.plancomic.com'
2658
    get_first_comic_link = get_a_navi_navifirst
2659
    get_navi_link = get_a_navi_comicnavnext_navinext
2660
2661
    @classmethod
2662
    def get_comic_info(cls, soup, link):
2663
        """Get information about a particular comics."""
2664
        title = soup.find('h2', class_='post-title').string
2665
        date_str = soup.find("span", class_="post-date").string
2666
        day = string_to_date(date_str, "%B %d, %Y")
2667
        imgs = soup.find('div', id='comic').find_all('img')
2668
        return {
2669
            'title': title,
2670
            'img': [i['src'] for i in imgs],
2671
            'month': day.month,
2672
            'year': day.year,
2673
            'day': day.day,
2674
        }
2675
2676
2677
class BuniComic(GenericNavigableComic):
2678
    """Class to retrieve Buni Comics."""
2679
    name = 'buni'
2680
    long_name = 'BuniComics'
2681 View Code Duplication
    url = 'http://www.bunicomic.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2682
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2683
    get_navi_link = get_link_rel_next
2684
2685
    @classmethod
2686
    def get_comic_info(cls, soup, link):
2687
        """Get information about a particular comics."""
2688
        imgs = soup.find('div', id='comic').find_all('img')
2689
        assert all(i['alt'] == i['title'] for i in imgs)
2690
        assert len(imgs) == 1
2691
        return {
2692
            'img': [i['src'] for i in imgs],
2693
            'title': imgs[0]['title'],
2694
        }
2695
2696
2697
class GenericCommitStrip(GenericNavigableComic):
2698
    """Generic class to retrieve Commit Strips in different languages."""
2699
    get_navi_link = get_a_rel_next
2700
    get_first_comic_link = simulate_first_link
2701
    first_url = NotImplemented
2702
2703
    @classmethod
2704
    def get_comic_info(cls, soup, link):
2705
        """Get information about a particular comics."""
2706
        desc = soup.find('meta', property='og:description')['content']
2707
        title = soup.find('meta', property='og:title')['content']
2708
        imgs = soup.find('div', class_='entry-content').find_all('img')
2709
        title2 = ' '.join(i.get('title', '') for i in imgs)
2710
        return {
2711
            'title': title,
2712
            'title2': title2,
2713
            'description': desc,
2714
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2715
        }
2716
2717
2718
class CommitStripFr(GenericCommitStrip):
2719
    """Class to retrieve Commit Strips in French."""
2720
    name = 'commit_fr'
2721
    long_name = 'Commit Strip (Fr)'
2722
    url = 'http://www.commitstrip.com/fr'
2723
    _categories = ('FRANCAIS', )
2724
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2725
2726
2727
class CommitStripEn(GenericCommitStrip):
2728
    """Class to retrieve Commit Strips in English."""
2729
    name = 'commit_en'
2730
    long_name = 'Commit Strip (En)'
2731
    url = 'http://www.commitstrip.com/en'
2732
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2733
2734
2735
class GenericBoumerie(GenericNavigableComic):
2736
    """Generic class to retrieve Boumeries comics in different languages."""
2737
    get_first_comic_link = get_a_navi_navifirst
2738
    get_navi_link = get_link_rel_next
2739
    date_format = NotImplemented
2740
    lang = NotImplemented
2741
2742
    @classmethod
2743
    def get_comic_info(cls, soup, link):
2744
        """Get information about a particular comics."""
2745
        title = soup.find('h2', class_='post-title').string
2746
        short_url = soup.find('link', rel='shortlink')['href']
2747
        author = soup.find("span", class_="post-author").find("a").string
2748
        date_str = soup.find('span', class_='post-date').string
2749
        day = string_to_date(date_str, cls.date_format, cls.lang)
2750
        imgs = soup.find('div', id='comic').find_all('img')
2751
        assert all(i['alt'] == i['title'] for i in imgs)
2752
        return {
2753
            'short_url': short_url,
2754
            'img': [i['src'] for i in imgs],
2755
            'title': title,
2756
            'author': author,
2757
            'month': day.month,
2758
            'year': day.year,
2759
            'day': day.day,
2760
        }
2761
2762
2763
class BoumerieEn(GenericBoumerie):
2764
    """Class to retrieve Boumeries comics in English."""
2765
    name = 'boumeries_en'
2766
    long_name = 'Boumeries (En)'
2767
    url = 'http://comics.boumerie.com'
2768
    date_format = "%B %d, %Y"
2769
    lang = 'en_GB.UTF-8'
2770
2771
2772
class BoumerieFr(GenericBoumerie):
2773
    """Class to retrieve Boumeries comics in French."""
2774
    name = 'boumeries_fr'
2775
    long_name = 'Boumeries (Fr)'
2776
    url = 'http://bd.boumerie.com'
2777
    _categories = ('FRANCAIS', )
2778
    date_format = "%A, %d %B %Y"
2779
    lang = "fr_FR.utf8"
2780
2781
2782 View Code Duplication
class UnearthedComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2783
    """Class to retrieve Unearthed comics."""
2784
    # Also on http://tapastic.com/series/UnearthedComics
2785
    # Also on http://unearthedcomics.tumblr.com
2786
    name = 'unearthed'
2787
    long_name = 'Unearthed Comics'
2788
    url = 'http://unearthedcomics.com'
2789
    _categories = ('UNEARTHED', )
2790
    get_navi_link = get_link_rel_next
2791
    get_first_comic_link = simulate_first_link
2792
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2793
2794
    @classmethod
2795
    def get_comic_info(cls, soup, link):
2796
        """Get information about a particular comics."""
2797
        short_url = soup.find('link', rel='shortlink')['href']
2798
        title_elt = soup.find('h1') or soup.find('h2')
2799
        title = title_elt.string if title_elt else ""
2800
        desc = soup.find('meta', property='og:description')
2801
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2802
        day = string_to_date(date_str, "%Y-%m-%d")
2803
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2804
        imgs = post.find_all('img')
2805
        return {
2806
            'title': title,
2807
            'description': desc,
2808
            'url2': short_url,
2809
            'img': [i['src'] for i in imgs],
2810
            'month': day.month,
2811
            'year': day.year,
2812
            'day': day.day,
2813
        }
2814
2815
2816
class Optipess(GenericNavigableComic):
2817
    """Class to retrieve Optipess comics."""
2818
    name = 'optipess'
2819
    long_name = 'Optipess'
2820
    url = 'http://www.optipess.com'
2821
    get_first_comic_link = get_a_navi_navifirst
2822
    get_navi_link = get_link_rel_next
2823
2824
    @classmethod
2825
    def get_comic_info(cls, soup, link):
2826
        """Get information about a particular comics."""
2827
        title = soup.find('h2', class_='post-title').string
2828
        author = soup.find("span", class_="post-author").find("a").string
2829
        comic = soup.find('div', id='comic')
2830
        imgs = comic.find_all('img') if comic else []
2831
        alt = imgs[0]['title'] if imgs else ""
2832
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2833
        date_str = soup.find('span', class_='post-date').string
2834
        day = string_to_date(date_str, "%B %d, %Y")
2835
        return {
2836
            'title': title,
2837
            'alt': alt,
2838
            'author': author,
2839
            'img': [i['src'] for i in imgs],
2840
            'month': day.month,
2841
            'year': day.year,
2842
            'day': day.day,
2843
        }
2844
2845
2846
class PainTrainComic(GenericNavigableComic):
2847
    """Class to retrieve Pain Train Comics."""
2848
    name = 'paintrain'
2849
    long_name = 'Pain Train Comics'
2850
    url = 'http://paintraincomic.com'
2851
    get_first_comic_link = get_a_navi_navifirst
2852
    get_navi_link = get_link_rel_next
2853
2854
    @classmethod
2855
    def get_comic_info(cls, soup, link):
2856
        """Get information about a particular comics."""
2857
        title = soup.find('h2', class_='post-title').string
2858
        short_url = soup.find('link', rel='shortlink')['href']
2859
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2860
        num = int(short_url_re.match(short_url).groups()[0])
2861
        imgs = soup.find('div', id='comic').find_all('img')
2862
        alt = imgs[0]['title']
2863
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2864
        date_str = soup.find('span', class_='post-date').string
2865
        day = string_to_date(date_str, "%d/%m/%Y")
2866
        return {
2867
            'short_url': short_url,
2868
            'num': num,
2869 View Code Duplication
            'img': [i['src'] for i in imgs],
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2870
            'month': day.month,
2871
            'year': day.year,
2872
            'day': day.day,
2873
            'alt': alt,
2874
            'title': title,
2875
        }
2876
2877
2878
class MoonBeard(GenericNavigableComic):
2879
    """Class to retrieve MoonBeard comics."""
2880
    # Also on http://blog.squiresjam.es/moonbeard
2881
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2882
    name = 'moonbeard'
2883
    long_name = 'Moon Beard'
2884
    url = 'http://moonbeard.com'
2885
    get_first_comic_link = get_a_navi_navifirst
2886
    get_navi_link = get_a_navi_navinext
2887
2888
    @classmethod
2889
    def get_comic_info(cls, soup, link):
2890
        """Get information about a particular comics."""
2891
        title = soup.find('h2', class_='post-title').string
2892
        short_url = soup.find('link', rel='shortlink')['href']
2893
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2894
        num = int(short_url_re.match(short_url).groups()[0])
2895
        imgs = soup.find('div', id='comic').find_all('img')
2896
        alt = imgs[0]['title']
2897
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2898
        date_str = soup.find('span', class_='post-date').string
2899
        day = string_to_date(date_str, "%B %d, %Y")
2900
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2901
        author = soup.find('span', class_='post-author').string
2902
        return {
2903
            'short_url': short_url,
2904
            'num': num,
2905
            'img': [i['src'] for i in imgs],
2906
            'month': day.month,
2907
            'year': day.year,
2908
            'day': day.day,
2909
            'title': title,
2910
            'tags': tags,
2911
            'alt': alt,
2912
            'author': author,
2913
        }
2914
2915
2916
class AHammADay(GenericEmptyComic, GenericNavigableComic):
2917
    """Class to retrieve class A Hamm A Day comics."""
2918
    name = 'hamm'
2919
    long_name = 'A Hamm A Day'
2920
    url = 'http://www.ahammaday.com'
2921
    get_url_from_link = join_cls_url_to_href
2922
    get_first_comic_link = simulate_first_link
2923
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2924
2925
    @classmethod
2926
    def get_navi_link(cls, last_soup, next_):
2927
        """Get link to next or previous comic."""
2928
        # prev is next / next is prev
2929
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2930
2931
    @classmethod
2932
    def get_comic_info(cls, soup, link):
2933
        """Get information about a particular comics."""
2934
        date_str = soup.find('time', class_='published')['datetime']
2935
        day = string_to_date(date_str, "%Y-%m-%d")
2936
        author = soup.find('span', class_='blog-author').find('a').string
2937
        title = soup.find('meta', property='og:title')['content']
2938
        imgs = soup.find_all('meta', itemprop='image')
2939
        return {
2940
            'img': [i['content'] for i in imgs],
2941
            'title': title,
2942
            'author': author,
2943
            'day': day.day,
2944
            'month': day.month,
2945
            'year': day.year,
2946
        }
2947
2948
2949
class LittleLifeLines(GenericNavigableComic):
2950
    """Class to retrieve Little Life Lines comics."""
2951
    # Also on https://little-life-lines.tumblr.com
2952
    name = 'life'
2953
    long_name = 'Little Life Lines'
2954
    url = 'http://www.littlelifelines.com'
2955
    get_url_from_link = join_cls_url_to_href
2956
    get_first_comic_link = simulate_first_link
2957
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2958
2959
    @classmethod
2960
    def get_navi_link(cls, last_soup, next_):
2961
        """Get link to next or previous comic."""
2962
        # prev is next / next is prev
2963
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2964
        return li.find('a') if li else None
2965
2966
    @classmethod
2967
    def get_comic_info(cls, soup, link):
2968
        """Get information about a particular comics."""
2969
        title = soup.find('meta', property='og:title')['content']
2970
        desc = soup.find('meta', property='og:description')['content']
2971
        date_str = soup.find('time', class_='published')['datetime']
2972
        day = string_to_date(date_str, "%Y-%m-%d")
2973
        author = soup.find('a', rel='author').string
2974
        div_content = soup.find('div', class_="body entry-content")
2975
        imgs = div_content.find_all('img')
2976
        imgs = [i for i in imgs if i.get('src') is not None]
2977
        alt = imgs[0]['alt']
2978
        return {
2979
            'title': title,
2980
            'alt': alt,
2981
            'description': desc,
2982
            'author': author,
2983
            'day': day.day,
2984
            'month': day.month,
2985
            'year': day.year,
2986
            'img': [i['src'] for i in imgs],
2987
        }
2988
2989
2990
class GenericWordPressInkblot(GenericNavigableComic):
2991
    """Generic class to retrieve comics using WordPress with Inkblot."""
2992
    get_navi_link = get_link_rel_next
2993
2994
    @classmethod
2995
    def get_first_comic_link(cls):
2996
        """Get link to first comics."""
2997
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2998
2999
    @classmethod
3000
    def get_comic_info(cls, soup, link):
3001
        """Get information about a particular comics."""
3002
        title = soup.find('meta', property='og:title')['content']
3003
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3004
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3005
        day = string_to_date(date_str, "%Y-%m-%d")
3006
        return {
3007
            'title': title,
3008
            'day': day.day,
3009
            'month': day.month,
3010
            'year': day.year,
3011
            'img': [i['src'] for i in imgs],
3012
        }
3013
3014
3015
class EverythingsStupid(GenericWordPressInkblot):
3016
    """Class to retrieve Everything's stupid Comics."""
3017
    # Also on http://tapastic.com/series/EverythingsStupid
3018
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3019
    # Also on http://everythingsstupidcomics.tumblr.com
3020
    name = 'stupid'
3021
    long_name = "Everything's Stupid"
3022
    url = 'http://everythingsstupid.net'
3023
3024
3025
class TheIsmComics(GenericWordPressInkblot):
3026
    """Class to retrieve The Ism Comics."""
3027
    # Also on https://tapastic.com/series/TheIsm (?)
3028
    name = 'theism'
3029
    long_name = "The Ism"
3030
    url = 'http://www.theism-comics.com'
3031
3032
3033
class WoodenPlankStudios(GenericWordPressInkblot):
3034
    """Class to retrieve Wooden Plank Studios comics."""
3035
    name = 'woodenplank'
3036
    long_name = 'Wooden Plank Studios'
3037
    url = 'http://woodenplankstudios.com'
3038
3039
3040
class ElectricBunnyComic(GenericNavigableComic):
3041
    """Class to retrieve Electric Bunny Comics."""
3042
    # Also on http://electricbunnycomics.tumblr.com
3043
    name = 'bunny'
3044
    long_name = 'Electric Bunny Comic'
3045
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3046
    get_url_from_link = join_cls_url_to_href
3047
3048
    @classmethod
3049
    def get_first_comic_link(cls):
3050
        """Get link to first comics."""
3051
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3052
3053
    @classmethod
3054
    def get_navi_link(cls, last_soup, next_):
3055
        """Get link to next or previous comic."""
3056
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3057
        return img.parent if img else None
3058
3059
    @classmethod
3060
    def get_comic_info(cls, soup, link):
3061
        """Get information about a particular comics."""
3062
        title = soup.find('meta', property='og:title')['content']
3063
        imgs = soup.find_all('meta', property='og:image')
3064
        return {
3065
            'title': title,
3066
            'img': [i['content'] for i in imgs],
3067
        }
3068
3069
3070
class SheldonComics(GenericNavigableComic):
3071
    """Class to retrieve Sheldon comics."""
3072
    # Also on http://www.gocomics.com/sheldon
3073
    name = 'sheldon'
3074
    long_name = 'Sheldon Comics'
3075
    url = 'http://www.sheldoncomics.com'
3076
3077
    @classmethod
3078
    def get_first_comic_link(cls):
3079
        """Get link to first comics."""
3080
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3081
3082
    @classmethod
3083
    def get_navi_link(cls, last_soup, next_):
3084
        """Get link to next or previous comic."""
3085
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3086
            if link['href'] != 'http://www.sheldoncomics.com':
3087
                return link
3088
        return None
3089
3090
    @classmethod
3091
    def get_comic_info(cls, soup, link):
3092
        """Get information about a particular comics."""
3093
        imgs = soup.find("div", id="comic-foot").find_all("img")
3094
        assert all(i['alt'] == i['title'] for i in imgs)
3095
        assert len(imgs) == 1
3096
        title = imgs[0]['title']
3097
        return {
3098
            'title': title,
3099
            'img': [i['src'] for i in imgs],
3100
        }
3101
3102
3103
class Ubertool(GenericNavigableComic):
3104
    """Class to retrieve Ubertool comics."""
3105
    # Also on http://ubertool.tumblr.com
3106
    # Also on https://tapastic.com/series/ubertool
3107
    name = 'ubertool'
3108
    long_name = 'Ubertool'
3109
    url = 'http://ubertoolcomic.com'
3110
    _categories = ('UBERTOOL', )
3111
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3112
    get_navi_link = get_a_comicnavbase_comicnavnext
3113
3114
    @classmethod
3115
    def get_comic_info(cls, soup, link):
3116
        """Get information about a particular comics."""
3117
        title = soup.find('h2', class_='post-title').string
3118
        date_str = soup.find('span', class_='post-date').string
3119
        day = string_to_date(date_str, "%B %d, %Y")
3120
        imgs = soup.find('div', id='comic').find_all('img')
3121
        return {
3122
            'img': [i['src'] for i in imgs],
3123
            'title': title,
3124
            'month': day.month,
3125
            'year': day.year,
3126
            'day': day.day,
3127
        }
3128
3129
3130
class EarthExplodes(GenericNavigableComic):
3131
    """Class to retrieve The Earth Explodes comics."""
3132
    name = 'earthexplodes'
3133
    long_name = 'The Earth Explodes'
3134 View Code Duplication
    url = 'http://www.earthexplodes.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3135
    get_url_from_link = join_cls_url_to_href
3136
    get_first_comic_link = simulate_first_link
3137
    first_url = 'http://www.earthexplodes.com/comics/000/'
3138
3139
    @classmethod
3140
    def get_navi_link(cls, last_soup, next_):
3141
        """Get link to next or previous comic."""
3142
        return last_soup.find('a', id='next' if next_ else 'prev')
3143
3144
    @classmethod
3145
    def get_comic_info(cls, soup, link):
3146
        """Get information about a particular comics."""
3147
        title = soup.find('title').string
3148
        imgs = soup.find('div', id='image').find_all('img')
3149
        alt = imgs[0].get('title', '')
3150
        return {
3151
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3152
            'title': title,
3153
            'alt': alt,
3154
        }
3155
3156
3157
class PomComics(GenericNavigableComic):
3158
    """Class to retrieve PomComics."""
3159
    name = 'pom'
3160
    long_name = 'Pom Comics / Piece of Me'
3161 View Code Duplication
    url = 'http://www.pomcomic.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3162
    get_url_from_link = join_cls_url_to_href
3163
3164
    @classmethod
3165
    def get_first_comic_link(cls):
3166
        """Get link to first comics."""
3167
        return get_soup_at_url(cls.url).find('a', class_='btn_first')
3168
3169
    @classmethod
3170
    def get_navi_link(cls, last_soup, next_):
3171
        """Get link to next or previous comic."""
3172
        return last_soup.find('a', class_='btn_next' if next_ else 'btn_prev')
3173
3174
    @classmethod
3175
    def get_comic_info(cls, soup, link):
3176
        """Get information about a particular comics."""
3177
        title = soup.find('h1', id="comic-name").string
3178
        desc = soup.find('meta', property='og:description')['content']
3179
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3180
        imgs = soup.find('div', class_='comic').find_all('img')
3181
        return {
3182
            'title': title,
3183
            'desc': desc,
3184
            'tags': tags,
3185
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3186
        }
3187
3188
3189
class CubeDrone(GenericNavigableComic):
3190
    """Class to retrieve Cube Drone comics."""
3191
    name = 'cubedrone'
3192
    long_name = 'Cube Drone'
3193
    url = 'http://cube-drone.com/comics'
3194
    get_url_from_link = join_cls_url_to_href
3195
3196
    @classmethod
3197
    def get_first_comic_link(cls):
3198
        """Get link to first comics."""
3199
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3200
3201
    @classmethod
3202
    def get_navi_link(cls, last_soup, next_):
3203
        """Get link to next or previous comic."""
3204
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3205
        return last_soup.find('span', class_=class_).parent
3206
3207
    @classmethod
3208
    def get_comic_info(cls, soup, link):
3209
        """Get information about a particular comics."""
3210
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3211
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3212
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3213
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3214
        imgs = soup.find_all('img', class_='comic img-responsive')
3215
        title2 = imgs[0]['title']
3216
        alt = imgs[0]['alt']
3217
        return {
3218
            'url2': url2,
3219
            'title': title,
3220
            'title2': title2,
3221
            'alt': alt,
3222
            'img': [i['src'] for i in imgs],
3223
        }
3224
3225
3226
class MakeItStoopid(GenericNavigableComic):
3227
    """Class to retrieve Make It Stoopid Comics."""
3228
    name = 'stoopid'
3229
    long_name = 'Make it stoopid'
3230
    url = 'http://makeitstoopid.com/comic.php'
3231
3232
    @classmethod
3233
    def get_nav(cls, soup):
3234
        """Get the navigation elements from soup object."""
3235
        cnav = soup.find_all(class_='cnav')
3236
        nav1, nav2 = cnav[:5], cnav[5:]
3237
        assert nav1 == nav2
3238
        # begin, prev, archive, next_, end = nav1
3239
        return [None if i.get('href') is None else i for i in nav1]
3240
3241
    @classmethod
3242
    def get_first_comic_link(cls):
3243
        """Get link to first comics."""
3244
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3245
3246
    @classmethod
3247
    def get_navi_link(cls, last_soup, next_):
3248
        """Get link to next or previous comic."""
3249
        return cls.get_nav(last_soup)[3 if next_ else 1]
3250
3251
    @classmethod
3252
    def get_comic_info(cls, soup, link):
3253
        """Get information about a particular comics."""
3254
        title = link['title']
3255
        imgs = soup.find_all('img', id='comicimg')
3256
        return {
3257
            'title': title,
3258
            'img': [i['src'] for i in imgs],
3259
        }
3260
3261 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3262
class MarketoonistComics(GenericNavigableComic):
3263
    """Class to retrieve Marketoonist Comics."""
3264
    name = 'marketoonist'
3265
    long_name = 'Marketoonist'
3266
    url = 'https://marketoonist.com/cartoons'
3267
    get_first_comic_link = simulate_first_link
3268
    get_navi_link = get_link_rel_next
3269
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3270
3271
    @classmethod
3272
    def get_comic_info(cls, soup, link):
3273
        """Get information about a particular comics."""
3274
        imgs = soup.find_all('meta', property='og:image')
3275
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3276
        day = string_to_date(date_str, "%Y-%m-%d")
3277
        title = soup.find('meta', property='og:title')['content']
3278
        return {
3279
            'img': [i['content'] for i in imgs],
3280
            'day': day.day,
3281
            'month': day.month,
3282
            'year': day.year,
3283
            'title': title,
3284
        }
3285
3286
3287
class ConsoliaComics(GenericNavigableComic):
3288
    """Class to retrieve Consolia comics."""
3289
    name = 'consolia'
3290
    long_name = 'consolia'
3291 View Code Duplication
    url = 'https://consolia-comic.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3292
    get_url_from_link = join_cls_url_to_href
3293
3294
    @classmethod
3295
    def get_first_comic_link(cls):
3296
        """Get link to first comics."""
3297
        return get_soup_at_url(cls.url).find('a', class_='first')
3298
3299
    @classmethod
3300
    def get_navi_link(cls, last_soup, next_):
3301
        """Get link to next or previous comic."""
3302
        return last_soup.find('a', class_='next' if next_ else 'prev')
3303
3304
    @classmethod
3305
    def get_comic_info(cls, soup, link):
3306
        """Get information about a particular comics."""
3307
        title = soup.find('meta', property='og:title')['content']
3308
        date_str = soup.find('time')["datetime"]
3309
        day = string_to_date(date_str, "%Y-%m-%d")
3310
        imgs = soup.find_all('meta', property='og:image')
3311
        return {
3312
            'title': title,
3313
            'img': [i['content'] for i in imgs],
3314
            'day': day.day,
3315
            'month': day.month,
3316
            'year': day.year,
3317
        }
3318
3319
3320
class TuMourrasMoinsBete(GenericNavigableComic):
3321
    """Class to retrieve Tu Mourras Moins Bete comics."""
3322
    name = 'mourrasmoinsbete'
3323
    long_name = 'Tu Mourras Moins Bete'
3324 View Code Duplication
    url = 'http://tumourrasmoinsbete.blogspot.fr'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3325
    _categories = ('FRANCAIS', )
3326
    get_first_comic_link = simulate_first_link
3327
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3328
3329
    @classmethod
3330
    def get_navi_link(cls, last_soup, next_):
3331
        """Get link to next or previous comic."""
3332
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3333
3334
    @classmethod
3335
    def get_comic_info(cls, soup, link):
3336
        """Get information about a particular comics."""
3337
        title = soup.find('title').string
3338
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3339
        author = soup.find('span', itemprop='author').string
3340
        return {
3341
            'img': [i['src'] for i in imgs],
3342
            'author': author,
3343
            'title': title,
3344
        }
3345
3346
3347
class GeekAndPoke(GenericNavigableComic):
3348
    """Class to retrieve Geek And Poke comics."""
3349
    name = 'geek'
3350
    long_name = 'Geek And Poke'
3351
    url = 'http://geek-and-poke.com'
3352
    get_url_from_link = join_cls_url_to_href
3353
    get_first_comic_link = simulate_first_link
3354
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3355
3356
    @classmethod
3357
    def get_navi_link(cls, last_soup, next_):
3358
        """Get link to next or previous comic."""
3359
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3360
3361
    @classmethod
3362
    def get_comic_info(cls, soup, link):
3363
        """Get information about a particular comics."""
3364
        title = soup.find('meta', property='og:title')['content']
3365
        desc = soup.find('meta', property='og:description')['content']
3366
        date_str = soup.find('time', class_='published')['datetime']
3367
        day = string_to_date(date_str, "%Y-%m-%d")
3368
        author = soup.find('a', rel='author').string
3369
        div_content = (soup.find('div', class_="body entry-content") or
3370
                       soup.find('div', class_="special-content"))
3371
        imgs = div_content.find_all('img')
3372
        imgs = [i for i in imgs if i.get('src') is not None]
3373
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3374
        alt = imgs[0].get('alt', "") if imgs else []
3375
        return {
3376
            'title': title,
3377
            'alt': alt,
3378
            'description': desc,
3379
            'author': author,
3380
            'day': day.day,
3381
            'month': day.month,
3382
            'year': day.year,
3383
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3384
        }
3385
3386
3387
class GloryOwlComix(GenericNavigableComic):
3388
    """Class to retrieve Glory Owl comics."""
3389
    name = 'gloryowl'
3390
    long_name = 'Glory Owl'
3391 View Code Duplication
    url = 'http://gloryowlcomix.blogspot.fr'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3392
    _categories = ('NSFW', 'FRANCAIS')
3393
    get_first_comic_link = simulate_first_link
3394
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3395
3396
    @classmethod
3397
    def get_navi_link(cls, last_soup, next_):
3398
        """Get link to next or previous comic."""
3399
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3400
3401
    @classmethod
3402
    def get_comic_info(cls, soup, link):
3403
        """Get information about a particular comics."""
3404
        title = soup.find('title').string
3405
        imgs = soup.find_all('link', rel='image_src')
3406
        author = soup.find('a', rel='author').string
3407
        return {
3408
            'img': [i['href'] for i in imgs],
3409
            'author': author,
3410
            'title': title,
3411
        }
3412
3413
3414
class GenericTumblrV1(GenericComic):
3415
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3416
    _categories = ('TUMBLR', )
3417
3418
    @classmethod
3419
    def get_next_comic(cls, last_comic):
3420
        """Generic implementation of get_next_comic for Tumblr comics."""
3421
        for p in cls.get_posts(last_comic):
3422
            comic = cls.get_comic_info(p)
3423
            if comic is not None:
3424
                yield comic
3425
3426
    @classmethod
3427
    def get_url_from_post(cls, post):
3428
        return post['url']
3429
3430
    @classmethod
3431
    def get_api_url(cls):
3432
        return urljoin_wrapper(cls.url, '/api/read/')
3433
3434
    @classmethod
3435
    def get_comic_info(cls, post):
3436
        """Get information about a particular comics."""
3437
        type_ = post['type']
3438
        if type_ != 'photo':
3439
            return None
3440
        tumblr_id = int(post['id'])
3441
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3442
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3443
        caption = post.find('photo-caption')
3444
        title = caption.string if caption else ""
3445
        tags = ' '.join(t.string for t in post.find_all('tag'))
3446
        # Photos may appear in 'photo' tags and/or straight in the post
3447
        photo_tags = post.find_all('photo')
3448
        if not photo_tags:
3449
            photo_tags = [post]
3450
        # Images are in multiple resolutions - taking the first one
3451
        imgs = [photo.find('photo-url') for photo in photo_tags]
3452
        return {
3453
            'url': cls.get_url_from_post(post),
3454
            'url2': post['url-with-slug'],
3455
            'day': day.day,
3456
            'month': day.month,
3457
            'year': day.year,
3458
            'title': title,
3459
            'tags': tags,
3460
            'img': [i.string for i in imgs],
3461
            'tumblr-id': tumblr_id,
3462
            'api_url': api_url,
3463
        }
3464
3465
    @classmethod
3466
    def get_posts(cls, last_comic, nb_post_per_call=10):
3467
        """Get posts using API. nb_post_per_call is max 50.
3468
3469
        Posts are retrieved from newer to older as per the tumblr v1 api
3470
        but are returned in chronological order."""
3471
        waiting_for_url = last_comic['url'] if last_comic else None
3472
        posts_acc = []
3473
        if last_comic is not None:
3474
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3475
            # might end up spending a lot of time looking for something that
3476
            # doesn't exist. Failing early and clearly might be a better option.
3477
            last_api_url = last_comic['api_url']
3478
            try:
3479
                get_soup_at_url(last_api_url)
3480
            except urllib.error.HTTPError:
3481
                try:
3482
                    get_soup_at_url(cls.url)
3483
                except urllib.error.HTTPError:
3484
                    print("Did not find previous post nor main url %s" % cls.url)
3485
                else:
3486
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3487
                return reversed(posts_acc)
3488
        api_url = cls.get_api_url()
3489
        posts = get_soup_at_url(api_url).find('posts')
3490
        start, total = int(posts['start']), int(posts['total'])
3491
        assert start == 0
3492
        for starting_num in range(0, total, nb_post_per_call):
3493
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3494
            posts2 = get_soup_at_url(api_url2).find('posts')
3495
            start2, total2 = int(posts2['start']), int(posts2['total'])
3496
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3497
            # This may happen and should be handled in the future
3498
            assert total == total2, "%d != %d" % (total, total2)
3499
            for p in posts2.find_all('post'):
3500
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3501
                    return reversed(posts_acc)
3502
                posts_acc.append(p)
3503
        if waiting_for_url is None:
3504
            return reversed(posts_acc)
3505
        print("Did not find %s : there might be a problem" % waiting_for_url)
3506
        return []
3507
3508
3509
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3510
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3511
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3512
    # Also on http://www.smbc-comics.com
3513
    name = 'smbc-tumblr'
3514
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3515
    url = 'http://smbc-comics.tumblr.com'
3516
    _categories = ('SMBC', )
3517
3518
3519
class IrwinCardozo(GenericTumblrV1):
3520
    """Class to retrieve Irwin Cardozo Comics."""
3521
    name = 'irwinc'
3522
    long_name = 'Irwin Cardozo'
3523
    url = 'http://irwincardozocomics.tumblr.com'
3524
3525
3526
class AccordingToDevin(GenericTumblrV1):
3527
    """Class to retrieve According To Devin comics."""
3528
    name = 'devin'
3529
    long_name = 'According To Devin'
3530
    url = 'http://accordingtodevin.tumblr.com'
3531
3532
3533
class ItsTheTieTumblr(GenericTumblrV1):
3534
    """Class to retrieve It's the tie comics."""
3535
    # Also on http://itsthetie.com
3536
    # Also on https://tapastic.com/series/itsthetie
3537
    name = 'tie-tumblr'
3538
    long_name = "It's the tie (from Tumblr)"
3539
    url = "http://itsthetie.tumblr.com"
3540
    _categories = ('TIE', )
3541
3542
3543
class OctopunsTumblr(GenericTumblrV1):
3544
    """Class to retrieve Octopuns comics."""
3545
    # Also on http://www.octopuns.net
3546
    name = 'octopuns-tumblr'
3547
    long_name = 'Octopuns (from Tumblr)'
3548
    url = 'http://octopuns.tumblr.com'
3549
3550
3551
class PicturesInBoxesTumblr(GenericTumblrV1):
3552
    """Class to retrieve Pictures In Boxes comics."""
3553
    # Also on http://www.picturesinboxes.com
3554
    name = 'picturesinboxes-tumblr'
3555
    long_name = 'Pictures in Boxes (from Tumblr)'
3556
    url = 'http://picturesinboxescomic.tumblr.com'
3557
3558
3559
class TubeyToonsTumblr(GenericTumblrV1):
3560
    """Class to retrieve TubeyToons comics."""
3561
    # Also on http://tapastic.com/series/Tubey-Toons
3562
    # Also on http://tubeytoons.com
3563
    name = 'tubeytoons-tumblr'
3564
    long_name = 'Tubey Toons (from Tumblr)'
3565
    url = 'http://tubeytoons.tumblr.com'
3566
    _categories = ('TUNEYTOONS', )
3567
3568
3569
class UnearthedComicsTumblr(GenericTumblrV1):
3570
    """Class to retrieve Unearthed comics."""
3571
    # Also on http://tapastic.com/series/UnearthedComics
3572
    # Also on http://unearthedcomics.com
3573
    name = 'unearthed-tumblr'
3574
    long_name = 'Unearthed Comics (from Tumblr)'
3575
    url = 'http://unearthedcomics.tumblr.com'
3576
    _categories = ('UNEARTHED', )
3577
3578
3579
class PieComic(GenericTumblrV1):
3580
    """Class to retrieve Pie Comic comics."""
3581
    name = 'pie'
3582
    long_name = 'Pie Comic'
3583
    url = "http://piecomic.tumblr.com"
3584
3585
3586
class MrEthanDiamond(GenericTumblrV1):
3587
    """Class to retrieve Mr Ethan Diamond comics."""
3588
    name = 'diamond'
3589
    long_name = 'Mr Ethan Diamond'
3590
    url = 'http://mrethandiamond.tumblr.com'
3591
3592
3593
class Flocci(GenericTumblrV1):
3594
    """Class to retrieve floccinaucinihilipilification comics."""
3595
    name = 'flocci'
3596
    long_name = 'floccinaucinihilipilification'
3597
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3598
3599
3600
class UpAndOut(GenericTumblrV1):
3601
    """Class to retrieve Up & Out comics."""
3602
    # Also on http://tapastic.com/series/UP-and-OUT
3603
    name = 'upandout'
3604
    long_name = 'Up And Out (from Tumblr)'
3605
    url = 'http://upandoutcomic.tumblr.com'
3606
3607
3608
class Pundemonium(GenericTumblrV1):
3609
    """Class to retrieve Pundemonium comics."""
3610
    name = 'pundemonium'
3611
    long_name = 'Pundemonium'
3612
    url = 'http://monstika.tumblr.com'
3613
3614
3615
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3616
    """Class to retrieve Poorly Drawn Lines comics."""
3617
    # Also on http://poorlydrawnlines.com
3618
    name = 'poorlydrawn-tumblr'
3619
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3620
    url = 'http://pdlcomics.tumblr.com'
3621
    _categories = ('POORLYDRAWN', )
3622
3623
3624
class PearShapedComics(GenericTumblrV1):
3625
    """Class to retrieve Pear Shaped Comics."""
3626
    name = 'pearshaped'
3627
    long_name = 'Pear-Shaped Comics'
3628
    url = 'http://pearshapedcomics.com'
3629
3630
3631
class PondScumComics(GenericTumblrV1):
3632
    """Class to retrieve Pond Scum Comics."""
3633
    name = 'pond'
3634
    long_name = 'Pond Scum'
3635
    url = 'http://pondscumcomic.tumblr.com'
3636
3637
3638
class MercworksTumblr(GenericTumblrV1):
3639
    """Class to retrieve Mercworks comics."""
3640
    # Also on http://mercworks.net
3641
    name = 'mercworks-tumblr'
3642
    long_name = 'Mercworks (from Tumblr)'
3643
    url = 'http://mercworks.tumblr.com'
3644
3645
3646
class OwlTurdTumblr(GenericTumblrV1):
3647
    """Class to retrieve Owl Turd comics."""
3648
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3649
    name = 'owlturd-tumblr'
3650
    long_name = 'Owl Turd (from Tumblr)'
3651
    url = 'http://owlturd.com'
3652
    _categories = ('OWLTURD', )
3653
3654
3655
class VectorBelly(GenericTumblrV1):
3656
    """Class to retrieve Vector Belly comics."""
3657
    # Also on http://vectorbelly.com
3658
    name = 'vector'
3659
    long_name = 'Vector Belly'
3660
    url = 'http://vectorbelly.tumblr.com'
3661
3662
3663
class GoneIntoRapture(GenericTumblrV1):
3664
    """Class to retrieve Gone Into Rapture comics."""
3665
    # Also on http://goneintorapture.tumblr.com
3666
    # Also on http://tapastic.com/series/Goneintorapture
3667
    name = 'rapture'
3668
    long_name = 'Gone Into Rapture'
3669
    url = 'http://www.goneintorapture.com'
3670
3671
3672
class TheOatmealTumblr(GenericTumblrV1):
3673
    """Class to retrieve The Oatmeal comics."""
3674
    # Also on http://theoatmeal.com
3675
    name = 'oatmeal-tumblr'
3676
    long_name = 'The Oatmeal (from Tumblr)'
3677
    url = 'http://oatmeal.tumblr.com'
3678
3679
3680
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3681
    """Class to retrieve Heck If I Know Comics."""
3682
    # Also on http://tapastic.com/series/Regular
3683
    name = 'heck-tumblr'
3684
    long_name = 'Heck if I Know comics (from Tumblr)'
3685
    url = 'http://heckifiknowcomics.com'
3686
3687
3688
class MyJetPack(GenericTumblrV1):
3689
    """Class to retrieve My Jet Pack comics."""
3690
    name = 'jetpack'
3691
    long_name = 'My Jet Pack'
3692
    url = 'http://myjetpack.tumblr.com'
3693
3694
3695
class CheerUpEmoKidTumblr(GenericTumblrV1):
3696
    """Class to retrieve CheerUpEmoKid comics."""
3697
    # Also on http://www.cheerupemokid.com
3698
    # Also on http://tapastic.com/series/CUEK
3699
    name = 'cuek-tumblr'
3700
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3701
    url = 'http://enzocomics.tumblr.com'
3702
3703
3704
class ForLackOfABetterComic(GenericTumblrV1):
3705
    """Class to retrieve For Lack Of A Better Comics."""
3706
    # Also on http://forlackofabettercomic.com
3707
    name = 'lack'
3708
    long_name = 'For Lack Of A Better Comic'
3709
    url = 'http://forlackofabettercomic.tumblr.com'
3710
3711
3712
class ZenPencilsTumblr(GenericTumblrV1):
3713
    """Class to retrieve ZenPencils comics."""
3714
    # Also on http://zenpencils.com
3715
    # Also on http://www.gocomics.com/zen-pencils
3716
    name = 'zenpencils-tumblr'
3717
    long_name = 'Zen Pencils (from Tumblr)'
3718
    url = 'http://zenpencils.tumblr.com'
3719
    _categories = ('ZENPENCILS', )
3720
3721
3722
class ThreeWordPhraseTumblr(GenericTumblrV1):
3723
    """Class to retrieve Three Word Phrase comics."""
3724
    # Also on http://threewordphrase.com
3725
    name = 'threeword-tumblr'
3726
    long_name = 'Three Word Phrase (from Tumblr)'
3727
    url = 'http://www.threewordphrase.tumblr.com'
3728
3729
3730
class TimeTrabbleTumblr(GenericTumblrV1):
3731
    """Class to retrieve Time Trabble comics."""
3732
    # Also on http://timetrabble.com
3733
    name = 'timetrabble-tumblr'
3734
    long_name = 'Time Trabble (from Tumblr)'
3735
    url = 'http://timetrabble.tumblr.com'
3736
3737
3738
class SafelyEndangeredTumblr(GenericTumblrV1):
3739
    """Class to retrieve Safely Endangered comics."""
3740
    # Also on http://www.safelyendangered.com
3741
    name = 'endangered-tumblr'
3742
    long_name = 'Safely Endangered (from Tumblr)'
3743
    url = 'http://tumblr.safelyendangered.com'
3744
3745
3746
class MouseBearComedyTumblr(GenericTumblrV1):
3747
    """Class to retrieve Mouse Bear Comedy comics."""
3748
    # Also on http://www.mousebearcomedy.com
3749
    name = 'mousebear-tumblr'
3750
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3751
    url = 'http://mousebearcomedy.tumblr.com'
3752
3753
3754
class BouletCorpTumblr(GenericTumblrV1):
3755
    """Class to retrieve BouletCorp comics."""
3756
    # Also on http://www.bouletcorp.com
3757
    name = 'boulet-tumblr'
3758
    long_name = 'Boulet Corp (from Tumblr)'
3759
    url = 'http://bouletcorp.tumblr.com'
3760
    _categories = ('BOULET', )
3761
3762
3763
class TheAwkwardYetiTumblr(GenericTumblrV1):
3764
    """Class to retrieve The Awkward Yeti comics."""
3765
    # Also on http://www.gocomics.com/the-awkward-yeti
3766
    # Also on http://theawkwardyeti.com
3767
    # Also on https://tapastic.com/series/TheAwkwardYeti
3768
    name = 'yeti-tumblr'
3769
    long_name = 'The Awkward Yeti (from Tumblr)'
3770
    url = 'http://larstheyeti.tumblr.com'
3771
    _categories = ('YETI', )
3772
3773
3774
class NellucNhoj(GenericTumblrV1):
3775
    """Class to retrieve NellucNhoj comics."""
3776
    name = 'nhoj'
3777
    long_name = 'Nelluc Nhoj'
3778
    url = 'http://nellucnhoj.com'
3779
3780
3781
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3782
    """Class to retrieve Down The Upward Spiral comics."""
3783
    # Also on http://www.downtheupwardspiral.com
3784
    name = 'spiral-tumblr'
3785
    long_name = 'Down the Upward Spiral (from Tumblr)'
3786
    url = 'http://downtheupwardspiral.tumblr.com'
3787
3788
3789
class AsPerUsualTumblr(GenericTumblrV1):
3790
    """Class to retrieve As Per Usual comics."""
3791
    # Also on https://tapastic.com/series/AsPerUsual
3792
    name = 'usual-tumblr'
3793
    long_name = 'As Per Usual (from Tumblr)'
3794
    url = 'http://as-per-usual.tumblr.com'
3795
    categories = ('DAMILEE', )
3796
3797
3798
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3799
    """Class to retrieve Hot Comics For Cool People."""
3800
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3801
    # Also on http://hotcomics.biz (links to tumblr)
3802
    # Also on http://hcfcp.com (links to tumblr)
3803
    name = 'hotcomics-tumblr'
3804
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3805
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3806
    categories = ('DAMILEE', )
3807
3808
3809
class OneOneOneOneComicTumblr(GenericTumblrV1):
3810
    """Class to retrieve 1111 Comics."""
3811
    # Also on http://www.1111comics.me
3812
    # Also on https://tapastic.com/series/1111-Comics
3813
    name = '1111-tumblr'
3814
    long_name = '1111 Comics (from Tumblr)'
3815
    url = 'http://comics1111.tumblr.com'
3816
    _categories = ('ONEONEONEONE', )
3817
3818
3819
class JhallComicsTumblr(GenericTumblrV1):
3820
    """Class to retrieve Jhall Comics."""
3821
    # Also on http://jhallcomics.com
3822
    name = 'jhall-tumblr'
3823
    long_name = 'Jhall Comics (from Tumblr)'
3824
    url = 'http://jhallcomics.tumblr.com'
3825
3826
3827
class BerkeleyMewsTumblr(GenericTumblrV1):
3828
    """Class to retrieve Berkeley Mews comics."""
3829
    # Also on http://www.gocomics.com/berkeley-mews
3830
    # Also on http://www.berkeleymews.com
3831
    name = 'berkeley-tumblr'
3832
    long_name = 'Berkeley Mews (from Tumblr)'
3833
    url = 'http://mews.tumblr.com'
3834
    _categories = ('BERKELEY', )
3835
3836
3837
class JoanCornellaTumblr(GenericTumblrV1):
3838
    """Class to retrieve Joan Cornella comics."""
3839
    # Also on http://joancornella.net
3840
    name = 'cornella-tumblr'
3841
    long_name = 'Joan Cornella (from Tumblr)'
3842
    url = 'http://cornellajoan.tumblr.com'
3843
3844
3845
class RespawnComicTumblr(GenericTumblrV1):
3846
    """Class to retrieve Respawn Comic."""
3847
    # Also on http://respawncomic.com
3848
    name = 'respawn-tumblr'
3849
    long_name = 'Respawn Comic (from Tumblr)'
3850
    url = 'http://respawncomic.tumblr.com'
3851
3852
3853
class ChrisHallbeckTumblr(GenericTumblrV1):
3854
    """Class to retrieve Chris Hallbeck comics."""
3855
    # Also on https://tapastic.com/ChrisHallbeck
3856
    # Also on http://maximumble.com
3857
    # Also on http://minimumble.com
3858
    # Also on http://thebookofbiff.com
3859
    name = 'hallbeck-tumblr'
3860
    long_name = 'Chris Hallback (from Tumblr)'
3861
    url = 'http://chrishallbeck.tumblr.com'
3862
    _categories = ('HALLBACK', )
3863
3864
3865
class ComicNuggets(GenericTumblrV1):
3866
    """Class to retrieve Comic Nuggets."""
3867
    name = 'nuggets'
3868
    long_name = 'Comic Nuggets'
3869
    url = 'http://comicnuggets.com'
3870
3871
3872
class PigeonGazetteTumblr(GenericTumblrV1):
3873
    """Class to retrieve The Pigeon Gazette comics."""
3874
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3875
    name = 'pigeon-tumblr'
3876
    long_name = 'The Pigeon Gazette (from Tumblr)'
3877
    url = 'http://thepigeongazette.tumblr.com'
3878
3879
3880
class CancerOwl(GenericTumblrV1):
3881
    """Class to retrieve Cancer Owl comics."""
3882
    # Also on http://cancerowl.com
3883
    name = 'cancerowl-tumblr'
3884
    long_name = 'Cancer Owl (from Tumblr)'
3885
    url = 'http://cancerowl.tumblr.com'
3886
3887
3888
class FowlLanguageTumblr(GenericTumblrV1):
3889
    """Class to retrieve Fowl Language comics."""
3890
    # Also on http://www.fowllanguagecomics.com
3891
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3892
    # Also on http://www.gocomics.com/fowl-language
3893
    name = 'fowllanguage-tumblr'
3894
    long_name = 'Fowl Language Comics (from Tumblr)'
3895
    url = 'http://fowllanguagecomics.tumblr.com'
3896
    _categories = ('FOWLLANGUAGE', )
3897
3898
3899
class TheOdd1sOutTumblr(GenericTumblrV1):
3900
    """Class to retrieve The Odd 1s Out comics."""
3901
    # Also on http://theodd1sout.com
3902
    # Also on https://tapastic.com/series/Theodd1sout
3903
    name = 'theodd-tumblr'
3904
    long_name = 'The Odd 1s Out (from Tumblr)'
3905
    url = 'http://theodd1sout.tumblr.com'
3906
3907
3908
class TheUnderfoldTumblr(GenericTumblrV1):
3909
    """Class to retrieve The Underfold comics."""
3910
    # Also on http://theunderfold.com
3911
    name = 'underfold-tumblr'
3912
    long_name = 'The Underfold (from Tumblr)'
3913
    url = 'http://theunderfold.tumblr.com'
3914
3915
3916
class LolNeinTumblr(GenericTumblrV1):
3917
    """Class to retrieve Lol Nein comics."""
3918
    # Also on http://lolnein.com
3919
    name = 'lolnein-tumblr'
3920
    long_name = 'Lol Nein (from Tumblr)'
3921
    url = 'http://lolneincom.tumblr.com'
3922
3923
3924
class FatAwesomeComicsTumblr(GenericTumblrV1):
3925
    """Class to retrieve Fat Awesome Comics."""
3926
    # Also on http://fatawesome.com/comics
3927
    name = 'fatawesome-tumblr'
3928
    long_name = 'Fat Awesome (from Tumblr)'
3929
    url = 'http://fatawesomecomedy.tumblr.com'
3930
3931
3932
class TheWorldIsFlatTumblr(GenericTumblrV1):
3933
    """Class to retrieve The World Is Flat Comics."""
3934
    # Also on https://tapastic.com/series/The-World-is-Flat
3935
    name = 'flatworld-tumblr'
3936
    long_name = 'The World Is Flat (from Tumblr)'
3937
    url = 'http://theworldisflatcomics.tumblr.com'
3938
3939
3940
class DorrisMc(GenericTumblrV1):
3941
    """Class to retrieve Dorris Mc Comics"""
3942
    # Also on http://www.gocomics.com/dorris-mccomics
3943
    name = 'dorrismc'
3944
    long_name = 'Dorris Mc'
3945
    url = 'http://dorrismccomics.com'
3946
3947
3948
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3949
    """Class to retrieve Leleoz comics."""
3950
    # Also on https://tapastic.com/series/Leleoz
3951
    name = 'leleoz-tumblr'
3952
    long_name = 'Leleoz (from Tumblr)'
3953
    url = 'http://leleozcomics.tumblr.com'
3954
3955
3956
class MoonBeardTumblr(GenericTumblrV1):
3957
    """Class to retrieve MoonBeard comics."""
3958
    # Also on http://moonbeard.com
3959
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3960
    name = 'moonbeard-tumblr'
3961
    long_name = 'Moon Beard (from Tumblr)'
3962
    url = 'http://blog.squiresjam.es/moonbeard'
3963
3964
3965
class AComik(GenericTumblrV1):
3966
    """Class to retrieve A Comik"""
3967
    name = 'comik'
3968
    long_name = 'A Comik'
3969
    url = 'http://acomik.com'
3970
3971
3972
class ClassicRandy(GenericTumblrV1):
3973
    """Class to retrieve Classic Randy comics."""
3974
    name = 'randy'
3975
    long_name = 'Classic Randy'
3976
    url = 'http://classicrandy.tumblr.com'
3977
3978
3979
class DagssonTumblr(GenericTumblrV1):
3980
    """Class to retrieve Dagsson comics."""
3981
    # Also on http://www.dagsson.com
3982
    name = 'dagsson-tumblr'
3983
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3984
    url = 'http://hugleikurdagsson.tumblr.com'
3985
3986
3987
class LinsEditionsTumblr(GenericTumblrV1):
3988
    """Class to retrieve L.I.N.S. Editions comics."""
3989
    # Also on https://linsedition.com
3990
    # Now on http://warandpeas.tumblr.com
3991
    name = 'lins-tumblr'
3992
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3993
    url = 'http://linscomics.tumblr.com'
3994
    _categories = ('LINS', )
3995
3996
3997
class WarAndPeasTumblr(GenericTumblrV1):
3998
    """Class to retrieve War And Peas comics."""
3999
    # Was on http://linscomics.tumblr.com
4000
    name = 'warandpeas-tumblr'
4001
    long_name = 'War And Peas (from Tumblr)'
4002
    url = 'http://warandpeas.tumblr.com'
4003
    _categories = ('WARANDPEAS', )
4004
4005
4006
class OrigamiHotDish(GenericTumblrV1):
4007
    """Class to retrieve Origami Hot Dish comics."""
4008
    name = 'origamihotdish'
4009
    long_name = 'Origami Hot Dish'
4010
    url = 'http://origamihotdish.com'
4011
4012
4013
class HitAndMissComicsTumblr(GenericTumblrV1):
4014
    """Class to retrieve Hit and Miss Comics."""
4015
    name = 'hitandmiss'
4016
    long_name = 'Hit and Miss Comics'
4017
    url = 'http://hitandmisscomics.tumblr.com'
4018
4019
4020
class HMBlanc(GenericTumblrV1):
4021
    """Class to retrieve HM Blanc comics."""
4022
    name = 'hmblanc'
4023
    long_name = 'HM Blanc'
4024
    url = 'http://hmblanc.tumblr.com'
4025
4026
4027
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4028
    """Class to retrieve Tales Of Absurdity comics."""
4029
    # Also on http://talesofabsurdity.com
4030
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4031
    name = 'absurdity-tumblr'
4032
    long_name = 'Tales of Absurdity (from Tumblr)'
4033
    url = 'http://talesofabsurdity.tumblr.com'
4034
    _categories = ('ABSURDITY', )
4035
4036
4037
class RobbieAndBobby(GenericTumblrV1):
4038
    """Class to retrieve Robbie And Bobby comics."""
4039
    # Also on http://robbieandbobby.com
4040
    name = 'robbie-tumblr'
4041
    long_name = 'Robbie And Bobby (from Tumblr)'
4042
    url = 'http://robbieandbobby.tumblr.com'
4043
4044
4045
class ElectricBunnyComicTumblr(GenericTumblrV1):
4046
    """Class to retrieve Electric Bunny Comics."""
4047
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4048
    name = 'bunny-tumblr'
4049
    long_name = 'Electric Bunny Comic (from Tumblr)'
4050
    url = 'http://electricbunnycomics.tumblr.com'
4051
4052
4053
class Hoomph(GenericTumblrV1):
4054
    """Class to retrieve Hoomph comics."""
4055
    name = 'hoomph'
4056
    long_name = 'Hoomph'
4057
    url = 'http://hoom.ph'
4058
4059
4060
class BFGFSTumblr(GenericTumblrV1):
4061
    """Class to retrieve BFGFS comics."""
4062
    # Also on https://tapastic.com/series/BFGFS
4063
    # Also on http://bfgfs.com
4064
    name = 'bfgfs-tumblr'
4065
    long_name = 'BFGFS (from Tumblr)'
4066
    url = 'http://bfgfs.tumblr.com'
4067
4068
4069
class DoodleForFood(GenericTumblrV1):
4070
    """Class to retrieve Doodle For Food comics."""
4071
    # Also on http://doodleforfood.com
4072
    name = 'doodle'
4073
    long_name = 'Doodle For Food'
4074
    url = 'http://doodleforfood.com'
4075
4076
4077
class CassandraCalinTumblr(GenericTumblrV1):
4078
    """Class to retrieve C. Cassandra comics."""
4079
    # Also on http://cassandracalin.com
4080
    # Also on https://tapastic.com/series/C-Cassandra-comics
4081
    name = 'cassandra-tumblr'
4082
    long_name = 'Cassandra Calin (from Tumblr)'
4083
    url = 'http://c-cassandra.tumblr.com'
4084
4085
4086
class DougWasTaken(GenericTumblrV1):
4087
    """Class to retrieve Doug Was Taken comics."""
4088
    name = 'doug'
4089
    long_name = 'Doug Was Taken'
4090
    url = 'http://dougwastaken.tumblr.com'
4091
4092
4093
class MandatoryRollerCoaster(GenericTumblrV1):
4094
    """Class to retrieve Mandatory Roller Coaster comics."""
4095
    name = 'rollercoaster'
4096
    long_name = 'Mandatory Roller Coaster'
4097
    url = 'http://mandatoryrollercoaster.com'
4098
4099
4100
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4101
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4102
    name = 'cperspqccltt'
4103
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4104
    url = 'http://cperspqccltt.tumblr.com'
4105
4106
4107
class TheGrohlTroll(GenericTumblrV1):
4108
    """Class to retrieve The Grohl Troll comics."""
4109
    name = 'grohltroll'
4110
    long_name = 'The Grohl Troll'
4111
    url = 'http://thegrohltroll.com'
4112
4113
4114
class WebcomicName(GenericTumblrV1):
4115
    """Class to retrieve Webcomic Name comics."""
4116
    name = 'webcomicname'
4117
    long_name = 'Webcomic Name'
4118
    url = 'http://webcomicname.com'
4119
4120
4121
class BooksOfAdam(GenericTumblrV1):
4122
    """Class to retrieve Books of Adam comics."""
4123
    # Also on http://www.booksofadam.com
4124
    name = 'booksofadam'
4125
    long_name = 'Books of Adam'
4126
    url = 'http://booksofadam.tumblr.com'
4127
4128
4129
class HarkAVagrant(GenericTumblrV1):
4130
    """Class to retrieve Hark A Vagrant comics."""
4131
    # Also on http://www.harkavagrant.com
4132
    name = 'hark-tumblr'
4133
    long_name = 'Hark A Vagrant (from Tumblr)'
4134
    url = 'http://beatonna.tumblr.com'
4135
4136
4137
class OurSuperAdventureTumblr(GenericTumblrV1):
4138
    """Class to retrieve Our Super Adventure comics."""
4139
    # Also on https://tapastic.com/series/Our-Super-Adventure
4140
    # Also on http://www.oursuperadventure.com
4141
    # http://sarahgraley.com
4142
    name = 'superadventure-tumblr'
4143
    long_name = 'Our Super Adventure (from Tumblr)'
4144
    url = 'http://sarahssketchbook.tumblr.com'
4145
4146
4147
class JakeLikesOnions(GenericTumblrV1):
4148
    """Class to retrieve Jake Likes Onions comics."""
4149
    name = 'jake'
4150
    long_name = 'Jake Likes Onions'
4151
    url = 'http://jakelikesonions.com'
4152
4153
4154
class InYourFaceCake(GenericTumblrV1):
4155
    """Class to retrieve In Your Face Cake comics."""
4156
    name = 'inyourfacecake-tumblr'
4157
    long_name = 'In Your Face Cake (from Tumblr)'
4158
    url = 'http://in-your-face-cake.tumblr.com'
4159
4160
4161
class Robospunk(GenericTumblrV1):
4162
    """Class to retrieve Robospunk comics."""
4163
    name = 'robospunk'
4164
    long_name = 'Robospunk'
4165
    url = 'http://robospunk.com'
4166
4167
4168
class BananaTwinky(GenericTumblrV1):
4169
    """Class to retrieve Banana Twinky comics."""
4170
    name = 'banana'
4171
    long_name = 'Banana Twinky'
4172
    url = 'http://bananatwinky.tumblr.com'
4173
4174
4175
class YesterdaysPopcornTumblr(GenericTumblrV1):
4176
    """Class to retrieve Yesterday's Popcorn comics."""
4177
    # Also on http://www.yesterdayspopcorn.com
4178
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4179
    name = 'popcorn-tumblr'
4180
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4181
    url = 'http://yesterdayspopcorn.tumblr.com'
4182
4183
4184
class TwistedDoodles(GenericTumblrV1):
4185
    """Class to retrieve Twisted Doodles comics."""
4186
    name = 'twisted'
4187
    long_name = 'Twisted Doodles'
4188
    url = 'http://www.twisteddoodles.com'
4189
4190
4191
class UbertoolTumblr(GenericTumblrV1):
4192
    """Class to retrieve Ubertool comics."""
4193
    # Also on http://ubertoolcomic.com
4194
    # Also on https://tapastic.com/series/ubertool
4195
    name = 'ubertool-tumblr'
4196
    long_name = 'Ubertool (from Tumblr)'
4197
    url = 'http://ubertool.tumblr.com'
4198
    _categories = ('UBERTOOL', )
4199
4200
4201
class LittleLifeLinesTumblr(GenericTumblrV1):
4202
    """Class to retrieve Little Life Lines comics."""
4203
    # Also on http://www.littlelifelines.com
4204
    name = 'life-tumblr'
4205
    long_name = 'Little Life Lines (from Tumblr)'
4206
    url = 'https://little-life-lines.tumblr.com'
4207
4208
4209
class TheyCanTalk(GenericTumblrV1):
4210
    """Class to retrieve They Can Talk comics."""
4211
    name = 'theycantalk'
4212
    long_name = 'They Can Talk'
4213
    url = 'http://theycantalk.com'
4214
4215
4216
class Will5NeverCome(GenericTumblrV1):
4217
    """Class to retrieve Will 5:00 Never Come comics."""
4218
    name = 'will5'
4219
    long_name = 'Will 5:00 Never Come ?'
4220
    url = 'http://will5nevercome.com'
4221
4222
4223
class Sephko(GenericTumblrV1):
4224
    """Class to retrieve Sephko Comics."""
4225
    # Also on http://www.sephko.com
4226
    name = 'sephko'
4227
    long_name = 'Sephko'
4228
    url = 'http://sephko.tumblr.com'
4229
4230
4231
class BlazersAtDawn(GenericTumblrV1):
4232
    """Class to retrieve Blazers At Dawn Comics."""
4233
    name = 'blazers'
4234
    long_name = 'Blazers At Dawn'
4235
    url = 'http://blazersatdawn.tumblr.com'
4236
4237
4238
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4239
    """Class to retrieve Art By Moga Comics."""
4240
    name = 'moga'
4241
    long_name = 'Art By Moga'
4242
    url = 'http://artbymoga.tumblr.com'
4243
4244
4245
class VerbalVomitTumblr(GenericTumblrV1):
4246
    """Class to retrieve Verbal Vomit comics."""
4247
    # Also on http://www.verbal-vomit.com
4248
    name = 'vomit-tumblr'
4249
    long_name = 'Verbal Vomit (from Tumblr)'
4250
    url = 'http://verbalvomits.tumblr.com'
4251
4252
4253
class LibraryComic(GenericTumblrV1):
4254
    """Class to retrieve LibraryComic."""
4255
    # Also on http://librarycomic.com
4256
    name = 'library-tumblr'
4257
    long_name = 'LibraryComic (from Tumblr)'
4258
    url = 'http://librarycomic.tumblr.com'
4259
4260
4261
class TizzyStitchBirdTumblr(GenericTumblrV1):
4262
    """Class to retrieve Tizzy Stitch Bird comics."""
4263
    # Also on http://tizzystitchbird.com
4264
    # Also on https://tapastic.com/series/TizzyStitchbird
4265
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4266
    name = 'tizzy-tumblr'
4267
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4268
    url = 'http://tizzystitchbird.tumblr.com'
4269
4270
4271
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4272
    """Class to retrieve VictimsOfCircumsolar comics."""
4273
    # Also on http://www.victimsofcircumsolar.com
4274
    name = 'circumsolar-tumblr'
4275
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4276
    url = 'http://victimsofcomics.tumblr.com'
4277
4278
4279
class RockPaperCynicTumblr(GenericTumblrV1):
4280
    """Class to retrieve RockPaperCynic comics."""
4281
    # Also on http://www.rockpapercynic.com
4282
    # Also on https://tapastic.com/series/rockpapercynic
4283
    name = 'rpc-tumblr'
4284
    long_name = 'Rock Paper Cynic (from Tumblr)'
4285
    url = 'http://rockpapercynic.tumblr.com'
4286
4287
4288
class DeadlyPanelTumblr(GenericTumblrV1):
4289
    """Class to retrieve Deadly Panel comics."""
4290
    # Also on http://www.deadlypanel.com
4291
    # Also on https://tapastic.com/series/deadlypanel
4292
    name = 'deadly-tumblr'
4293
    long_name = 'Deadly Panel (from Tumblr)'
4294
    url = 'http://deadlypanel.tumblr.com'
4295
4296
4297
class CatanaComics(GenericTumblrV1):
4298
    """Class to retrieve Catana comics."""
4299
    name = 'catana'
4300
    long_name = 'Catana'
4301
    url = 'http://www.catanacomics.com'
4302
4303
4304
class OffTheLeashDogTumblr(GenericTumblrV1):
4305
    """Class to retrieve Off The Leash Dog comics."""
4306
    # Also on http://offtheleashdogcartoons.com
4307
    # Also on http://www.rupertfawcettcartoons.com
4308
    name = 'offtheleash-tumblr'
4309
    long_name = 'Off The Leash Dog (from Tumblr)'
4310
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4311
    _categories = ('FAWCETT', )
4312 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4313
4314
class HorovitzComics(GenericListableComic):
4315
    """Generic class to handle the logic common to the different comics from Horovitz."""
4316
    url = 'http://www.horovitzcomics.com'
4317
    _categories = ('HOROVITZ', )
4318
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4319
    link_re = NotImplemented
4320
    get_url_from_archive_element = join_cls_url_to_href
4321
4322
    @classmethod
4323
    def get_comic_info(cls, soup, link):
4324
        """Get information about a particular comics."""
4325
        href = link['href']
4326
        num = int(cls.link_re.match(href).groups()[0])
4327
        title = link.string
4328
        imgs = soup.find_all('img', id='comic')
4329
        assert len(imgs) == 1
4330
        year, month, day = [int(s)
4331
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4332
        return {
4333
            'title': title,
4334
            'day': day,
4335
            'month': month,
4336
            'year': year,
4337
            'img': [i['src'] for i in imgs],
4338
            'num': num,
4339
        }
4340
4341
    @classmethod
4342
    def get_archive_elements(cls):
4343
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4344
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4345
4346
4347
class HorovitzNew(HorovitzComics):
4348
    """Class to retrieve Horovitz new comics."""
4349
    name = 'horovitznew'
4350
    long_name = 'Horovitz New'
4351
    link_re = re.compile('^/comics/new/([0-9]+)$')
4352
4353
4354
class HorovitzClassic(HorovitzComics):
4355
    """Class to retrieve Horovitz classic comics."""
4356
    name = 'horovitzclassic'
4357
    long_name = 'Horovitz Classic'
4358
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4359
4360
4361
class GenericGoComic(GenericNavigableComic):
4362
    """Generic class to handle the logic common to comics from gocomics.com."""
4363
    _categories = ('GOCOMIC', )
4364
4365
    @classmethod
4366
    def get_first_comic_link(cls):
4367
        """Get link to first comics."""
4368
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4369
4370
    @classmethod
4371
    def get_navi_link(cls, last_soup, next_):
4372
        """Get link to next or previous comic."""
4373
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4374
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
4375
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4376
4377
    @classmethod
4378
    def get_url_from_link(cls, link):
4379
        gocomics = 'http://www.gocomics.com'
4380
        return urljoin_wrapper(gocomics, link['href'])
4381
4382
    @classmethod
4383
    def get_comic_info(cls, soup, link):
4384
        """Get information about a particular comics."""
4385
        date_str = soup.find('meta', property='article:published_time')['content']
4386
        day = string_to_date(date_str, "%Y-%m-%d")
4387
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4388
        author = soup.find('meta', property='article:author')['content']
4389
        tags = soup.find('meta', property='article:tag')['content']
4390
        return {
4391
            'day': day.day,
4392
            'month': day.month,
4393
            'year': day.year,
4394
            'img': [i['src'] for i in imgs],
4395
            'author': author,
4396
            'tags': tags,
4397
        }
4398
4399
4400
class PearlsBeforeSwine(GenericGoComic):
4401
    """Class to retrieve Pearls Before Swine comics."""
4402
    name = 'pearls'
4403
    long_name = 'Pearls Before Swine'
4404
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4405
4406
4407
class Peanuts(GenericGoComic):
4408
    """Class to retrieve Peanuts comics."""
4409
    name = 'peanuts'
4410
    long_name = 'Peanuts'
4411
    url = 'http://www.gocomics.com/peanuts'
4412
4413
4414
class MattWuerker(GenericGoComic):
4415
    """Class to retrieve Matt Wuerker comics."""
4416
    name = 'wuerker'
4417
    long_name = 'Matt Wuerker'
4418
    url = 'http://www.gocomics.com/mattwuerker'
4419
4420
4421
class TomToles(GenericGoComic):
4422
    """Class to retrieve Tom Toles comics."""
4423
    name = 'toles'
4424
    long_name = 'Tom Toles'
4425
    url = 'http://www.gocomics.com/tomtoles'
4426
4427
4428
class BreakOfDay(GenericGoComic):
4429
    """Class to retrieve Break Of Day comics."""
4430
    name = 'breakofday'
4431
    long_name = 'Break Of Day'
4432
    url = 'http://www.gocomics.com/break-of-day'
4433
4434
4435
class Brevity(GenericGoComic):
4436
    """Class to retrieve Brevity comics."""
4437
    name = 'brevity'
4438
    long_name = 'Brevity'
4439
    url = 'http://www.gocomics.com/brevitypanel'
4440
4441
4442
class MichaelRamirez(GenericGoComic):
4443
    """Class to retrieve Michael Ramirez comics."""
4444
    name = 'ramirez'
4445
    long_name = 'Michael Ramirez'
4446
    url = 'http://www.gocomics.com/michaelramirez'
4447
4448
4449
class MikeLuckovich(GenericGoComic):
4450
    """Class to retrieve Mike Luckovich comics."""
4451
    name = 'luckovich'
4452
    long_name = 'Mike Luckovich'
4453
    url = 'http://www.gocomics.com/mikeluckovich'
4454
4455
4456
class JimBenton(GenericGoComic):
4457
    """Class to retrieve Jim Benton comics."""
4458
    # Also on http://jimbenton.tumblr.com
4459
    name = 'benton'
4460
    long_name = 'Jim Benton'
4461
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4462
4463
4464
class TheArgyleSweater(GenericGoComic):
4465
    """Class to retrieve the Argyle Sweater comics."""
4466
    name = 'argyle'
4467
    long_name = 'Argyle Sweater'
4468
    url = 'http://www.gocomics.com/theargylesweater'
4469
4470
4471
class SunnyStreet(GenericGoComic):
4472
    """Class to retrieve Sunny Street comics."""
4473
    # Also on http://www.sunnystreetcomics.com
4474
    name = 'sunny'
4475
    long_name = 'Sunny Street'
4476
    url = 'http://www.gocomics.com/sunny-street'
4477
4478
4479
class OffTheMark(GenericGoComic):
4480
    """Class to retrieve Off The Mark comics."""
4481
    # Also on https://www.offthemark.com
4482
    name = 'offthemark'
4483
    long_name = 'Off The Mark'
4484
    url = 'http://www.gocomics.com/offthemark'
4485
4486
4487
class WuMo(GenericGoComic):
4488
    """Class to retrieve WuMo comics."""
4489
    # Also on http://wumo.com
4490
    name = 'wumo'
4491
    long_name = 'WuMo'
4492
    url = 'http://www.gocomics.com/wumo'
4493
4494
4495
class LunarBaboon(GenericGoComic):
4496
    """Class to retrieve Lunar Baboon comics."""
4497
    # Also on http://www.lunarbaboon.com
4498
    # Also on https://tapastic.com/series/Lunarbaboon
4499
    name = 'lunarbaboon'
4500
    long_name = 'Lunar Baboon'
4501
    url = 'http://www.gocomics.com/lunarbaboon'
4502
4503
4504
class SandersenGocomic(GenericGoComic):
4505
    """Class to retrieve Sarah Andersen comics."""
4506
    # Also on http://sarahcandersen.com
4507
    # Also on http://tapastic.com/series/Doodle-Time
4508
    name = 'sandersen-goc'
4509
    long_name = 'Sarah Andersen (from GoComics)'
4510
    url = 'http://www.gocomics.com/sarahs-scribbles'
4511
4512
4513
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4514
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4515
    # Also on http://smbc-comics.tumblr.com
4516
    # Also on http://www.smbc-comics.com
4517
    name = 'smbc-goc'
4518
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4519
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4520
    _categories = ('SMBC', )
4521
4522
4523
class CalvinAndHobbesGoComic(GenericGoComic):
4524
    """Class to retrieve Calvin and Hobbes comics."""
4525
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4526
    name = 'calvin-goc'
4527
    long_name = 'Calvin and Hobbes (from GoComics)'
4528
    url = 'http://www.gocomics.com/calvinandhobbes'
4529
4530
4531
class RallGoComic(GenericGoComic):
4532
    """Class to retrieve Ted Rall comics."""
4533
    # Also on http://rall.com/comic
4534
    name = 'rall-goc'
4535
    long_name = "Ted Rall (from GoComics)"
4536
    url = "http://www.gocomics.com/ted-rall"
4537
    _categories = ('RALL', )
4538
4539
4540
class TheAwkwardYetiGoComic(GenericGoComic):
4541
    """Class to retrieve The Awkward Yeti comics."""
4542
    # Also on http://larstheyeti.tumblr.com
4543
    # Also on http://theawkwardyeti.com
4544
    # Also on https://tapastic.com/series/TheAwkwardYeti
4545
    name = 'yeti-goc'
4546
    long_name = 'The Awkward Yeti (from GoComics)'
4547
    url = 'http://www.gocomics.com/the-awkward-yeti'
4548
    _categories = ('YETI', )
4549
4550
4551
class BerkeleyMewsGoComics(GenericGoComic):
4552
    """Class to retrieve Berkeley Mews comics."""
4553
    # Also on http://mews.tumblr.com
4554
    # Also on http://www.berkeleymews.com
4555
    name = 'berkeley-goc'
4556
    long_name = 'Berkeley Mews (from GoComics)'
4557
    url = 'http://www.gocomics.com/berkeley-mews'
4558
    _categories = ('BERKELEY', )
4559
4560
4561
class SheldonGoComics(GenericGoComic):
4562
    """Class to retrieve Sheldon comics."""
4563
    # Also on http://www.sheldoncomics.com
4564
    name = 'sheldon-goc'
4565
    long_name = 'Sheldon Comics (from GoComics)'
4566
    url = 'http://www.gocomics.com/sheldon'
4567
4568
4569
class FowlLanguageGoComics(GenericGoComic):
4570
    """Class to retrieve Fowl Language comics."""
4571
    # Also on http://www.fowllanguagecomics.com
4572
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4573
    # Also on http://fowllanguagecomics.tumblr.com
4574
    name = 'fowllanguage-goc'
4575
    long_name = 'Fowl Language Comics (from GoComics)'
4576
    url = 'http://www.gocomics.com/fowl-language'
4577
    _categories = ('FOWLLANGUAGE', )
4578
4579
4580
class NickAnderson(GenericGoComic):
4581
    """Class to retrieve Nick Anderson comics."""
4582
    name = 'nickanderson'
4583
    long_name = 'Nick Anderson'
4584
    url = 'http://www.gocomics.com/nickanderson'
4585
4586
4587
class GarfieldGoComics(GenericGoComic):
4588
    """Class to retrieve Garfield comics."""
4589
    # Also on http://garfield.com
4590
    name = 'garfield-goc'
4591
    long_name = 'Garfield (from GoComics)'
4592
    url = 'http://www.gocomics.com/garfield'
4593
    _categories = ('GARFIELD', )
4594
4595
4596
class DorrisMcGoComics(GenericGoComic):
4597
    """Class to retrieve Dorris Mc Comics"""
4598
    # Also on http://dorrismccomics.com
4599
    name = 'dorrismc-goc'
4600
    long_name = 'Dorris Mc (from GoComics)'
4601
    url = 'http://www.gocomics.com/dorris-mccomics'
4602
4603
4604
class FoxTrot(GenericGoComic):
4605
    """Class to retrieve FoxTrot comics."""
4606
    name = 'foxtrot'
4607
    long_name = 'FoxTrot'
4608
    url = 'http://www.gocomics.com/foxtrot'
4609
4610
4611
class FoxTrotClassics(GenericGoComic):
4612
    """Class to retrieve FoxTrot Classics comics."""
4613
    name = 'foxtrot-classics'
4614
    long_name = 'FoxTrot Classics'
4615
    url = 'http://www.gocomics.com/foxtrotclassics'
4616
4617
4618
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4619
    """Class to retrieve Mister & Me Comics."""
4620
    # Also on http://www.mister-and-me.com
4621
    # Also on https://tapastic.com/series/Mister-and-Me
4622
    name = 'mister-goc'
4623
    long_name = 'Mister & Me (from GoComics)'
4624
    url = 'http://www.gocomics.com/mister-and-me'
4625
4626
4627
class NonSequitur(GenericGoComic):
4628
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4629
    name = 'nonsequitur'
4630
    long_name = 'Non Sequitur'
4631
    url = 'http://www.gocomics.com/nonsequitur'
4632
4633
4634
class GenericTapasticComic(GenericListableComic):
4635
    """Generic class to handle the logic common to comics from tapastic.com."""
4636
    _categories = ('TAPASTIC', )
4637
4638
    @classmethod
4639
    def get_comic_info(cls, soup, archive_elt):
4640
        """Get information about a particular comics."""
4641
        timestamp = int(archive_elt['publishDate']) / 1000.0
4642
        day = datetime.datetime.fromtimestamp(timestamp).date()
4643
        imgs = soup.find_all('img', class_='art-image')
4644
        if not imgs:
4645
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4646
            return None
4647
        assert len(imgs) > 0
4648
        return {
4649
            'day': day.day,
4650
            'year': day.year,
4651
            'month': day.month,
4652
            'img': [i['src'] for i in imgs],
4653
            'title': archive_elt['title'],
4654
        }
4655
4656
    @classmethod
4657
    def get_url_from_archive_element(cls, archive_elt):
4658
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4659
4660
    @classmethod
4661
    def get_archive_elements(cls):
4662
        pref, suff = 'episodeList : ', ','
4663
        # Information is stored in the javascript part
4664
        # I don't know the clean way to get it so this is the ugly way.
4665
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4666
        return json.loads(string)
4667
4668
4669
class VegetablesForDessert(GenericTapasticComic):
4670
    """Class to retrieve Vegetables For Dessert comics."""
4671
    # Also on http://vegetablesfordessert.tumblr.com
4672
    name = 'vegetables'
4673
    long_name = 'Vegetables For Dessert'
4674
    url = 'http://tapastic.com/series/vegetablesfordessert'
4675
4676
4677
class FowlLanguageTapa(GenericTapasticComic):
4678
    """Class to retrieve Fowl Language comics."""
4679
    # Also on http://www.fowllanguagecomics.com
4680
    # Also on http://fowllanguagecomics.tumblr.com
4681
    # Also on http://www.gocomics.com/fowl-language
4682
    name = 'fowllanguage-tapa'
4683
    long_name = 'Fowl Language Comics (from Tapastic)'
4684
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4685
    _categories = ('FOWLLANGUAGE', )
4686
4687
4688
class OscillatingProfundities(GenericTapasticComic):
4689
    """Class to retrieve Oscillating Profundities comics."""
4690
    name = 'oscillating'
4691
    long_name = 'Oscillating Profundities'
4692
    url = 'http://tapastic.com/series/oscillatingprofundities'
4693
4694
4695
class ZnoflatsComics(GenericTapasticComic):
4696
    """Class to retrieve Znoflats comics."""
4697
    name = 'znoflats'
4698
    long_name = 'Znoflats Comics'
4699
    url = 'http://tapastic.com/series/Znoflats-Comics'
4700
4701
4702
class SandersenTapastic(GenericTapasticComic):
4703
    """Class to retrieve Sarah Andersen comics."""
4704
    # Also on http://sarahcandersen.com
4705
    # Also on http://www.gocomics.com/sarahs-scribbles
4706
    name = 'sandersen-tapa'
4707
    long_name = 'Sarah Andersen (from Tapastic)'
4708
    url = 'http://tapastic.com/series/Doodle-Time'
4709
4710
4711
class TubeyToonsTapastic(GenericTapasticComic):
4712
    """Class to retrieve TubeyToons comics."""
4713
    # Also on http://tubeytoons.com
4714
    # Also on http://tubeytoons.tumblr.com
4715
    name = 'tubeytoons-tapa'
4716
    long_name = 'Tubey Toons (from Tapastic)'
4717
    url = 'http://tapastic.com/series/Tubey-Toons'
4718
    _categories = ('TUNEYTOONS', )
4719
4720
4721
class AnythingComicTapastic(GenericTapasticComic):
4722
    """Class to retrieve Anything Comics."""
4723
    # Also on http://www.anythingcomic.com
4724
    name = 'anythingcomic-tapa'
4725
    long_name = 'Anything Comic (from Tapastic)'
4726
    url = 'http://tapastic.com/series/anything'
4727
4728
4729
class UnearthedComicsTapastic(GenericTapasticComic):
4730
    """Class to retrieve Unearthed comics."""
4731
    # Also on http://unearthedcomics.com
4732
    # Also on http://unearthedcomics.tumblr.com
4733
    name = 'unearthed-tapa'
4734
    long_name = 'Unearthed Comics (from Tapastic)'
4735
    url = 'http://tapastic.com/series/UnearthedComics'
4736
    _categories = ('UNEARTHED', )
4737
4738
4739
class EverythingsStupidTapastic(GenericTapasticComic):
4740
    """Class to retrieve Everything's stupid Comics."""
4741
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4742
    # Also on http://everythingsstupid.net
4743
    name = 'stupid-tapa'
4744
    long_name = "Everything's Stupid (from Tapastic)"
4745
    url = 'http://tapastic.com/series/EverythingsStupid'
4746
4747
4748
class JustSayEhTapastic(GenericTapasticComic):
4749
    """Class to retrieve Just Say Eh comics."""
4750
    # Also on http://www.justsayeh.com
4751
    name = 'justsayeh-tapa'
4752
    long_name = 'Just Say Eh (from Tapastic)'
4753
    url = 'http://tapastic.com/series/Just-Say-Eh'
4754
4755
4756
class ThorsThundershackTapastic(GenericTapasticComic):
4757
    """Class to retrieve Thor's Thundershack comics."""
4758
    # Also on http://www.thorsthundershack.com
4759
    name = 'thor-tapa'
4760
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4761
    url = 'http://tapastic.com/series/Thors-Thundershac'
4762
    _categories = ('THOR', )
4763
4764
4765
class OwlTurdTapastic(GenericTapasticComic):
4766
    """Class to retrieve Owl Turd comics."""
4767
    # Also on http://owlturd.com
4768
    name = 'owlturd-tapa'
4769
    long_name = 'Owl Turd (from Tapastic)'
4770
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4771
    _categories = ('OWLTURD', )
4772
4773
4774
class GoneIntoRaptureTapastic(GenericTapasticComic):
4775
    """Class to retrieve Gone Into Rapture comics."""
4776
    # Also on http://goneintorapture.tumblr.com
4777
    # Also on http://www.goneintorapture.com
4778
    name = 'rapture-tapa'
4779
    long_name = 'Gone Into Rapture (from Tapastic)'
4780
    url = 'http://tapastic.com/series/Goneintorapture'
4781
4782
4783
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4784
    """Class to retrieve Heck If I Know Comics."""
4785
    # Also on http://heckifiknowcomics.com
4786
    name = 'heck-tapa'
4787
    long_name = 'Heck if I Know comics (from Tapastic)'
4788
    url = 'http://tapastic.com/series/Regular'
4789
4790
4791
class CheerUpEmoKidTapa(GenericTapasticComic):
4792
    """Class to retrieve CheerUpEmoKid comics."""
4793
    # Also on http://www.cheerupemokid.com
4794
    # Also on http://enzocomics.tumblr.com
4795
    name = 'cuek-tapa'
4796
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4797
    url = 'http://tapastic.com/series/CUEK'
4798
4799
4800
class BigFootJusticeTapa(GenericTapasticComic):
4801
    """Class to retrieve Big Foot Justice comics."""
4802
    # Also on http://bigfootjustice.com
4803
    name = 'bigfoot-tapa'
4804
    long_name = 'Big Foot Justice (from Tapastic)'
4805
    url = 'http://tapastic.com/series/bigfoot-justice'
4806
4807
4808
class UpAndOutTapa(GenericTapasticComic):
4809
    """Class to retrieve Up & Out comics."""
4810
    # Also on http://upandoutcomic.tumblr.com
4811
    name = 'upandout-tapa'
4812
    long_name = 'Up And Out (from Tapastic)'
4813
    url = 'http://tapastic.com/series/UP-and-OUT'
4814
4815
4816
class ToonHoleTapa(GenericTapasticComic):
4817
    """Class to retrieve Toon Holes comics."""
4818
    # Also on http://www.toonhole.com
4819
    name = 'toonhole-tapa'
4820
    long_name = 'Toon Hole (from Tapastic)'
4821
    url = 'http://tapastic.com/series/TOONHOLE'
4822
4823
4824
class AngryAtNothingTapa(GenericTapasticComic):
4825
    """Class to retrieve Angry at Nothing comics."""
4826
    # Also on http://www.angryatnothing.net
4827
    name = 'angry-tapa'
4828
    long_name = 'Angry At Nothing (from Tapastic)'
4829
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4830
4831
4832
class LeleozTapa(GenericTapasticComic):
4833
    """Class to retrieve Leleoz comics."""
4834
    # Also on http://leleozcomics.tumblr.com
4835
    name = 'leleoz-tapa'
4836
    long_name = 'Leleoz (from Tapastic)'
4837
    url = 'https://tapastic.com/series/Leleoz'
4838
4839
4840
class TheAwkwardYetiTapa(GenericTapasticComic):
4841
    """Class to retrieve The Awkward Yeti comics."""
4842
    # Also on http://www.gocomics.com/the-awkward-yeti
4843
    # Also on http://theawkwardyeti.com
4844
    # Also on http://larstheyeti.tumblr.com
4845
    name = 'yeti-tapa'
4846
    long_name = 'The Awkward Yeti (from Tapastic)'
4847
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4848
    _categories = ('YETI', )
4849
4850
4851
class AsPerUsualTapa(GenericTapasticComic):
4852
    """Class to retrieve As Per Usual comics."""
4853
    # Also on http://as-per-usual.tumblr.com
4854
    name = 'usual-tapa'
4855
    long_name = 'As Per Usual (from Tapastic)'
4856
    url = 'https://tapastic.com/series/AsPerUsual'
4857
    categories = ('DAMILEE', )
4858
4859
4860
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4861
    """Class to retrieve Hot Comics For Cool People."""
4862
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4863
    # Also on http://hotcomics.biz (links to tumblr)
4864
    # Also on http://hcfcp.com (links to tumblr)
4865
    name = 'hotcomics-tapa'
4866
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4867
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4868
    categories = ('DAMILEE', )
4869
4870
4871
class OneOneOneOneComicTapa(GenericTapasticComic):
4872
    """Class to retrieve 1111 Comics."""
4873
    # Also on http://www.1111comics.me
4874
    # Also on http://comics1111.tumblr.com
4875
    name = '1111-tapa'
4876
    long_name = '1111 Comics (from Tapastic)'
4877
    url = 'https://tapastic.com/series/1111-Comics'
4878
    _categories = ('ONEONEONEONE', )
4879
4880
4881
class TumbleDryTapa(GenericTapasticComic):
4882
    """Class to retrieve Tumble Dry comics."""
4883
    # Also on http://tumbledrycomics.com
4884
    name = 'tumbledry-tapa'
4885
    long_name = 'Tumblr Dry (from Tapastic)'
4886
    url = 'https://tapastic.com/series/TumbleDryComics'
4887
4888
4889
class DeadlyPanelTapa(GenericTapasticComic):
4890
    """Class to retrieve Deadly Panel comics."""
4891
    # Also on http://www.deadlypanel.com
4892
    name = 'deadly-tapa'
4893
    long_name = 'Deadly Panel (from Tapastic)'
4894
    url = 'https://tapastic.com/series/deadlypanel'
4895
4896
4897
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4898
    """Class to retrieve Chris Hallbeck comics."""
4899
    # Also on http://chrishallbeck.tumblr.com
4900
    # Also on http://maximumble.com
4901
    name = 'hallbeckmaxi-tapa'
4902
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4903
    url = 'https://tapastic.com/series/Maximumble'
4904
    _categories = ('HALLBACK', )
4905
4906
4907
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4908
    """Class to retrieve Chris Hallbeck comics."""
4909
    # Also on http://chrishallbeck.tumblr.com
4910
    # Also on http://minimumble.com
4911
    name = 'hallbeckmini-tapa'
4912
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4913
    url = 'https://tapastic.com/series/Minimumble'
4914
    _categories = ('HALLBACK', )
4915
4916
4917
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4918
    """Class to retrieve Chris Hallbeck comics."""
4919
    # Also on http://chrishallbeck.tumblr.com
4920
    # Also on http://thebookofbiff.com
4921
    name = 'hallbeckbiff-tapa'
4922
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4923
    url = 'https://tapastic.com/series/Biff'
4924
    _categories = ('HALLBACK', )
4925
4926
4927
class RandoWisTapa(GenericTapasticComic):
4928
    """Class to retrieve RandoWis comics."""
4929
    # Also on https://randowis.com
4930
    name = 'randowis-tapa'
4931
    long_name = 'RandoWis (from Tapastic)'
4932
    url = 'https://tapastic.com/series/RandoWis'
4933
4934
4935
class PigeonGazetteTapa(GenericTapasticComic):
4936
    """Class to retrieve The Pigeon Gazette comics."""
4937
    # Also on http://thepigeongazette.tumblr.com
4938
    name = 'pigeon-tapa'
4939
    long_name = 'The Pigeon Gazette (from Tapastic)'
4940
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4941
4942
4943
class TheOdd1sOutTapa(GenericTapasticComic):
4944
    """Class to retrieve The Odd 1s Out comics."""
4945
    # Also on http://theodd1sout.com
4946
    # Also on http://theodd1sout.tumblr.com
4947
    name = 'theodd-tapa'
4948
    long_name = 'The Odd 1s Out (from Tapastic)'
4949
    url = 'https://tapastic.com/series/Theodd1sout'
4950
4951
4952
class TheWorldIsFlatTapa(GenericTapasticComic):
4953
    """Class to retrieve The World Is Flat Comics."""
4954
    # Also on http://theworldisflatcomics.tumblr.com
4955
    name = 'flatworld-tapa'
4956
    long_name = 'The World Is Flat (from Tapastic)'
4957
    url = 'https://tapastic.com/series/The-World-is-Flat'
4958
4959
4960
class MisterAndMeTapa(GenericTapasticComic):
4961
    """Class to retrieve Mister & Me Comics."""
4962
    # Also on http://www.mister-and-me.com
4963
    # Also on http://www.gocomics.com/mister-and-me
4964
    name = 'mister-tapa'
4965
    long_name = 'Mister & Me (from Tapastic)'
4966
    url = 'https://tapastic.com/series/Mister-and-Me'
4967
4968
4969
class TalesOfAbsurdityTapa(GenericTapasticComic):
4970
    """Class to retrieve Tales Of Absurdity comics."""
4971
    # Also on http://talesofabsurdity.com
4972
    # Also on http://talesofabsurdity.tumblr.com
4973
    name = 'absurdity-tapa'
4974
    long_name = 'Tales of Absurdity (from Tapastic)'
4975
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4976
    _categories = ('ABSURDITY', )
4977
4978
4979
class BFGFSTapa(GenericTapasticComic):
4980
    """Class to retrieve BFGFS comics."""
4981
    # Also on http://bfgfs.com
4982
    # Also on http://bfgfs.tumblr.com
4983
    name = 'bfgfs-tapa'
4984
    long_name = 'BFGFS (from Tapastic)'
4985
    url = 'https://tapastic.com/series/BFGFS'
4986
4987
4988
class DoodleForFoodTapa(GenericTapasticComic):
4989
    """Class to retrieve Doodle For Food comics."""
4990
    # Also on http://doodleforfood.com
4991
    name = 'doodle-tapa'
4992
    long_name = 'Doodle For Food (from Tapastic)'
4993
    url = 'https://tapastic.com/series/Doodle-for-Food'
4994
4995
4996
class MrLovensteinTapa(GenericTapasticComic):
4997
    """Class to retrieve Mr Lovenstein comics."""
4998
    # Also on  https://tapastic.com/series/MrLovenstein
4999
    name = 'mrlovenstein-tapa'
5000
    long_name = 'Mr. Lovenstein (from Tapastic)'
5001
    url = 'https://tapastic.com/series/MrLovenstein'
5002
5003
5004
class CassandraCalinTapa(GenericTapasticComic):
5005
    """Class to retrieve C. Cassandra comics."""
5006
    # Also on http://cassandracalin.com
5007
    # Also on http://c-cassandra.tumblr.com
5008
    name = 'cassandra-tapa'
5009
    long_name = 'Cassandra Calin (from Tapastic)'
5010
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5011
5012
5013
class WafflesAndPancakes(GenericTapasticComic):
5014
    """Class to retrieve Waffles And Pancakes comics."""
5015
    # Also on http://wandpcomic.com
5016
    name = 'waffles'
5017
    long_name = 'Waffles And Pancakes'
5018
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5019
5020
5021
class YesterdaysPopcornTapastic(GenericTapasticComic):
5022
    """Class to retrieve Yesterday's Popcorn comics."""
5023
    # Also on http://www.yesterdayspopcorn.com
5024
    # Also on http://yesterdayspopcorn.tumblr.com
5025
    name = 'popcorn-tapa'
5026
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5027
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5028
5029
5030
class OurSuperAdventureTapastic(GenericTapasticComic):
5031
    """Class to retrieve Our Super Adventure comics."""
5032
    # Also on http://www.oursuperadventure.com
5033
    # http://sarahssketchbook.tumblr.com
5034
    # http://sarahgraley.com
5035
    name = 'superadventure-tapastic'
5036
    long_name = 'Our Super Adventure (from Tapastic)'
5037
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5038
5039
5040
class NamelessPCs(GenericTapasticComic):
5041
    """Class to retrieve Nameless PCs comics."""
5042
    # Also on http://namelesspcs.com
5043
    name = 'namelesspcs-tapa'
5044
    long_name = 'NamelessPCs (from Tapastic)'
5045
    url = 'https://tapastic.com/series/NamelessPC'
5046
5047
5048
class UbertoolTapa(GenericTapasticComic):
5049
    """Class to retrieve Ubertool comics."""
5050
    # Also on http://ubertoolcomic.com
5051
    # Also on http://ubertool.tumblr.com
5052
    name = 'ubertool-tapa'
5053
    long_name = 'Ubertool (from Tapastic)'
5054
    url = 'https://tapastic.com/series/ubertool'
5055
    _categories = ('UBERTOOL', )
5056
5057
5058
class BarteNerdsTapa(GenericTapasticComic):
5059
    """Class to retrieve BarteNerds comics."""
5060
    # Also on http://www.bartenerds.com
5061
    name = 'bartenerds-tapa'
5062
    long_name = 'BarteNerds (from Tapastic)'
5063
    url = 'https://tapastic.com/series/BarteNERDS'
5064
5065
5066
class SmallBlueYonderTapa(GenericTapasticComic):
5067
    """Class to retrieve Small Blue Yonder comics."""
5068
    # Also on http://www.smallblueyonder.com
5069
    name = 'smallblue-tapa'
5070
    long_name = 'Small Blue Yonder (from Tapastic)'
5071
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5072
5073
5074
class TizzyStitchBirdTapa(GenericTapasticComic):
5075
    """Class to retrieve Tizzy Stitch Bird comics."""
5076
    # Also on http://tizzystitchbird.com
5077
    # Also on http://tizzystitchbird.tumblr.com
5078
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5079
    name = 'tizzy-tapa'
5080
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5081
    url = 'https://tapastic.com/series/TizzyStitchbird'
5082
5083
5084
class RockPaperCynicTapa(GenericTapasticComic):
5085
    """Class to retrieve RockPaperCynic comics."""
5086
    # Also on http://www.rockpapercynic.com
5087
    # Also on http://rockpapercynic.tumblr.com
5088
    name = 'rpc-tapa'
5089
    long_name = 'Rock Paper Cynic (from Tapastic)'
5090
    url = 'https://tapastic.com/series/rockpapercynic'
5091
5092
5093
class ItsTheTieTapa(GenericTapasticComic):
5094
    """Class to retrieve It's the tie comics."""
5095
    # Also on http://itsthetie.com
5096
    # Also on http://itsthetie.tumblr.com
5097
    name = 'tie-tapa'
5098
    long_name = "It's the tie (from Tapastic)"
5099
    url = "https://tapastic.com/series/itsthetie"
5100
    _categories = ('TIE', )
5101
5102
5103
def get_subclasses(klass):
5104
    """Gets the list of direct/indirect subclasses of a class"""
5105
    subclasses = klass.__subclasses__()
5106
    for derived in list(subclasses):
5107
        subclasses.extend(get_subclasses(derived))
5108
    return subclasses
5109
5110
5111
def remove_st_nd_rd_th_from_date(string):
5112
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5113
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5114
    return (string.replace('st', '')
5115
            .replace('nd', '')
5116
            .replace('rd', '')
5117
            .replace('th', '')
5118
            .replace('Augu', 'August'))
5119
5120
5121
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5122
    """Function to convert string to date object.
5123
    Wrapper around datetime.datetime.strptime."""
5124
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5125
    prev_locale = locale.setlocale(locale.LC_ALL)
5126
    if local != prev_locale:
5127
        locale.setlocale(locale.LC_ALL, local)
5128
    ret = datetime.datetime.strptime(string, date_format).date()
5129
    if local != prev_locale:
5130
        locale.setlocale(locale.LC_ALL, prev_locale)
5131
    return ret
5132
5133
5134
COMICS = set(get_subclasses(GenericComic))
5135
VALID_COMICS = [c for c in COMICS if c.name is not None]
5136
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5137
assert len(VALID_COMICS) == len(COMIC_NAMES)
5138
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5139
assert len(VALID_COMICS) == len(CLASS_NAMES)
5140