Completed
Push — master ( fe1ffd...9a7de8 )
by De
01:12
created

HMBlanc

Complexity

Total Complexity 0

Size/Duplication

Total Lines 5
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 5
wmc 0
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        return cls.get_navi_link(last_soup, True)
109
110
    @classmethod
111
    def get_prev_link(cls, last_soup):
112
        """Get link to previous comic."""
113
        return cls.get_navi_link(last_soup, False)
114
115
    @classmethod
116
    def get_next_comic(cls, last_comic):
117
        """Generic implementation of get_next_comic for navigable comics."""
118
        url = last_comic['url'] if last_comic else None
119
        next_comic = \
120
            cls.get_next_link(get_soup_at_url(url)) \
121
            if url else \
122
            cls.get_first_comic_link()
123
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
124
        while next_comic:
125
            prev_url, url = url, cls.get_url_from_link(next_comic)
126
            if prev_url == url:
127
                cls.log("got same url %s" % url)
128
                break
129
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
130
            soup = get_soup_at_url(url)
131
            comic = cls.get_comic_info(soup, next_comic)
132
            if comic is not None:
133
                assert 'url' not in comic
134
                comic['url'] = url
135
                yield comic
136
            next_comic = cls.get_next_link(soup)
137
            cls.log("next comic will be %s" % str(next_comic))
138
139
    @classmethod
140
    def check_first_link(cls):
141
        """Check that navigation to first comic seems to be working - for dev purposes."""
142
        cls.log("about to check first link")
143
        ok = True
144
        firstlink = cls.get_first_comic_link()
145
        if firstlink is None:
146
            print("From %s : no first link" % cls.url)
147
            ok = False
148
        else:
149
            firsturl = cls.get_url_from_link(firstlink)
150
            try:
151
                get_soup_at_url(firsturl)
152
            except urllib.error.HTTPError:
153
                print("From %s : invalid first url" % cls.url)
154
                ok = False
155
        cls.log("checked first link -> returned %d" % ok)
156
        return ok
157
158
    @classmethod
159
    def check_prev_next_links(cls, url):
160
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
161
        cls.log("about to check prev/next from %s" % url)
162
        ok = True
163
        if url is None:
164
            prevlink, nextlink = None, None
165
        else:
166
            soup = get_soup_at_url(url)
167
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
168
        if prevlink is None and nextlink is None:
169
            print("From %s : no previous nor next" % url)
170
            ok = False
171
        else:
172
            if prevlink:
173
                prevurl = cls.get_url_from_link(prevlink)
174
                prevsoup = get_soup_at_url(prevurl)
175
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
176
                if prevnext != url:
177
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
178
                    ok = False
179
            if nextlink:
180
                nexturl = cls.get_url_from_link(nextlink)
181
                if nexturl != url:
182
                    nextsoup = get_soup_at_url(nexturl)
183
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
184
                    if nextprev != url:
185
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
186
                        ok = False
187
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
188
        return ok
189
190
    @classmethod
191
    def check_navigation(cls, url):
192
        """Check that navigation functions seem to be working - for dev purposes."""
193
        cls.log("about to check navigation from %s" % url)
194
        first = cls.check_first_link()
195
        prevnext = cls.check_prev_next_links(url)
196
        ok = first and prevnext
197
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
198
        return ok
199
200
201
class GenericListableComic(GenericComic):
202
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
203
204
    The method `get_next_comic` methods is implemented in terms of new
205
    more specialized methods to be implemented/overridden:
206
        - get_archive_elements
207
        - get_url_from_archive_element
208
        - get_comic_info
209
    """
210
    _categories = ('LISTABLE', )
211
212
    @classmethod
213
    def get_archive_elements(cls):
214
        """Get the archive elements (iterable)."""
215
        raise NotImplementedError
216
217
    @classmethod
218
    def get_url_from_archive_element(cls, archive_elt):
219
        """Get url corresponding to an archive element."""
220
        raise NotImplementedError
221
222
    @classmethod
223
    def get_comic_info(cls, soup, archive_elt):
224
        """Get information about a particular comics."""
225
        raise NotImplementedError
226
227
    @classmethod
228
    def get_next_comic(cls, last_comic):
229
        """Generic implementation of get_next_comic for listable comics."""
230
        waiting_for_url = last_comic['url'] if last_comic else None
231
        for archive_elt in cls.get_archive_elements():
232
            url = cls.get_url_from_archive_element(archive_elt)
233
            cls.log("considering %s" % url)
234
            if waiting_for_url and waiting_for_url == url:
235
                waiting_for_url = None
236
            elif waiting_for_url is None:
237
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
238
                soup = get_soup_at_url(url)
239
                comic = cls.get_comic_info(soup, archive_elt)
240
                if comic is not None:
241
                    assert 'url' not in comic
242
                    comic['url'] = url
243
                    yield comic
244
        if waiting_for_url is not None:
245
            print("Did not find %s : there might be a problem" % waiting_for_url)
246
247
# Helper functions corresponding to get_first_comic_link/get_navi_link
248
249
250
@classmethod
251
def get_link_rel_next(cls, last_soup, next_):
252
    """Implementation of get_navi_link."""
253
    return last_soup.find('link', rel='next' if next_ else 'prev')
254
255
256
@classmethod
257
def get_a_rel_next(cls, last_soup, next_):
258
    """Implementation of get_navi_link."""
259
    return last_soup.find('a', rel='next' if next_ else 'prev')
260
261
262
@classmethod
263
def get_a_navi_navinext(cls, last_soup, next_):
264
    """Implementation of get_navi_link."""
265
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
266
267
268
@classmethod
269
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
270
    """Implementation of get_navi_link."""
271
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
272
273
274
@classmethod
275
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
276
    """Implementation of get_navi_link."""
277
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
278
279
280
@classmethod
281
def get_a_navi_navifirst(cls):
282
    """Implementation of get_first_comic_link."""
283
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
284
285
286
@classmethod
287
def get_div_navfirst_a(cls):
288
    """Implementation of get_first_comic_link."""
289
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
290
291
292
@classmethod
293
def get_a_comicnavbase_comicnavfirst(cls):
294
    """Implementation of get_first_comic_link."""
295
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
296
297
298
@classmethod
299
def simulate_first_link(cls):
300
    """Implementation of get_first_comic_link creating a link-like object from
301
    an URL provided by the class."""
302
    return {'href': cls.first_url}
303
304
305
@classmethod
306
def navigate_to_first_comic(cls):
307
    """Implementation of get_first_comic_link navigating from a user provided
308
    URL to the first comic.
309
310
    Sometimes, the first comic cannot be reached directly so to start
311
    from the first comic one has to go to the previous comic until
312
    there is no previous comics. Once this URL is reached, it
313
    is better to hardcode it but for development purposes, it
314
    is convenient to have an automatic way to find it.
315
    """
316
    url = input("Get starting URL: ")
317
    print(url)
318
    comic = cls.get_prev_link(get_soup_at_url(url))
319
    while comic:
320
        url = cls.get_url_from_link(comic)
321
        print(url)
322
        comic = cls.get_prev_link(get_soup_at_url(url))
323
    return {'href': url}
324
325
326
class GenericEmptyComic(GenericComic):
327
    """Generic class for comics where nothing is to be done.
328
329
    It can be useful to deactivate temporarily comics that do not work
330
    properly by replacing `def MyComic(GenericWhateverComic)` with
331
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
332
    _categories = ('EMPTY', )
333
334
    @classmethod
335
    def get_next_comic(cls, last_comic):
336
        """Implementation of get_next_comic returning no comics."""
337
        cls.log("comic is considered as empty - returning no comic")
338 View Code Duplication
        return []
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
339
340
341
class ExtraFabulousComics(GenericNavigableComic):
342
    """Class to retrieve Extra Fabulous Comics."""
343
    name = 'efc'
344
    long_name = 'Extra Fabulous Comics'
345
    url = 'http://extrafabulouscomics.com'
346
    get_first_comic_link = get_a_navi_navifirst
347
    get_navi_link = get_link_rel_next
348
349
    @classmethod
350
    def get_comic_info(cls, soup, link):
351
        """Get information about a particular comics."""
352
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
353
        imgs = soup.find_all('img', src=img_src_re)
354
        title = soup.find('meta', property='og:title')['content']
355
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
356
        day = string_to_date(date_str, "%Y-%m-%d")
357
        return {
358
            'title': title,
359
            'img': [i['src'] for i in imgs],
360
            'month': day.month,
361
            'year': day.year,
362
            'day': day.day,
363
            'prefix': title + '-'
364
        }
365
366
367
class GenericLeMondeBlog(GenericNavigableComic):
368
    """Generic class to retrieve comics from Le Monde blogs."""
369
    _categories = ('LEMONDE', 'FRANCAIS')
370
    get_navi_link = get_link_rel_next
371
    get_first_comic_link = simulate_first_link
372
    first_url = NotImplemented
373
374
    @classmethod
375
    def get_comic_info(cls, soup, link):
376
        """Get information about a particular comics."""
377
        url2 = soup.find('link', rel='shortlink')['href']
378
        title = soup.find('meta', property='og:title')['content']
379
        date_str = soup.find("span", class_="entry-date").string
380
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
381
        imgs = soup.find_all('meta', property='og:image')
382
        return {
383
            'title': title,
384
            'url2': url2,
385
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
386
            'month': day.month,
387
            'year': day.year,
388
            'day': day.day,
389
        }
390
391
392
class ZepWorld(GenericLeMondeBlog):
393
    """Class to retrieve Zep World comics."""
394
    name = "zep"
395
    long_name = "Zep World"
396
    url = "http://zepworld.blog.lemonde.fr"
397
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
398
399
400
class Vidberg(GenericLeMondeBlog):
401
    """Class to retrieve Vidberg comics."""
402
    name = 'vidberg'
403
    long_name = "Vidberg - l'actu en patates"
404
    url = "http://vidberg.blog.lemonde.fr"
405
    # Not the first but I didn't find an efficient way to retrieve it
406
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
407
408
409
class Plantu(GenericLeMondeBlog):
410
    """Class to retrieve Plantu comics."""
411
    name = 'plantu'
412
    long_name = "Plantu"
413
    url = "http://plantu.blog.lemonde.fr"
414
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
415
416
417
class XavierGorce(GenericLeMondeBlog):
418
    """Class to retrieve Xavier Gorce comics."""
419
    name = 'gorce'
420
    long_name = "Xavier Gorce"
421
    url = "http://xaviergorce.blog.lemonde.fr"
422
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
423
424
425
class CartooningForPeace(GenericLeMondeBlog):
426
    """Class to retrieve Cartooning For Peace comics."""
427
    name = 'forpeace'
428
    long_name = "Cartooning For Peace"
429
    url = "http://cartooningforpeace.blog.lemonde.fr"
430
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
431
432
433
class Aurel(GenericLeMondeBlog):
434
    """Class to retrieve Aurel comics."""
435
    name = 'aurel'
436
    long_name = "Aurel"
437
    url = "http://aurel.blog.lemonde.fr"
438
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
439
440
441
class LesCulottees(GenericLeMondeBlog):
442
    """Class to retrieve Les Culottees comics."""
443
    name = 'culottees'
444
    long_name = 'Les Culottees'
445
    url = "http://lesculottees.blog.lemonde.fr"
446
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
447
448
449
class UneAnneeAuLycee(GenericLeMondeBlog):
450
    """Class to retrieve Une Annee Au Lycee comics."""
451
    name = 'lycee'
452
    long_name = 'Une Annee au Lycee'
453
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
454
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
455
456
457
class Rall(GenericNavigableComic):
458
    """Class to retrieve Ted Rall comics."""
459
    # Also on http://www.gocomics.com/tedrall
460
    name = 'rall'
461
    long_name = "Ted Rall"
462
    url = "http://rall.com/comic"
463
    _categories = ('RALL', )
464
    get_navi_link = get_link_rel_next
465
    get_first_comic_link = simulate_first_link
466
    # Not the first but I didn't find an efficient way to retrieve it
467
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
468
469
    @classmethod
470
    def get_comic_info(cls, soup, link):
471
        """Get information about a particular comics."""
472
        title = soup.find('meta', property='og:title')['content']
473
        author = soup.find("span", class_="author vcard").find("a").string
474
        date_str = soup.find("span", class_="entry-date").string
475
        day = string_to_date(date_str, "%B %d, %Y")
476
        desc = soup.find('meta', property='og:description')['content']
477
        imgs = soup.find('div', class_='entry-content').find_all('img')
478
        imgs = imgs[:-7]  # remove social media buttons
479
        return {
480
            'title': title,
481
            'author': author,
482
            'month': day.month,
483
            'year': day.year,
484
            'day': day.day,
485
            'description': desc,
486
            'img': [i['src'] for i in imgs],
487
        }
488
489
490
class Dilem(GenericNavigableComic):
491
    """Class to retrieve Ali Dilem comics."""
492
    name = 'dilem'
493
    long_name = 'Ali Dilem'
494
    url = 'http://information.tv5monde.com/dilem'
495
    _categories = ('FRANCAIS', )
496
    get_url_from_link = join_cls_url_to_href
497
    get_first_comic_link = simulate_first_link
498
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
499
500
    @classmethod
501
    def get_navi_link(cls, last_soup, next_):
502
        """Get link to next or previous comic."""
503
        # prev is next / next is prev
504
        li = last_soup.find('li', class_='prev' if next_ else 'next')
505
        return li.find('a') if li else None
506
507
    @classmethod
508
    def get_comic_info(cls, soup, link):
509
        """Get information about a particular comics."""
510
        short_url = soup.find('link', rel='shortlink')['href']
511
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
512
        imgs = soup.find_all('meta', property='og:image')
513
        date_str = soup.find('span', property='dc:date')['content']
514
        date_str = date_str[:10]
515
        day = string_to_date(date_str, "%Y-%m-%d")
516
        return {
517
            'short_url': short_url,
518
            'title': title,
519
            'img': [i['content'] for i in imgs],
520
            'day': day.day,
521
            'month': day.month,
522
            'year': day.year,
523
        }
524
525
526
class SpaceAvalanche(GenericNavigableComic):
527
    """Class to retrieve Space Avalanche comics."""
528
    name = 'avalanche'
529
    long_name = 'Space Avalanche'
530
    url = 'http://www.spaceavalanche.com'
531
    get_navi_link = get_link_rel_next
532
533
    @classmethod
534
    def get_first_comic_link(cls):
535
        """Get link to first comics."""
536
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
537
538
    @classmethod
539
    def get_comic_info(cls, soup, link):
540
        """Get information about a particular comics."""
541
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
542
        title = link['title']
543
        url = cls.get_url_from_link(link)
544
        year, month, day = [int(s)
545
                            for s in url_date_re.match(url).groups()]
546
        imgs = soup.find("div", class_="entry").find_all("img")
547
        return {
548
            'title': title,
549
            'day': day,
550
            'month': month,
551
            'year': year,
552
            'img': [i['src'] for i in imgs],
553
        }
554
555
556
class ZenPencils(GenericNavigableComic):
557
    """Class to retrieve ZenPencils comics."""
558
    # Also on http://zenpencils.tumblr.com
559
    # Also on http://www.gocomics.com/zen-pencils
560
    name = 'zenpencils'
561
    long_name = 'Zen Pencils'
562
    url = 'http://zenpencils.com'
563
    _categories = ('ZENPENCILS', )
564
    get_navi_link = get_link_rel_next
565
    get_first_comic_link = simulate_first_link
566
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
567
568
    @classmethod
569
    def get_comic_info(cls, soup, link):
570
        """Get information about a particular comics."""
571
        imgs = soup.find('div', id='comic').find_all('img')
572
        # imgs2 = soup.find_all('meta', property='og:image')
573
        post = soup.find('div', class_='post-content')
574
        author = post.find("span", class_="post-author").find("a").string
575
        title = soup.find('meta', property='og:title')['content']
576
        date_str = post.find('span', class_='post-date').string
577
        day = string_to_date(date_str, "%B %d, %Y")
578
        assert imgs
579
        assert all(i['alt'] == i['title'] for i in imgs)
580
        assert all(i['alt'] in (title, "") for i in imgs)
581
        desc = soup.find('meta', property='og:description')['content']
582
        return {
583
            'title': title,
584
            'description': desc,
585
            'author': author,
586
            'day': day.day,
587
            'month': day.month,
588
            'year': day.year,
589
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
590
        }
591
592
593
class ItsTheTie(GenericNavigableComic):
594
    """Class to retrieve It's the tie comics."""
595
    # Also on http://itsthetie.tumblr.com
596
    # Also on https://tapastic.com/series/itsthetie
597
    name = 'tie'
598
    long_name = "It's the tie"
599
    url = "http://itsthetie.com"
600
    _categories = ('TIE', )
601
    get_first_comic_link = get_div_navfirst_a
602
    get_navi_link = get_a_rel_next
603
604
    @classmethod
605
    def get_comic_info(cls, soup, link):
606
        """Get information about a particular comics."""
607
        title = soup.find('h1', class_='comic-title').find('a').string
608
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
609
        day = string_to_date(date_str, "%B %d, %Y")
610
        # Bonus images may or may not be in meta og:image.
611
        imgs = soup.find_all('meta', property='og:image')
612
        imgs_src = [i['content'] for i in imgs]
613
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
614
        bonus_src = [b['data-oversrc'] for b in bonus]
615
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
616
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
617
        tag_meta = soup.find('meta', property='article:tag')
618
        tags = tag_meta['content'] if tag_meta else ""
619
        return {
620
            'title': title,
621
            'month': day.month,
622
            'year': day.year,
623
            'day': day.day,
624
            'img': all_imgs_src,
625
            'tags': tags,
626
        }
627
628
629
class PenelopeBagieu(GenericNavigableComic):
630
    """Class to retrieve comics from Penelope Bagieu's blog."""
631
    name = 'bagieu'
632
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
633
    url = 'http://www.penelope-jolicoeur.com'
634
    _categories = ('FRANCAIS', )
635
    get_navi_link = get_link_rel_next
636
    get_first_comic_link = simulate_first_link
637
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
638
639
    @classmethod
640
    def get_comic_info(cls, soup, link):
641
        """Get information about a particular comics."""
642
        date_str = soup.find('h2', class_='date-header').string
643
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
644
        imgs = soup.find('div', class_='entry-body').find_all('img')
645 View Code Duplication
        title = soup.find('h3', class_='entry-header').string
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
646
        return {
647
            'title': title,
648
            'img': [i['src'] for i in imgs],
649
            'month': day.month,
650
            'year': day.year,
651
            'day': day.day,
652
        }
653
654
655
class OneOneOneOneComic(GenericNavigableComic):
656
    """Class to retrieve 1111 Comics."""
657
    # Also on http://comics1111.tumblr.com
658
    # Also on https://tapastic.com/series/1111-Comics
659
    name = '1111'
660
    long_name = '1111 Comics'
661
    url = 'http://www.1111comics.me'
662
    _categories = ('ONEONEONEONE', )
663
    get_first_comic_link = get_div_navfirst_a
664
    get_navi_link = get_link_rel_next
665
666
    @classmethod
667
    def get_comic_info(cls, soup, link):
668
        """Get information about a particular comics."""
669
        title = soup.find('h1', class_='comic-title').find('a').string
670
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
671
        day = string_to_date(date_str, "%B %d, %Y")
672
        imgs = soup.find_all('meta', property='og:image')
673
        return {
674
            'title': title,
675
            'month': day.month,
676
            'year': day.year,
677
            'day': day.day,
678
            'img': [i['content'] for i in imgs],
679
        }
680
681
682
class AngryAtNothing(GenericNavigableComic):
683
    """Class to retrieve Angry at Nothing comics."""
684
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
685
    name = 'angry'
686
    long_name = 'Angry At Nothing'
687
    url = 'http://www.angryatnothing.net'
688
    get_first_comic_link = get_div_navfirst_a
689
    get_navi_link = get_a_rel_next
690
691
    @classmethod
692
    def get_comic_info(cls, soup, link):
693
        """Get information about a particular comics."""
694
        title = soup.find('h1', class_='comic-title').find('a').string
695
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
696
        day = string_to_date(date_str, "%B %d, %Y")
697
        imgs = soup.find_all('meta', property='og:image')
698
        return {
699
            'title': title,
700
            'month': day.month,
701
            'year': day.year,
702
            'day': day.day,
703
            'img': [i['content'] for i in imgs],
704
        }
705
706
707
class NeDroid(GenericNavigableComic):
708
    """Class to retrieve NeDroid comics."""
709
    name = 'nedroid'
710
    long_name = 'NeDroid'
711
    url = 'http://nedroid.com'
712
    get_first_comic_link = get_div_navfirst_a
713
    get_navi_link = get_link_rel_next
714
    get_url_from_link = join_cls_url_to_href
715
716
    @classmethod
717
    def get_comic_info(cls, soup, link):
718
        """Get information about a particular comics."""
719
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
720
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
721
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
722
        num = int(short_url_re.match(short_url).groups()[0])
723
        imgs = soup.find('div', id='comic').find_all('img')
724
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
725
        assert len(imgs) == 1
726
        title = imgs[0]['alt']
727
        title2 = imgs[0]['title']
728
        return {
729
            'short_url': short_url,
730
            'title': title,
731
            'title2': title2,
732
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
733
            'day': day,
734
            'month': month,
735
            'year': year,
736
            'num': num,
737
        }
738
739
740
class Garfield(GenericNavigableComic):
741
    """Class to retrieve Garfield comics."""
742
    # Also on http://www.gocomics.com/garfield
743
    name = 'garfield'
744
    long_name = 'Garfield'
745
    url = 'https://garfield.com'
746
    _categories = ('GARFIELD', )
747
    get_first_comic_link = simulate_first_link
748
    first_url = 'https://garfield.com/comic/1978/06/19'
749
750
    @classmethod
751
    def get_navi_link(cls, last_soup, next_):
752
        """Get link to next or previous comic."""
753
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
754
755
    @classmethod
756
    def get_comic_info(cls, soup, link):
757
        """Get information about a particular comics."""
758 View Code Duplication
        url = cls.get_url_from_link(link)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
759
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
760
        year, month, day = [int(s) for s in date_re.match(url).groups()]
761
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
762
        return {
763
            'month': month,
764
            'year': year,
765
            'day': day,
766
            'img': [i['src'] for i in imgs],
767
        }
768
769
770
class Dilbert(GenericNavigableComic):
771
    """Class to retrieve Dilbert comics."""
772
    # Also on http://www.gocomics.com/dilbert-classics
773
    name = 'dilbert'
774
    long_name = 'Dilbert'
775
    url = 'http://dilbert.com'
776
    get_url_from_link = join_cls_url_to_href
777
    get_first_comic_link = simulate_first_link
778
    first_url = 'http://dilbert.com/strip/1989-04-16'
779
780
    @classmethod
781
    def get_navi_link(cls, last_soup, next_):
782
        """Get link to next or previous comic."""
783
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
784
        return link.find('a') if link else None
785
786
    @classmethod
787
    def get_comic_info(cls, soup, link):
788
        """Get information about a particular comics."""
789
        title = soup.find('meta', property='og:title')['content']
790
        imgs = soup.find_all('meta', property='og:image')
791
        desc = soup.find('meta', property='og:description')['content']
792
        date_str = soup.find('meta', property='article:publish_date')['content']
793
        day = string_to_date(date_str, "%B %d, %Y")
794
        author = soup.find('meta', property='article:author')['content']
795
        tags = soup.find('meta', property='article:tag')['content']
796
        return {
797
            'title': title,
798
            'description': desc,
799
            'img': [i['content'] for i in imgs],
800
            'author': author,
801
            'tags': tags,
802
            'day': day.day,
803
            'month': day.month,
804
            'year': day.year
805
        }
806
807
808
class VictimsOfCircumsolar(GenericNavigableComic):
809
    """Class to retrieve VictimsOfCircumsolar comics."""
810
    name = 'circumsolar'
811
    long_name = 'Victims Of Circumsolar'
812
    url = 'http://www.victimsofcircumsolar.com'
813
    get_navi_link = get_a_navi_comicnavnext_navinext
814
    get_first_comic_link = simulate_first_link
815
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
816
817
    @classmethod
818
    def get_comic_info(cls, soup, link):
819
        """Get information about a particular comics."""
820
        # Date is on the archive page
821
        title = soup.find_all('meta', property='og:title')[-1]['content']
822
        desc = soup.find_all('meta', property='og:description')[-1]['content']
823
        imgs = soup.find('div', id='comic').find_all('img')
824
        assert all(i['title'] == i['alt'] == title for i in imgs)
825
        return {
826
            'title': title,
827
            'description': desc,
828
            'img': [i['src'] for i in imgs],
829
        }
830
831
832
class ThreeWordPhrase(GenericNavigableComic):
833
    """Class to retrieve Three Word Phrase comics."""
834
    # Also on http://www.threewordphrase.tumblr.com
835
    name = 'threeword'
836
    long_name = 'Three Word Phrase'
837
    url = 'http://threewordphrase.com'
838
    get_url_from_link = join_cls_url_to_href
839
840
    @classmethod
841
    def get_first_comic_link(cls):
842
        """Get link to first comics."""
843
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
844
845
    @classmethod
846
    def get_navi_link(cls, last_soup, next_):
847
        """Get link to next or previous comic."""
848
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
849
        return None if link.get('href') is None else link
850
851
    @classmethod
852
    def get_comic_info(cls, soup, link):
853
        """Get information about a particular comics."""
854
        title = soup.find('title')
855
        imgs = [img for img in soup.find_all('img')
856
                if not img['src'].endswith(
857
                    ('link.gif', '32.png', 'twpbookad.jpg',
858
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
859
        return {
860
            'title': title.string if title else None,
861
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
862
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
863
        }
864
865
866
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
867
    """Class to retrieve Deadly Panel comics."""
868
    # Also on https://tapastic.com/series/deadlypanel
869
    name = 'deadly'
870
    long_name = 'Deadly Panel'
871
    url = 'http://www.deadlypanel.com'
872
    get_first_comic_link = get_a_navi_navifirst
873
    get_navi_link = get_a_navi_comicnavnext_navinext
874
875
    @classmethod
876
    def get_comic_info(cls, soup, link):
877
        """Get information about a particular comics."""
878
        imgs = soup.find('div', id='comic').find_all('img')
879
        assert all(i['alt'] == i['title'] for i in imgs)
880
        return {
881
            'img': [i['src'] for i in imgs],
882
        }
883
884
885
class TheGentlemanArmchair(GenericNavigableComic):
886
    """Class to retrieve The Gentleman Armchair comics."""
887
    name = 'gentlemanarmchair'
888
    long_name = 'The Gentleman Armchair'
889
    url = 'http://thegentlemansarmchair.com'
890
    get_first_comic_link = get_a_navi_navifirst
891
    get_navi_link = get_link_rel_next
892
893
    @classmethod
894
    def get_comic_info(cls, soup, link):
895
        """Get information about a particular comics."""
896
        title = soup.find('h2', class_='post-title').string
897
        author = soup.find("span", class_="post-author").find("a").string
898
        date_str = soup.find('span', class_='post-date').string
899
        day = string_to_date(date_str, "%B %d, %Y")
900
        imgs = soup.find('div', id='comic').find_all('img')
901
        return {
902
            'img': [i['src'] for i in imgs],
903
            'title': title,
904
            'author': author,
905
            'month': day.month,
906
            'year': day.year,
907
            'day': day.day,
908
        }
909
910
911
class MyExtraLife(GenericNavigableComic):
912
    """Class to retrieve My Extra Life comics."""
913
    name = 'extralife'
914
    long_name = 'My Extra Life'
915
    url = 'http://www.myextralife.com'
916
    get_navi_link = get_link_rel_next
917
918
    @classmethod
919
    def get_first_comic_link(cls):
920
        """Get link to first comics."""
921
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
922
923
    @classmethod
924
    def get_comic_info(cls, soup, link):
925
        """Get information about a particular comics."""
926
        title = soup.find("h1", class_="comic_title").string
927
        date_str = soup.find("span", class_="comic_date").string
928
        day = string_to_date(date_str, "%B %d, %Y")
929
        imgs = soup.find_all("img", class_="comic")
930
        assert all(i['alt'] == i['title'] == title for i in imgs)
931
        return {
932
            'title': title,
933
            'img': [i['src'] for i in imgs if i["src"]],
934
            'day': day.day,
935
            'month': day.month,
936
            'year': day.year
937
        }
938
939
940
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
941
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
942
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
943
    # Also on http://smbc-comics.tumblr.com
944
    name = 'smbc'
945
    long_name = 'Saturday Morning Breakfast Cereal'
946
    url = 'http://www.smbc-comics.com'
947
    _categories = ('SMBC', )
948
    get_navi_link = get_a_rel_next
949
950
    @classmethod
951
    def get_first_comic_link(cls):
952
        """Get link to first comics."""
953
        return get_soup_at_url(cls.url).find('a', rel='start')
954
955
    @classmethod
956
    def get_comic_info(cls, soup, link):
957
        """Get information about a particular comics."""
958
        image1 = soup.find('img', id='cc-comic')
959
        image_url1 = image1['src']
960
        aftercomic = soup.find('div', id='aftercomic')
961
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
962
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
963
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
964
        day = string_to_date(date_str, "%B %d, %Y")
965
        return {
966
            'title': image1['title'],
967
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
968
            'day': day.day,
969
            'month': day.month,
970
            'year': day.year
971
        }
972
973
974
class PerryBibleFellowship(GenericListableComic):
975
    """Class to retrieve Perry Bible Fellowship comics."""
976
    name = 'pbf'
977
    long_name = 'Perry Bible Fellowship'
978
    url = 'http://pbfcomics.com'
979
    get_url_from_archive_element = join_cls_url_to_href
980
981
    @classmethod
982
    def get_archive_elements(cls):
983
        comic_link_re = re.compile('^/[0-9]*/$')
984
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
985
986
    @classmethod
987
    def get_comic_info(cls, soup, link):
988
        """Get information about a particular comics."""
989
        url = cls.get_url_from_archive_element(link)
990
        comic_img_re = re.compile('^/archive_b/PBF.*')
991
        name = link.string
992
        num = int(link['name'])
993
        href = link['href']
994
        assert href == '/%d/' % num
995
        imgs = soup.find_all('img', src=comic_img_re)
996
        assert len(imgs) == 1
997
        assert imgs[0]['alt'] == name
998
        return {
999
            'num': num,
1000
            'name': name,
1001
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1002
            'prefix': '%d-' % num,
1003
        }
1004
1005
1006
class Mercworks(GenericNavigableComic):
1007
    """Class to retrieve Mercworks comics."""
1008
    # Also on http://mercworks.tumblr.com
1009
    name = 'mercworks'
1010
    long_name = 'Mercworks'
1011
    url = 'http://mercworks.net'
1012
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1013
    get_navi_link = get_a_rel_next
1014
1015
    @classmethod
1016
    def get_comic_info(cls, soup, link):
1017
        """Get information about a particular comics."""
1018
        title = soup.find('meta', property='og:title')['content']
1019
        metadesc = soup.find('meta', property='og:description')
1020
        desc = metadesc['content'] if metadesc else ""
1021
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1022
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1023
        date_str = date_str[:10]
1024
        day = string_to_date(date_str, "%Y-%m-%d")
1025
        imgs = soup.find_all('meta', property='og:image')
1026
        return {
1027
            'img': [i['content'] for i in imgs],
1028
            'title': title,
1029
            'author': author,
1030
            'desc': desc,
1031
            'day': day.day,
1032
            'month': day.month,
1033
            'year': day.year
1034
        }
1035
1036
1037
class BerkeleyMews(GenericListableComic):
1038
    """Class to retrieve Berkeley Mews comics."""
1039
    # Also on http://mews.tumblr.com
1040
    # Also on http://www.gocomics.com/berkeley-mews
1041
    name = 'berkeley'
1042
    long_name = 'Berkeley Mews'
1043
    url = 'http://www.berkeleymews.com'
1044
    _categories = ('BERKELEY', )
1045
    get_url_from_archive_element = get_href
1046
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1047
1048
    @classmethod
1049
    def get_archive_elements(cls):
1050
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1051
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1052
1053
    @classmethod
1054
    def get_comic_info(cls, soup, link):
1055
        """Get information about a particular comics."""
1056
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1057
        url = cls.get_url_from_archive_element(link)
1058
        num = int(cls.comic_num_re.match(url).groups()[0])
1059
        img = soup.find('div', id='comic').find('img')
1060
        assert all(i['alt'] == i['title'] for i in [img])
1061
        title2 = img['title']
1062
        img_url = img['src']
1063
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1064
        return {
1065
            'num': num,
1066
            'title': link.string,
1067
            'title2': title2,
1068
            'img': [img_url],
1069
            'year': year,
1070
            'month': month,
1071
            'day': day,
1072
        }
1073
1074
1075
class GenericBouletCorp(GenericNavigableComic):
1076
    """Generic class to retrieve BouletCorp comics in different languages."""
1077
    # Also on http://bouletcorp.tumblr.com
1078
    _categories = ('BOULET', )
1079
    get_navi_link = get_link_rel_next
1080
1081
    @classmethod
1082
    def get_first_comic_link(cls):
1083
        """Get link to first comics."""
1084
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1085
1086
    @classmethod
1087
    def get_comic_info(cls, soup, link):
1088
        """Get information about a particular comics."""
1089
        url = cls.get_url_from_link(link)
1090
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1091
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1092
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1093
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1094
        title = soup.find('title').string
1095
        return {
1096
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1097
            'title': title,
1098
            'texts': texts,
1099
            'year': year,
1100
            'month': month,
1101
            'day': day,
1102
        }
1103
1104
1105
class BouletCorp(GenericBouletCorp):
1106
    """Class to retrieve BouletCorp comics."""
1107
    name = 'boulet'
1108
    long_name = 'Boulet Corp'
1109
    url = 'http://www.bouletcorp.com'
1110
    _categories = ('FRANCAIS', )
1111
1112
1113
class BouletCorpEn(GenericBouletCorp):
1114
    """Class to retrieve EnglishBouletCorp comics."""
1115
    name = 'boulet_en'
1116
    long_name = 'Boulet Corp English'
1117
    url = 'http://english.bouletcorp.com'
1118
1119
1120
class AmazingSuperPowers(GenericNavigableComic):
1121
    """Class to retrieve Amazing Super Powers comics."""
1122
    name = 'asp'
1123
    long_name = 'Amazing Super Powers'
1124
    url = 'http://www.amazingsuperpowers.com'
1125
    get_first_comic_link = get_a_navi_navifirst
1126
    get_navi_link = get_a_navi_navinext
1127
1128
    @classmethod
1129
    def get_comic_info(cls, soup, link):
1130
        """Get information about a particular comics."""
1131
        author = soup.find("span", class_="post-author").find("a").string
1132
        date_str = soup.find('span', class_='post-date').string
1133
        day = string_to_date(date_str, "%B %d, %Y")
1134
        imgs = soup.find('div', id='comic').find_all('img')
1135
        title = ' '.join(i['title'] for i in imgs)
1136
        assert all(i['alt'] == i['title'] for i in imgs)
1137
        return {
1138
            'title': title,
1139
            'author': author,
1140
            'img': [img['src'] for img in imgs],
1141
            'day': day.day,
1142
            'month': day.month,
1143
            'year': day.year
1144
        }
1145
1146
1147
class ToonHole(GenericListableComic):
1148
    """Class to retrieve Toon Holes comics."""
1149
    # Also on http://tapastic.com/series/TOONHOLE
1150
    name = 'toonhole'
1151
    long_name = 'Toon Hole'
1152
    url = 'http://www.toonhole.com'
1153
    get_url_from_archive_element = get_href
1154
1155
    @classmethod
1156
    def get_comic_info(cls, soup, link):
1157
        """Get information about a particular comics."""
1158
        title = link.string
1159
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1160
        day = string_to_date(date_str, "%B %d, %Y")
1161
        imgs = soup.find('div', id='comic').find_all('img')
1162
        assert all(i['alt'] == i['title'] == title for i in imgs)
1163
        return {
1164
            'title': title,
1165
            'month': day.month,
1166
            'year': day.year,
1167
            'day': day.day,
1168
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1169
        }
1170
1171
    @classmethod
1172
    def get_archive_elements(cls):
1173
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1174
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1175
1176
1177
class Channelate(GenericNavigableComic):
1178
    """Class to retrieve Channelate comics."""
1179
    name = 'channelate'
1180
    long_name = 'Channelate'
1181
    url = 'http://www.channelate.com'
1182
    get_first_comic_link = get_div_navfirst_a
1183
    get_navi_link = get_link_rel_next
1184
    get_url_from_link = join_cls_url_to_href
1185
1186
    @classmethod
1187
    def get_comic_info(cls, soup, link):
1188
        """Get information about a particular comics."""
1189
        author = soup.find("span", class_="post-author").find("a").string
1190
        date_str = soup.find('span', class_='post-date').string
1191
        day = string_to_date(date_str, '%Y/%m/%d')
1192
        title = soup.find('meta', property='og:title')['content']
1193
        post = soup.find('div', id='comic')
1194
        imgs = post.find_all('img') if post else []
1195
        extra_url = None
1196
        extra_div = soup.find('div', id='extrapanelbutton')
1197
        if extra_div:
1198
            extra_url = extra_div.find('a')['href']
1199
            extra_soup = get_soup_at_url(extra_url)
1200
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1201
            imgs.extend(extra_imgs)
1202
        return {
1203
            'url_extra': extra_url,
1204
            'title': title,
1205
            'author': author,
1206
            'month': day.month,
1207
            'year': day.year,
1208
            'day': day.day,
1209
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1210
        }
1211
1212
1213
class CyanideAndHappiness(GenericNavigableComic):
1214
    """Class to retrieve Cyanide And Happiness comics."""
1215
    name = 'cyanide'
1216
    long_name = 'Cyanide and Happiness'
1217
    url = 'http://explosm.net'
1218
    _categories = ('NSFW', )
1219
    get_url_from_link = join_cls_url_to_href
1220
1221
    @classmethod
1222
    def get_first_comic_link(cls):
1223
        """Get link to first comics."""
1224
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1225
1226
    @classmethod
1227
    def get_navi_link(cls, last_soup, next_):
1228
        """Get link to next or previous comic."""
1229
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1230
        return None if link.get('href') is None else link
1231
1232
    @classmethod
1233
    def get_comic_info(cls, soup, link):
1234
        """Get information about a particular comics."""
1235
        url2 = soup.find('meta', property='og:url')['content']
1236
        num = int(url2.split('/')[-2])
1237
        date_str = soup.find('h3').find('a').string
1238
        day = string_to_date(date_str, '%Y.%m.%d')
1239
        author = soup.find('small', class_="author-credit-name").string
1240
        assert author.startswith('by ')
1241
        author = author[3:]
1242
        imgs = soup.find_all('img', id='main-comic')
1243
        return {
1244
            'num': num,
1245
            'author': author,
1246
            'month': day.month,
1247
            'year': day.year,
1248
            'day': day.day,
1249
            'prefix': '%d-' % num,
1250
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1251
        }
1252
1253
1254
class MrLovenstein(GenericComic):
1255
    """Class to retrieve Mr Lovenstein comics."""
1256
    # Also on https://tapastic.com/series/MrLovenstein
1257
    name = 'mrlovenstein'
1258
    long_name = 'Mr. Lovenstein'
1259
    url = 'http://www.mrlovenstein.com'
1260
1261
    @classmethod
1262
    def get_next_comic(cls, last_comic):
1263
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1264
        # TODO: more info from http://www.mrlovenstein.com/archive
1265
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1266
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1267
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1268
        first, last = min(nums), max(nums)
1269
        if last_comic:
1270
            first = last_comic['num'] + 1
1271
        for num in range(first, last + 1):
1272
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1273
            soup = get_soup_at_url(url)
1274
            imgs = list(
1275
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1276
            description = soup.find('meta', attrs={'name': 'description'})['content']
1277
            yield {
1278
                'url': url,
1279
                'num': num,
1280
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1281
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1282
                'description': description,
1283
            }
1284
1285
1286
class DinosaurComics(GenericListableComic):
1287
    """Class to retrieve Dinosaur Comics comics."""
1288
    name = 'dinosaur'
1289
    long_name = 'Dinosaur Comics'
1290
    url = 'http://www.qwantz.com'
1291
    get_url_from_archive_element = get_href
1292
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1293
1294
    @classmethod
1295
    def get_archive_elements(cls):
1296
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1297
        # first link is random -> skip it
1298
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1299
1300
    @classmethod
1301
    def get_comic_info(cls, soup, link):
1302
        """Get information about a particular comics."""
1303
        url = cls.get_url_from_archive_element(link)
1304
        num = int(cls.comic_link_re.match(url).groups()[0])
1305
        date_str = link.string
1306
        text = link.next_sibling.string
1307
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1308
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1309
        img = soup.find('img', src=comic_img_re)
1310
        return {
1311
            'month': day.month,
1312
            'year': day.year,
1313
            'day': day.day,
1314
            'img': [img.get('src')],
1315
            'title': img.get('title'),
1316
            'text': text,
1317
            'num': num,
1318
        }
1319
1320
1321
class ButterSafe(GenericListableComic):
1322
    """Class to retrieve Butter Safe comics."""
1323
    name = 'butter'
1324
    long_name = 'ButterSafe'
1325
    url = 'http://buttersafe.com'
1326
    get_url_from_archive_element = get_href
1327 View Code Duplication
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1328
1329
    @classmethod
1330
    def get_archive_elements(cls):
1331
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1332
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1333
1334
    @classmethod
1335
    def get_comic_info(cls, soup, link):
1336
        """Get information about a particular comics."""
1337
        url = cls.get_url_from_archive_element(link)
1338
        title = link.string
1339
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1340
        img = soup.find('div', id='comic').find('img')
1341
        assert img['alt'] == title
1342
        return {
1343
            'title': title,
1344
            'day': day,
1345
            'month': month,
1346
            'year': year,
1347
            'img': [img['src']],
1348
        }
1349
1350
1351
class CalvinAndHobbes(GenericComic):
1352
    """Class to retrieve Calvin and Hobbes comics."""
1353
    # Also on http://www.gocomics.com/calvinandhobbes/
1354
    name = 'calvin'
1355
    long_name = 'Calvin and Hobbes'
1356
    # This is not through any official webpage but eh...
1357
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1358
1359
    @classmethod
1360
    def get_next_comic(cls, last_comic):
1361
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1362
        last_date = get_date_for_comic(
1363
            last_comic) if last_comic else date(1985, 11, 1)
1364
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1365
        img_re = re.compile('')
1366
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1367
            url = link['href']
1368
            year, month = link_re.match(url).groups()
1369
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1370
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1371
                month_url = urljoin_wrapper(cls.url, url)
1372
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1373
                    img_src = img['src']
1374
                    day = int(img_re.match(img_src).groups()[0])
1375
                    comic_date = date(int(year), int(month), day)
1376
                    if comic_date > last_date:
1377
                        yield {
1378
                            'url': month_url,
1379
                            'year': int(year),
1380
                            'month': int(month),
1381
                            'day': int(day),
1382
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1383
                        }
1384
                        last_date = comic_date
1385
1386
1387
class AbstruseGoose(GenericListableComic):
1388
    """Class to retrieve AbstruseGoose Comics."""
1389
    name = 'abstruse'
1390
    long_name = 'Abstruse Goose'
1391
    url = 'http://abstrusegoose.com'
1392
    get_url_from_archive_element = get_href
1393 View Code Duplication
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1394
    comic_img_re = re.compile('^%s/strips/.*' % url)
1395
1396
    @classmethod
1397
    def get_archive_elements(cls):
1398
        archive_url = urljoin_wrapper(cls.url, 'archive')
1399
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1400
1401
    @classmethod
1402
    def get_comic_info(cls, soup, archive_elt):
1403
        comic_url = cls.get_url_from_archive_element(archive_elt)
1404
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1405
        return {
1406
            'num': num,
1407
            'title': archive_elt.string,
1408
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1409
        }
1410
1411
1412
class PhDComics(GenericNavigableComic):
1413
    """Class to retrieve PHD Comics."""
1414
    name = 'phd'
1415
    long_name = 'PhD Comics'
1416
    url = 'http://phdcomics.com/comics/archive.php'
1417
    get_url_from_link = join_cls_url_to_href
1418
1419
    @classmethod
1420
    def get_first_comic_link(cls):
1421
        """Get link to first comics."""
1422
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1423
1424
    @classmethod
1425
    def get_navi_link(cls, last_soup, next_):
1426
        """Get link to next or previous comic."""
1427
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1428
        return None if img is None else img.parent
1429
1430
    @classmethod
1431
    def get_comic_info(cls, soup, link):
1432
        """Get information about a particular comics."""
1433 View Code Duplication
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1434
        try:
1435
            day = string_to_date(date_str, '%m/%d/%Y')
1436
        except ValueError:
1437
            print("Invalid date %s" % date_str)
1438
            day = date.today()
1439
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1440
        return {
1441
            'year': day.year,
1442
            'month': day.month,
1443
            'day': day.day,
1444
            'img': [soup.find('img', id='comic')['src']],
1445
            'title': title,
1446
        }
1447
1448
1449
class Octopuns(GenericNavigableComic):
1450
    """Class to retrieve Octopuns comics."""
1451
    # Also on http://octopuns.tumblr.com
1452
    name = 'octopuns'
1453
    long_name = 'Octopuns'
1454
    url = 'http://www.octopuns.net'
1455
1456
    @classmethod
1457
    def get_first_comic_link(cls):
1458
        """Get link to first comics."""
1459
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1460
1461
    @classmethod
1462
    def get_navi_link(cls, last_soup, next_):
1463
        """Get link to next or previous comic."""
1464
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1465
        return None if link.get('href') is None else link
1466
1467
    @classmethod
1468
    def get_comic_info(cls, soup, link):
1469
        """Get information about a particular comics."""
1470
        title = soup.find('h3', class_='post-title entry-title').string
1471
        date_str = soup.find('h2', class_='date-header').string
1472
        day = string_to_date(date_str, "%A, %B %d, %Y")
1473
        imgs = soup.find_all('link', rel='image_src')
1474
        return {
1475
            'img': [i['href'] for i in imgs],
1476
            'title': title,
1477
            'day': day.day,
1478
            'month': day.month,
1479
            'year': day.year,
1480
        }
1481
1482
1483
class Quarktees(GenericNavigableComic):
1484
    """Class to retrieve the Quarktees comics."""
1485
    name = 'quarktees'
1486
    long_name = 'Quarktees'
1487
    url = 'http://www.quarktees.com/blogs/news'
1488
    get_url_from_link = join_cls_url_to_href
1489
    get_first_comic_link = simulate_first_link
1490
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1491
1492
    @classmethod
1493
    def get_navi_link(cls, last_soup, next_):
1494
        """Get link to next or previous comic."""
1495
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1496
1497
    @classmethod
1498
    def get_comic_info(cls, soup, link):
1499
        """Get information about a particular comics."""
1500
        title = soup.find('meta', property='og:title')['content']
1501
        article = soup.find('div', class_='single-article')
1502
        imgs = article.find_all('img')
1503
        return {
1504
            'title': title,
1505
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1506
        }
1507
1508
1509
class OverCompensating(GenericNavigableComic):
1510
    """Class to retrieve the Over Compensating comics."""
1511
    name = 'compensating'
1512
    long_name = 'Over Compensating'
1513
    url = 'http://www.overcompensating.com'
1514
    get_url_from_link = join_cls_url_to_href
1515
1516
    @classmethod
1517
    def get_first_comic_link(cls):
1518
        """Get link to first comics."""
1519
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1520
1521
    @classmethod
1522
    def get_navi_link(cls, last_soup, next_):
1523
        """Get link to next or previous comic."""
1524
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1525
1526
    @classmethod
1527
    def get_comic_info(cls, soup, link):
1528
        """Get information about a particular comics."""
1529
        img_src_re = re.compile('^/oc/comics/.*')
1530
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1531
        comic_url = cls.get_url_from_link(link)
1532
        num = int(comic_num_re.match(comic_url).groups()[0])
1533
        img = soup.find('img', src=img_src_re)
1534
        return {
1535
            'num': num,
1536
            'img': [urljoin_wrapper(comic_url, img['src'])],
1537
            'title': img.get('title')
1538
        }
1539
1540
1541
class Oglaf(GenericNavigableComic):
1542
    """Class to retrieve Oglaf comics."""
1543
    name = 'oglaf'
1544
    long_name = 'Oglaf [NSFW]'
1545
    url = 'http://oglaf.com'
1546
    _categories = ('NSFW', )
1547
    get_url_from_link = join_cls_url_to_href
1548
1549
    @classmethod
1550
    def get_first_comic_link(cls):
1551
        """Get link to first comics."""
1552
        return get_soup_at_url(cls.url).find("div", id="st").parent
1553
1554
    @classmethod
1555
    def get_navi_link(cls, last_soup, next_):
1556
        """Get link to next or previous comic."""
1557
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1558
        return div.parent if div else None
1559
1560
    @classmethod
1561
    def get_comic_info(cls, soup, link):
1562
        """Get information about a particular comics."""
1563
        title = soup.find('title').string
1564
        title_imgs = soup.find('div', id='tt').find_all('img')
1565
        assert len(title_imgs) == 1
1566
        strip_imgs = soup.find_all('img', id='strip')
1567
        assert len(strip_imgs) == 1
1568
        imgs = title_imgs + strip_imgs
1569
        desc = ' '.join(i['title'] for i in imgs)
1570
        return {
1571
            'title': title,
1572
            'img': [i['src'] for i in imgs],
1573
            'description': desc,
1574
        }
1575
1576
1577
class ScandinaviaAndTheWorld(GenericNavigableComic):
1578
    """Class to retrieve Scandinavia And The World comics."""
1579
    name = 'satw'
1580
    long_name = 'Scandinavia And The World'
1581
    url = 'http://satwcomic.com'
1582
    get_first_comic_link = simulate_first_link
1583
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1584
1585
    @classmethod
1586
    def get_navi_link(cls, last_soup, next_):
1587
        """Get link to next or previous comic."""
1588
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1589
1590
    @classmethod
1591
    def get_comic_info(cls, soup, link):
1592
        """Get information about a particular comics."""
1593
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1594
        desc = soup.find('meta', property='og:description')['content']
1595
        imgs = soup.find_all('img', itemprop="image")
1596
        return {
1597
            'title': title,
1598
            'description': desc,
1599
            'img': [i['src'] for i in imgs],
1600
        }
1601
1602
1603
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1604
    """Class to retrieve the Something Of That Ilk comics."""
1605
    name = 'somethingofthatilk'
1606
    long_name = 'Something Of That Ilk'
1607
    url = 'http://www.somethingofthatilk.com'
1608
1609
1610
class InfiniteMonkeyBusiness(GenericNavigableComic):
1611
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1612
    name = 'monkey'
1613
    long_name = 'Infinite Monkey Business'
1614
    url = 'http://infinitemonkeybusiness.net'
1615
    get_navi_link = get_a_navi_comicnavnext_navinext
1616
    get_first_comic_link = simulate_first_link
1617
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1618
1619
    @classmethod
1620
    def get_comic_info(cls, soup, link):
1621
        """Get information about a particular comics."""
1622
        title = soup.find('meta', property='og:title')['content']
1623
        imgs = soup.find('div', id='comic').find_all('img')
1624
        return {
1625
            'title': title,
1626
            'img': [i['src'] for i in imgs],
1627
        }
1628
1629
1630
class Wondermark(GenericListableComic):
1631
    """Class to retrieve the Wondermark comics."""
1632
    name = 'wondermark'
1633
    long_name = 'Wondermark'
1634
    url = 'http://wondermark.com'
1635
    get_url_from_archive_element = get_href
1636
1637
    @classmethod
1638
    def get_archive_elements(cls):
1639
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1640
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1641
1642
    @classmethod
1643
    def get_comic_info(cls, soup, link):
1644
        """Get information about a particular comics."""
1645
        date_str = soup.find('div', class_='postdate').find('em').string
1646
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1647
        div = soup.find('div', id='comic')
1648
        if div:
1649
            img = div.find('img')
1650 View Code Duplication
            img_src = [img['src']]
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1651
            alt = img['alt']
1652
            assert alt == img['title']
1653
            title = soup.find('meta', property='og:title')['content']
1654
        else:
1655
            img_src = []
1656
            alt = ''
1657
            title = ''
1658
        return {
1659
            'month': day.month,
1660
            'year': day.year,
1661
            'day': day.day,
1662
            'img': img_src,
1663
            'title': title,
1664
            'alt': alt,
1665
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1666
        }
1667
1668
1669
class WarehouseComic(GenericNavigableComic):
1670
    """Class to retrieve Warehouse Comic comics."""
1671
    name = 'warehouse'
1672
    long_name = 'Warehouse Comic'
1673
    url = 'http://warehousecomic.com'
1674
    get_first_comic_link = get_a_navi_navifirst
1675
    get_navi_link = get_link_rel_next
1676
1677
    @classmethod
1678
    def get_comic_info(cls, soup, link):
1679
        """Get information about a particular comics."""
1680
        title = soup.find('h2', class_='post-title').string
1681
        date_str = soup.find('span', class_='post-date').string
1682
        day = string_to_date(date_str, "%B %d, %Y")
1683
        imgs = soup.find('div', id='comic').find_all('img')
1684
        return {
1685
            'img': [i['src'] for i in imgs],
1686
            'title': title,
1687
            'day': day.day,
1688
            'month': day.month,
1689
            'year': day.year,
1690
        }
1691
1692
1693
class JustSayEh(GenericNavigableComic):
1694
    """Class to retrieve Just Say Eh comics."""
1695
    # Also on http//tapastic.com/series/Just-Say-Eh
1696
    name = 'justsayeh'
1697
    long_name = 'Just Say Eh'
1698
    url = 'http://www.justsayeh.com'
1699
    get_first_comic_link = get_a_navi_navifirst
1700
    get_navi_link = get_a_navi_comicnavnext_navinext
1701
1702
    @classmethod
1703
    def get_comic_info(cls, soup, link):
1704
        """Get information about a particular comics."""
1705
        title = soup.find('h2', class_='post-title').string
1706
        imgs = soup.find("div", id="comic").find_all("img")
1707
        assert all(i['alt'] == i['title'] for i in imgs)
1708
        alt = imgs[0]['alt']
1709
        return {
1710
            'img': [i['src'] for i in imgs],
1711
            'title': title,
1712
            'alt': alt,
1713
        }
1714
1715
1716
class MouseBearComedy(GenericNavigableComic):
1717
    """Class to retrieve Mouse Bear Comedy comics."""
1718
    # Also on http://mousebearcomedy.tumblr.com
1719
    name = 'mousebear'
1720
    long_name = 'Mouse Bear Comedy'
1721
    url = 'http://www.mousebearcomedy.com'
1722
    get_first_comic_link = get_a_navi_navifirst
1723
    get_navi_link = get_a_navi_comicnavnext_navinext
1724
1725
    @classmethod
1726
    def get_comic_info(cls, soup, link):
1727
        """Get information about a particular comics."""
1728
        title = soup.find('h2', class_='post-title').string
1729
        author = soup.find("span", class_="post-author").find("a").string
1730
        date_str = soup.find("span", class_="post-date").string
1731
        day = string_to_date(date_str, '%B %d, %Y')
1732
        imgs = soup.find("div", id="comic").find_all("img")
1733
        assert all(i['alt'] == i['title'] == title for i in imgs)
1734
        return {
1735
            'day': day.day,
1736
            'month': day.month,
1737
            'year': day.year,
1738
            'img': [i['src'] for i in imgs],
1739
            'title': title,
1740
            'author': author,
1741
        }
1742
1743
1744 View Code Duplication
class BigFootJustice(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1745
    """Class to retrieve Big Foot Justice comics."""
1746
    # Also on http://tapastic.com/series/bigfoot-justice
1747
    name = 'bigfoot'
1748
    long_name = 'Big Foot Justice'
1749
    url = 'http://bigfootjustice.com'
1750
    get_first_comic_link = get_a_navi_navifirst
1751
    get_navi_link = get_a_navi_comicnavnext_navinext
1752
1753
    @classmethod
1754
    def get_comic_info(cls, soup, link):
1755
        """Get information about a particular comics."""
1756
        imgs = soup.find('div', id='comic').find_all('img')
1757
        assert all(i['title'] == i['alt'] for i in imgs)
1758
        title = ' '.join(i['title'] for i in imgs)
1759
        return {
1760
            'img': [i['src'] for i in imgs],
1761
            'title': title,
1762
        }
1763
1764
1765
class RespawnComic(GenericNavigableComic):
1766
    """Class to retrieve Respawn Comic."""
1767
    # Also on http://respawncomic.tumblr.com
1768
    name = 'respawn'
1769
    long_name = 'Respawn Comic'
1770
    url = 'http://respawncomic.com '
1771
    _categories = ('RESPAWN', )
1772
    get_navi_link = get_a_rel_next
1773
    get_first_comic_link = simulate_first_link
1774
    first_url = 'http://respawncomic.com/comic/c0001/'
1775
1776
    @classmethod
1777
    def get_comic_info(cls, soup, link):
1778
        """Get information about a particular comics."""
1779
        title = soup.find('meta', property='og:title')['content']
1780
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1781 View Code Duplication
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1782
        date_str = date_str[:10]
1783
        day = string_to_date(date_str, "%Y-%m-%d")
1784
        imgs = soup.find_all('meta', property='og:image')
1785
        skip_imgs = {
1786
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1787
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1788
        }
1789
        return {
1790
            'title': title,
1791
            'author': author,
1792
            'day': day.day,
1793
            'month': day.month,
1794
            'year': day.year,
1795
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1796
        }
1797
1798
1799
class SafelyEndangered(GenericNavigableComic):
1800
    """Class to retrieve Safely Endangered comics."""
1801
    # Also on http://tumblr.safelyendangered.com
1802
    name = 'endangered'
1803
    long_name = 'Safely Endangered'
1804
    url = 'http://www.safelyendangered.com'
1805
    get_navi_link = get_link_rel_next
1806
    get_first_comic_link = simulate_first_link
1807
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1808
1809
    @classmethod
1810 View Code Duplication
    def get_comic_info(cls, soup, link):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1811
        """Get information about a particular comics."""
1812
        title = soup.find('h2', class_='post-title').string
1813
        date_str = soup.find('span', class_='post-date').string
1814
        day = string_to_date(date_str, '%B %d, %Y')
1815
        imgs = soup.find('div', id='comic').find_all('img')
1816
        alt = imgs[0]['alt']
1817
        assert all(i['alt'] == i['title'] for i in imgs)
1818
        return {
1819
            'day': day.day,
1820
            'month': day.month,
1821
            'year': day.year,
1822
            'img': [i['src'] for i in imgs],
1823
            'title': title,
1824
            'alt': alt,
1825
        }
1826
1827
1828
class PicturesInBoxes(GenericNavigableComic):
1829
    """Class to retrieve Pictures In Boxes comics."""
1830
    # Also on http://picturesinboxescomic.tumblr.com
1831
    name = 'picturesinboxes'
1832
    long_name = 'Pictures in Boxes'
1833
    url = 'http://www.picturesinboxes.com'
1834
    get_navi_link = get_a_navi_navinext
1835
    get_first_comic_link = simulate_first_link
1836
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1837
1838
    @classmethod
1839
    def get_comic_info(cls, soup, link):
1840
        """Get information about a particular comics."""
1841
        title = soup.find('h2', class_='post-title').string
1842
        author = soup.find("span", class_="post-author").find("a").string
1843
        date_str = soup.find('span', class_='post-date').string
1844
        day = string_to_date(date_str, '%B %d, %Y')
1845
        imgs = soup.find('div', class_='comicpane').find_all('img')
1846
        assert imgs
1847
        assert all(i['title'] == i['alt'] == title for i in imgs)
1848
        return {
1849
            'day': day.day,
1850
            'month': day.month,
1851
            'year': day.year,
1852
            'img': [i['src'] for i in imgs],
1853
            'title': title,
1854
            'author': author,
1855
        }
1856
1857
1858
class Penmen(GenericEmptyComic):
1859
    """Class to retrieve Penmen comics."""
1860
    name = 'penmen'
1861
    long_name = 'Penmen'
1862
    url = 'http://penmen.com'
1863
1864
1865
class TheDoghouseDiaries(GenericNavigableComic):
1866
    """Class to retrieve The Dog House Diaries comics."""
1867
    name = 'doghouse'
1868
    long_name = 'The Dog House Diaries'
1869
    url = 'http://thedoghousediaries.com'
1870
1871
    @classmethod
1872
    def get_first_comic_link(cls):
1873
        """Get link to first comics."""
1874
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1875
1876
    @classmethod
1877
    def get_navi_link(cls, last_soup, next_):
1878
        """Get link to next or previous comic."""
1879
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1880
1881
    @classmethod
1882
    def get_comic_info(cls, soup, link):
1883
        """Get information about a particular comics."""
1884
        comic_img_re = re.compile('^dhdcomics/.*')
1885
        img = soup.find('img', src=comic_img_re)
1886
        comic_url = cls.get_url_from_link(link)
1887
        return {
1888
            'title': soup.find('h2', id='titleheader').string,
1889
            'title2': soup.find('div', id='subtext').string,
1890
            'alt': img.get('title'),
1891
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1892
            'num': int(comic_url.split('/')[-1]),
1893
        }
1894
1895
1896
class InvisibleBread(GenericListableComic):
1897
    """Class to retrieve Invisible Bread comics."""
1898
    # Also on http://www.gocomics.com/invisible-bread
1899
    name = 'invisiblebread'
1900
    long_name = 'Invisible Bread'
1901
    url = 'http://invisiblebread.com'
1902
1903
    @classmethod
1904
    def get_archive_elements(cls):
1905
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1906
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1907
1908
    @classmethod
1909
    def get_url_from_archive_element(cls, td):
1910
        return td.find('a')['href']
1911
1912
    @classmethod
1913
    def get_comic_info(cls, soup, td):
1914
        """Get information about a particular comics."""
1915
        url = cls.get_url_from_archive_element(td)
1916
        title = td.find('a').string
1917
        month_and_day = td.previous_sibling.string
1918
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1919
        year = link_re.match(url).groups()[0]
1920
        date_str = month_and_day + ' ' + year
1921
        day = string_to_date(date_str, '%b %d %Y')
1922
        imgs = [soup.find('div', id='comic').find('img')]
1923
        assert len(imgs) == 1
1924
        assert all(i['title'] == i['alt'] == title for i in imgs)
1925
        return {
1926
            'month': day.month,
1927
            'year': day.year,
1928
            'day': day.day,
1929
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1930
            'title': title,
1931
        }
1932 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1933
1934
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1935
    """Class to retrieve Disco Bleach Comics."""
1936
    name = 'discobleach'
1937
    long_name = 'Disco Bleach'
1938
    url = 'http://discobleach.com'
1939
1940
1941
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1942
    """Class to retrieve TubeyToons comics."""
1943
    # Also on http://tapastic.com/series/Tubey-Toons
1944
    # Also on http://tubeytoons.tumblr.com
1945
    name = 'tubeytoons'
1946
    long_name = 'Tubey Toons'
1947
    url = 'http://tubeytoons.com'
1948
    _categories = ('TUNEYTOONS', )
1949
1950
1951
class CompletelySeriousComics(GenericNavigableComic):
1952
    """Class to retrieve Completely Serious comics."""
1953
    name = 'completelyserious'
1954
    long_name = 'Completely Serious Comics'
1955
    url = 'http://completelyseriouscomics.com'
1956
    get_first_comic_link = get_a_navi_navifirst
1957
    get_navi_link = get_a_navi_navinext
1958
1959
    @classmethod
1960
    def get_comic_info(cls, soup, link):
1961
        """Get information about a particular comics."""
1962
        title = soup.find('h2', class_='post-title').string
1963
        author = soup.find('span', class_='post-author').contents[1].string
1964
        date_str = soup.find('span', class_='post-date').string
1965
        day = string_to_date(date_str, '%B %d, %Y')
1966
        imgs = soup.find('div', class_='comicpane').find_all('img')
1967
        assert imgs
1968
        alt = imgs[0]['title']
1969
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1970
        return {
1971
            'month': day.month,
1972
            'year': day.year,
1973
            'day': day.day,
1974
            'img': [i['src'] for i in imgs],
1975
            'title': title,
1976
            'alt': alt,
1977
            'author': author,
1978
        }
1979
1980
1981
class PoorlyDrawnLines(GenericListableComic):
1982
    """Class to retrieve Poorly Drawn Lines comics."""
1983
    # Also on http://pdlcomics.tumblr.com
1984
    name = 'poorlydrawn'
1985
    long_name = 'Poorly Drawn Lines'
1986
    url = 'http://poorlydrawnlines.com'
1987 View Code Duplication
    _categories = ('POORLYDRAWN', )
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1988
    get_url_from_archive_element = get_href
1989
1990
    @classmethod
1991
    def get_comic_info(cls, soup, link):
1992
        """Get information about a particular comics."""
1993
        imgs = soup.find('div', class_='post').find_all('img')
1994
        assert len(imgs) <= 1
1995
        return {
1996
            'img': [i['src'] for i in imgs],
1997
            'title': imgs[0].get('title', "") if imgs else "",
1998
        }
1999
2000
    @classmethod
2001
    def get_archive_elements(cls):
2002
        archive_url = urljoin_wrapper(cls.url, 'archive')
2003
        url_re = re.compile('^%s/comic/.' % cls.url)
2004
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2005
2006
2007
class LoadingComics(GenericNavigableComic):
2008
    """Class to retrieve Loading Artist comics."""
2009
    name = 'loadingartist'
2010
    long_name = 'Loading Artist'
2011
    url = 'http://www.loadingartist.com/latest'
2012
2013
    @classmethod
2014
    def get_first_comic_link(cls):
2015
        """Get link to first comics."""
2016
        return get_soup_at_url(cls.url).find('a', title="First")
2017
2018
    @classmethod
2019 View Code Duplication
    def get_navi_link(cls, last_soup, next_):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2020
        """Get link to next or previous comic."""
2021
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2022
2023
    @classmethod
2024
    def get_comic_info(cls, soup, link):
2025
        """Get information about a particular comics."""
2026
        title = soup.find('h1').string
2027
        date_str = soup.find('span', class_='date').string.strip()
2028
        day = string_to_date(date_str, "%B %d, %Y")
2029
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2030
        return {
2031
            'title': title,
2032
            'img': [i['src'] for i in imgs],
2033
            'month': day.month,
2034
            'year': day.year,
2035
            'day': day.day,
2036
        }
2037
2038
2039
class ChuckleADuck(GenericNavigableComic):
2040
    """Class to retrieve Chuckle-A-Duck comics."""
2041
    name = 'chuckleaduck'
2042
    long_name = 'Chuckle-A-duck'
2043
    url = 'http://chuckleaduck.com'
2044
    get_first_comic_link = get_div_navfirst_a
2045
    get_navi_link = get_link_rel_next
2046
2047
    @classmethod
2048
    def get_comic_info(cls, soup, link):
2049
        """Get information about a particular comics."""
2050
        date_str = soup.find('span', class_='post-date').string
2051
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2052
        author = soup.find('span', class_='post-author').string
2053
        div = soup.find('div', id='comic')
2054
        imgs = div.find_all('img') if div else []
2055
        title = imgs[0]['title'] if imgs else ""
2056
        assert all(i['title'] == i['alt'] == title for i in imgs)
2057
        return {
2058
            'month': day.month,
2059
            'year': day.year,
2060
            'day': day.day,
2061
            'img': [i['src'] for i in imgs],
2062
            'title': title,
2063
            'author': author,
2064
        }
2065
2066
2067
class DepressedAlien(GenericNavigableComic):
2068
    """Class to retrieve Depressed Alien Comics."""
2069
    name = 'depressedalien'
2070
    long_name = 'Depressed Alien'
2071
    url = 'http://depressedalien.com'
2072
    get_url_from_link = join_cls_url_to_href
2073
2074
    @classmethod
2075
    def get_first_comic_link(cls):
2076
        """Get link to first comics."""
2077
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2078
2079
    @classmethod
2080
    def get_navi_link(cls, last_soup, next_):
2081
        """Get link to next or previous comic."""
2082
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2083
2084
    @classmethod
2085
    def get_comic_info(cls, soup, link):
2086
        """Get information about a particular comics."""
2087
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2088
        imgs = soup.find_all('meta', property='og:image')
2089
        return {
2090
            'title': title,
2091
            'img': [i['content'] for i in imgs],
2092
        }
2093
2094
2095
class ThingsInSquares(GenericListableComic):
2096
    """Class to retrieve Things In Squares comics."""
2097
    # This can be retrieved in other languages
2098
    # Also on https://tapastic.com/series/Things-in-Squares
2099
    name = 'squares'
2100
    long_name = 'Things in squares'
2101
    url = 'http://www.thingsinsquares.com'
2102
2103
    @classmethod
2104
    def get_comic_info(cls, soup, tr):
2105
        """Get information about a particular comics."""
2106
        _, td2, td3 = tr.find_all('td')
2107
        a = td2.find('a')
2108
        date_str = td3.string
2109
        day = string_to_date(date_str, "%m.%d.%y")
2110
        title = a.string
2111
        title2 = soup.find('meta', property='og:title')['content']
2112
        desc = soup.find('meta', property='og:description')
2113
        description = desc['content'] if desc else ''
2114
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2115
        imgs = soup.find('div', class_='entry-content').find_all('img')
2116
        return {
2117
            'day': day.day,
2118
            'month': day.month,
2119 View Code Duplication
            'year': day.year,
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2120
            'title': title,
2121
            'title2': title2,
2122
            'description': description,
2123
            'tags': tags,
2124
            'img': [i['src'] for i in imgs],
2125
            'alt': ' '.join(i['alt'] for i in imgs),
2126
        }
2127
2128
    @classmethod
2129
    def get_url_from_archive_element(cls, tr):
2130
        _, td2, td3 = tr.find_all('td')
2131
        return td2.find('a')['href']
2132
2133
    @classmethod
2134
    def get_archive_elements(cls):
2135
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2136
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2137
2138
2139
class HappleTea(GenericNavigableComic):
2140
    """Class to retrieve Happle Tea Comics."""
2141
    name = 'happletea'
2142
    long_name = 'Happle Tea'
2143
    url = 'http://www.happletea.com'
2144
    get_first_comic_link = get_a_navi_navifirst
2145
    get_navi_link = get_link_rel_next
2146
2147
    @classmethod
2148
    def get_comic_info(cls, soup, link):
2149
        """Get information about a particular comics."""
2150
        imgs = soup.find('div', id='comic').find_all('img')
2151
        post = soup.find('div', class_='post-content')
2152
        title = post.find('h2', class_='post-title').string
2153
        author = post.find('a', rel='author').string
2154
        date_str = post.find('span', class_='post-date').string
2155
        day = string_to_date(date_str, "%B %d, %Y")
2156
        assert all(i['alt'] == i['title'] for i in imgs)
2157
        return {
2158
            'title': title,
2159
            'img': [i['src'] for i in imgs],
2160
            'alt': ''.join(i['alt'] for i in imgs),
2161
            'month': day.month,
2162
            'year': day.year,
2163
            'day': day.day,
2164
            'author': author,
2165
        }
2166
2167
2168
class FatAwesomeComics(GenericNavigableComic):
2169
    """Class to retrieve Fat Awesome Comics."""
2170
    # Also on http://fatawesomecomedy.tumblr.com
2171
    name = 'fatawesome'
2172
    long_name = 'Fat Awesome'
2173
    url = 'http://fatawesome.com/comics'
2174
    get_navi_link = get_a_rel_next
2175
    get_first_comic_link = simulate_first_link
2176
    first_url = 'http://fatawesome.com/shortbus/'
2177
2178
    @classmethod
2179
    def get_comic_info(cls, soup, link):
2180
        """Get information about a particular comics."""
2181
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2182
        description = soup.find('meta', attrs={'name': 'description'})['content']
2183
        tags_prop = soup.find('meta', property='article:tag')
2184
        tags = tags_prop['content'] if tags_prop else ""
2185
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2186
        day = string_to_date(date_str, "%Y-%m-%d")
2187
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2188
        assert len(imgs) == 1
2189
        return {
2190
            'title': title,
2191
            'description': description,
2192
            'tags': tags,
2193
            'alt': "".join(i['alt'] for i in imgs),
2194
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2195
            'month': day.month,
2196
            'year': day.year,
2197
            'day': day.day,
2198
        }
2199
2200
2201
class AnythingComic(GenericListableComic):
2202
    """Class to retrieve Anything Comics."""
2203
    # Also on http://tapastic.com/series/anything
2204
    name = 'anythingcomic'
2205
    long_name = 'Anything Comic'
2206
    url = 'http://www.anythingcomic.com'
2207
2208
    @classmethod
2209
    def get_archive_elements(cls):
2210
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2211
        # The first 2 <tr>'s do not correspond to comics
2212
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2213
2214
    @classmethod
2215
    def get_url_from_archive_element(cls, tr):
2216
        """Get url corresponding to an archive element."""
2217
        td_num, td_comic, td_date, _ = tr.find_all('td')
2218
        link = td_comic.find('a')
2219
        return urljoin_wrapper(cls.url, link['href'])
2220
2221
    @classmethod
2222
    def get_comic_info(cls, soup, tr):
2223
        """Get information about a particular comics."""
2224
        td_num, td_comic, td_date, _ = tr.find_all('td')
2225
        num = int(td_num.string)
2226
        link = td_comic.find('a')
2227
        title = link.string
2228
        imgs = soup.find_all('img', id='comic_image')
2229
        day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2230
        assert len(imgs) == 1
2231
        assert all(i.get('alt') == i.get('title') for i in imgs)
2232
        return {
2233
            'num': num,
2234
            'title': title,
2235
            'alt': imgs[0].get('alt', ''),
2236
            'img': [i['src'] for i in imgs],
2237
            'month': day.month,
2238
            'year': day.year,
2239
            'day': day.day,
2240
        }
2241
2242
2243
class LonnieMillsap(GenericNavigableComic):
2244
    """Class to retrieve Lonnie Millsap's comics."""
2245
    name = 'millsap'
2246
    long_name = 'Lonnie Millsap'
2247
    url = 'http://www.lonniemillsap.com'
2248
    get_navi_link = get_link_rel_next
2249
    get_first_comic_link = simulate_first_link
2250
    first_url = 'http://www.lonniemillsap.com/?p=42'
2251
2252
    @classmethod
2253
    def get_comic_info(cls, soup, link):
2254
        """Get information about a particular comics."""
2255
        title = soup.find('h2', class_='post-title').string
2256
        post = soup.find('div', class_='post-content')
2257
        author = post.find("span", class_="post-author").find("a").string
2258
        date_str = post.find("span", class_="post-date").string
2259
        day = string_to_date(date_str, "%B %d, %Y")
2260
        imgs = post.find("div", class_="entry").find_all("img")
2261
        return {
2262
            'title': title,
2263
            'author': author,
2264
            'img': [i['src'] for i in imgs],
2265
            'month': day.month,
2266
            'year': day.year,
2267
            'day': day.day,
2268
        }
2269
2270
2271
class LinsEditions(GenericNavigableComic):
2272
    """Class to retrieve L.I.N.S. Editions comics."""
2273
    # Also on http://linscomics.tumblr.com
2274
    name = 'lins'
2275
    long_name = 'L.I.N.S. Editions'
2276
    url = 'https://linsedition.com'
2277
    _categories = ('LINS', )
2278
    get_navi_link = get_link_rel_next
2279
    get_first_comic_link = simulate_first_link
2280
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2281
2282
    @classmethod
2283
    def get_comic_info(cls, soup, link):
2284
        """Get information about a particular comics."""
2285
        title = soup.find('meta', property='og:title')['content']
2286
        imgs = soup.find_all('meta', property='og:image')
2287
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2288
        day = string_to_date(date_str, "%Y-%m-%d")
2289
        return {
2290
            'title': title,
2291
            'img': [i['content'] for i in imgs],
2292
            'month': day.month,
2293
            'year': day.year,
2294
            'day': day.day,
2295
        }
2296
2297
2298
class ThorsThundershack(GenericNavigableComic):
2299
    """Class to retrieve Thor's Thundershack comics."""
2300
    # Also on http://tapastic.com/series/Thors-Thundershac
2301
    name = 'thor'
2302
    long_name = 'Thor\'s Thundershack'
2303
    url = 'http://www.thorsthundershack.com'
2304
    _categories = ('THOR', )
2305
    get_url_from_link = join_cls_url_to_href
2306
2307
    @classmethod
2308
    def get_first_comic_link(cls):
2309
        """Get link to first comics."""
2310
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2311
2312
    @classmethod
2313
    def get_navi_link(cls, last_soup, next_):
2314
        """Get link to next or previous comic."""
2315
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2316
            if link['href'] != '/comic':
2317
                return link
2318
        return None
2319
2320
    @classmethod
2321 View Code Duplication
    def get_comic_info(cls, soup, link):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2322
        """Get information about a particular comics."""
2323
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2324
        description = soup.find('div', itemprop='articleBody').text
2325
        author = soup.find('span', itemprop='author copyrightHolder').string
2326
        imgs = soup.find_all('img', itemprop='image')
2327
        assert all(i['title'] == i['alt'] for i in imgs)
2328
        alt = imgs[0]['alt'] if imgs else ""
2329
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2330
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2331
        return {
2332
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2333
            'month': day.month,
2334
            'year': day.year,
2335
            'day': day.day,
2336
            'author': author,
2337
            'title': title,
2338
            'alt': alt,
2339
            'description': description,
2340
        }
2341
2342
2343
class GerbilWithAJetpack(GenericNavigableComic):
2344
    """Class to retrieve GerbilWithAJetpack comics."""
2345
    name = 'gerbil'
2346
    long_name = 'Gerbil With A Jetpack'
2347
    url = 'http://gerbilwithajetpack.com'
2348
    get_first_comic_link = get_a_navi_navifirst
2349
    get_navi_link = get_a_rel_next
2350
2351
    @classmethod
2352
    def get_comic_info(cls, soup, link):
2353
        """Get information about a particular comics."""
2354
        title = soup.find('h2', class_='post-title').string
2355
        author = soup.find("span", class_="post-author").find("a").string
2356
        date_str = soup.find("span", class_="post-date").string
2357
        day = string_to_date(date_str, "%B %d, %Y")
2358
        imgs = soup.find("div", id="comic").find_all("img")
2359
        alt = imgs[0]['alt']
2360
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2361
        return {
2362
            'img': [i['src'] for i in imgs],
2363
            'title': title,
2364
            'alt': alt,
2365
            'author': author,
2366
            'day': day.day,
2367
            'month': day.month,
2368
            'year': day.year
2369
        }
2370
2371
2372
class EveryDayBlues(GenericNavigableComic):
2373
    """Class to retrieve EveryDayBlues Comics."""
2374
    name = "blues"
2375
    long_name = "Every Day Blues"
2376
    url = "http://everydayblues.net"
2377
    get_first_comic_link = get_a_navi_navifirst
2378 View Code Duplication
    get_navi_link = get_link_rel_next
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2379
2380
    @classmethod
2381
    def get_comic_info(cls, soup, link):
2382
        """Get information about a particular comics."""
2383
        title = soup.find("h2", class_="post-title").string
2384
        author = soup.find("span", class_="post-author").find("a").string
2385
        date_str = soup.find("span", class_="post-date").string
2386
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2387
        imgs = soup.find("div", id="comic").find_all("img")
2388
        assert all(i['alt'] == i['title'] == title for i in imgs)
2389
        assert len(imgs) <= 1
2390
        return {
2391
            'img': [i['src'] for i in imgs],
2392
            'title': title,
2393
            'author': author,
2394
            'day': day.day,
2395
            'month': day.month,
2396
            'year': day.year
2397
        }
2398
2399
2400
class BiterComics(GenericNavigableComic):
2401
    """Class to retrieve Biter Comics."""
2402
    name = "biter"
2403
    long_name = "Biter Comics"
2404
    url = "http://www.bitercomics.com"
2405
    get_first_comic_link = get_a_navi_navifirst
2406
    get_navi_link = get_link_rel_next
2407
2408
    @classmethod
2409
    def get_comic_info(cls, soup, link):
2410
        """Get information about a particular comics."""
2411
        title = soup.find("h1", class_="entry-title").string
2412
        author = soup.find("span", class_="author vcard").find("a").string
2413
        date_str = soup.find("span", class_="entry-date").string
2414
        day = string_to_date(date_str, "%B %d, %Y")
2415
        imgs = soup.find("div", id="comic").find_all("img")
2416
        assert all(i['alt'] == i['title'] for i in imgs)
2417
        assert len(imgs) == 1
2418
        alt = imgs[0]['alt']
2419
        return {
2420
            'img': [i['src'] for i in imgs],
2421
            'title': title,
2422
            'alt': alt,
2423
            'author': author,
2424
            'day': day.day,
2425
            'month': day.month,
2426
            'year': day.year
2427
        }
2428
2429
2430
class TheAwkwardYeti(GenericNavigableComic):
2431
    """Class to retrieve The Awkward Yeti comics."""
2432
    # Also on http://www.gocomics.com/the-awkward-yeti
2433
    # Also on http://larstheyeti.tumblr.com
2434
    # Also on https://tapastic.com/series/TheAwkwardYeti
2435
    name = 'yeti'
2436
    long_name = 'The Awkward Yeti'
2437
    url = 'http://theawkwardyeti.com'
2438
    _categories = ('YETI', )
2439
    get_first_comic_link = get_a_navi_navifirst
2440
    get_navi_link = get_link_rel_next
2441
2442
    @classmethod
2443
    def get_comic_info(cls, soup, link):
2444
        """Get information about a particular comics."""
2445
        title = soup.find('h2', class_='post-title').string
2446
        date_str = soup.find("span", class_="post-date").string
2447
        day = string_to_date(date_str, "%B %d, %Y")
2448
        imgs = soup.find("div", id="comic").find_all("img")
2449
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2450
        return {
2451
            'img': [i['src'] for i in imgs],
2452
            'title': title,
2453
            'day': day.day,
2454
            'month': day.month,
2455
            'year': day.year
2456
        }
2457
2458
2459
class PleasantThoughts(GenericNavigableComic):
2460
    """Class to retrieve Pleasant Thoughts comics."""
2461
    name = 'pleasant'
2462
    long_name = 'Pleasant Thoughts'
2463
    url = 'http://pleasant-thoughts.com'
2464
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2465
    get_navi_link = get_link_rel_next
2466
2467
    @classmethod
2468
    def get_comic_info(cls, soup, link):
2469
        """Get information about a particular comics."""
2470
        post = soup.find('div', class_='post-content')
2471
        title = post.find('h2', class_='post-title').string
2472
        imgs = post.find("div", class_="entry").find_all("img")
2473
        return {
2474
            'title': title,
2475
            'img': [i['src'] for i in imgs],
2476
        }
2477
2478
2479
class MisterAndMe(GenericNavigableComic):
2480
    """Class to retrieve Mister & Me Comics."""
2481
    # Also on http://www.gocomics.com/mister-and-me
2482
    # Also on https://tapastic.com/series/Mister-and-Me
2483
    name = 'mister'
2484
    long_name = 'Mister & Me'
2485
    url = 'http://www.mister-and-me.com'
2486
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2487
    get_navi_link = get_link_rel_next
2488 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2489
    @classmethod
2490
    def get_comic_info(cls, soup, link):
2491
        """Get information about a particular comics."""
2492
        title = soup.find('h2', class_='post-title').string
2493
        author = soup.find("span", class_="post-author").find("a").string
2494
        date_str = soup.find("span", class_="post-date").string
2495
        day = string_to_date(date_str, "%B %d, %Y")
2496
        imgs = soup.find("div", id="comic").find_all("img")
2497
        assert all(i['alt'] == i['title'] for i in imgs)
2498
        assert len(imgs) <= 1
2499
        alt = imgs[0]['alt'] if imgs else ""
2500
        return {
2501
            'img': [i['src'] for i in imgs],
2502
            'title': title,
2503
            'alt': alt,
2504
            'author': author,
2505
            'day': day.day,
2506
            'month': day.month,
2507
            'year': day.year
2508
        }
2509
2510
2511
class LastPlaceComics(GenericNavigableComic):
2512
    """Class to retrieve Last Place Comics."""
2513
    name = 'lastplace'
2514
    long_name = 'Last Place Comics'
2515
    url = "http://lastplacecomics.com"
2516
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2517
    get_navi_link = get_link_rel_next
2518 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2519
    @classmethod
2520
    def get_comic_info(cls, soup, link):
2521
        """Get information about a particular comics."""
2522
        title = soup.find('h2', class_='post-title').string
2523
        author = soup.find("span", class_="post-author").find("a").string
2524
        date_str = soup.find("span", class_="post-date").string
2525
        day = string_to_date(date_str, "%B %d, %Y")
2526
        imgs = soup.find("div", id="comic").find_all("img")
2527
        assert all(i['alt'] == i['title'] for i in imgs)
2528
        assert len(imgs) <= 1
2529
        alt = imgs[0]['alt'] if imgs else ""
2530
        return {
2531
            'img': [i['src'] for i in imgs],
2532
            'title': title,
2533
            'alt': alt,
2534
            'author': author,
2535
            'day': day.day,
2536
            'month': day.month,
2537
            'year': day.year
2538
        }
2539
2540
2541
class TalesOfAbsurdity(GenericNavigableComic):
2542
    """Class to retrieve Tales Of Absurdity comics."""
2543
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2544
    # Also on http://talesofabsurdity.tumblr.com
2545
    name = 'absurdity'
2546
    long_name = 'Tales of Absurdity'
2547
    url = 'http://talesofabsurdity.com'
2548
    _categories = ('ABSURDITY', )
2549
    get_first_comic_link = get_a_navi_navifirst
2550
    get_navi_link = get_a_navi_comicnavnext_navinext
2551
2552
    @classmethod
2553
    def get_comic_info(cls, soup, link):
2554
        """Get information about a particular comics."""
2555
        title = soup.find('h2', class_='post-title').string
2556
        author = soup.find("span", class_="post-author").find("a").string
2557
        date_str = soup.find("span", class_="post-date").string
2558
        day = string_to_date(date_str, "%B %d, %Y")
2559
        imgs = soup.find("div", id="comic").find_all("img")
2560
        assert all(i['alt'] == i['title'] for i in imgs)
2561
        alt = imgs[0]['alt'] if imgs else ""
2562
        return {
2563
            'img': [i['src'] for i in imgs],
2564
            'title': title,
2565
            'alt': alt,
2566
            'author': author,
2567
            'day': day.day,
2568
            'month': day.month,
2569
            'year': day.year
2570
        }
2571
2572
2573
class EndlessOrigami(GenericNavigableComic):
2574
    """Class to retrieve Endless Origami Comics."""
2575
    name = "origami"
2576
    long_name = "Endless Origami"
2577
    url = "http://endlessorigami.com"
2578
    get_first_comic_link = get_a_navi_navifirst
2579
    get_navi_link = get_link_rel_next
2580
2581
    @classmethod
2582
    def get_comic_info(cls, soup, link):
2583
        """Get information about a particular comics."""
2584
        title = soup.find('h2', class_='post-title').string
2585
        author = soup.find("span", class_="post-author").find("a").string
2586
        date_str = soup.find("span", class_="post-date").string
2587
        day = string_to_date(date_str, "%B %d, %Y")
2588
        imgs = soup.find("div", id="comic").find_all("img")
2589
        assert all(i['alt'] == i['title'] for i in imgs)
2590
        alt = imgs[0]['alt'] if imgs else ""
2591
        return {
2592
            'img': [i['src'] for i in imgs],
2593
            'title': title,
2594
            'alt': alt,
2595
            'author': author,
2596
            'day': day.day,
2597
            'month': day.month,
2598
            'year': day.year
2599
        }
2600
2601
2602
class PlanC(GenericNavigableComic):
2603
    """Class to retrieve Plan C comics."""
2604
    name = 'planc'
2605
    long_name = 'Plan C'
2606
    url = 'http://www.plancomic.com'
2607
    get_first_comic_link = get_a_navi_navifirst
2608
    get_navi_link = get_a_navi_comicnavnext_navinext
2609
2610
    @classmethod
2611
    def get_comic_info(cls, soup, link):
2612
        """Get information about a particular comics."""
2613
        title = soup.find('h2', class_='post-title').string
2614
        date_str = soup.find("span", class_="post-date").string
2615
        day = string_to_date(date_str, "%B %d, %Y")
2616
        imgs = soup.find('div', id='comic').find_all('img')
2617
        return {
2618
            'title': title,
2619
            'img': [i['src'] for i in imgs],
2620
            'month': day.month,
2621
            'year': day.year,
2622
            'day': day.day,
2623
        }
2624
2625
2626 View Code Duplication
class BuniComic(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2627
    """Class to retrieve Buni Comics."""
2628
    name = 'buni'
2629
    long_name = 'BuniComics'
2630
    url = 'http://www.bunicomic.com'
2631
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2632
    get_navi_link = get_link_rel_next
2633
2634
    @classmethod
2635
    def get_comic_info(cls, soup, link):
2636
        """Get information about a particular comics."""
2637
        imgs = soup.find('div', id='comic').find_all('img')
2638
        assert all(i['alt'] == i['title'] for i in imgs)
2639
        assert len(imgs) == 1
2640
        return {
2641
            'img': [i['src'] for i in imgs],
2642
            'title': imgs[0]['title'],
2643
        }
2644
2645
2646
class GenericCommitStrip(GenericNavigableComic):
2647
    """Generic class to retrieve Commit Strips in different languages."""
2648
    get_navi_link = get_a_rel_next
2649
    get_first_comic_link = simulate_first_link
2650
    first_url = NotImplemented
2651
2652
    @classmethod
2653
    def get_comic_info(cls, soup, link):
2654
        """Get information about a particular comics."""
2655
        desc = soup.find('meta', property='og:description')['content']
2656
        title = soup.find('meta', property='og:title')['content']
2657
        imgs = soup.find('div', class_='entry-content').find_all('img')
2658
        title2 = ' '.join(i.get('title', '') for i in imgs)
2659 View Code Duplication
        return {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2660
            'title': title,
2661
            'title2': title2,
2662
            'description': desc,
2663
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2664
        }
2665
2666
2667
class CommitStripFr(GenericCommitStrip):
2668
    """Class to retrieve Commit Strips in French."""
2669
    name = 'commit_fr'
2670
    long_name = 'Commit Strip (Fr)'
2671
    url = 'http://www.commitstrip.com/fr'
2672
    _categories = ('FRANCAIS', )
2673
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2674
2675
2676
class CommitStripEn(GenericCommitStrip):
2677
    """Class to retrieve Commit Strips in English."""
2678
    name = 'commit_en'
2679
    long_name = 'Commit Strip (En)'
2680
    url = 'http://www.commitstrip.com/en'
2681
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2682
2683
2684
class GenericBoumerie(GenericNavigableComic):
2685
    """Generic class to retrieve Boumeries comics in different languages."""
2686
    get_first_comic_link = get_a_navi_navifirst
2687
    get_navi_link = get_link_rel_next
2688
    date_format = NotImplemented
2689
    lang = NotImplemented
2690
2691
    @classmethod
2692
    def get_comic_info(cls, soup, link):
2693
        """Get information about a particular comics."""
2694
        title = soup.find('h2', class_='post-title').string
2695
        short_url = soup.find('link', rel='shortlink')['href']
2696
        author = soup.find("span", class_="post-author").find("a").string
2697
        date_str = soup.find('span', class_='post-date').string
2698
        day = string_to_date(date_str, cls.date_format, cls.lang)
2699
        imgs = soup.find('div', id='comic').find_all('img')
2700
        assert all(i['alt'] == i['title'] for i in imgs)
2701
        return {
2702
            'short_url': short_url,
2703
            'img': [i['src'] for i in imgs],
2704
            'title': title,
2705
            'author': author,
2706
            'month': day.month,
2707
            'year': day.year,
2708
            'day': day.day,
2709
        }
2710
2711
2712
class BoumerieEn(GenericBoumerie):
2713
    """Class to retrieve Boumeries comics in English."""
2714
    name = 'boumeries_en'
2715
    long_name = 'Boumeries (En)'
2716
    url = 'http://comics.boumerie.com'
2717
    date_format = "%B %d, %Y"
2718
    lang = 'en_GB.UTF-8'
2719
2720
2721
class BoumerieFr(GenericBoumerie):
2722
    """Class to retrieve Boumeries comics in French."""
2723
    name = 'boumeries_fr'
2724
    long_name = 'Boumeries (Fr)'
2725
    url = 'http://bd.boumerie.com'
2726
    _categories = ('FRANCAIS', )
2727
    date_format = "%A, %d %B %Y"
2728
    lang = "fr_FR.utf8"
2729
2730
2731
class UnearthedComics(GenericNavigableComic):
2732
    """Class to retrieve Unearthed comics."""
2733
    # Also on http://tapastic.com/series/UnearthedComics
2734
    # Also on http://unearthedcomics.tumblr.com
2735
    name = 'unearthed'
2736
    long_name = 'Unearthed Comics'
2737
    url = 'http://unearthedcomics.com'
2738 View Code Duplication
    _categories = ('UNEARTHED', )
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2739
    get_navi_link = get_link_rel_next
2740
    get_first_comic_link = simulate_first_link
2741
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2742
2743
    @classmethod
2744
    def get_comic_info(cls, soup, link):
2745
        """Get information about a particular comics."""
2746
        short_url = soup.find('link', rel='shortlink')['href']
2747
        title_elt = soup.find('h1') or soup.find('h2')
2748
        title = title_elt.string if title_elt else ""
2749
        desc = soup.find('meta', property='og:description')
2750
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2751
        day = string_to_date(date_str, "%Y-%m-%d")
2752
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2753
        imgs = post.find_all('img')
2754
        return {
2755
            'title': title,
2756
            'description': desc,
2757
            'url2': short_url,
2758
            'img': [i['src'] for i in imgs],
2759
            'month': day.month,
2760
            'year': day.year,
2761
            'day': day.day,
2762
        }
2763
2764
2765
class Optipess(GenericNavigableComic):
2766
    """Class to retrieve Optipess comics."""
2767
    name = 'optipess'
2768
    long_name = 'Optipess'
2769
    url = 'http://www.optipess.com'
2770
    get_first_comic_link = get_a_navi_navifirst
2771
    get_navi_link = get_link_rel_next
2772
2773
    @classmethod
2774
    def get_comic_info(cls, soup, link):
2775
        """Get information about a particular comics."""
2776
        title = soup.find('h2', class_='post-title').string
2777
        author = soup.find("span", class_="post-author").find("a").string
2778
        comic = soup.find('div', id='comic')
2779
        imgs = comic.find_all('img') if comic else []
2780
        alt = imgs[0]['title'] if imgs else ""
2781
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2782
        date_str = soup.find('span', class_='post-date').string
2783
        day = string_to_date(date_str, "%B %d, %Y")
2784
        return {
2785
            'title': title,
2786
            'alt': alt,
2787
            'author': author,
2788
            'img': [i['src'] for i in imgs],
2789
            'month': day.month,
2790
            'year': day.year,
2791
            'day': day.day,
2792
        }
2793
2794
2795
class PainTrainComic(GenericNavigableComic):
2796
    """Class to retrieve Pain Train Comics."""
2797
    name = 'paintrain'
2798
    long_name = 'Pain Train Comics'
2799
    url = 'http://paintraincomic.com'
2800
    get_first_comic_link = get_a_navi_navifirst
2801
    get_navi_link = get_link_rel_next
2802
2803
    @classmethod
2804
    def get_comic_info(cls, soup, link):
2805
        """Get information about a particular comics."""
2806
        title = soup.find('h2', class_='post-title').string
2807
        short_url = soup.find('link', rel='shortlink')['href']
2808
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2809
        num = int(short_url_re.match(short_url).groups()[0])
2810
        imgs = soup.find('div', id='comic').find_all('img')
2811
        alt = imgs[0]['title']
2812
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2813
        date_str = soup.find('span', class_='post-date').string
2814
        day = string_to_date(date_str, "%d/%m/%Y")
2815
        return {
2816
            'short_url': short_url,
2817
            'num': num,
2818
            'img': [i['src'] for i in imgs],
2819
            'month': day.month,
2820
            'year': day.year,
2821
            'day': day.day,
2822
            'alt': alt,
2823
            'title': title,
2824
        }
2825
2826
2827
class MoonBeard(GenericNavigableComic):
2828
    """Class to retrieve MoonBeard comics."""
2829
    # Also on http://blog.squiresjam.es/moonbeard
2830
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2831
    name = 'moonbeard'
2832
    long_name = 'Moon Beard'
2833
    url = 'http://moonbeard.com'
2834
    get_first_comic_link = get_a_navi_navifirst
2835
    get_navi_link = get_a_navi_navinext
2836
2837
    @classmethod
2838
    def get_comic_info(cls, soup, link):
2839
        """Get information about a particular comics."""
2840
        title = soup.find('h2', class_='post-title').string
2841
        short_url = soup.find('link', rel='shortlink')['href']
2842
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2843
        num = int(short_url_re.match(short_url).groups()[0])
2844
        imgs = soup.find('div', id='comic').find_all('img')
2845
        alt = imgs[0]['title']
2846
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2847
        date_str = soup.find('span', class_='post-date').string
2848
        day = string_to_date(date_str, "%B %d, %Y")
2849
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2850
        author = soup.find('span', class_='post-author').string
2851
        return {
2852
            'short_url': short_url,
2853
            'num': num,
2854
            'img': [i['src'] for i in imgs],
2855
            'month': day.month,
2856
            'year': day.year,
2857
            'day': day.day,
2858
            'title': title,
2859
            'tags': tags,
2860
            'alt': alt,
2861
            'author': author,
2862
        }
2863
2864
2865
class AHamADay(GenericNavigableComic):
2866
    """Class to retrieve class A Ham A Day comics."""
2867
    name = 'ham'
2868
    long_name = 'A Ham A Day'
2869 View Code Duplication
    url = 'http://www.ahammaday.com'
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2870
    get_url_from_link = join_cls_url_to_href
2871
    get_first_comic_link = simulate_first_link
2872
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2873
2874
    @classmethod
2875
    def get_navi_link(cls, last_soup, next_):
2876
        """Get link to next or previous comic."""
2877
        # prev is next / next is prev
2878
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2879
2880
    @classmethod
2881
    def get_comic_info(cls, soup, link):
2882
        """Get information about a particular comics."""
2883
        date_str = soup.find('time', class_='published')['datetime']
2884
        day = string_to_date(date_str, "%Y-%m-%d")
2885
        author = soup.find('span', class_='blog-author').find('a').string
2886
        title = soup.find('meta', property='og:title')['content']
2887
        imgs = soup.find_all('meta', itemprop='image')
2888
        return {
2889
            'img': [i['content'] for i in imgs],
2890
            'title': title,
2891
            'author': author,
2892
            'day': day.day,
2893
            'month': day.month,
2894
            'year': day.year,
2895
        }
2896
2897
2898
class LittleLifeLines(GenericNavigableComic):
2899
    """Class to retrieve Little Life Lines comics."""
2900
    # Also on https://little-life-lines.tumblr.com
2901
    name = 'life'
2902
    long_name = 'Little Life Lines'
2903
    url = 'http://www.littlelifelines.com'
2904
    get_url_from_link = join_cls_url_to_href
2905
    get_first_comic_link = simulate_first_link
2906
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2907
2908
    @classmethod
2909
    def get_navi_link(cls, last_soup, next_):
2910
        """Get link to next or previous comic."""
2911
        # prev is next / next is prev
2912
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2913
        return li.find('a') if li else None
2914
2915
    @classmethod
2916
    def get_comic_info(cls, soup, link):
2917
        """Get information about a particular comics."""
2918
        title = soup.find('meta', property='og:title')['content']
2919
        desc = soup.find('meta', property='og:description')['content']
2920
        date_str = soup.find('time', class_='published')['datetime']
2921
        day = string_to_date(date_str, "%Y-%m-%d")
2922
        author = soup.find('a', rel='author').string
2923
        div_content = soup.find('div', class_="body entry-content")
2924
        imgs = div_content.find_all('img')
2925
        imgs = [i for i in imgs if i.get('src') is not None]
2926
        alt = imgs[0]['alt']
2927
        return {
2928
            'title': title,
2929
            'alt': alt,
2930
            'description': desc,
2931
            'author': author,
2932
            'day': day.day,
2933
            'month': day.month,
2934
            'year': day.year,
2935
            'img': [i['src'] for i in imgs],
2936
        }
2937
2938
2939
class GenericWordPressInkblot(GenericNavigableComic):
2940
    """Generic class to retrieve comics using WordPress with Inkblot."""
2941
    get_navi_link = get_link_rel_next
2942
2943
    @classmethod
2944
    def get_first_comic_link(cls):
2945
        """Get link to first comics."""
2946
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2947
2948
    @classmethod
2949
    def get_comic_info(cls, soup, link):
2950
        """Get information about a particular comics."""
2951
        title = soup.find('meta', property='og:title')['content']
2952
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2953
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2954
        day = string_to_date(date_str, "%Y-%m-%d")
2955
        return {
2956
            'title': title,
2957
            'day': day.day,
2958
            'month': day.month,
2959
            'year': day.year,
2960
            'img': [i['src'] for i in imgs],
2961
        }
2962
2963
2964
class EverythingsStupid(GenericWordPressInkblot):
2965
    """Class to retrieve Everything's stupid Comics."""
2966
    # Also on http://tapastic.com/series/EverythingsStupid
2967
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2968
    # Also on http://everythingsstupidcomics.tumblr.com
2969
    name = 'stupid'
2970
    long_name = "Everything's Stupid"
2971
    url = 'http://everythingsstupid.net'
2972
2973
2974
class TheIsmComics(GenericWordPressInkblot):
2975
    """Class to retrieve The Ism Comics."""
2976
    # Also on https://tapastic.com/series/TheIsm (?)
2977
    name = 'theism'
2978
    long_name = "The Ism"
2979
    url = 'http://www.theism-comics.com'
2980
2981
2982
class WoodenPlankStudios(GenericWordPressInkblot):
2983
    """Class to retrieve Wooden Plank Studios comics."""
2984
    name = 'woodenplank'
2985
    long_name = 'Wooden Plank Studios'
2986
    url = 'http://woodenplankstudios.com'
2987
2988
2989
class ElectricBunnyComic(GenericNavigableComic):
2990
    """Class to retrieve Electric Bunny Comics."""
2991
    # Also on http://electricbunnycomics.tumblr.com
2992
    name = 'bunny'
2993
    long_name = 'Electric Bunny Comic'
2994
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
2995
    get_url_from_link = join_cls_url_to_href
2996
2997
    @classmethod
2998
    def get_first_comic_link(cls):
2999
        """Get link to first comics."""
3000
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3001
3002
    @classmethod
3003
    def get_navi_link(cls, last_soup, next_):
3004
        """Get link to next or previous comic."""
3005
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3006
        return img.parent if img else None
3007
3008
    @classmethod
3009
    def get_comic_info(cls, soup, link):
3010
        """Get information about a particular comics."""
3011
        title = soup.find('meta', property='og:title')['content']
3012
        imgs = soup.find_all('meta', property='og:image')
3013
        return {
3014
            'title': title,
3015
            'img': [i['content'] for i in imgs],
3016
        }
3017
3018
3019
class SheldonComics(GenericNavigableComic):
3020
    """Class to retrieve Sheldon comics."""
3021
    # Also on http://www.gocomics.com/sheldon
3022
    name = 'sheldon'
3023
    long_name = 'Sheldon Comics'
3024
    url = 'http://www.sheldoncomics.com'
3025
3026
    @classmethod
3027
    def get_first_comic_link(cls):
3028
        """Get link to first comics."""
3029
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3030
3031
    @classmethod
3032
    def get_navi_link(cls, last_soup, next_):
3033
        """Get link to next or previous comic."""
3034
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3035
            if link['href'] != 'http://www.sheldoncomics.com':
3036
                return link
3037
        return None
3038
3039
    @classmethod
3040
    def get_comic_info(cls, soup, link):
3041
        """Get information about a particular comics."""
3042
        imgs = soup.find("div", id="comic-foot").find_all("img")
3043
        assert all(i['alt'] == i['title'] for i in imgs)
3044
        assert len(imgs) == 1
3045
        title = imgs[0]['title']
3046
        return {
3047
            'title': title,
3048
            'img': [i['src'] for i in imgs],
3049
        }
3050
3051
3052
class Ubertool(GenericNavigableComic):
3053
    """Class to retrieve Ubertool comics."""
3054
    # Also on http://ubertool.tumblr.com
3055
    # Also on https://tapastic.com/series/ubertool
3056
    name = 'ubertool'
3057
    long_name = 'Ubertool'
3058
    url = 'http://ubertoolcomic.com'
3059
    _categories = ('UBERTOOL', )
3060
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3061
    get_navi_link = get_a_comicnavbase_comicnavnext
3062
3063
    @classmethod
3064
    def get_comic_info(cls, soup, link):
3065
        """Get information about a particular comics."""
3066
        title = soup.find('h2', class_='post-title').string
3067
        date_str = soup.find('span', class_='post-date').string
3068
        day = string_to_date(date_str, "%B %d, %Y")
3069
        imgs = soup.find('div', id='comic').find_all('img')
3070
        return {
3071
            'img': [i['src'] for i in imgs],
3072
            'title': title,
3073
            'month': day.month,
3074
            'year': day.year,
3075
            'day': day.day,
3076
        }
3077
3078
3079
class CubeDrone(GenericNavigableComic):
3080
    """Class to retrieve Cube Drone comics."""
3081
    name = 'cubedrone'
3082
    long_name = 'Cube Drone'
3083
    url = 'http://cube-drone.com/comics'
3084
    get_url_from_link = join_cls_url_to_href
3085
3086
    @classmethod
3087
    def get_first_comic_link(cls):
3088
        """Get link to first comics."""
3089
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3090
3091
    @classmethod
3092
    def get_navi_link(cls, last_soup, next_):
3093
        """Get link to next or previous comic."""
3094
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3095
        return last_soup.find('span', class_=class_).parent
3096
3097
    @classmethod
3098
    def get_comic_info(cls, soup, link):
3099
        """Get information about a particular comics."""
3100
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3101
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3102
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3103
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3104
        imgs = soup.find_all('img', class_='comic img-responsive')
3105
        title2 = imgs[0]['title']
3106
        alt = imgs[0]['alt']
3107
        return {
3108
            'url2': url2,
3109
            'title': title,
3110
            'title2': title2,
3111
            'alt': alt,
3112
            'img': [i['src'] for i in imgs],
3113
        }
3114
3115
3116
class MakeItStoopid(GenericNavigableComic):
3117
    """Class to retrieve Make It Stoopid Comics."""
3118
    name = 'stoopid'
3119
    long_name = 'Make it stoopid'
3120
    url = 'http://makeitstoopid.com/comic.php'
3121
3122
    @classmethod
3123
    def get_nav(cls, soup):
3124 View Code Duplication
        """Get the navigation elements from soup object."""
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
3125
        cnav = soup.find_all(class_='cnav')
3126
        nav1, nav2 = cnav[:5], cnav[5:]
3127
        assert nav1 == nav2
3128
        # begin, prev, archive, next_, end = nav1
3129
        return [None if i.get('href') is None else i for i in nav1]
3130
3131
    @classmethod
3132
    def get_first_comic_link(cls):
3133
        """Get link to first comics."""
3134
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3135
3136
    @classmethod
3137
    def get_navi_link(cls, last_soup, next_):
3138
        """Get link to next or previous comic."""
3139
        return cls.get_nav(last_soup)[3 if next_ else 1]
3140
3141
    @classmethod
3142
    def get_comic_info(cls, soup, link):
3143
        """Get information about a particular comics."""
3144
        title = link['title']
3145
        imgs = soup.find_all('img', id='comicimg')
3146
        return {
3147
            'title': title,
3148
            'img': [i['src'] for i in imgs],
3149
        }
3150
3151
3152
class TuMourrasMoinsBete(GenericNavigableComic):
3153
    """Class to retrieve Tu Mourras Moins Bete comics."""
3154
    name = 'mourrasmoinsbete'
3155
    long_name = 'Tu Mourras Moins Bete'
3156
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3157
    _categories = ('FRANCAIS', )
3158
    get_first_comic_link = simulate_first_link
3159
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3160
3161
    @classmethod
3162
    def get_navi_link(cls, last_soup, next_):
3163
        """Get link to next or previous comic."""
3164
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3165
3166
    @classmethod
3167
    def get_comic_info(cls, soup, link):
3168
        """Get information about a particular comics."""
3169
        title = soup.find('title').string
3170
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3171
        author = soup.find('span', itemprop='author').string
3172
        return {
3173
            'img': [i['src'] for i in imgs],
3174
            'author': author,
3175
            'title': title,
3176
        }
3177
3178
3179
class GeekAndPoke(GenericNavigableComic):
3180
    """Class to retrieve Geek And Poke comics."""
3181
    name = 'geek'
3182
    long_name = 'Geek And Poke'
3183
    url = 'http://geek-and-poke.com'
3184
    get_url_from_link = join_cls_url_to_href
3185
    get_first_comic_link = simulate_first_link
3186
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3187
3188
    @classmethod
3189
    def get_navi_link(cls, last_soup, next_):
3190
        """Get link to next or previous comic."""
3191 View Code Duplication
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
3192
3193
    @classmethod
3194
    def get_comic_info(cls, soup, link):
3195
        """Get information about a particular comics."""
3196
        title = soup.find('meta', property='og:title')['content']
3197
        desc = soup.find('meta', property='og:description')['content']
3198
        date_str = soup.find('time', class_='published')['datetime']
3199
        day = string_to_date(date_str, "%Y-%m-%d")
3200
        author = soup.find('a', rel='author').string
3201
        div_content = (soup.find('div', class_="body entry-content") or
3202
                       soup.find('div', class_="special-content"))
3203
        imgs = div_content.find_all('img')
3204
        imgs = [i for i in imgs if i.get('src') is not None]
3205
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3206
        alt = imgs[0].get('alt', "") if imgs else []
3207
        return {
3208
            'title': title,
3209
            'alt': alt,
3210
            'description': desc,
3211
            'author': author,
3212
            'day': day.day,
3213
            'month': day.month,
3214
            'year': day.year,
3215
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3216
        }
3217
3218
3219
class GloryOwlComix(GenericNavigableComic):
3220
    """Class to retrieve Glory Owl comics."""
3221
    name = 'gloryowl'
3222
    long_name = 'Glory Owl'
3223
    url = 'http://gloryowlcomix.blogspot.fr'
3224
    _categories = ('NSFW', 'FRANCAIS')
3225
    get_first_comic_link = simulate_first_link
3226
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3227
3228
    @classmethod
3229
    def get_navi_link(cls, last_soup, next_):
3230
        """Get link to next or previous comic."""
3231
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3232
3233
    @classmethod
3234
    def get_comic_info(cls, soup, link):
3235
        """Get information about a particular comics."""
3236
        title = soup.find('title').string
3237
        imgs = soup.find_all('link', rel='image_src')
3238
        author = soup.find('a', rel='author').string
3239
        return {
3240
            'img': [i['href'] for i in imgs],
3241
            'author': author,
3242
            'title': title,
3243
        }
3244
3245
3246
class GenericTumblrV1(GenericComic):
3247
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3248
    _categories = ('TUMBLR', )
3249
3250
    @classmethod
3251
    def get_next_comic(cls, last_comic):
3252
        """Generic implementation of get_next_comic for Tumblr comics."""
3253
        for p in cls.get_posts(last_comic):
3254
            comic = cls.get_comic_info(p)
3255
            if comic is not None:
3256
                yield comic
3257
3258
    @classmethod
3259
    def get_url_from_post(cls, post):
3260
        return post['url']
3261
3262
    @classmethod
3263
    def get_api_url(cls):
3264
        return urljoin_wrapper(cls.url, '/api/read/')
3265
3266
    @classmethod
3267
    def get_comic_info(cls, post):
3268
        """Get information about a particular comics."""
3269
        type_ = post['type']
3270
        if type_ != 'photo':
3271
            return None
3272
        tumblr_id = int(post['id'])
3273
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3274
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3275
        caption = post.find('photo-caption')
3276
        title = caption.string if caption else ""
3277
        tags = ' '.join(t.string for t in post.find_all('tag'))
3278
        # Photos may appear in 'photo' tags and/or straight in the post
3279
        photo_tags = post.find_all('photo')
3280
        if not photo_tags:
3281
            photo_tags = [post]
3282
        # Images are in multiple resolutions - taking the first one
3283
        imgs = [photo.find('photo-url') for photo in photo_tags]
3284
        return {
3285
            'url': cls.get_url_from_post(post),
3286
            'url2': post['url-with-slug'],
3287
            'day': day.day,
3288
            'month': day.month,
3289
            'year': day.year,
3290
            'title': title,
3291
            'tags': tags,
3292
            'img': [i.string for i in imgs],
3293
            'tumblr-id': tumblr_id,
3294
            'api_url': api_url,
3295
        }
3296
3297
    @classmethod
3298
    def get_posts(cls, last_comic, nb_post_per_call=10):
3299
        """Get posts using API. nb_post_per_call is max 50.
3300
3301
        Posts are retrieved from newer to older as per the tumblr v1 api
3302
        but are returned in chronological order."""
3303
        waiting_for_url = last_comic['url'] if last_comic else None
3304
        posts_acc = []
3305
        if last_comic is not None:
3306
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3307
            # might end up spending a lot of time looking for something that
3308
            # doesn't exist. Failing early and clearly might be a better option.
3309
            last_api_url = last_comic['api_url']
3310
            try:
3311
                get_soup_at_url(last_api_url)
3312
            except urllib.error.HTTPError:
3313
                try:
3314
                    get_soup_at_url(cls.url)
3315
                except urllib.error.HTTPError:
3316
                    print("Did not find previous post nor main url %s" % cls.url)
3317
                else:
3318
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3319
                return reversed(posts_acc)
3320
        api_url = cls.get_api_url()
3321
        posts = get_soup_at_url(api_url).find('posts')
3322
        start, total = int(posts['start']), int(posts['total'])
3323
        assert start == 0
3324
        for starting_num in range(0, total, nb_post_per_call):
3325
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3326
            posts2 = get_soup_at_url(api_url2).find('posts')
3327
            start2, total2 = int(posts2['start']), int(posts2['total'])
3328
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3329
            # This may happen and should be handled in the future
3330
            assert total == total2, "%d != %d" % (total, total2)
3331
            for p in posts2.find_all('post'):
3332
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3333
                    return reversed(posts_acc)
3334
                posts_acc.append(p)
3335
        if waiting_for_url is None:
3336
            return reversed(posts_acc)
3337
        print("Did not find %s : there might be a problem" % waiting_for_url)
3338
        return []
3339
3340
3341
class SaturdayMorningBreakfastCerealTumblr(GenericEmptyComic, GenericTumblrV1):
3342
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3343
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3344
    # Also on http://www.smbc-comics.com
3345
    name = 'smbc-tumblr'
3346
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3347
    url = 'http://smbc-comics.tumblr.com'
3348
    _categories = ('SMBC', )
3349
3350
3351
class IrwinCardozo(GenericTumblrV1):
3352
    """Class to retrieve Irwin Cardozo Comics."""
3353
    name = 'irwinc'
3354
    long_name = 'Irwin Cardozo'
3355
    url = 'http://irwincardozocomics.tumblr.com'
3356
3357
3358
class AccordingToDevin(GenericTumblrV1):
3359
    """Class to retrieve According To Devin comics."""
3360
    name = 'devin'
3361
    long_name = 'According To Devin'
3362
    url = 'http://accordingtodevin.tumblr.com'
3363
3364
3365
class ItsTheTieTumblr(GenericTumblrV1):
3366
    """Class to retrieve It's the tie comics."""
3367
    # Also on http://itsthetie.com
3368
    # Also on https://tapastic.com/series/itsthetie
3369
    name = 'tie-tumblr'
3370
    long_name = "It's the tie (from Tumblr)"
3371
    url = "http://itsthetie.tumblr.com"
3372
    _categories = ('TIE', )
3373
3374
3375
class OctopunsTumblr(GenericTumblrV1):
3376
    """Class to retrieve Octopuns comics."""
3377
    # Also on http://www.octopuns.net
3378
    name = 'octopuns-tumblr'
3379
    long_name = 'Octopuns (from Tumblr)'
3380
    url = 'http://octopuns.tumblr.com'
3381
3382
3383
class PicturesInBoxesTumblr(GenericTumblrV1):
3384
    """Class to retrieve Pictures In Boxes comics."""
3385
    # Also on http://www.picturesinboxes.com
3386
    name = 'picturesinboxes-tumblr'
3387
    long_name = 'Pictures in Boxes (from Tumblr)'
3388
    url = 'http://picturesinboxescomic.tumblr.com'
3389
3390
3391
class TubeyToonsTumblr(GenericTumblrV1):
3392
    """Class to retrieve TubeyToons comics."""
3393
    # Also on http://tapastic.com/series/Tubey-Toons
3394
    # Also on http://tubeytoons.com
3395
    name = 'tubeytoons-tumblr'
3396
    long_name = 'Tubey Toons (from Tumblr)'
3397
    url = 'http://tubeytoons.tumblr.com'
3398
    _categories = ('TUNEYTOONS', )
3399
3400
3401
class UnearthedComicsTumblr(GenericTumblrV1):
3402
    """Class to retrieve Unearthed comics."""
3403
    # Also on http://tapastic.com/series/UnearthedComics
3404
    # Also on http://unearthedcomics.com
3405
    name = 'unearthed-tumblr'
3406
    long_name = 'Unearthed Comics (from Tumblr)'
3407
    url = 'http://unearthedcomics.tumblr.com'
3408
    _categories = ('UNEARTHED', )
3409
3410
3411
class PieComic(GenericTumblrV1):
3412
    """Class to retrieve Pie Comic comics."""
3413
    name = 'pie'
3414
    long_name = 'Pie Comic'
3415
    url = "http://piecomic.tumblr.com"
3416
3417
3418
class MrEthanDiamond(GenericTumblrV1):
3419
    """Class to retrieve Mr Ethan Diamond comics."""
3420
    name = 'diamond'
3421
    long_name = 'Mr Ethan Diamond'
3422
    url = 'http://mrethandiamond.tumblr.com'
3423
3424
3425
class Flocci(GenericTumblrV1):
3426
    """Class to retrieve floccinaucinihilipilification comics."""
3427
    name = 'flocci'
3428
    long_name = 'floccinaucinihilipilification'
3429
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3430
3431
3432
class UpAndOut(GenericTumblrV1):
3433
    """Class to retrieve Up & Out comics."""
3434
    # Also on http://tapastic.com/series/UP-and-OUT
3435
    name = 'upandout'
3436
    long_name = 'Up And Out (from Tumblr)'
3437
    url = 'http://upandoutcomic.tumblr.com'
3438
3439
3440
class Pundemonium(GenericTumblrV1):
3441
    """Class to retrieve Pundemonium comics."""
3442
    name = 'pundemonium'
3443
    long_name = 'Pundemonium'
3444
    url = 'http://monstika.tumblr.com'
3445
3446
3447
class PoorlyDrawnLinesTumblr(GenericEmptyComic, GenericTumblrV1):
3448
    """Class to retrieve Poorly Drawn Lines comics."""
3449
    # Also on http://poorlydrawnlines.com
3450
    name = 'poorlydrawn-tumblr'
3451
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3452
    url = 'http://pdlcomics.tumblr.com'
3453
    _categories = ('POORLYDRAWN', )
3454
3455
3456
class PearShapedComics(GenericTumblrV1):
3457
    """Class to retrieve Pear Shaped Comics."""
3458
    name = 'pearshaped'
3459
    long_name = 'Pear-Shaped Comics'
3460
    url = 'http://pearshapedcomics.com'
3461
3462
3463
class PondScumComics(GenericTumblrV1):
3464
    """Class to retrieve Pond Scum Comics."""
3465
    name = 'pond'
3466
    long_name = 'Pond Scum'
3467
    url = 'http://pondscumcomic.tumblr.com'
3468
3469
3470
class MercworksTumblr(GenericTumblrV1):
3471
    """Class to retrieve Mercworks comics."""
3472
    # Also on http://mercworks.net
3473
    name = 'mercworks-tumblr'
3474
    long_name = 'Mercworks (from Tumblr)'
3475
    url = 'http://mercworks.tumblr.com'
3476
3477
3478
class OwlTurdTumblr(GenericEmptyComic, GenericTumblrV1):
3479
    """Class to retrieve Owl Turd comics."""
3480
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3481
    name = 'owlturd-tumblr'
3482
    long_name = 'Owl Turd (from Tumblr)'
3483
    url = 'http://owlturd.com'
3484
    _categories = ('OWLTURD', )
3485
3486
3487
class VectorBelly(GenericTumblrV1):
3488
    """Class to retrieve Vector Belly comics."""
3489
    # Also on http://vectorbelly.com
3490
    name = 'vector'
3491
    long_name = 'Vector Belly'
3492
    url = 'http://vectorbelly.tumblr.com'
3493
3494
3495
class GoneIntoRapture(GenericTumblrV1):
3496
    """Class to retrieve Gone Into Rapture comics."""
3497
    # Also on http://goneintorapture.tumblr.com
3498
    # Also on http://tapastic.com/series/Goneintorapture
3499
    name = 'rapture'
3500
    long_name = 'Gone Into Rapture'
3501
    url = 'http://www.goneintorapture.com'
3502
3503
3504
class TheOatmealTumblr(GenericTumblrV1):
3505
    """Class to retrieve The Oatmeal comics."""
3506
    # Also on http://theoatmeal.com
3507
    name = 'oatmeal-tumblr'
3508
    long_name = 'The Oatmeal (from Tumblr)'
3509
    url = 'http://oatmeal.tumblr.com'
3510
3511
3512
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3513
    """Class to retrieve Heck If I Know Comics."""
3514
    # Also on http://tapastic.com/series/Regular
3515
    name = 'heck-tumblr'
3516
    long_name = 'Heck if I Know comics (from Tumblr)'
3517
    url = 'http://heckifiknowcomics.com'
3518
3519
3520
class MyJetPack(GenericTumblrV1):
3521
    """Class to retrieve My Jet Pack comics."""
3522
    name = 'jetpack'
3523
    long_name = 'My Jet Pack'
3524
    url = 'http://myjetpack.tumblr.com'
3525
3526
3527
class CheerUpEmoKidTumblr(GenericTumblrV1):
3528
    """Class to retrieve CheerUpEmoKid comics."""
3529
    # Also on http://www.cheerupemokid.com
3530
    # Also on http://tapastic.com/series/CUEK
3531
    name = 'cuek-tumblr'
3532
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3533
    url = 'http://enzocomics.tumblr.com'
3534
3535
3536
class ForLackOfABetterComic(GenericEmptyComic, GenericTumblrV1):
3537
    """Class to retrieve For Lack Of A Better Comics."""
3538
    # Also on http://forlackofabettercomic.com
3539
    name = 'lack'
3540
    long_name = 'For Lack Of A Better Comic'
3541
    url = 'http://forlackofabettercomic.tumblr.com'
3542
3543
3544
class ZenPencilsTumblr(GenericTumblrV1):
3545
    """Class to retrieve ZenPencils comics."""
3546
    # Also on http://zenpencils.com
3547
    # Also on http://www.gocomics.com/zen-pencils
3548
    name = 'zenpencils-tumblr'
3549
    long_name = 'Zen Pencils (from Tumblr)'
3550
    url = 'http://zenpencils.tumblr.com'
3551
    _categories = ('ZENPENCILS', )
3552
3553
3554
class ThreeWordPhraseTumblr(GenericTumblrV1):
3555
    """Class to retrieve Three Word Phrase comics."""
3556
    # Also on http://threewordphrase.com
3557
    name = 'threeword-tumblr'
3558
    long_name = 'Three Word Phrase (from Tumblr)'
3559
    url = 'http://www.threewordphrase.tumblr.com'
3560
3561
3562
class TimeTrabbleTumblr(GenericTumblrV1):
3563
    """Class to retrieve Time Trabble comics."""
3564
    # Also on http://timetrabble.com
3565
    name = 'timetrabble-tumblr'
3566
    long_name = 'Time Trabble (from Tumblr)'
3567
    url = 'http://timetrabble.tumblr.com'
3568
3569
3570
class SafelyEndangeredTumblr(GenericTumblrV1):
3571
    """Class to retrieve Safely Endangered comics."""
3572
    # Also on http://www.safelyendangered.com
3573
    name = 'endangered-tumblr'
3574
    long_name = 'Safely Endangered (from Tumblr)'
3575
    url = 'http://tumblr.safelyendangered.com'
3576
3577
3578
class MouseBearComedyTumblr(GenericTumblrV1):
3579
    """Class to retrieve Mouse Bear Comedy comics."""
3580
    # Also on http://www.mousebearcomedy.com
3581
    name = 'mousebear-tumblr'
3582
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3583
    url = 'http://mousebearcomedy.tumblr.com'
3584
3585
3586
class BouletCorpTumblr(GenericTumblrV1):
3587
    """Class to retrieve BouletCorp comics."""
3588
    # Also on http://www.bouletcorp.com
3589
    name = 'boulet-tumblr'
3590
    long_name = 'Boulet Corp (from Tumblr)'
3591
    url = 'http://bouletcorp.tumblr.com'
3592
    _categories = ('BOULET', )
3593
3594
3595
class TheAwkwardYetiTumblr(GenericEmptyComic, GenericTumblrV1):
3596
    """Class to retrieve The Awkward Yeti comics."""
3597
    # Also on http://www.gocomics.com/the-awkward-yeti
3598
    # Also on http://theawkwardyeti.com
3599
    # Also on https://tapastic.com/series/TheAwkwardYeti
3600
    name = 'yeti-tumblr'
3601
    long_name = 'The Awkward Yeti (from Tumblr)'
3602
    url = 'http://larstheyeti.tumblr.com'
3603
    _categories = ('YETI', )
3604
3605
3606
class NellucNhoj(GenericTumblrV1):
3607
    """Class to retrieve NellucNhoj comics."""
3608
    name = 'nhoj'
3609
    long_name = 'Nelluc Nhoj'
3610
    url = 'http://nellucnhoj.com'
3611
3612
3613
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3614
    """Class to retrieve Down The Upward Spiral comics."""
3615
    # Also on http://www.downtheupwardspiral.com
3616
    name = 'spiral-tumblr'
3617
    long_name = 'Down the Upward Spiral (from Tumblr)'
3618
    url = 'http://downtheupwardspiral.tumblr.com'
3619
3620
3621
class AsPerUsualTumblr(GenericTumblrV1):
3622
    """Class to retrieve As Per Usual comics."""
3623
    # Also on https://tapastic.com/series/AsPerUsual
3624
    name = 'usual-tumblr'
3625
    long_name = 'As Per Usual (from Tumblr)'
3626
    url = 'http://as-per-usual.tumblr.com'
3627
3628
3629
class OneOneOneOneComicTumblr(GenericTumblrV1):
3630
    """Class to retrieve 1111 Comics."""
3631
    # Also on http://www.1111comics.me
3632
    # Also on https://tapastic.com/series/1111-Comics
3633
    name = '1111-tumblr'
3634
    long_name = '1111 Comics (from Tumblr)'
3635
    url = 'http://comics1111.tumblr.com'
3636
    _categories = ('ONEONEONEONE', )
3637
3638
3639
class JhallComicsTumblr(GenericTumblrV1):
3640
    """Class to retrieve Jhall Comics."""
3641
    # Also on http://jhallcomics.com
3642
    name = 'jhall-tumblr'
3643
    long_name = 'Jhall Comics (from Tumblr)'
3644
    url = 'http://jhallcomics.tumblr.com'
3645
3646
3647
class BerkeleyMewsTumblr(GenericTumblrV1):
3648
    """Class to retrieve Berkeley Mews comics."""
3649
    # Also on http://www.gocomics.com/berkeley-mews
3650
    # Also on http://www.berkeleymews.com
3651
    name = 'berkeley-tumblr'
3652
    long_name = 'Berkeley Mews (from Tumblr)'
3653
    url = 'http://mews.tumblr.com'
3654
    _categories = ('BERKELEY', )
3655
3656
3657
class JoanCornellaTumblr(GenericTumblrV1):
3658
    """Class to retrieve Joan Cornella comics."""
3659
    # Also on http://joancornella.net
3660
    name = 'cornella-tumblr'
3661
    long_name = 'Joan Cornella (from Tumblr)'
3662
    url = 'http://cornellajoan.tumblr.com'
3663
3664
3665
class RespawnComicTumblr(GenericTumblrV1):
3666
    """Class to retrieve Respawn Comic."""
3667
    # Also on http://respawncomic.com
3668
    name = 'respawn-tumblr'
3669
    long_name = 'Respawn Comic (from Tumblr)'
3670
    url = 'http://respawncomic.tumblr.com'
3671
3672
3673
class ChrisHallbeckTumblr(GenericEmptyComic, GenericTumblrV1):
3674
    """Class to retrieve Chris Hallbeck comics."""
3675
    # Also on https://tapastic.com/ChrisHallbeck
3676
    # Also on http://maximumble.com
3677
    # Also on http://minimumble.com
3678
    # Also on http://thebookofbiff.com
3679
    name = 'hallbeck-tumblr'
3680
    long_name = 'Chris Hallback (from Tumblr)'
3681
    url = 'http://chrishallbeck.tumblr.com'
3682
    _categories = ('HALLBACK', )
3683
3684
3685
class ComicNuggets(GenericTumblrV1):
3686
    """Class to retrieve Comic Nuggets."""
3687
    name = 'nuggets'
3688
    long_name = 'Comic Nuggets'
3689
    url = 'http://comicnuggets.com'
3690
3691
3692
class PigeonGazetteTumblr(GenericTumblrV1):
3693
    """Class to retrieve The Pigeon Gazette comics."""
3694
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3695
    name = 'pigeon-tumblr'
3696
    long_name = 'The Pigeon Gazette (from Tumblr)'
3697
    url = 'http://thepigeongazette.tumblr.com'
3698
3699
3700
class CancerOwl(GenericTumblrV1):
3701
    """Class to retrieve Cancer Owl comics."""
3702
    # Also on http://cancerowl.com
3703
    name = 'cancerowl-tumblr'
3704
    long_name = 'Cancer Owl (from Tumblr)'
3705
    url = 'http://cancerowl.tumblr.com'
3706
3707
3708
class FowlLanguageTumblr(GenericTumblrV1):
3709
    """Class to retrieve Fowl Language comics."""
3710
    # Also on http://www.fowllanguagecomics.com
3711
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3712
    # Also on http://www.gocomics.com/fowl-language
3713
    name = 'fowllanguage-tumblr'
3714
    long_name = 'Fowl Language Comics (from Tumblr)'
3715
    url = 'http://fowllanguagecomics.tumblr.com'
3716
    _categories = ('FOWLLANGUAGE', )
3717
3718
3719
class TheOdd1sOutTumblr(GenericTumblrV1):
3720
    """Class to retrieve The Odd 1s Out comics."""
3721
    # Also on http://theodd1sout.com
3722
    # Also on https://tapastic.com/series/Theodd1sout
3723
    name = 'theodd-tumblr'
3724
    long_name = 'The Odd 1s Out (from Tumblr)'
3725
    url = 'http://theodd1sout.tumblr.com'
3726
3727
3728
class TheUnderfoldTumblr(GenericTumblrV1):
3729
    """Class to retrieve The Underfold comics."""
3730
    # Also on http://theunderfold.com
3731
    name = 'underfold-tumblr'
3732
    long_name = 'The Underfold (from Tumblr)'
3733
    url = 'http://theunderfold.tumblr.com'
3734
3735
3736
class LolNeinTumblr(GenericTumblrV1):
3737
    """Class to retrieve Lol Nein comics."""
3738
    # Also on http://lolnein.com
3739
    name = 'lolnein-tumblr'
3740
    long_name = 'Lol Nein (from Tumblr)'
3741
    url = 'http://lolneincom.tumblr.com'
3742
3743
3744
class FatAwesomeComicsTumblr(GenericTumblrV1):
3745
    """Class to retrieve Fat Awesome Comics."""
3746
    # Also on http://fatawesome.com/comics
3747
    name = 'fatawesome-tumblr'
3748
    long_name = 'Fat Awesome (from Tumblr)'
3749
    url = 'http://fatawesomecomedy.tumblr.com'
3750
3751
3752
class TheWorldIsFlatTumblr(GenericTumblrV1):
3753
    """Class to retrieve The World Is Flat Comics."""
3754
    # Also on https://tapastic.com/series/The-World-is-Flat
3755
    name = 'flatworld-tumblr'
3756
    long_name = 'The World Is Flat (from Tumblr)'
3757
    url = 'http://theworldisflatcomics.tumblr.com'
3758
3759
3760
class DorrisMc(GenericEmptyComic, GenericTumblrV1):
3761
    """Class to retrieve Dorris Mc Comics"""
3762
    # Also on http://www.gocomics.com/dorris-mccomics
3763
    name = 'dorrismc'
3764
    long_name = 'Dorris Mc'
3765
    url = 'http://dorrismccomics.com'
3766
3767
3768
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3769
    """Class to retrieve Leleoz comics."""
3770
    # Also on https://tapastic.com/series/Leleoz
3771
    name = 'leleoz-tumblr'
3772
    long_name = 'Leleoz (from Tumblr)'
3773
    url = 'http://leleozcomics.tumblr.com'
3774
3775
3776
class MoonBeardTumblr(GenericTumblrV1):
3777
    """Class to retrieve MoonBeard comics."""
3778
    # Also on http://moonbeard.com
3779
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3780
    name = 'moonbeard-tumblr'
3781
    long_name = 'Moon Beard (from Tumblr)'
3782
    url = 'http://blog.squiresjam.es/moonbeard'
3783
3784
3785
class AComik(GenericTumblrV1):
3786
    """Class to retrieve A Comik"""
3787
    name = 'comik'
3788
    long_name = 'A Comik'
3789
    url = 'http://acomik.com'
3790
3791
3792
class ClassicRandy(GenericTumblrV1):
3793
    """Class to retrieve Classic Randy comics."""
3794
    name = 'randy'
3795
    long_name = 'Classic Randy'
3796
    url = 'http://classicrandy.tumblr.com'
3797
3798
3799
class DagssonTumblr(GenericTumblrV1):
3800
    """Class to retrieve Dagsson comics."""
3801
    # Also on http://www.dagsson.com
3802
    name = 'dagsson-tumblr'
3803
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3804
    url = 'http://hugleikurdagsson.tumblr.com'
3805
3806
3807
class LinsEditionsTumblr(GenericTumblrV1):
3808
    """Class to retrieve L.I.N.S. Editions comics."""
3809
    # Also on https://linsedition.com
3810
    name = 'lins-tumblr'
3811
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3812
    url = 'http://linscomics.tumblr.com'
3813
    _categories = ('LINS', )
3814
3815
3816
class OrigamiHotDish(GenericTumblrV1):
3817
    """Class to retrieve Origami Hot Dish comics."""
3818
    name = 'origamihotdish'
3819
    long_name = 'Origami Hot Dish'
3820
    url = 'http://origamihotdish.com'
3821
3822
3823
class HitAndMissComicsTumblr(GenericTumblrV1):
3824
    """Class to retrieve Hit and Miss Comics."""
3825
    name = 'hitandmiss'
3826
    long_name = 'Hit and Miss Comics'
3827
    url = 'http://hitandmisscomics.tumblr.com'
3828
3829
3830
class HMBlanc(GenericTumblrV1):
3831
    """Class to retrieve HM Blanc comics."""
3832
    name = 'hmblanc'
3833
    long_name = 'HM Blanc'
3834
    url = 'http://hmblanc.tumblr.com'
3835
3836
3837
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3838
    """Class to retrieve Tales Of Absurdity comics."""
3839
    # Also on http://talesofabsurdity.com
3840
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3841
    name = 'absurdity-tumblr'
3842
    long_name = 'Tales of Absurdity (from Tumblr)'
3843
    url = 'http://talesofabsurdity.tumblr.com'
3844
    _categories = ('ABSURDITY', )
3845
3846
3847
class RobbieAndBobby(GenericTumblrV1):
3848
    """Class to retrieve Robbie And Bobby comics."""
3849
    # Also on http://robbieandbobby.com
3850
    name = 'robbie-tumblr'
3851
    long_name = 'Robbie And Bobby (from Tumblr)'
3852
    url = 'http://robbieandbobby.tumblr.com'
3853
3854
3855
class ElectricBunnyComicTumblr(GenericTumblrV1):
3856
    """Class to retrieve Electric Bunny Comics."""
3857
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3858
    name = 'bunny-tumblr'
3859
    long_name = 'Electric Bunny Comic (from Tumblr)'
3860
    url = 'http://electricbunnycomics.tumblr.com'
3861
3862
3863
class Hoomph(GenericTumblrV1):
3864
    """Class to retrieve Hoomph comics."""
3865
    name = 'hoomph'
3866
    long_name = 'Hoomph'
3867
    url = 'http://hoom.ph'
3868
3869
3870
class BFGFSTumblr(GenericTumblrV1):
3871
    """Class to retrieve BFGFS comics."""
3872
    # Also on https://tapastic.com/series/BFGFS
3873
    # Also on http://bfgfs.com
3874
    name = 'bfgfs-tumblr'
3875
    long_name = 'BFGFS (from Tumblr)'
3876
    url = 'http://bfgfs.tumblr.com'
3877
3878
3879
class DoodleForFood(GenericTumblrV1):
3880
    """Class to retrieve Doodle For Food comics."""
3881
    # Also on http://doodleforfood.com
3882
    name = 'doodle'
3883
    long_name = 'Doodle For Food'
3884
    url = 'http://doodleforfood.com'
3885
3886
3887
class CassandraCalinTumblr(GenericEmptyComic, GenericTumblrV1):
3888
    """Class to retrieve C. Cassandra comics."""
3889
    # Also on http://cassandracalin.com
3890
    # Also on https://tapastic.com/series/C-Cassandra-comics
3891
    name = 'cassandra-tumblr'
3892
    long_name = 'Cassandra Calin (from Tumblr)'
3893
    url = 'http://c-cassandra.tumblr.com'
3894
3895
3896
class DougWasTaken(GenericTumblrV1):
3897
    """Class to retrieve Doug Was Taken comics."""
3898
    name = 'doog'
3899
    long_name = 'Doug Was Taken'
3900
    url = 'http://dougwastaken.tumblr.com'
3901
3902
3903
class MandatoryRollerCoaster(GenericEmptyComic, GenericTumblrV1):
3904
    """Class to retrieve Mandatory Roller Coaster comics."""
3905
    name = 'rollercoaster'
3906
    long_name = 'Mandatory Roller Coaster'
3907
    url = 'http://mandatoryrollercoaster.com'
3908
3909
3910
class CEstPasEnRegardantSesPompes(GenericEmptyComic, GenericTumblrV1):
3911
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
3912
    name = 'cperspqccltt'
3913
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
3914
    url = 'http://cperspqccltt.tumblr.com'
3915
3916
3917
class TheGrohlTroll(GenericEmptyComic, GenericTumblrV1):
3918
    """Class to retrieve The Grohl Troll comics."""
3919
    name = 'grohltroll'
3920
    long_name = 'The Grohl Troll'
3921
    url = 'http://thegrohltroll.com'
3922
3923
3924
class WebcomicName(GenericEmptyComic, GenericTumblrV1):
3925
    """Class to retrieve Webcomic Name comics."""
3926
    name = 'webcomicname'
3927
    long_name = 'Webcomic Name'
3928
    url = 'http://webcomicname.com'
3929
3930
3931
class BooksOfAdam(GenericEmptyComic, GenericTumblrV1):
3932
    """Class to retrieve Books of Adam comics."""
3933
    # Also on http://www.booksofadam.com
3934
    name = 'booksofadam'
3935
    long_name = 'Books of Adam'
3936
    url = 'http://booksofadam.tumblr.com'
3937
3938
3939
class HarkAVagrant(GenericEmptyComic, GenericTumblrV1):
3940
    """Class to retrieve Hark A Vagrant comics."""
3941
    # Also on http://www.harkavagrant.com
3942
    name = 'hark-tumblr'
3943
    long_name = 'Hark A Vagrant (from Tumblr)'
3944
    url = 'http://beatonna.tumblr.com'
3945
3946
3947
class OurSuperAdventureTumblr(GenericEmptyComic, GenericTumblrV1):
3948
    """Class to retrieve Our Super Adventure comics."""
3949
    # Also on https://tapastic.com/series/Our-Super-Adventure
3950
    # Also on http://www.oursuperadventure.com
3951
    # http://sarahgraley.com
3952
    name = 'superadventure-tumblr'
3953
    long_name = 'Our Super Adventure (from Tumblr)'
3954
    url = 'http://sarahssketchbook.tumblr.com'
3955
3956
3957
class JakeLikesOnions(GenericTumblrV1):
3958
    """Class to retrieve Jake Likes Onions comics."""
3959
    name = 'jake'
3960
    long_name = 'Jake Likes Onions'
3961
    url = 'http://jakelikesonions.com'
3962
3963
3964
class InYourFaceCake(GenericEmptyComic, GenericTumblrV1):
3965
    """Class to retrieve In Your Face Cake comics."""
3966
    name = 'inyourfacecake-tumblr'
3967
    long_name = 'In Your Face Cake (from Tumblr)'
3968
    url = 'http://in-your-face-cake.tumblr.com'
3969
3970
3971
class Robospunk(GenericTumblrV1):
3972
    """Class to retrieve Robospunk comics."""
3973
    name = 'robospunk'
3974
    long_name = 'Robospunk'
3975
    url = 'http://robospunk.com'
3976
3977
3978
class BananaTwinky(GenericTumblrV1):
3979
    """Class to retrieve Banana Twinky comics."""
3980
    name = 'banana'
3981
    long_name = 'Banana Twinky'
3982
    url = 'http://bananatwinky.tumblr.com'
3983
3984
3985
class YesterdaysPopcornTumblr(GenericTumblrV1):
3986
    """Class to retrieve Yesterday's Popcorn comics."""
3987
    # Also on http://www.yesterdayspopcorn.com
3988
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
3989
    name = 'popcorn-tumblr'
3990
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
3991
    url = 'http://yesterdayspopcorn.tumblr.com'
3992
3993
3994
class TwistedDoodles(GenericEmptyComic, GenericTumblrV1):
3995
    """Class to retrieve Twisted Doodles comics."""
3996
    name = 'twisted'
3997
    long_name = 'Twisted Doodles'
3998
    url = 'http://www.twisteddoodles.com'
3999
4000
4001
class UbertoolTumblr(GenericEmptyComic, GenericTumblrV1):
4002
    """Class to retrieve Ubertool comics."""
4003
    # Also on http://ubertoolcomic.com
4004
    # Also on https://tapastic.com/series/ubertool
4005
    name = 'ubertool-tumblr'
4006
    long_name = 'Ubertool (from Tumblr)'
4007
    url = 'http://ubertool.tumblr.com'
4008
    _categories = ('UBERTOOL', )
4009
4010
4011
class LittleLifeLinesTumblr(GenericTumblrV1):
4012
    """Class to retrieve Little Life Lines comics."""
4013
    # Also on http://www.littlelifelines.com
4014
    name = 'life-tumblr'
4015
    long_name = 'Little Life Lines (from Tumblr)'
4016
    url = 'https://little-life-lines.tumblr.com'
4017
4018
4019
class TheyCanTalk(GenericEmptyComic, GenericTumblrV1):
4020
    """Class to retrieve They Can Talk comics."""
4021
    name = 'theycantalk'
4022
    long_name = 'They Can Talk'
4023
    url = 'http://theycantalk.com'
4024
4025
4026
class HorovitzComics(GenericListableComic):
4027
    """Generic class to handle the logic common to the different comics from Horovitz."""
4028
    url = 'http://www.horovitzcomics.com'
4029
    _categories = ('HOROVITZ', )
4030
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4031
    link_re = NotImplemented
4032
    get_url_from_archive_element = join_cls_url_to_href
4033
4034
    @classmethod
4035
    def get_comic_info(cls, soup, link):
4036
        """Get information about a particular comics."""
4037
        href = link['href']
4038
        num = int(cls.link_re.match(href).groups()[0])
4039
        title = link.string
4040
        imgs = soup.find_all('img', id='comic')
4041
        assert len(imgs) == 1
4042
        year, month, day = [int(s)
4043
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4044
        return {
4045
            'title': title,
4046
            'day': day,
4047
            'month': month,
4048
            'year': year,
4049
            'img': [i['src'] for i in imgs],
4050
            'num': num,
4051
        }
4052
4053
    @classmethod
4054
    def get_archive_elements(cls):
4055
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4056
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4057
4058
4059
class HorovitzNew(HorovitzComics):
4060
    """Class to retrieve Horovitz new comics."""
4061
    name = 'horovitznew'
4062
    long_name = 'Horovitz New'
4063
    link_re = re.compile('^/comics/new/([0-9]+)$')
4064
4065
4066
class HorovitzClassic(HorovitzComics):
4067
    """Class to retrieve Horovitz classic comics."""
4068
    name = 'horovitzclassic'
4069
    long_name = 'Horovitz Classic'
4070
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4071
4072
4073
class GenericGoComic(GenericNavigableComic):
4074
    """Generic class to handle the logic common to comics from gocomics.com."""
4075
    _categories = ('GOCOMIC', )
4076
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
4077
4078
    @classmethod
4079
    def get_first_comic_link(cls):
4080
        """Get link to first comics."""
4081
        return get_soup_at_url(cls.url).find('a', class_='beginning')
4082
4083
    @classmethod
4084
    def get_navi_link(cls, last_soup, next_):
4085
        """Get link to next or previous comic."""
4086
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
4087
4088
    @classmethod
4089
    def get_url_from_link(cls, link):
4090
        gocomics = 'http://www.gocomics.com'
4091
        return urljoin_wrapper(gocomics, link['href'])
4092
4093
    @classmethod
4094
    def get_comic_info(cls, soup, link):
4095
        """Get information about a particular comics."""
4096
        url = cls.get_url_from_link(link)
4097
        year, month, day = [int(s)
4098
                            for s in cls.url_date_re.match(url).groups()]
4099
        return {
4100
            'day': day,
4101
            'month': month,
4102
            'year': year,
4103
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
4104
            'author': soup.find('meta', attrs={'name': 'author'})['content']
4105
        }
4106
4107
4108
class PearlsBeforeSwine(GenericGoComic):
4109
    """Class to retrieve Pearls Before Swine comics."""
4110
    name = 'pearls'
4111
    long_name = 'Pearls Before Swine'
4112
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4113
4114
4115
class Peanuts(GenericGoComic):
4116
    """Class to retrieve Peanuts comics."""
4117
    name = 'peanuts'
4118
    long_name = 'Peanuts'
4119
    url = 'http://www.gocomics.com/peanuts'
4120
4121
4122
class MattWuerker(GenericGoComic):
4123
    """Class to retrieve Matt Wuerker comics."""
4124
    name = 'wuerker'
4125
    long_name = 'Matt Wuerker'
4126
    url = 'http://www.gocomics.com/mattwuerker'
4127
4128
4129
class TomToles(GenericGoComic):
4130
    """Class to retrieve Tom Toles comics."""
4131
    name = 'toles'
4132
    long_name = 'Tom Toles'
4133
    url = 'http://www.gocomics.com/tomtoles'
4134
4135
4136
class BreakOfDay(GenericGoComic):
4137
    """Class to retrieve Break Of Day comics."""
4138
    name = 'breakofday'
4139
    long_name = 'Break Of Day'
4140
    url = 'http://www.gocomics.com/break-of-day'
4141
4142
4143
class Brevity(GenericGoComic):
4144
    """Class to retrieve Brevity comics."""
4145
    name = 'brevity'
4146
    long_name = 'Brevity'
4147
    url = 'http://www.gocomics.com/brevity'
4148
4149
4150
class MichaelRamirez(GenericGoComic):
4151
    """Class to retrieve Michael Ramirez comics."""
4152
    name = 'ramirez'
4153
    long_name = 'Michael Ramirez'
4154
    url = 'http://www.gocomics.com/michaelramirez'
4155
4156
4157
class MikeLuckovich(GenericGoComic):
4158
    """Class to retrieve Mike Luckovich comics."""
4159
    name = 'luckovich'
4160
    long_name = 'Mike Luckovich'
4161
    url = 'http://www.gocomics.com/mikeluckovich'
4162
4163
4164
class JimBenton(GenericGoComic):
4165
    """Class to retrieve Jim Benton comics."""
4166
    # Also on http://jimbenton.tumblr.com
4167
    name = 'benton'
4168
    long_name = 'Jim Benton'
4169
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4170
4171
4172
class TheArgyleSweater(GenericGoComic):
4173
    """Class to retrieve the Argyle Sweater comics."""
4174
    name = 'argyle'
4175
    long_name = 'Argyle Sweater'
4176
    url = 'http://www.gocomics.com/theargylesweater'
4177
4178
4179
class SunnyStreet(GenericGoComic):
4180
    """Class to retrieve Sunny Street comics."""
4181
    # Also on http://www.sunnystreetcomics.com
4182
    name = 'sunny'
4183
    long_name = 'Sunny Street'
4184
    url = 'http://www.gocomics.com/sunny-street'
4185
4186
4187
class OffTheMark(GenericGoComic):
4188
    """Class to retrieve Off The Mark comics."""
4189
    # Also on https://www.offthemark.com
4190
    name = 'offthemark'
4191
    long_name = 'Off The Mark'
4192
    url = 'http://www.gocomics.com/offthemark'
4193
4194
4195
class WuMo(GenericGoComic):
4196
    """Class to retrieve WuMo comics."""
4197
    # Also on http://wumo.com
4198
    name = 'wumo'
4199
    long_name = 'WuMo'
4200
    url = 'http://www.gocomics.com/wumo'
4201
4202
4203
class LunarBaboon(GenericGoComic):
4204
    """Class to retrieve Lunar Baboon comics."""
4205
    # Also on http://www.lunarbaboon.com
4206
    # Also on https://tapastic.com/series/Lunarbaboon
4207
    name = 'lunarbaboon'
4208
    long_name = 'Lunar Baboon'
4209
    url = 'http://www.gocomics.com/lunarbaboon'
4210
4211
4212
class SandersenGocomic(GenericGoComic):
4213
    """Class to retrieve Sarah Andersen comics."""
4214
    # Also on http://sarahcandersen.com
4215
    # Also on http://tapastic.com/series/Doodle-Time
4216
    name = 'sandersen-goc'
4217
    long_name = 'Sarah Andersen (from GoComics)'
4218
    url = 'http://www.gocomics.com/sarahs-scribbles'
4219
4220
4221
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4222
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4223
    # Also on http://smbc-comics.tumblr.com
4224
    # Also on http://www.smbc-comics.com
4225
    name = 'smbc-goc'
4226
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4227
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4228
    _categories = ('SMBC', )
4229
4230
4231
class CalvinAndHobbesGoComic(GenericGoComic):
4232
    """Class to retrieve Calvin and Hobbes comics."""
4233
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4234
    name = 'calvin-goc'
4235
    long_name = 'Calvin and Hobbes (from GoComics)'
4236
    url = 'http://www.gocomics.com/calvinandhobbes'
4237
4238
4239
class RallGoComic(GenericGoComic):
4240
    """Class to retrieve Ted Rall comics."""
4241
    # Also on http://rall.com/comic
4242
    name = 'rall-goc'
4243
    long_name = "Ted Rall (from GoComics)"
4244
    url = "http://www.gocomics.com/tedrall"
4245
    _categories = ('RALL', )
4246
4247
4248
class TheAwkwardYetiGoComic(GenericGoComic):
4249
    """Class to retrieve The Awkward Yeti comics."""
4250
    # Also on http://larstheyeti.tumblr.com
4251
    # Also on http://theawkwardyeti.com
4252
    # Also on https://tapastic.com/series/TheAwkwardYeti
4253
    name = 'yeti-goc'
4254
    long_name = 'The Awkward Yeti (from GoComics)'
4255
    url = 'http://www.gocomics.com/the-awkward-yeti'
4256
    _categories = ('YETI', )
4257
4258
4259
class BerkeleyMewsGoComics(GenericGoComic):
4260
    """Class to retrieve Berkeley Mews comics."""
4261
    # Also on http://mews.tumblr.com
4262
    # Also on http://www.berkeleymews.com
4263
    name = 'berkeley-goc'
4264
    long_name = 'Berkeley Mews (from GoComics)'
4265
    url = 'http://www.gocomics.com/berkeley-mews'
4266
    _categories = ('BERKELEY', )
4267
4268
4269
class SheldonGoComics(GenericGoComic):
4270
    """Class to retrieve Sheldon comics."""
4271
    # Also on http://www.sheldoncomics.com
4272
    name = 'sheldon-goc'
4273
    long_name = 'Sheldon Comics (from GoComics)'
4274
    url = 'http://www.gocomics.com/sheldon'
4275
4276
4277
class FowlLanguageGoComics(GenericGoComic):
4278
    """Class to retrieve Fowl Language comics."""
4279
    # Also on http://www.fowllanguagecomics.com
4280
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4281
    # Also on http://fowllanguagecomics.tumblr.com
4282
    name = 'fowllanguage-goc'
4283
    long_name = 'Fowl Language Comics (from GoComics)'
4284
    url = 'http://www.gocomics.com/fowl-language'
4285
    _categories = ('FOWLLANGUAGE', )
4286
4287
4288
class NickAnderson(GenericGoComic):
4289
    """Class to retrieve Nick Anderson comics."""
4290
    name = 'nickanderson'
4291
    long_name = 'Nick Anderson'
4292
    url = 'http://www.gocomics.com/nickanderson'
4293
4294
4295
class GarfieldGoComics(GenericGoComic):
4296
    """Class to retrieve Garfield comics."""
4297
    # Also on http://garfield.com
4298
    name = 'garfield-goc'
4299
    long_name = 'Garfield (from GoComics)'
4300
    url = 'http://www.gocomics.com/garfield'
4301
    _categories = ('GARFIELD', )
4302
4303
4304
class DorrisMcGoComics(GenericGoComic):
4305
    """Class to retrieve Dorris Mc Comics"""
4306
    # Also on http://dorrismccomics.com
4307
    name = 'dorrismc-goc'
4308
    long_name = 'Dorris Mc (from GoComics)'
4309
    url = 'http://www.gocomics.com/dorris-mccomics'
4310
4311
4312
class FoxTrot(GenericGoComic):
4313
    """Class to retrieve FoxTrot comics."""
4314
    name = 'foxtrot'
4315
    long_name = 'FoxTrot'
4316
    url = 'http://www.gocomics.com/foxtrot'
4317
4318
4319
class FoxTrotClassics(GenericGoComic):
4320
    """Class to retrieve FoxTrot Classics comics."""
4321
    name = 'foxtrot-classics'
4322
    long_name = 'FoxTrot Classics'
4323
    url = 'http://www.gocomics.com/foxtrotclassics'
4324
4325
4326
class MisterAndMeGoComics(GenericGoComic):
4327
    """Class to retrieve Mister & Me Comics."""
4328
    # Also on http://www.mister-and-me.com
4329
    # Also on https://tapastic.com/series/Mister-and-Me
4330
    name = 'mister-goc'
4331
    long_name = 'Mister & Me (from GoComics)'
4332
    url = 'http://www.gocomics.com/mister-and-me'
4333
4334
4335
class NonSequitur(GenericGoComic):
4336
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4337
    name = 'nonsequitur'
4338
    long_name = 'Non Sequitur'
4339
    url = 'http://www.gocomics.com/nonsequitur'
4340
4341
4342
class GenericTapasticComic(GenericListableComic):
4343
    """Generic class to handle the logic common to comics from tapastic.com."""
4344
    _categories = ('TAPASTIC', )
4345
4346
    @classmethod
4347
    def get_comic_info(cls, soup, archive_elt):
4348
        """Get information about a particular comics."""
4349
        timestamp = int(archive_elt['publishDate']) / 1000.0
4350
        day = datetime.datetime.fromtimestamp(timestamp).date()
4351
        imgs = soup.find_all('img', class_='art-image')
4352
        if not imgs:
4353
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4354
            return None
4355
        assert len(imgs) > 0
4356
        return {
4357
            'day': day.day,
4358
            'year': day.year,
4359
            'month': day.month,
4360
            'img': [i['src'] for i in imgs],
4361
            'title': archive_elt['title'],
4362
        }
4363
4364
    @classmethod
4365
    def get_url_from_archive_element(cls, archive_elt):
4366
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4367
4368
    @classmethod
4369
    def get_archive_elements(cls):
4370
        pref, suff = 'episodeList : ', ','
4371
        # Information is stored in the javascript part
4372
        # I don't know the clean way to get it so this is the ugly way.
4373
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4374
        return json.loads(string)
4375
4376
4377
class VegetablesForDessert(GenericTapasticComic):
4378
    """Class to retrieve Vegetables For Dessert comics."""
4379
    # Also on http://vegetablesfordessert.tumblr.com
4380
    name = 'vegetables'
4381
    long_name = 'Vegetables For Dessert'
4382
    url = 'http://tapastic.com/series/vegetablesfordessert'
4383
4384
4385
class FowlLanguageTapa(GenericTapasticComic):
4386
    """Class to retrieve Fowl Language comics."""
4387
    # Also on http://www.fowllanguagecomics.com
4388
    # Also on http://fowllanguagecomics.tumblr.com
4389
    # Also on http://www.gocomics.com/fowl-language
4390
    name = 'fowllanguage-tapa'
4391
    long_name = 'Fowl Language Comics (from Tapastic)'
4392
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4393
    _categories = ('FOWLLANGUAGE', )
4394
4395
4396
class OscillatingProfundities(GenericTapasticComic):
4397
    """Class to retrieve Oscillating Profundities comics."""
4398
    name = 'oscillating'
4399
    long_name = 'Oscillating Profundities'
4400
    url = 'http://tapastic.com/series/oscillatingprofundities'
4401
4402
4403
class ZnoflatsComics(GenericTapasticComic):
4404
    """Class to retrieve Znoflats comics."""
4405
    name = 'znoflats'
4406
    long_name = 'Znoflats Comics'
4407
    url = 'http://tapastic.com/series/Znoflats-Comics'
4408
4409
4410
class SandersenTapastic(GenericTapasticComic):
4411
    """Class to retrieve Sarah Andersen comics."""
4412
    # Also on http://sarahcandersen.com
4413
    # Also on http://www.gocomics.com/sarahs-scribbles
4414
    name = 'sandersen-tapa'
4415
    long_name = 'Sarah Andersen (from Tapastic)'
4416
    url = 'http://tapastic.com/series/Doodle-Time'
4417
4418
4419
class TubeyToonsTapastic(GenericTapasticComic):
4420
    """Class to retrieve TubeyToons comics."""
4421
    # Also on http://tubeytoons.com
4422
    # Also on http://tubeytoons.tumblr.com
4423
    name = 'tubeytoons-tapa'
4424
    long_name = 'Tubey Toons (from Tapastic)'
4425
    url = 'http://tapastic.com/series/Tubey-Toons'
4426
    _categories = ('TUNEYTOONS', )
4427
4428
4429
class AnythingComicTapastic(GenericTapasticComic):
4430
    """Class to retrieve Anything Comics."""
4431
    # Also on http://www.anythingcomic.com
4432
    name = 'anythingcomic-tapa'
4433
    long_name = 'Anything Comic (from Tapastic)'
4434
    url = 'http://tapastic.com/series/anything'
4435
4436
4437
class UnearthedComicsTapastic(GenericTapasticComic):
4438
    """Class to retrieve Unearthed comics."""
4439
    # Also on http://unearthedcomics.com
4440
    # Also on http://unearthedcomics.tumblr.com
4441
    name = 'unearthed-tapa'
4442
    long_name = 'Unearthed Comics (from Tapastic)'
4443
    url = 'http://tapastic.com/series/UnearthedComics'
4444
    _categories = ('UNEARTHED', )
4445
4446
4447
class EverythingsStupidTapastic(GenericTapasticComic):
4448
    """Class to retrieve Everything's stupid Comics."""
4449
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4450
    # Also on http://everythingsstupid.net
4451
    name = 'stupid-tapa'
4452
    long_name = "Everything's Stupid (from Tapastic)"
4453
    url = 'http://tapastic.com/series/EverythingsStupid'
4454
4455
4456
class JustSayEhTapastic(GenericTapasticComic):
4457
    """Class to retrieve Just Say Eh comics."""
4458
    # Also on http://www.justsayeh.com
4459
    name = 'justsayeh-tapa'
4460
    long_name = 'Just Say Eh (from Tapastic)'
4461
    url = 'http://tapastic.com/series/Just-Say-Eh'
4462
4463
4464
class ThorsThundershackTapastic(GenericTapasticComic):
4465
    """Class to retrieve Thor's Thundershack comics."""
4466
    # Also on http://www.thorsthundershack.com
4467
    name = 'thor-tapa'
4468
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4469
    url = 'http://tapastic.com/series/Thors-Thundershac'
4470
    _categories = ('THOR', )
4471
4472
4473
class OwlTurdTapastic(GenericTapasticComic):
4474
    """Class to retrieve Owl Turd comics."""
4475
    # Also on http://owlturd.com
4476
    name = 'owlturd-tapa'
4477
    long_name = 'Owl Turd (from Tapastic)'
4478
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4479
    _categories = ('OWLTURD', )
4480
4481
4482
class GoneIntoRaptureTapastic(GenericTapasticComic):
4483
    """Class to retrieve Gone Into Rapture comics."""
4484
    # Also on http://goneintorapture.tumblr.com
4485
    # Also on http://www.goneintorapture.com
4486
    name = 'rapture-tapa'
4487
    long_name = 'Gone Into Rapture (from Tapastic)'
4488
    url = 'http://tapastic.com/series/Goneintorapture'
4489
4490
4491
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4492
    """Class to retrieve Heck If I Know Comics."""
4493
    # Also on http://heckifiknowcomics.com
4494
    name = 'heck-tapa'
4495
    long_name = 'Heck if I Know comics (from Tapastic)'
4496
    url = 'http://tapastic.com/series/Regular'
4497
4498
4499
class CheerUpEmoKidTapa(GenericTapasticComic):
4500
    """Class to retrieve CheerUpEmoKid comics."""
4501
    # Also on http://www.cheerupemokid.com
4502
    # Also on http://enzocomics.tumblr.com
4503
    name = 'cuek-tapa'
4504
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4505
    url = 'http://tapastic.com/series/CUEK'
4506
4507
4508
class BigFootJusticeTapa(GenericTapasticComic):
4509
    """Class to retrieve Big Foot Justice comics."""
4510
    # Also on http://bigfootjustice.com
4511
    name = 'bigfoot-tapa'
4512
    long_name = 'Big Foot Justice (from Tapastic)'
4513
    url = 'http://tapastic.com/series/bigfoot-justice'
4514
4515
4516
class UpAndOutTapa(GenericTapasticComic):
4517
    """Class to retrieve Up & Out comics."""
4518
    # Also on http://upandoutcomic.tumblr.com
4519
    name = 'upandout-tapa'
4520
    long_name = 'Up And Out (from Tapastic)'
4521
    url = 'http://tapastic.com/series/UP-and-OUT'
4522
4523
4524
class ToonHoleTapa(GenericTapasticComic):
4525
    """Class to retrieve Toon Holes comics."""
4526
    # Also on http://www.toonhole.com
4527
    name = 'toonhole-tapa'
4528
    long_name = 'Toon Hole (from Tapastic)'
4529
    url = 'http://tapastic.com/series/TOONHOLE'
4530
4531
4532
class AngryAtNothingTapa(GenericTapasticComic):
4533
    """Class to retrieve Angry at Nothing comics."""
4534
    # Also on http://www.angryatnothing.net
4535
    name = 'angry-tapa'
4536
    long_name = 'Angry At Nothing (from Tapastic)'
4537
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4538
4539
4540
class LeleozTapa(GenericTapasticComic):
4541
    """Class to retrieve Leleoz comics."""
4542
    # Also on http://leleozcomics.tumblr.com
4543
    name = 'leleoz-tapa'
4544
    long_name = 'Leleoz (from Tapastic)'
4545
    url = 'https://tapastic.com/series/Leleoz'
4546
4547
4548
class TheAwkwardYetiTapa(GenericTapasticComic):
4549
    """Class to retrieve The Awkward Yeti comics."""
4550
    # Also on http://www.gocomics.com/the-awkward-yeti
4551
    # Also on http://theawkwardyeti.com
4552
    # Also on http://larstheyeti.tumblr.com
4553
    name = 'yeti-tapa'
4554
    long_name = 'The Awkward Yeti (from Tapastic)'
4555
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4556
    _categories = ('YETI', )
4557
4558
4559
class AsPerUsualTapa(GenericTapasticComic):
4560
    """Class to retrieve As Per Usual comics."""
4561
    # Also on http://as-per-usual.tumblr.com
4562
    name = 'usual-tapa'
4563
    long_name = 'As Per Usual (from Tapastic)'
4564
    url = 'https://tapastic.com/series/AsPerUsual'
4565
4566
4567
class OneOneOneOneComicTapa(GenericTapasticComic):
4568
    """Class to retrieve 1111 Comics."""
4569
    # Also on http://www.1111comics.me
4570
    # Also on http://comics1111.tumblr.com
4571
    name = '1111-tapa'
4572
    long_name = '1111 Comics (from Tapastic)'
4573
    url = 'https://tapastic.com/series/1111-Comics'
4574
    _categories = ('ONEONEONEONE', )
4575
4576
4577
class TumbleDryTapa(GenericTapasticComic):
4578
    """Class to retrieve Tumble Dry comics."""
4579
    # Also on http://tumbledrycomics.com
4580
    name = 'tumbledry-tapa'
4581
    long_name = 'Tumblr Dry (from Tapastic)'
4582
    url = 'https://tapastic.com/series/TumbleDryComics'
4583
4584
4585
class DeadlyPanelTapa(GenericTapasticComic):
4586
    """Class to retrieve Deadly Panel comics."""
4587
    # Also on http://www.deadlypanel.com
4588
    name = 'deadly-tapa'
4589
    long_name = 'Deadly Panel (from Tapastic)'
4590
    url = 'https://tapastic.com/series/deadlypanel'
4591
4592
4593
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4594
    """Class to retrieve Chris Hallbeck comics."""
4595
    # Also on http://chrishallbeck.tumblr.com
4596
    # Also on http://maximumble.com
4597
    name = 'hallbeckmaxi-tapa'
4598
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4599
    url = 'https://tapastic.com/series/Maximumble'
4600
    _categories = ('HALLBACK', )
4601
4602
4603
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4604
    """Class to retrieve Chris Hallbeck comics."""
4605
    # Also on http://chrishallbeck.tumblr.com
4606
    # Also on http://minimumble.com
4607
    name = 'hallbeckmini-tapa'
4608
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4609
    url = 'https://tapastic.com/series/Minimumble'
4610
    _categories = ('HALLBACK', )
4611
4612
4613
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4614
    """Class to retrieve Chris Hallbeck comics."""
4615
    # Also on http://chrishallbeck.tumblr.com
4616
    # Also on http://thebookofbiff.com
4617
    name = 'hallbeckbiff-tapa'
4618
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4619
    url = 'https://tapastic.com/series/Biff'
4620
    _categories = ('HALLBACK', )
4621
4622
4623
class RandoWisTapa(GenericTapasticComic):
4624
    """Class to retrieve RandoWis comics."""
4625
    # Also on https://randowis.com
4626
    name = 'randowis-tapa'
4627
    long_name = 'RandoWis (from Tapastic)'
4628
    url = 'https://tapastic.com/series/RandoWis'
4629
4630
4631
class PigeonGazetteTapa(GenericTapasticComic):
4632
    """Class to retrieve The Pigeon Gazette comics."""
4633
    # Also on http://thepigeongazette.tumblr.com
4634
    name = 'pigeon-tapa'
4635
    long_name = 'The Pigeon Gazette (from Tapastic)'
4636
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4637
4638
4639
class TheOdd1sOutTapa(GenericTapasticComic):
4640
    """Class to retrieve The Odd 1s Out comics."""
4641
    # Also on http://theodd1sout.com
4642
    # Also on http://theodd1sout.tumblr.com
4643
    name = 'theodd-tapa'
4644
    long_name = 'The Odd 1s Out (from Tapastic)'
4645
    url = 'https://tapastic.com/series/Theodd1sout'
4646
4647
4648
class TheWorldIsFlatTapa(GenericTapasticComic):
4649
    """Class to retrieve The World Is Flat Comics."""
4650
    # Also on http://theworldisflatcomics.tumblr.com
4651
    name = 'flatworld-tapa'
4652
    long_name = 'The World Is Flat (from Tapastic)'
4653
    url = 'https://tapastic.com/series/The-World-is-Flat'
4654
4655
4656
class MisterAndMeTapa(GenericTapasticComic):
4657
    """Class to retrieve Mister & Me Comics."""
4658
    # Also on http://www.mister-and-me.com
4659
    # Also on http://www.gocomics.com/mister-and-me
4660
    name = 'mister-tapa'
4661
    long_name = 'Mister & Me (from Tapastic)'
4662
    url = 'https://tapastic.com/series/Mister-and-Me'
4663
4664
4665
class TalesOfAbsurdityTapa(GenericTapasticComic):
4666
    """Class to retrieve Tales Of Absurdity comics."""
4667
    # Also on http://talesofabsurdity.com
4668
    # Also on http://talesofabsurdity.tumblr.com
4669
    name = 'absurdity-tapa'
4670
    long_name = 'Tales of Absurdity (from Tapastic)'
4671
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4672
    _categories = ('ABSURDITY', )
4673
4674
4675
class BFGFSTapa(GenericTapasticComic):
4676
    """Class to retrieve BFGFS comics."""
4677
    # Also on http://bfgfs.com
4678
    # Also on http://bfgfs.tumblr.com
4679
    name = 'bfgfs-tapa'
4680
    long_name = 'BFGFS (from Tapastic)'
4681
    url = 'https://tapastic.com/series/BFGFS'
4682
4683
4684
class DoodleForFoodTapa(GenericTapasticComic):
4685
    """Class to retrieve Doodle For Food comics."""
4686
    # Also on http://doodleforfood.com
4687
    name = 'doodle-tapa'
4688
    long_name = 'Doodle For Food (from Tapastic)'
4689
    url = 'https://tapastic.com/series/Doodle-for-Food'
4690
4691
4692
class MrLovensteinTapa(GenericTapasticComic):
4693
    """Class to retrieve Mr Lovenstein comics."""
4694
    # Also on  https://tapastic.com/series/MrLovenstein
4695
    name = 'mrlovenstein-tapa'
4696
    long_name = 'Mr. Lovenstein (from Tapastic)'
4697
    url = 'https://tapastic.com/series/MrLovenstein'
4698
4699
4700
class CassandraCalinTapa(GenericTapasticComic):
4701
    """Class to retrieve C. Cassandra comics."""
4702
    # Also on http://cassandracalin.com
4703
    # Also on http://c-cassandra.tumblr.com
4704
    name = 'cassandra-tapa'
4705
    long_name = 'Cassandra Calin (from Tapastic)'
4706
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4707
4708
4709
class WafflesAndPancakes(GenericTapasticComic):
4710
    """Class to retrieve Waffles And Pancakes comics."""
4711
    # Also on http://wandpcomic.com
4712
    name = 'waffles'
4713
    long_name = 'Waffles And Pancakes'
4714
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4715
4716
4717
class YesterdaysPopcornTapastic(GenericTapasticComic):
4718
    """Class to retrieve Yesterday's Popcorn comics."""
4719
    # Also on http://www.yesterdayspopcorn.com
4720
    # Also on http://yesterdayspopcorn.tumblr.com
4721
    name = 'popcorn-tapa'
4722
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
4723
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
4724
4725
4726
class OurSuperAdventureTapastic(GenericTapasticComic):
4727
    """Class to retrieve Our Super Adventure comics."""
4728
    # Also on http://www.oursuperadventure.com
4729
    # http://sarahssketchbook.tumblr.com
4730
    # http://sarahgraley.com
4731
    name = 'superadventure-tapastic'
4732
    long_name = 'Our Super Adventure (from Tapastic)'
4733
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4734
4735
4736
class NamelessPCs(GenericTapasticComic):
4737
    """Class to retrieve Nameless PCs comics."""
4738
    # Also on http://namelesspcs.com
4739
    name = 'namelesspcs-tapa'
4740
    long_name = 'NamelessPCs (from Tapastic)'
4741
    url = 'https://tapastic.com/series/NamelessPC'
4742
4743
4744
class UbertoolTapa(GenericTapasticComic):
4745
    """Class to retrieve Ubertool comics."""
4746
    # Also on http://ubertoolcomic.com
4747
    # Also on http://ubertool.tumblr.com
4748
    name = 'ubertool-tapa'
4749
    long_name = 'Ubertool (from Tapastic)'
4750
    url = 'https://tapastic.com/series/ubertool'
4751
    _categories = ('UBERTOOL', )
4752
4753
4754
def get_subclasses(klass):
4755
    """Gets the list of direct/indirect subclasses of a class"""
4756
    subclasses = klass.__subclasses__()
4757
    for derived in list(subclasses):
4758
        subclasses.extend(get_subclasses(derived))
4759
    return subclasses
4760
4761
4762
def remove_st_nd_rd_th_from_date(string):
4763
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4764
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4765
    return (string.replace('st', '')
4766
            .replace('nd', '')
4767
            .replace('rd', '')
4768
            .replace('th', '')
4769
            .replace('Augu', 'August'))
4770
4771
4772
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4773
    """Function to convert string to date object.
4774
    Wrapper around datetime.datetime.strptime."""
4775
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4776
    prev_locale = locale.setlocale(locale.LC_ALL)
4777
    if local != prev_locale:
4778
        locale.setlocale(locale.LC_ALL, local)
4779
    ret = datetime.datetime.strptime(string, date_format).date()
4780
    if local != prev_locale:
4781
        locale.setlocale(locale.LC_ALL, prev_locale)
4782
    return ret
4783
4784
4785
COMICS = set(get_subclasses(GenericComic))
4786
VALID_COMICS = [c for c in COMICS if c.name is not None]
4787
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4788
assert len(VALID_COMICS) == len(COMIC_NAMES)
4789
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4790
assert len(VALID_COMICS) == len(CLASS_NAMES)
4791