Completed
Push — master ( 910ddb...038367 )
by De
01:09
created

UneAnneeAuLycee   A

Complexity

Total Complexity 1

Size/Duplication

Total Lines 9
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 9
rs 10
wmc 1

1 Method

Rating   Name   Duplication   Size   Complexity  
A get_first_comic_url() 0 3 1
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        first_num = last_comic['num'] if last_comic else 0
27
        last_num = load_json_at_url(
28
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
29
30
        for num in range(first_num + 1, last_num + 1):
31
            if num != 404:
32
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
33
                comic = load_json_at_url(json_url)
34
                comic['img'] = [comic['img']]
35
                comic['prefix'] = '%d-' % num
36
                comic['json_url'] = json_url
37
                comic['url'] = urljoin_wrapper(cls.url, str(num))
38
                comic['day'] = int(comic['day'])
39
                comic['month'] = int(comic['month'])
40
                comic['year'] = int(comic['year'])
41
                assert comic['num'] == num
42
                yield comic
43
44
45
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
46
47
48
@classmethod
49
def get_href(cls, link):
50
    """Implementation of get_url_from_link/get_url_from_archive_element."""
51
    return link['href']
52
53
54
@classmethod
55
def join_cls_url_to_href(cls, link):
56
    """Implementation of get_url_from_link/get_url_from_archive_element."""
57
    return urljoin_wrapper(cls.url, link['href'])
58
59
60
class GenericNavigableComic(GenericComic):
61
    """Generic class for "navigable" comics : with first/next arrows.
62
63
    The method `get_next_comic` methods is implemented in terms of new
64
    more specialized methods to be implemented/overridden:
65
        - get_first_comic_link
66
        - get_navi_link
67
        - get_comic_info
68
        - get_url_from_link
69
    """
70
    get_url_from_link = get_href  # Default implementation
71
72
    @classmethod
73
    def get_first_comic_link(cls):
74
        """Get link to first comics.
75
76
        Sometimes this can be retrieved of any comic page, sometimes on
77
        the archive page, sometimes it doesn't exist at all and one has
78
        to iterate backward to find it before hardcoding the result found.
79
        """
80
        raise NotImplementedError
81
82
    @classmethod
83
    def get_navi_link(cls, last_soup, next_):
84
        """Get link to next (or previous - for dev purposes) comic."""
85
        raise NotImplementedError
86
87
    @classmethod
88
    def get_comic_info(cls, soup, link):
89
        """Get information about a particular comics."""
90
        raise NotImplementedError
91
92
    @classmethod
93
    def get_next_link(cls, last_soup):
94
        """Get link to next comic."""
95
        return cls.get_navi_link(last_soup, True)
96
97
    @classmethod
98
    def get_prev_link(cls, last_soup):
99
        """Get link to previous comic."""
100
        return cls.get_navi_link(last_soup, False)
101
102
    @classmethod
103
    def get_next_comic(cls, last_comic):
104
        """Generic implementation of get_next_comic for navigable comics."""
105
        url = last_comic['url'] if last_comic else None
106
        next_comic = \
107
            cls.get_next_link(get_soup_at_url(url)) \
108
            if url else \
109
            cls.get_first_comic_link()
110
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
111
        while next_comic:
112
            prev_url, url = url, cls.get_url_from_link(next_comic)
113
            if prev_url == url:
114
                cls.log("got same url %s" % url)
115
                break
116
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
117
            soup = get_soup_at_url(url)
118
            comic = cls.get_comic_info(soup, next_comic)
119
            if comic is not None:
120
                assert 'url' not in comic
121
                comic['url'] = url
122
                yield comic
123
            next_comic = cls.get_next_link(soup)
124
            cls.log("next comic will be %s" % str(next_comic))
125
126
    @classmethod
127
    def check_navigation(cls, url):
128
        """Check that navigation functions seem to be working - for dev purposes."""
129
        cls.log("about to check navigation from %s" % url)
130
        ok = True
131
        firstlink = cls.get_first_comic_link()
132
        if firstlink is None:
133
            print("From %s : no first link" % cls.url)
134
            ok = False
135
        if url is None:
136
            prevlink, nextlink = None, None
137
        else:
138
            soup = get_soup_at_url(url)
139
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
140
        if prevlink is None and nextlink is None:
141
            print("From %s : no previous nor next" % url)
142
            ok = False
143
        else:
144
            if prevlink:
145
                prevurl = cls.get_url_from_link(prevlink)
146
                prevsoup = get_soup_at_url(prevurl)
147
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
148
                if prevnext != url:
149
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
150
                    ok = False
151
            if nextlink:
152
                nexturl = cls.get_url_from_link(nextlink)
153
                if nexturl != url:
154
                    nextsoup = get_soup_at_url(nexturl)
155
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
156
                    if nextprev != url:
157
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
158
                        ok = False
159
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
160
        return ok
161
162
    # This method is not defined by default and is not part of this class'API.
163
    # It is only used:
164
    # - during development
165
    # - in subclasses implementing it correctly
166
    if False:
167
        @classmethod
168
        def get_first_comic_url(cls):
169
            """Get first comic url
170
171
            Sometimes, the first comic cannot be reached directly so to start
172
            from the first comic one has to go to the previous comic until
173
            there is no previous comics. Once this URL is reached, it
174
            is better to hardcode it but for development purposes, it
175
            is convenient to have an automatic way to find it.
176
            """
177
            url = input("Get starting URL: ")
178
            print(url)
179
            comic = cls.get_prev_link(get_soup_at_url(url))
180
            while comic:
181
                url = cls.get_url_from_link(comic)
182
                print(url)
183
                comic = cls.get_prev_link(get_soup_at_url(url))
184
            return url
185
186
187
class GenericListableComic(GenericComic):
188
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
189
190
    The method `get_next_comic` methods is implemented in terms of new
191
    more specialized methods to be implemented/overridden:
192
        - get_archive_elements
193
        - get_url_from_archive_element
194
        - get_comic_info
195
    """
196
197
    @classmethod
198
    def get_archive_elements(cls):
199
        """Get the archive elements (iterable)."""
200
        raise NotImplementedError
201
202
    @classmethod
203
    def get_url_from_archive_element(cls, archive_elt):
204
        """Get url correponding to an archive element."""
205
        raise NotImplementedError
206
207
    @classmethod
208
    def get_comic_info(cls, soup, archive_elt):
209
        """Get information about a particular comics."""
210
        raise NotImplementedError
211
212
    @classmethod
213
    def get_next_comic(cls, last_comic):
214
        """Generic implementation of get_next_comic for listable comics."""
215
        waiting_for_url = last_comic['url'] if last_comic else None
216
        for archive_elt in cls.get_archive_elements():
217
            url = cls.get_url_from_archive_element(archive_elt)
218
            cls.log("considering %s" % url)
219
            if waiting_for_url and waiting_for_url == url:
220
                waiting_for_url = None
221
            elif waiting_for_url is None:
222
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
223
                soup = get_soup_at_url(url)
224
                comic = cls.get_comic_info(soup, archive_elt)
225
                if comic is not None:
226
                    assert 'url' not in comic
227
                    comic['url'] = url
228
                    yield comic
229
        if waiting_for_url is not None:
230
            print("Did not find %s : there might be a problem" % waiting_for_url)
231
232
# Helper functions corresponding to get_first_comic_link/get_navi_link
233
234
235
@classmethod
236
def get_link_rel_next(cls, last_soup, next_):
237
    """Implementation of get_navi_link."""
238
    return last_soup.find('link', rel='next' if next_ else 'prev')
239
240
241
@classmethod
242
def get_a_rel_next(cls, last_soup, next_):
243
    """Implementation of get_navi_link."""
244
    return last_soup.find('a', rel='next' if next_ else 'prev')
245
246
247
@classmethod
248
def get_a_navi_navinext(cls, last_soup, next_):
249
    """Implementation of get_navi_link."""
250
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
251
252
253
@classmethod
254
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
255
    """Implementation of get_navi_link."""
256
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
257
258
259
@classmethod
260
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
261
    """Implementation of get_navi_link."""
262
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
263
264
265
@classmethod
266
def get_a_navi_navifirst(cls):
267
    """Implementation of get_first_comic_link."""
268
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
269
270
271
@classmethod
272
def get_div_navfirst_a(cls):
273
    """Implementation of get_first_comic_link."""
274
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
275
276
277
@classmethod
278
def get_a_comicnavbase_comicnavfirst(cls):
279
    """Implementation of get_first_comic_link."""
280
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
281
282
283
class GenericEmptyComic(GenericComic):
284
    """Generic class for comics where nothing is to be done.
285
286
    It can be useful to deactivate temporarily comics that do not work
287
    properly by replacing `def MyComic(GenericWhateverComic)` with
288
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
289
290
    @classmethod
291
    def get_next_comic(cls, last_comic):
292
        return []
293
294
295
class ExtraFabulousComics(GenericNavigableComic):
296
    """Class to retrieve Extra Fabulous Comics."""
297
    name = 'efc'
298
    long_name = 'Extra Fabulous Comics'
299
    url = 'http://extrafabulouscomics.com'
300
    get_navi_link = get_link_rel_next
301
302
    @classmethod
303
    def get_first_comic_link(cls):
304
        return get_soup_at_url(cls.url).find('a', title='FIRST')
305
306
    @classmethod
307
    def get_comic_info(cls, soup, link):
308
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
309
        imgs = soup.find_all('img', src=img_src_re)
310
        title = soup.find('h2', class_='post-title').string
311
        return {
312
            'title': title,
313
            'img': [i['src'] for i in imgs],
314
            'prefix': title + '-'
315
        }
316
317
318 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
319
    """Generic class to retrieve comics from Le Monde blogs."""
320
    get_navi_link = get_link_rel_next
321
322
    @classmethod
323
    def get_first_comic_url(cls):
324
        """Get first comic url."""
325
        raise NotImplementedError
326
327
    @classmethod
328
    def get_first_comic_link(cls):
329
        return {'href': cls.get_first_comic_url()}
330
331
    @classmethod
332
    def get_comic_info(cls, soup, link):
333
        url2 = soup.find('link', rel='shortlink')['href']
334
        title = soup.find('meta', property='og:title')['content']
335
        date_str = soup.find("span", class_="entry-date").string
336
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
337
        imgs = soup.find_all('meta', property='og:image')
338
        return {
339
            'title': title,
340
            'url2': url2,
341
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
342
            'month': day.month,
343
            'year': day.year,
344
            'day': day.day,
345
        }
346
347
348
class ZepWorld(GenericLeMondeBlog):
349
    """Class to retrieve Zep World comics."""
350
    name = "zep"
351
    long_name = "Zep World"
352
    url = "http://zepworld.blog.lemonde.fr"
353
354
    @classmethod
355
    def get_first_comic_url(cls):
356
        return "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
357
358
359
class Vidberg(GenericLeMondeBlog):
360
    """Class to retrieve Vidberg comics."""
361
    name = 'vidberg'
362
    long_name = "Vidberg - l'actu en patates"
363
    url = "http://vidberg.blog.lemonde.fr"
364
365
    @classmethod
366
    def get_first_comic_url(cls):
367
        # Not the first but I didn't find an efficient way to retrieve it
368
        return "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
369
370
371
class Plantu(GenericLeMondeBlog):
372
    """Class to retrieve Plantu comics."""
373
    name = 'plantu'
374
    long_name = "Plantu"
375
    url = "http://plantu.blog.lemonde.fr"
376
377
    @classmethod
378
    def get_first_comic_url(cls):
379
        return "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
380
381
382
class XavierGorce(GenericLeMondeBlog):
383
    """Class to retrieve Xavier Gorce comics."""
384
    name = 'gorce'
385
    long_name = "Xavier Gorce"
386
    url = "http://xaviergorce.blog.lemonde.fr"
387
388
    @classmethod
389
    def get_first_comic_url(cls):
390
        return "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
391
392
393
class CartooningForPeace(GenericLeMondeBlog):
394
    """Class to retrieve Cartooning For Peace comics."""
395
    name = 'forpeace'
396
    long_name = "Cartooning For Peace"
397
    url = "http://cartooningforpeace.blog.lemonde.fr"
398
399
    @classmethod
400
    def get_first_comic_url(cls):
401
        return "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
402
403
404
class Aurel(GenericLeMondeBlog):
405
    """Class to retrieve Aurel comics."""
406
    name = 'aurel'
407
    long_name = "Aurel"
408
    url = "http://aurel.blog.lemonde.fr"
409
410
    @classmethod
411
    def get_first_comic_url(cls):
412
        return "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
413
414
415
class LesCulottees(GenericLeMondeBlog):
416
    """Class to retrieve Les Culottees comics."""
417
    name = 'culottees'
418
    long_name = 'Les Culottees'
419
    url = "http://lesculottees.blog.lemonde.fr"
420
421
    @classmethod
422
    def get_first_comic_url(cls):
423
        return "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
424
425
426
class UneAnneeAuLycee(GenericLeMondeBlog):
427
    """Class to retrieve Une Annee Au Lycee comics."""
428
    name = 'lycee'
429
    long_name = 'Une Annee au Lycee'
430
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
431
432
    @classmethod
433
    def get_first_comic_url(cls):
434
        return "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
435
436
437 View Code Duplication
class Rall(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
438
    """Class to retrieve Ted Rall comics."""
439
    # Also on http://www.gocomics.com/tedrall
440
    name = 'rall'
441
    long_name = "Ted Rall"
442
    url = "http://rall.com/comic"
443
    get_navi_link = get_link_rel_next
444
445
    @classmethod
446
    def get_first_comic_link(cls):
447
        # Not the first but I didn't find an efficient way to retrieve it
448
        return {'href': "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"}
449
450
    @classmethod
451
    def get_comic_info(cls, soup, link):
452
        title = soup.find('meta', property='og:title')['content']
453
        author = soup.find("span", class_="author vcard").find("a").string
454
        date_str = soup.find("span", class_="entry-date").string
455
        day = string_to_date(date_str, "%B %d, %Y")
456
        desc = soup.find('meta', property='og:description')['content']
457
        imgs = soup.find('div', class_='entry-content').find_all('img')
458
        imgs = imgs[:-7]  # remove social media buttons
459
        return {
460
            'title': title,
461
            'author': author,
462
            'month': day.month,
463
            'year': day.year,
464
            'day': day.day,
465
            'description': desc,
466
            'img': [i['src'] for i in imgs],
467
        }
468
469
470
class Dilem(GenericNavigableComic):
471
    """Class to retrieve Ali Dilem comics."""
472
    name = 'dilem'
473
    long_name = 'Ali Dilem'
474
    url = 'http://information.tv5monde.com/dilem'
475
    get_url_from_link = join_cls_url_to_href
476
477
    @classmethod
478
    def get_first_comic_link(cls):
479
        return {'href': "http://information.tv5monde.com/dilem/2004-06-26"}
480
481
    @classmethod
482
    def get_navi_link(cls, last_soup, next_):
483
        # prev is next / next is prev
484
        li = last_soup.find('li', class_='prev' if next_ else 'next')
485
        return li.find('a') if li else None
486
487 View Code Duplication
    @classmethod
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
488
    def get_comic_info(cls, soup, link):
489
        short_url = soup.find('link', rel='shortlink')['href']
490
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
491
        imgs = soup.find_all('meta', property='og:image')
492
        date_str = soup.find('span', property='dc:date')['content']
493
        date_str = date_str[:10]
494
        day = string_to_date(date_str, "%Y-%m-%d")
495
        return {
496
            'short_url': short_url,
497
            'title': title,
498
            'img': [i['content'] for i in imgs],
499
            'day': day.day,
500
            'month': day.month,
501
            'year': day.year,
502
        }
503
504
505
class SpaceAvalanche(GenericNavigableComic):
506
    """Class to retrieve Space Avalanche comics."""
507
    name = 'avalanche'
508
    long_name = 'Space Avalanche'
509
    url = 'http://www.spaceavalanche.com'
510
    get_navi_link = get_link_rel_next
511
512
    @classmethod
513
    def get_first_comic_link(cls):
514
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
515
516
    @classmethod
517
    def get_comic_info(cls, soup, link):
518
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
519
        title = link['title']
520
        url = cls.get_url_from_link(link)
521
        year, month, day = [int(s)
522
                            for s in url_date_re.match(url).groups()]
523
        imgs = soup.find("div", class_="entry").find_all("img")
524
        return {
525
            'title': title,
526
            'day': day,
527
            'month': month,
528
            'year': year,
529
            'img': [i['src'] for i in imgs],
530
        }
531
532
533
class ZenPencils(GenericNavigableComic):
534
    """Class to retrieve ZenPencils comics."""
535
    # Also on http://zenpencils.tumblr.com
536
    # Also on http://www.gocomics.com/zen-pencils
537
    name = 'zenpencils'
538
    long_name = 'Zen Pencils'
539
    url = 'http://zenpencils.com'
540
    get_navi_link = get_link_rel_next
541
542
    @classmethod
543
    def get_first_comic_link(cls):
544
        return {'href': "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"}
545
546
    @classmethod
547
    def get_comic_info(cls, soup, link):
548
        imgs = soup.find('div', id='comic').find_all('img')
549
        post = soup.find('div', class_='post-content')
550
        author = post.find("span", class_="post-author").find("a").string
551
        title = post.find('h2', class_='post-title').string
552
        date_str = post.find('span', class_='post-date').string
553
        day = string_to_date(date_str, "%B %d, %Y")
554
        assert imgs
555
        assert all(i['alt'] == i['title'] for i in imgs)
556
        assert all(i['alt'] in (title, "") for i in imgs)
557
        desc = soup.find('meta', property='og:description')['content']
558
        return {
559
            'title': title,
560
            'description': desc,
561
            'author': author,
562
            'day': day.day,
563
            'month': day.month,
564
            'year': day.year,
565
            'img': [i['src'] for i in imgs],
566
        }
567
568
569
class ItsTheTie(GenericNavigableComic):
570
    """Class to retrieve It's the tie comics."""
571
    # Also on http://itsthetie.tumblr.com
572
    # Also on https://tapastic.com/series/itsthetie
573
    name = 'tie'
574
    long_name = "It's the tie"
575
    url = "http://itsthetie.com"
576
    get_first_comic_link = get_div_navfirst_a
577
    get_navi_link = get_a_rel_next
578
579
    @classmethod
580
    def get_comic_info(cls, soup, link):
581
        title = soup.find('h1', class_='comic-title').find('a').string
582
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
583
        day = string_to_date(date_str, "%B %d, %Y")
584
        # Bonus images may or may not be in meta og:image.
585
        imgs = soup.find_all('meta', property='og:image')
586
        imgs_src = [i['content'] for i in imgs]
587
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
588
        bonus_src = [b['data-oversrc'] for b in bonus]
589
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
590
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
591
        tag_meta = soup.find('meta', property='article:tag')
592
        tags = tag_meta['content'] if tag_meta else ""
593
        return {
594
            'title': title,
595
            'month': day.month,
596
            'year': day.year,
597
            'day': day.day,
598
            'img': all_imgs_src,
599
            'tags': tags,
600
        }
601
602
603 View Code Duplication
class PenelopeBagieu(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
604
    """Class to retrieve comics from Penelope Bagieu's blog."""
605
    name = 'bagieu'
606
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
607
    url = 'http://www.penelope-jolicoeur.com'
608
    get_navi_link = get_link_rel_next
609
610
    @classmethod
611
    def get_first_comic_link(cls):
612
        return {'href': 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'}
613
614
    @classmethod
615
    def get_comic_info(cls, soup, link):
616
        date_str = soup.find('h2', class_='date-header').string
617
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
618
        imgs = soup.find('div', class_='entry-body').find_all('img')
619
        title = soup.find('h3', class_='entry-header').string
620
        return {
621
            'title': title,
622
            'img': [i['src'] for i in imgs],
623
            'month': day.month,
624
            'year': day.year,
625
            'day': day.day,
626
        }
627
628
629 View Code Duplication
class OneOneOneOneComic(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
630
    """Class to retrieve 1111 Comics."""
631
    # Also on http://comics1111.tumblr.com
632
    # Also on https://tapastic.com/series/1111-Comics
633
    name = '1111'
634
    long_name = '1111 Comics'
635
    url = 'http://www.1111comics.me'
636
    get_first_comic_link = get_div_navfirst_a
637
    get_navi_link = get_link_rel_next
638
639
    @classmethod
640
    def get_comic_info(cls, soup, link):
641
        title = soup.find('h1', class_='comic-title').find('a').string
642
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
643
        day = string_to_date(date_str, "%B %d, %Y")
644
        imgs = soup.find_all('meta', property='og:image')
645
        return {
646
            'title': title,
647
            'month': day.month,
648
            'year': day.year,
649
            'day': day.day,
650
            'img': [i['content'] for i in imgs],
651
        }
652
653
654 View Code Duplication
class AngryAtNothing(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
655
    """Class to retrieve Angry at Nothing comics."""
656
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
657
    name = 'angry'
658
    long_name = 'Angry At Nothing'
659
    url = 'http://www.angryatnothing.net'
660
    get_first_comic_link = get_div_navfirst_a
661
    get_navi_link = get_a_rel_next
662
663
    @classmethod
664
    def get_comic_info(cls, soup, link):
665
        title = soup.find('h1', class_='comic-title').find('a').string
666
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
667
        day = string_to_date(date_str, "%B %d, %Y")
668
        imgs = soup.find_all('meta', property='og:image')
669
        return {
670
            'title': title,
671
            'month': day.month,
672
            'year': day.year,
673
            'day': day.day,
674
            'img': [i['content'] for i in imgs],
675
        }
676
677
678
class NeDroid(GenericNavigableComic):
679
    """Class to retrieve NeDroid comics."""
680
    name = 'nedroid'
681
    long_name = 'NeDroid'
682
    url = 'http://nedroid.com'
683
    get_first_comic_link = get_div_navfirst_a
684
    get_navi_link = get_link_rel_next
685
    get_url_from_link = join_cls_url_to_href
686
687
    @classmethod
688
    def get_comic_info(cls, soup, link):
689
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
690
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
691
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
692
        num = int(short_url_re.match(short_url).groups()[0])
693
        imgs = soup.find('div', id='comic').find_all('img')
694
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
695
        assert len(imgs) == 1
696
        title = imgs[0]['alt']
697
        title2 = imgs[0]['title']
698
        return {
699
            'short_url': short_url,
700
            'title': title,
701
            'title2': title2,
702
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
703
            'day': day,
704
            'month': month,
705
            'year': year,
706
            'num': num,
707
        }
708
709
710
class Garfield(GenericNavigableComic):
711
    """Class to retrieve Garfield comics."""
712
    # Also on http://www.gocomics.com/garfield
713
    name = 'garfield'
714
    long_name = 'Garfield'
715
    url = 'https://garfield.com'
716
717
    @classmethod
718
    def get_first_comic_link(cls):
719
        return {'href': 'https://garfield.com/comic/1978/06/19'}
720
721
    @classmethod
722
    def get_navi_link(cls, last_soup, next_):
723
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
724
725
    @classmethod
726
    def get_comic_info(cls, soup, link):
727
        url = cls.get_url_from_link(link)
728
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
729
        year, month, day = [int(s) for s in date_re.match(url).groups()]
730
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
731
        return {
732
            'month': month,
733
            'year': year,
734
            'day': day,
735
            'img': [i['src'] for i in imgs],
736
        }
737
738
739 View Code Duplication
class Dilbert(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
740
    """Class to retrieve Dilbert comics."""
741
    # Also on http://www.gocomics.com/dilbert-classics
742
    name = 'dilbert'
743
    long_name = 'Dilbert'
744
    url = 'http://dilbert.com'
745
    get_url_from_link = join_cls_url_to_href
746
747
    @classmethod
748
    def get_first_comic_link(cls):
749
        return {'href': 'http://dilbert.com/strip/1989-04-16'}
750
751
    @classmethod
752
    def get_navi_link(cls, last_soup, next_):
753
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
754
        return link.find('a') if link else None
755
756
    @classmethod
757
    def get_comic_info(cls, soup, link):
758
        title = soup.find('meta', property='og:title')['content']
759
        imgs = soup.find_all('meta', property='og:image')
760
        desc = soup.find('meta', property='og:description')['content']
761
        date_str = soup.find('meta', property='article:publish_date')['content']
762
        day = string_to_date(date_str, "%B %d, %Y")
763
        author = soup.find('meta', property='article:author')['content']
764
        tags = soup.find('meta', property='article:tag')['content']
765
        return {
766
            'title': title,
767
            'description': desc,
768
            'img': [i['content'] for i in imgs],
769
            'author': author,
770
            'tags': tags,
771
            'day': day.day,
772
            'month': day.month,
773
            'year': day.year
774
        }
775
776
777
class VictimsOfCircumsolar(GenericNavigableComic):
778
    """Class to retrieve VictimsOfCircumsolar comics."""
779
    name = 'circumsolar'
780
    long_name = 'Victims Of Circumsolar'
781
    url = 'http://www.victimsofcircumsolar.com'
782
    get_navi_link = get_a_navi_comicnavnext_navinext
783
784
    @classmethod
785
    def get_first_comic_link(cls):
786
        return {'href': 'http://www.victimsofcircumsolar.com/comic/modern-addiction'}
787
788
    @classmethod
789
    def get_comic_info(cls, soup, link):
790
        # Date is on the archive page
791
        title = soup.find_all('meta', property='og:title')[-1]['content']
792
        desc = soup.find_all('meta', property='og:description')[-1]['content']
793
        imgs = soup.find('div', id='comic').find_all('img')
794
        assert all(i['title'] == i['alt'] == title for i in imgs)
795
        return {
796
            'title': title,
797
            'description': desc,
798
            'img': [i['src'] for i in imgs],
799
        }
800
801
802
class ThreeWordPhrase(GenericNavigableComic):
803
    """Class to retrieve Three Word Phrase comics."""
804
    # Also on http://www.threewordphrase.tumblr.com
805
    name = 'threeword'
806
    long_name = 'Three Word Phrase'
807
    url = 'http://threewordphrase.com'
808
    get_url_from_link = join_cls_url_to_href
809
810
    @classmethod
811
    def get_first_comic_link(cls):
812
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
813
814
    @classmethod
815
    def get_navi_link(cls, last_soup, next_):
816
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
817
        return None if link.get('href') is None else link
818
819
    @classmethod
820
    def get_comic_info(cls, soup, link):
821
        title = soup.find('title')
822
        imgs = [img for img in soup.find_all('img')
823
                if not img['src'].endswith(
824
                    ('link.gif', '32.png', 'twpbookad.jpg',
825
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
826
        return {
827
            'title': title.string if title else None,
828
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
829
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
830
        }
831
832
833
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
834
    """Class to retrieve Deadly Panel comics."""
835
    # Also on https://tapastic.com/series/deadlypanel
836
    name = 'deadly'
837
    long_name = 'Deadly Panel'
838
    url = 'http://www.deadlypanel.com'
839
    get_first_comic_link = get_a_navi_navifirst
840
    get_navi_link = get_a_navi_comicnavnext_navinext
841
842
    @classmethod
843
    def get_comic_info(cls, soup, link):
844
        imgs = soup.find('div', id='comic').find_all('img')
845
        assert all(i['alt'] == i['title'] for i in imgs)
846
        return {
847
            'img': [i['src'] for i in imgs],
848
        }
849
850
851 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
852
    """Class to retrieve The Gentleman Armchair comics."""
853
    name = 'gentlemanarmchair'
854
    long_name = 'The Gentleman Armchair'
855
    url = 'http://thegentlemansarmchair.com'
856
    get_first_comic_link = get_a_navi_navifirst
857
    get_navi_link = get_link_rel_next
858
859
    @classmethod
860
    def get_comic_info(cls, soup, link):
861
        title = soup.find('h2', class_='post-title').string
862
        author = soup.find("span", class_="post-author").find("a").string
863
        date_str = soup.find('span', class_='post-date').string
864
        day = string_to_date(date_str, "%B %d, %Y")
865
        imgs = soup.find('div', id='comic').find_all('img')
866
        return {
867
            'img': [i['src'] for i in imgs],
868
            'title': title,
869
            'author': author,
870
            'month': day.month,
871
            'year': day.year,
872
            'day': day.day,
873
        }
874
875
876
class MyExtraLife(GenericNavigableComic):
877
    """Class to retrieve My Extra Life comics."""
878
    name = 'extralife'
879
    long_name = 'My Extra Life'
880
    url = 'http://www.myextralife.com'
881
    get_navi_link = get_link_rel_next
882
883
    @classmethod
884
    def get_first_comic_link(cls):
885
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
886
887
    @classmethod
888
    def get_comic_info(cls, soup, link):
889
        title = soup.find("h1", class_="comic_title").string
890
        date_str = soup.find("span", class_="comic_date").string
891
        day = string_to_date(date_str, "%B %d, %Y")
892
        imgs = soup.find_all("img", class_="comic")
893
        assert all(i['alt'] == i['title'] == title for i in imgs)
894
        return {
895
            'title': title,
896
            'img': [i['src'] for i in imgs if i["src"]],
897
            'day': day.day,
898
            'month': day.month,
899
            'year': day.year
900
        }
901
902
903
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
904
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
905
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
906
    # Also on http://smbc-comics.tumblr.com
907
    name = 'smbc'
908
    long_name = 'Saturday Morning Breakfast Cereal'
909
    url = 'http://www.smbc-comics.com'
910
    get_navi_link = get_a_rel_next
911
912
    @classmethod
913
    def get_first_comic_link(cls):
914
        return get_soup_at_url(cls.url).find('a', rel='start')
915
916
    @classmethod
917
    def get_comic_info(cls, soup, link):
918
        image1 = soup.find('img', id='cc-comic')
919
        image_url1 = image1['src']
920
        aftercomic = soup.find('div', id='aftercomic')
921
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
922
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
923
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
924
        day = string_to_date(date_str, "%B %d, %Y")
925
        return {
926
            'title': image1['title'],
927
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
928
            'day': day.day,
929
            'month': day.month,
930
            'year': day.year
931
        }
932
933
934
class PerryBibleFellowship(GenericListableComic):
935
    """Class to retrieve Perry Bible Fellowship comics."""
936
    name = 'pbf'
937
    long_name = 'Perry Bible Fellowship'
938
    url = 'http://pbfcomics.com'
939
    get_url_from_archive_element = join_cls_url_to_href
940
941
    @classmethod
942
    def get_archive_elements(cls):
943
        comic_link_re = re.compile('^/[0-9]*/$')
944
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
945
946
    @classmethod
947
    def get_comic_info(cls, soup, link):
948
        url = cls.get_url_from_archive_element(link)
949
        comic_img_re = re.compile('^/archive_b/PBF.*')
950
        name = link.string
951
        num = int(link['name'])
952
        href = link['href']
953
        assert href == '/%d/' % num
954
        imgs = soup.find_all('img', src=comic_img_re)
955
        assert len(imgs) == 1
956
        assert imgs[0]['alt'] == name
957
        return {
958
            'num': num,
959
            'name': name,
960
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
961
            'prefix': '%d-' % num,
962
        }
963
964
965
class Mercworks(GenericNavigableComic):
966
    """Class to retrieve Mercworks comics."""
967
    # Also on http://mercworks.tumblr.com
968
    name = 'mercworks'
969
    long_name = 'Mercworks'
970
    url = 'http://mercworks.net'
971
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
972
    get_navi_link = get_a_rel_next
973
974 View Code Duplication
    @classmethod
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
975
    def get_comic_info(cls, soup, link):
976
        title = soup.find('meta', property='og:title')['content']
977
        metadesc = soup.find('meta', property='og:description')
978
        desc = metadesc['content'] if metadesc else ""
979
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
980
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
981
        date_str = date_str[:10]
982
        day = string_to_date(date_str, "%Y-%m-%d")
983
        imgs = soup.find_all('meta', property='og:image')
984
        return {
985
            'img': [i['content'] for i in imgs],
986
            'title': title,
987
            'author': author,
988
            'desc': desc,
989
            'day': day.day,
990
            'month': day.month,
991
            'year': day.year
992
        }
993
994
995
class BerkeleyMews(GenericListableComic):
996
    """Class to retrieve Berkeley Mews comics."""
997
    # Also on http://mews.tumblr.com
998
    # Also on http://www.gocomics.com/berkeley-mews
999
    name = 'berkeley'
1000
    long_name = 'Berkeley Mews'
1001
    url = 'http://www.berkeleymews.com'
1002
    get_url_from_archive_element = get_href
1003
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1004
1005
    @classmethod
1006
    def get_archive_elements(cls):
1007
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1008
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1009
1010
    @classmethod
1011
    def get_comic_info(cls, soup, link):
1012
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1013
        url = cls.get_url_from_archive_element(link)
1014
        num = int(cls.comic_num_re.match(url).groups()[0])
1015
        img = soup.find('div', id='comic').find('img')
1016
        assert all(i['alt'] == i['title'] for i in [img])
1017
        title2 = img['title']
1018
        img_url = img['src']
1019
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1020
        return {
1021
            'num': num,
1022
            'title': link.string,
1023
            'title2': title2,
1024
            'img': [img_url],
1025
            'year': year,
1026
            'month': month,
1027
            'day': day,
1028
        }
1029
1030
1031
class GenericBouletCorp(GenericNavigableComic):
1032
    """Generic class to retrieve BouletCorp comics in different languages."""
1033
    # Also on http://bouletcorp.tumblr.com
1034
    get_navi_link = get_link_rel_next
1035
1036
    @classmethod
1037
    def get_first_comic_link(cls):
1038
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1039
1040
    @classmethod
1041
    def get_comic_info(cls, soup, link):
1042
        url = cls.get_url_from_link(link)
1043
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1044
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1045
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1046
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1047
        title = soup.find('title').string
1048
        return {
1049
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1050
            'title': title,
1051
            'texts': texts,
1052
            'year': year,
1053
            'month': month,
1054
            'day': day,
1055
        }
1056
1057
1058
class BouletCorp(GenericBouletCorp):
1059
    """Class to retrieve BouletCorp comics."""
1060
    name = 'boulet'
1061
    long_name = 'Boulet Corp'
1062
    url = 'http://www.bouletcorp.com'
1063
1064
1065
class BouletCorpEn(GenericBouletCorp):
1066
    """Class to retrieve EnglishBouletCorp comics."""
1067
    name = 'boulet_en'
1068
    long_name = 'Boulet Corp English'
1069
    url = 'http://english.bouletcorp.com'
1070
1071
1072 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1073
    """Class to retrieve Amazing Super Powers comics."""
1074
    name = 'asp'
1075
    long_name = 'Amazing Super Powers'
1076
    url = 'http://www.amazingsuperpowers.com'
1077
    get_first_comic_link = get_a_navi_navifirst
1078
    get_navi_link = get_a_navi_navinext
1079
1080
    @classmethod
1081
    def get_comic_info(cls, soup, link):
1082
        author = soup.find("span", class_="post-author").find("a").string
1083
        date_str = soup.find('span', class_='post-date').string
1084
        day = string_to_date(date_str, "%B %d, %Y")
1085
        imgs = soup.find('div', id='comic').find_all('img')
1086
        title = ' '.join(i['title'] for i in imgs)
1087
        assert all(i['alt'] == i['title'] for i in imgs)
1088
        return {
1089
            'title': title,
1090
            'author': author,
1091
            'img': [img['src'] for img in imgs],
1092
            'day': day.day,
1093
            'month': day.month,
1094
            'year': day.year
1095
        }
1096
1097
1098
class ToonHole(GenericListableComic):
1099
    """Class to retrieve Toon Holes comics."""
1100
    # Also on http://tapastic.com/series/TOONHOLE
1101
    name = 'toonhole'
1102
    long_name = 'Toon Hole'
1103
    url = 'http://www.toonhole.com'
1104
    get_url_from_archive_element = get_href
1105
1106
    @classmethod
1107
    def get_comic_info(cls, soup, link):
1108
        title = link.string
1109
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1110
        day = string_to_date(date_str, "%B %d, %Y")
1111
        imgs = soup.find('div', id='comic').find_all('img')
1112
        assert all(i['alt'] == i['title'] == title for i in imgs)
1113
        return {
1114
            'title': title,
1115
            'month': day.month,
1116
            'year': day.year,
1117
            'day': day.day,
1118
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1119
        }
1120
1121
    @classmethod
1122
    def get_archive_elements(cls):
1123
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1124
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1125
1126
1127
class Channelate(GenericNavigableComic):
1128
    """Class to retrieve Channelate comics."""
1129
    name = 'channelate'
1130
    long_name = 'Channelate'
1131
    url = 'http://www.channelate.com'
1132
    get_first_comic_link = get_div_navfirst_a
1133
    get_navi_link = get_link_rel_next
1134
1135
    @classmethod
1136
    def get_comic_info(cls, soup, link):
1137
        author = soup.find("span", class_="post-author").find("a").string
1138
        date_str = soup.find('span', class_='post-date').string
1139
        day = string_to_date(date_str, '%Y/%m/%d')
1140
        title = soup.find('meta', property='og:title')['content']
1141
        post = soup.find('div', id='comic')
1142
        imgs = post.find_all('img') if post else []
1143
        assert all(i['alt'] == i['title'] for i in imgs)
1144
        extra_url = None
1145
        extra_div = soup.find('div', id='extrapanelbutton')
1146
        if extra_div:
1147
            extra_url = extra_div.find('a')['href']
1148
            extra_soup = get_soup_at_url(extra_url)
1149
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1150
            imgs.extend(extra_imgs)
1151
        return {
1152
            'url_extra': extra_url,
1153
            'title': title,
1154
            'author': author,
1155
            'month': day.month,
1156
            'year': day.year,
1157
            'day': day.day,
1158
            'img': [i['src'] for i in imgs],
1159
        }
1160
1161
1162
class CyanideAndHappiness(GenericNavigableComic):
1163
    """Class to retrieve Cyanide And Happiness comics."""
1164
    name = 'cyanide'
1165
    long_name = 'Cyanide and Happiness'
1166
    url = 'http://explosm.net'
1167
    get_url_from_link = join_cls_url_to_href
1168
1169
    @classmethod
1170
    def get_first_comic_link(cls):
1171
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1172
1173
    @classmethod
1174
    def get_navi_link(cls, last_soup, next_):
1175
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1176
        return None if link.get('href') is None else link
1177
1178
    @classmethod
1179
    def get_comic_info(cls, soup, link):
1180
        url2 = soup.find('meta', property='og:url')['content']
1181
        num = int(url2.split('/')[-2])
1182
        date_str = soup.find('h3').find('a').string
1183
        day = string_to_date(date_str, '%Y.%m.%d')
1184
        author = soup.find('small', class_="author-credit-name").string
1185
        assert author.startswith('by ')
1186
        author = author[3:]
1187
        imgs = soup.find_all('img', id='main-comic')
1188
        return {
1189
            'num': num,
1190
            'author': author,
1191
            'month': day.month,
1192
            'year': day.year,
1193
            'day': day.day,
1194
            'prefix': '%d-' % num,
1195
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1196
        }
1197
1198
1199
class MrLovenstein(GenericComic):
1200
    """Class to retrieve Mr Lovenstein comics."""
1201
    # Also on https://tapastic.com/series/MrLovenstein
1202
    name = 'mrlovenstein'
1203
    long_name = 'Mr. Lovenstein'
1204
    url = 'http://www.mrlovenstein.com'
1205
1206
    @classmethod
1207
    def get_next_comic(cls, last_comic):
1208
        # TODO: more info from http://www.mrlovenstein.com/archive
1209
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1210
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1211
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1212
        first, last = min(nums), max(nums)
1213
        if last_comic:
1214
            first = last_comic['num'] + 1
1215
        for num in range(first, last + 1):
1216
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1217
            soup = get_soup_at_url(url)
1218
            imgs = list(
1219
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1220
            yield {
1221
                'url': url,
1222
                'num': num,
1223
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1224
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1225
            }
1226
1227
1228
class DinosaurComics(GenericListableComic):
1229
    """Class to retrieve Dinosaur Comics comics."""
1230
    name = 'dinosaur'
1231
    long_name = 'Dinosaur Comics'
1232
    url = 'http://www.qwantz.com'
1233
    get_url_from_archive_element = get_href
1234
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1235
1236
    @classmethod
1237
    def get_archive_elements(cls):
1238
        archive_url = '%s/archive.php' % cls.url
1239
        # first link is random -> skip it
1240
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1241
1242
    @classmethod
1243
    def get_comic_info(cls, soup, link):
1244
        url = cls.get_url_from_archive_element(link)
1245
        num = int(cls.comic_link_re.match(url).groups()[0])
1246
        date_str = link.string
1247
        text = link.next_sibling.string
1248
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1249
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1250
        img = soup.find('img', src=comic_img_re)
1251
        return {
1252
            'month': day.month,
1253
            'year': day.year,
1254
            'day': day.day,
1255
            'img': [img.get('src')],
1256
            'title': img.get('title'),
1257
            'text': text,
1258
            'num': num,
1259
        }
1260
1261
1262
class ButterSafe(GenericListableComic):
1263
    """Class to retrieve Butter Safe comics."""
1264
    name = 'butter'
1265
    long_name = 'ButterSafe'
1266
    url = 'http://buttersafe.com'
1267
    get_url_from_archive_element = get_href
1268
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1269
1270
    @classmethod
1271
    def get_archive_elements(cls):
1272
        archive_url = '%s/archive/' % cls.url
1273
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1274
1275
    @classmethod
1276
    def get_comic_info(cls, soup, link):
1277
        url = cls.get_url_from_archive_element(link)
1278
        title = link.string
1279
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1280
        img = soup.find('div', id='comic').find('img')
1281
        assert img['alt'] == title
1282
        return {
1283
            'title': title,
1284
            'day': day,
1285
            'month': month,
1286
            'year': year,
1287
            'img': [img['src']],
1288
        }
1289
1290
1291
class CalvinAndHobbes(GenericComic):
1292
    """Class to retrieve Calvin and Hobbes comics."""
1293
    # Also on http://www.gocomics.com/calvinandhobbes/
1294
    name = 'calvin'
1295
    long_name = 'Calvin and Hobbes'
1296
    # This is not through any official webpage but eh...
1297
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1298
1299
    @classmethod
1300
    def get_next_comic(cls, last_comic):
1301
        last_date = get_date_for_comic(
1302
            last_comic) if last_comic else date(1985, 11, 1)
1303
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1304
        img_re = re.compile('')
1305
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1306
            url = link['href']
1307
            year, month = link_re.match(url).groups()
1308
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1309
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1310
                month_url = urljoin_wrapper(cls.url, url)
1311
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1312
                    img_src = img['src']
1313
                    day = int(img_re.match(img_src).groups()[0])
1314
                    comic_date = date(int(year), int(month), day)
1315
                    if comic_date > last_date:
1316
                        yield {
1317
                            'url': month_url,
1318
                            'year': int(year),
1319
                            'month': int(month),
1320
                            'day': int(day),
1321
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1322
                        }
1323
                        last_date = comic_date
1324
1325
1326
class AbstruseGoose(GenericComic):
1327
    """Class to retrieve AbstruseGoose Comics."""
1328
    name = 'abstruse'
1329
    long_name = 'Abstruse Goose'
1330
    url = 'http://abstrusegoose.com'
1331
1332
    @classmethod
1333
    def get_next_comic(cls, last_comic):
1334
        archive_url = '%s/archive' % cls.url
1335
        last_num = last_comic['num'] if last_comic else 0
1336
        comic_url_re = re.compile('^%s/([0-9]*)$' % cls.url)
1337
        comic_img_re = re.compile('^%s/strips/.*' % cls.url)
1338
        for link in get_soup_at_url(archive_url).find_all('a', href=comic_url_re):
1339
            comic_url = link['href']
1340
            num = int(comic_url_re.match(comic_url).groups()[0])
1341
            if num > last_num:
1342
                yield {
1343
                    'url': comic_url,
1344
                    'num': num,
1345
                    'title': link.string,
1346
                    'img': [get_soup_at_url(comic_url).find('img', src=comic_img_re)['src']]
1347
                }
1348
1349
1350
class PhDComics(GenericNavigableComic):
1351
    """Class to retrieve PHD Comics."""
1352
    name = 'phd'
1353
    long_name = 'PhD Comics'
1354
    url = 'http://phdcomics.com/comics/archive.php'
1355
    get_url_from_link = join_cls_url_to_href
1356
1357
    @classmethod
1358
    def get_first_comic_link(cls):
1359
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1360
1361
    @classmethod
1362
    def get_navi_link(cls, last_soup, next_):
1363
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1364
        return None if img is None else img.parent
1365
1366
    @classmethod
1367
    def get_comic_info(cls, soup, link):
1368
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1369
        try:
1370
            day = string_to_date(date_str, '%m/%d/%Y')
1371
        except ValueError:
1372
            print("Invalid date %s" % date_str)
1373
            day = date.today()
1374
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1375
        return {
1376
            'year': day.year,
1377
            'month': day.month,
1378
            'day': day.day,
1379
            'img': [soup.find('img', id='comic')['src']],
1380
            'title': title,
1381
        }
1382
1383
1384 View Code Duplication
class Octopuns(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1385
    """Class to retrieve Octopuns comics."""
1386
    # Also on http://octopuns.tumblr.com
1387
    name = 'octopuns'
1388
    long_name = 'Octopuns'
1389
    url = 'http://www.octopuns.net'
1390
1391
    @classmethod
1392
    def get_first_comic_link(cls):
1393
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1394
1395
    @classmethod
1396
    def get_navi_link(cls, last_soup, next_):
1397
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1398
        return None if link.get('href') is None else link
1399
1400
    @classmethod
1401
    def get_comic_info(cls, soup, link):
1402
        title = soup.find('h3', class_='post-title entry-title').string
1403
        date_str = soup.find('h2', class_='date-header').string
1404
        day = string_to_date(date_str, "%A, %B %d, %Y")
1405
        imgs = soup.find_all('link', rel='image_src')
1406
        return {
1407
            'img': [i['href'] for i in imgs],
1408
            'title': title,
1409
            'day': day.day,
1410
            'month': day.month,
1411
            'year': day.year,
1412
        }
1413
1414
1415
class Quarktees(GenericNavigableComic):
1416
    """Class to retrieve the Quarktees comics."""
1417
    name = 'quarktees'
1418
    long_name = 'Quarktees'
1419
    url = 'http://www.quarktees.com/blogs/news'
1420
    get_url_from_link = join_cls_url_to_href
1421
1422
    @classmethod
1423
    def get_first_comic_link(cls):
1424
        return {'href': 'http://www.quarktees.com/blogs/news/12486621-coming-soon'}
1425
1426
    @classmethod
1427
    def get_navi_link(cls, last_soup, next_):
1428
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1429
1430
    @classmethod
1431
    def get_comic_info(cls, soup, link):
1432
        title = soup.find('meta', property='og:title')['content']
1433
        article = soup.find('div', class_='single-article')
1434
        imgs = article.find_all('img')
1435
        return {
1436
            'title': title,
1437
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1438
        }
1439
1440
1441
class OverCompensating(GenericNavigableComic):
1442
    """Class to retrieve the Over Compensating comics."""
1443
    name = 'compensating'
1444
    long_name = 'Over Compensating'
1445
    url = 'http://www.overcompensating.com'
1446
    get_url_from_link = join_cls_url_to_href
1447
1448
    @classmethod
1449
    def get_first_comic_link(cls):
1450
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1451
1452
    @classmethod
1453
    def get_navi_link(cls, last_soup, next_):
1454
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1455
1456
    @classmethod
1457
    def get_comic_info(cls, soup, link):
1458
        img_src_re = re.compile('^/oc/comics/.*')
1459
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1460
        comic_url = cls.get_url_from_link(link)
1461
        num = int(comic_num_re.match(comic_url).groups()[0])
1462
        img = soup.find('img', src=img_src_re)
1463
        return {
1464
            'num': num,
1465
            'img': [urljoin_wrapper(comic_url, img['src'])],
1466
            'title': img.get('title')
1467
        }
1468
1469
1470
class Oglaf(GenericNavigableComic):
1471
    """Class to retrieve Oglaf comics."""
1472
    name = 'oglaf'
1473
    long_name = 'Oglaf [NSFW]'
1474
    url = 'http://oglaf.com'
1475
    get_url_from_link = join_cls_url_to_href
1476
1477
    @classmethod
1478
    def get_first_comic_link(cls):
1479
        return get_soup_at_url(cls.url).find("div", id="st").parent
1480
1481
    @classmethod
1482
    def get_navi_link(cls, last_soup, next_):
1483
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1484
        return div.parent if div else None
1485
1486
    @classmethod
1487
    def get_comic_info(cls, soup, link):
1488
        title = soup.find('title').string
1489
        title_imgs = soup.find('div', id='tt').find_all('img')
1490
        assert len(title_imgs) == 1
1491
        strip_imgs = soup.find_all('img', id='strip')
1492
        assert len(strip_imgs) == 1
1493
        imgs = title_imgs + strip_imgs
1494
        desc = ' '.join(i['title'] for i in imgs)
1495
        return {
1496
            'title': title,
1497
            'img': [i['src'] for i in imgs],
1498
            'description': desc,
1499
        }
1500
1501
1502 View Code Duplication
class ScandinaviaAndTheWorld(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1503
    """Class to retrieve Scandinavia And The World comics."""
1504
    name = 'satw'
1505
    long_name = 'Scandinavia And The World'
1506
    url = 'http://satwcomic.com'
1507
1508
    @classmethod
1509
    def get_first_comic_link(cls):
1510
        return {'href': 'http://satwcomic.com/sweden-denmark-and-norway'}
1511
1512
    @classmethod
1513
    def get_navi_link(cls, last_soup, next_):
1514
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1515
1516
    @classmethod
1517
    def get_comic_info(cls, soup, link):
1518
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1519
        desc = soup.find('meta', property='og:description')['content']
1520
        imgs = soup.find_all('img', itemprop="image")
1521
        return {
1522
            'title': title,
1523
            'description': desc,
1524
            'img': [i['src'] for i in imgs],
1525
        }
1526
1527
1528
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1529
    """Class to retrieve the Something Of That Ilk comics."""
1530
    name = 'somethingofthatilk'
1531
    long_name = 'Something Of That Ilk'
1532
    url = 'http://www.somethingofthatilk.com'
1533
1534
1535
class InfiniteMonkeyBusiness(GenericNavigableComic):
1536
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1537
    name = 'monkey'
1538
    long_name = 'Infinite Monkey Business'
1539
    url = 'http://infinitemonkeybusiness.net'
1540
    get_navi_link = get_a_navi_comicnavnext_navinext
1541
1542
    @classmethod
1543
    def get_first_comic_link(cls):
1544
        return {'href': 'http://infinitemonkeybusiness.net/comic/pillory/'}
1545
1546
    @classmethod
1547
    def get_comic_info(cls, soup, link):
1548
        title = soup.find('meta', property='og:title')['content']
1549
        imgs = soup.find('div', id='comic').find_all('img')
1550
        return {
1551
            'title': title,
1552
            'img': [i['src'] for i in imgs],
1553
        }
1554
1555
1556
class Wondermark(GenericListableComic):
1557
    """Class to retrieve the Wondermark comics."""
1558
    name = 'wondermark'
1559
    long_name = 'Wondermark'
1560
    url = 'http://wondermark.com'
1561
    get_url_from_archive_element = get_href
1562
1563
    @classmethod
1564
    def get_archive_elements(cls):
1565
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1566
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1567
1568
    @classmethod
1569
    def get_comic_info(cls, soup, link):
1570
        date_str = soup.find('div', class_='postdate').find('em').string
1571
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1572
        div = soup.find('div', id='comic')
1573
        if div:
1574
            img = div.find('img')
1575
            img_src = [img['src']]
1576
            alt = img['alt']
1577
            assert alt == img['title']
1578
            title = soup.find('meta', property='og:title')['content']
1579
        else:
1580
            img_src = []
1581
            alt = ''
1582
            title = ''
1583
        return {
1584
            'month': day.month,
1585
            'year': day.year,
1586
            'day': day.day,
1587
            'img': img_src,
1588
            'title': title,
1589
            'alt': alt,
1590
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1591
        }
1592
1593
1594 View Code Duplication
class WarehouseComic(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1595
    """Class to retrieve Warehouse Comic comics."""
1596
    name = 'warehouse'
1597
    long_name = 'Warehouse Comic'
1598
    url = 'http://warehousecomic.com'
1599
    get_first_comic_link = get_a_navi_navifirst
1600
    get_navi_link = get_link_rel_next
1601
1602
    @classmethod
1603
    def get_comic_info(cls, soup, link):
1604
        title = soup.find('h2', class_='post-title').string
1605
        date_str = soup.find('span', class_='post-date').string
1606
        day = string_to_date(date_str, "%B %d, %Y")
1607
        imgs = soup.find('div', id='comic').find_all('img')
1608
        return {
1609
            'img': [i['src'] for i in imgs],
1610
            'title': title,
1611
            'day': day.day,
1612
            'month': day.month,
1613
            'year': day.year,
1614
        }
1615
1616
1617
class JustSayEh(GenericNavigableComic):
1618
    """Class to retrieve Just Say Eh comics."""
1619
    # Also on http//tapastic.com/series/Just-Say-Eh
1620
    name = 'justsayeh'
1621
    long_name = 'Just Say Eh'
1622
    url = 'http://www.justsayeh.com'
1623
    get_first_comic_link = get_a_navi_navifirst
1624
    get_navi_link = get_a_navi_comicnavnext_navinext
1625
1626
    @classmethod
1627
    def get_comic_info(cls, soup, link):
1628
        title = soup.find('h2', class_='post-title').string
1629
        imgs = soup.find("div", id="comic").find_all("img")
1630
        assert all(i['alt'] == i['title'] for i in imgs)
1631
        alt = imgs[0]['alt']
1632
        return {
1633
            'img': [i['src'] for i in imgs],
1634
            'title': title,
1635
            'alt': alt,
1636
        }
1637
1638
1639 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1640
    """Class to retrieve Mouse Bear Comedy comics."""
1641
    # Also on http://mousebearcomedy.tumblr.com
1642
    name = 'mousebear'
1643
    long_name = 'Mouse Bear Comedy'
1644
    url = 'http://www.mousebearcomedy.com'
1645
    get_first_comic_link = get_a_navi_navifirst
1646
    get_navi_link = get_a_navi_comicnavnext_navinext
1647
1648
    @classmethod
1649
    def get_comic_info(cls, soup, link):
1650
        title = soup.find('h2', class_='post-title').string
1651
        author = soup.find("span", class_="post-author").find("a").string
1652
        date_str = soup.find("span", class_="post-date").string
1653
        day = string_to_date(date_str, '%B %d, %Y')
1654
        imgs = soup.find("div", id="comic").find_all("img")
1655
        assert all(i['alt'] == i['title'] == title for i in imgs)
1656
        return {
1657
            'day': day.day,
1658
            'month': day.month,
1659
            'year': day.year,
1660
            'img': [i['src'] for i in imgs],
1661
            'title': title,
1662
            'author': author,
1663
        }
1664
1665
1666 View Code Duplication
class BigFootJustice(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1667
    """Class to retrieve Big Foot Justice comics."""
1668
    # Also on http://tapastic.com/series/bigfoot-justice
1669
    name = 'bigfoot'
1670
    long_name = 'Big Foot Justice'
1671
    url = 'http://bigfootjustice.com'
1672
    get_first_comic_link = get_a_navi_navifirst
1673
    get_navi_link = get_a_navi_comicnavnext_navinext
1674
1675
    @classmethod
1676
    def get_comic_info(cls, soup, link):
1677
        imgs = soup.find('div', id='comic').find_all('img')
1678
        assert all(i['title'] == i['alt'] for i in imgs)
1679
        title = ' '.join(i['title'] for i in imgs)
1680
        return {
1681
            'img': [i['src'] for i in imgs],
1682
            'title': title,
1683
        }
1684
1685
1686
class RespawnComic(GenericNavigableComic):
1687
    """Class to retrieve Respawn Comic."""
1688
    # Also on http://respawncomic.tumblr.com
1689
    name = 'respawn'
1690
    long_name = 'Respawn Comic'
1691
    url = 'http://respawncomic.com '
1692
    get_navi_link = get_a_navi_comicnavnext_navinext
1693
1694
    @classmethod
1695
    def get_first_comic_link(cls):
1696
        return {'href': 'http://respawncomic.com/comic/c0001/'}
1697
1698 View Code Duplication
    @classmethod
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1699
    def get_comic_info(cls, soup, link):
1700
        title = soup.find('meta', property='og:title')['content']
1701
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1702
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1703
        date_str = date_str[:10]
1704
        day = string_to_date(date_str, "%Y-%m-%d")
1705
        imgs = soup.find_all('meta', property='og:image')
1706
        skip_imgs = {
1707
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1708
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1709
        }
1710
        return {
1711
            'title': title,
1712
            'author': author,
1713
            'day': day.day,
1714
            'month': day.month,
1715
            'year': day.year,
1716
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1717
        }
1718
1719
1720 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1721
    """Class to retrieve Safely Endangered comics."""
1722
    # Also on http://tumblr.safelyendangered.com
1723
    name = 'endangered'
1724
    long_name = 'Safely Endangered'
1725
    url = 'http://www.safelyendangered.com'
1726
    get_navi_link = get_link_rel_next
1727
1728
    @classmethod
1729
    def get_first_comic_link(cls):
1730
        return {'href': 'http://www.safelyendangered.com/comic/ignored/'}
1731
1732
    @classmethod
1733
    def get_comic_info(cls, soup, link):
1734
        title = soup.find('h2', class_='post-title').string
1735
        date_str = soup.find('span', class_='post-date').string
1736
        day = string_to_date(date_str, '%B %d, %Y')
1737
        imgs = soup.find('div', id='comic').find_all('img')
1738
        alt = imgs[0]['alt']
1739
        assert all(i['alt'] == i['title'] for i in imgs)
1740
        return {
1741
            'day': day.day,
1742
            'month': day.month,
1743
            'year': day.year,
1744
            'img': [i['src'] for i in imgs],
1745
            'title': title,
1746
            'alt': alt,
1747
        }
1748
1749
1750
class PicturesInBoxes(GenericNavigableComic):
1751
    """Class to retrieve Pictures In Boxes comics."""
1752
    # Also on http://picturesinboxescomic.tumblr.com
1753
    name = 'picturesinboxes'
1754
    long_name = 'Pictures in Boxes'
1755
    url = 'http://www.picturesinboxes.com'
1756
    get_navi_link = get_a_navi_navinext
1757
1758
    @classmethod
1759
    def get_first_comic_link(cls):
1760
        return {'href': 'http://www.picturesinboxes.com/2013/10/26/tetris/'}
1761
1762
    @classmethod
1763
    def get_comic_info(cls, soup, link):
1764
        title = soup.find('h2', class_='post-title').string
1765
        author = soup.find("span", class_="post-author").find("a").string
1766
        date_str = soup.find('span', class_='post-date').string
1767
        day = string_to_date(date_str, '%B %d, %Y')
1768
        imgs = soup.find('div', class_='comicpane').find_all('img')
1769
        assert imgs
1770
        assert all(i['title'] == i['alt'] == title for i in imgs)
1771
        return {
1772
            'day': day.day,
1773
            'month': day.month,
1774
            'year': day.year,
1775
            'img': [i['src'] for i in imgs],
1776
            'title': title,
1777
            'author': author,
1778
        }
1779
1780
1781
class Penmen(GenericEmptyComic):
1782
    """Class to retrieve Penmen comics."""
1783
    name = 'penmen'
1784
    long_name = 'Penmen'
1785
    url = 'http://penmen.com'
1786
1787
1788
class TheDoghouseDiaries(GenericNavigableComic):
1789
    """Class to retrieve The Dog House Diaries comics."""
1790
    name = 'doghouse'
1791
    long_name = 'The Dog House Diaries'
1792
    url = 'http://thedoghousediaries.com'
1793
1794
    @classmethod
1795
    def get_first_comic_link(cls):
1796
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1797
1798
    @classmethod
1799
    def get_navi_link(cls, last_soup, next_):
1800
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1801
1802
    @classmethod
1803
    def get_comic_info(cls, soup, link):
1804
        comic_img_re = re.compile('^dhdcomics/.*')
1805
        img = soup.find('img', src=comic_img_re)
1806
        comic_url = cls.get_url_from_link(link)
1807
        return {
1808
            'title': soup.find('h2', id='titleheader').string,
1809
            'title2': soup.find('div', id='subtext').string,
1810
            'alt': img.get('title'),
1811
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1812
            'num': int(comic_url.split('/')[-1]),
1813
        }
1814
1815
1816
class InvisibleBread(GenericListableComic):
1817
    """Class to retrieve Invisible Bread comics."""
1818
    # Also on http://www.gocomics.com/invisible-bread
1819
    name = 'invisiblebread'
1820
    long_name = 'Invisible Bread'
1821
    url = 'http://invisiblebread.com'
1822
1823
    @classmethod
1824
    def get_archive_elements(cls):
1825
        archive_url = urljoin_wrapper(cls.url, '/archives/')
1826
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1827
1828
    @classmethod
1829
    def get_url_from_archive_element(cls, td):
1830
        return td.find('a')['href']
1831
1832
    @classmethod
1833
    def get_comic_info(cls, soup, td):
1834
        url = cls.get_url_from_archive_element(td)
1835
        title = td.find('a').string
1836
        month_and_day = td.previous_sibling.string
1837
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1838
        year = link_re.match(url).groups()[0]
1839
        date_str = month_and_day + ' ' + year
1840
        day = string_to_date(date_str, '%b %d %Y')
1841
        imgs = [soup.find('div', id='comic').find('img')]
1842
        assert len(imgs) == 1
1843
        assert all(i['title'] == i['alt'] == title for i in imgs)
1844
        return {
1845
            'month': day.month,
1846
            'year': day.year,
1847
            'day': day.day,
1848
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1849
            'title': title,
1850
        }
1851
1852
1853
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1854
    """Class to retrieve Disco Bleach Comics."""
1855
    name = 'discobleach'
1856
    long_name = 'Disco Bleach'
1857
    url = 'http://discobleach.com'
1858
1859
1860
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1861
    """Class to retrieve TubeyToons comics."""
1862
    # Also on http://tapastic.com/series/Tubey-Toons
1863
    # Also on http://tubeytoons.tumblr.com
1864
    name = 'tubeytoons'
1865
    long_name = 'Tubey Toons'
1866
    url = 'http://tubeytoons.com'
1867
1868
1869
class CompletelySeriousComics(GenericNavigableComic):
1870
    """Class to retrieve Completely Serious comics."""
1871
    name = 'completelyserious'
1872
    long_name = 'Completely Serious Comics'
1873
    url = 'http://completelyseriouscomics.com'
1874
    get_first_comic_link = get_a_navi_navifirst
1875
    get_navi_link = get_a_navi_navinext
1876
1877
    @classmethod
1878
    def get_comic_info(cls, soup, link):
1879
        title = soup.find('h2', class_='post-title').string
1880
        author = soup.find('span', class_='post-author').contents[1].string
1881
        date_str = soup.find('span', class_='post-date').string
1882
        day = string_to_date(date_str, '%B %d, %Y')
1883
        imgs = soup.find('div', class_='comicpane').find_all('img')
1884
        assert imgs
1885
        alt = imgs[0]['title']
1886
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1887
        return {
1888
            'month': day.month,
1889
            'year': day.year,
1890
            'day': day.day,
1891
            'img': [i['src'] for i in imgs],
1892
            'title': title,
1893
            'alt': alt,
1894
            'author': author,
1895
        }
1896
1897
1898
class PoorlyDrawnLines(GenericListableComic):
1899
    """Class to retrieve Poorly Drawn Lines comics."""
1900
    # Also on http://pdlcomics.tumblr.com
1901
    name = 'poorlydrawn'
1902
    long_name = 'Poorly Drawn Lines'
1903
    url = 'http://poorlydrawnlines.com'
1904
    get_url_from_archive_element = get_href
1905
1906
    @classmethod
1907
    def get_comic_info(cls, soup, link):
1908
        imgs = soup.find('div', class_='post').find_all('img')
1909
        assert len(imgs) <= 1
1910
        return {
1911
            'img': [i['src'] for i in imgs],
1912
            'title': imgs[0].get('title', "") if imgs else "",
1913
        }
1914
1915
    @classmethod
1916
    def get_archive_elements(cls):
1917
        archive_url = urljoin_wrapper(cls.url, 'archive')
1918
        url_re = re.compile('^%s/comic/.' % cls.url)
1919
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
1920
1921
1922 View Code Duplication
class LoadingComics(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1923
    """Class to retrieve Loading Artist comics."""
1924
    name = 'loadingartist'
1925
    long_name = 'Loading Artist'
1926
    url = 'http://www.loadingartist.com/latest'
1927
1928
    @classmethod
1929
    def get_first_comic_link(cls):
1930
        return get_soup_at_url(cls.url).find('a', title="First")
1931
1932
    @classmethod
1933
    def get_navi_link(cls, last_soup, next_):
1934
        return last_soup.find('a', title='Next' if next_ else 'Previous')
1935
1936
    @classmethod
1937
    def get_comic_info(cls, soup, link):
1938
        title = soup.find('h1').string
1939
        date_str = soup.find('span', class_='date').string.strip()
1940
        day = string_to_date(date_str, "%B %d, %Y")
1941
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
1942
        return {
1943
            'title': title,
1944
            'img': [i['src'] for i in imgs],
1945
            'month': day.month,
1946
            'year': day.year,
1947
            'day': day.day,
1948
        }
1949
1950
1951 View Code Duplication
class ChuckleADuck(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1952
    """Class to retrieve Chuckle-A-Duck comics."""
1953
    name = 'chuckleaduck'
1954
    long_name = 'Chuckle-A-duck'
1955
    url = 'http://chuckleaduck.com'
1956
    get_first_comic_link = get_div_navfirst_a
1957
    get_navi_link = get_link_rel_next
1958
1959
    @classmethod
1960
    def get_comic_info(cls, soup, link):
1961
        date_str = soup.find('span', class_='post-date').string
1962
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1963
        author = soup.find('span', class_='post-author').string
1964
        div = soup.find('div', id='comic')
1965
        imgs = div.find_all('img') if div else []
1966
        title = imgs[0]['title'] if imgs else ""
1967
        assert all(i['title'] == i['alt'] == title for i in imgs)
1968
        return {
1969
            'month': day.month,
1970
            'year': day.year,
1971
            'day': day.day,
1972
            'img': [i['src'] for i in imgs],
1973
            'title': title,
1974
            'author': author,
1975
        }
1976
1977
1978 View Code Duplication
class DepressedAlien(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1979
    """Class to retrieve Depressed Alien Comics."""
1980
    name = 'depressedalien'
1981
    long_name = 'Depressed Alien'
1982
    url = 'http://depressedalien.com'
1983
    get_url_from_link = join_cls_url_to_href
1984
1985
    @classmethod
1986
    def get_first_comic_link(cls):
1987
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
1988
1989
    @classmethod
1990
    def get_navi_link(cls, last_soup, next_):
1991
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
1992
1993
    @classmethod
1994
    def get_comic_info(cls, soup, link):
1995
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1996
        imgs = soup.find_all('meta', property='og:image')
1997
        return {
1998
            'title': title,
1999
            'img': [i['content'] for i in imgs],
2000
        }
2001
2002
2003
class ThingsInSquares(GenericListableComic):
2004
    """Class to retrieve Things In Squares comics."""
2005
    # This can be retrieved in other languages
2006
    # Also on https://tapastic.com/series/Things-in-Squares
2007
    name = 'squares'
2008
    long_name = 'Things in squares'
2009
    url = 'http://www.thingsinsquares.com'
2010
2011
    @classmethod
2012
    def get_comic_info(cls, soup, tr):
2013
        _, td2, td3 = tr.find_all('td')
2014
        a = td2.find('a')
2015
        date_str = td3.string
2016
        day = string_to_date(date_str, "%m.%d.%y")
2017
        title = a.string
2018
        title2 = soup.find('meta', property='og:title')['content']
2019
        desc = soup.find('meta', property='og:description')
2020
        description = desc['content'] if desc else ''
2021
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2022
        imgs = soup.find('div', class_='entry-content').find_all('img')
2023
        return {
2024
            'day': day.day,
2025
            'month': day.month,
2026
            'year': day.year,
2027
            'title': title,
2028
            'title2': title2,
2029
            'description': description,
2030
            'tags': tags,
2031
            'img': [i['src'] for i in imgs],
2032
            'alt': ' '.join(i['alt'] for i in imgs),
2033
        }
2034
2035
    @classmethod
2036
    def get_url_from_archive_element(cls, tr):
2037
        _, td2, td3 = tr.find_all('td')
2038
        return td2.find('a')['href']
2039
2040
    @classmethod
2041
    def get_archive_elements(cls):
2042
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2043
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2044
2045
2046 View Code Duplication
class HappleTea(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2047
    """Class to retrieve Happle Tea Comics."""
2048
    name = 'happletea'
2049
    long_name = 'Happle Tea'
2050
    url = 'http://www.happletea.com'
2051
    get_first_comic_link = get_a_navi_navifirst
2052
    get_navi_link = get_link_rel_next
2053
2054
    @classmethod
2055
    def get_comic_info(cls, soup, link):
2056
        imgs = soup.find('div', id='comic').find_all('img')
2057
        post = soup.find('div', class_='post-content')
2058
        title = post.find('h2', class_='post-title').string
2059
        author = post.find('a', rel='author').string
2060
        date_str = post.find('span', class_='post-date').string
2061
        day = string_to_date(date_str, "%B %d, %Y")
2062
        assert all(i['alt'] == i['title'] for i in imgs)
2063
        return {
2064
            'title': title,
2065
            'img': [i['src'] for i in imgs],
2066
            'alt': ''.join(i['alt'] for i in imgs),
2067
            'month': day.month,
2068
            'year': day.year,
2069
            'day': day.day,
2070
            'author': author,
2071
        }
2072
2073
2074
class FatAwesomeComics(GenericNavigableComic):
2075
    """Class to retrieve Fat Awesome Comics."""
2076
    # Also on http://fatawesomecomedy.tumblr.com
2077
    name = 'fatawesome'
2078
    long_name = 'Fat Awesome'
2079
    url = 'http://fatawesome.com/comics'
2080
    get_navi_link = get_a_rel_next
2081
2082
    @classmethod
2083
    def get_first_comic_link(cls):
2084
        return {'href': 'http://fatawesome.com/shortbus/'}
2085
2086
    @classmethod
2087
    def get_comic_info(cls, soup, link):
2088
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2089
        description = soup.find('meta', attrs={'name': 'description'})['content']
2090
        tags_prop = soup.find('meta', property='article:tag')
2091
        tags = tags_prop['content'] if tags_prop else ""
2092
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2093
        day = string_to_date(date_str, "%Y-%m-%d")
2094
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2095
        assert len(imgs) == 1
2096
        return {
2097
            'title': title,
2098
            'description': description,
2099
            'tags': tags,
2100
            'alt': "".join(i['alt'] for i in imgs),
2101
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2102
            'month': day.month,
2103
            'year': day.year,
2104
            'day': day.day,
2105
        }
2106
2107
2108
class AnythingComic(GenericComic):
2109
    """Class to retrieve Anything Comics."""
2110
    # Also on http://tapastic.com/series/anything
2111
    name = 'anythingcomic'
2112
    long_name = 'Anything Comic'
2113
    url = 'http://www.anythingcomic.com'
2114
2115
    @classmethod
2116
    def get_next_comic(cls, last_comic):
2117
        last_num = last_comic['num'] if last_comic else 0
2118
        archive_url = urljoin_wrapper(cls.url, 'archive')
2119
        for i, tr in enumerate(get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')):
2120
            if i > 1:
2121
                td_num, td_comic, td_date, td_com = tr.find_all('td')
2122
                num = int(td_num.string)
2123
                assert num + 1 == i
2124
                if num > last_num:
2125
                    link = td_comic.find('a')
2126
                    comic_url = urljoin_wrapper(cls.url, link['href'])
2127
                    title = link.string
2128
                    soup = get_soup_at_url(comic_url)
2129
                    imgs = soup.find_all('img', id='comic_image')
2130
                    day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2131
                    assert len(imgs) == 1
2132
                    assert all(i.get('alt') == i.get('title') for i in imgs)
2133
                    yield {
2134
                        'url': comic_url,
2135
                        'num': num,
2136
                        'title': title,
2137
                        'alt': imgs[0].get('alt', ''),
2138
                        'img': [i['src'] for i in imgs],
2139
                        'month': day.month,
2140
                        'year': day.year,
2141
                        'day': day.day,
2142
                    }
2143
2144
2145
class LonnieMillsap(GenericNavigableComic):
2146
    """Class to retrieve Lonnie Millsap's comics."""
2147
    name = 'millsap'
2148
    long_name = 'Lonnie Millsap'
2149
    url = 'http://www.lonniemillsap.com'
2150
    get_navi_link = get_link_rel_next
2151
2152
    @classmethod
2153
    def get_first_comic_link(cls):
2154
        return {'href': 'http://www.lonniemillsap.com/?p=42'}
2155
2156
    @classmethod
2157
    def get_comic_info(cls, soup, link):
2158
        title = soup.find('h2', class_='post-title').string
2159
        post = soup.find('div', class_='post-content')
2160
        author = post.find("span", class_="post-author").find("a").string
2161
        date_str = post.find("span", class_="post-date").string
2162
        day = string_to_date(date_str, "%B %d, %Y")
2163
        imgs = post.find("div", class_="entry").find_all("img")
2164
        return {
2165
            'title': title,
2166
            'author': author,
2167
            'img': [i['src'] for i in imgs],
2168
            'month': day.month,
2169
            'year': day.year,
2170
            'day': day.day,
2171
        }
2172
2173
2174 View Code Duplication
class LinsEditions(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2175
    """Class to retrieve L.I.N.S. Editions comics."""
2176
    # Also on http://linscomics.tumblr.com
2177
    name = 'lins'
2178
    long_name = 'L.I.N.S. Editions'
2179
    url = 'https://linsedition.com'
2180
    get_navi_link = get_link_rel_next
2181
2182
    @classmethod
2183
    def get_first_comic_link(cls):
2184
        return {'href': 'https://linsedition.com/2011/09/07/l-i-n-s/'}
2185
2186
    @classmethod
2187
    def get_comic_info(cls, soup, link):
2188
        title = soup.find('meta', property='og:title')['content']
2189
        imgs = soup.find_all('meta', property='og:image')
2190
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2191
        day = string_to_date(date_str, "%Y-%m-%d")
2192
        return {
2193
            'title': title,
2194
            'img': [i['content'] for i in imgs],
2195
            'month': day.month,
2196
            'year': day.year,
2197
            'day': day.day,
2198
        }
2199
2200
2201
class ThorsThundershack(GenericNavigableComic):
2202
    """Class to retrieve Thor's Thundershack comics."""
2203
    # Also on http://tapastic.com/series/Thors-Thundershac
2204
    name = 'thor'
2205
    long_name = 'Thor\'s Thundershack'
2206
    url = 'http://www.thorsthundershack.com'
2207
    get_url_from_link = join_cls_url_to_href
2208
2209
    @classmethod
2210
    def get_first_comic_link(cls):
2211
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2212
2213
    @classmethod
2214
    def get_navi_link(cls, last_soup, next_):
2215
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2216
            if link['href'] != '/comic':
2217
                return link
2218
        return None
2219
2220
    @classmethod
2221
    def get_comic_info(cls, soup, link):
2222
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2223
        description = soup.find('div', itemprop='articleBody').text
2224
        author = soup.find('span', itemprop='author copyrightHolder').string
2225
        imgs = soup.find_all('img', itemprop='image')
2226
        assert all(i['title'] == i['alt'] for i in imgs)
2227
        alt = imgs[0]['alt'] if imgs else ""
2228
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2229
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2230
        return {
2231
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2232
            'month': day.month,
2233
            'year': day.year,
2234
            'day': day.day,
2235
            'author': author,
2236
            'title': title,
2237
            'alt': alt,
2238
            'description': description,
2239
        }
2240
2241
2242 View Code Duplication
class GerbilWithAJetpack(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2243
    """Class to retrieve GerbilWithAJetpack comics."""
2244
    name = 'gerbil'
2245
    long_name = 'Gerbil With A Jetpack'
2246
    url = 'http://gerbilwithajetpack.com'
2247
    get_first_comic_link = get_a_navi_navifirst
2248
    get_navi_link = get_a_rel_next
2249
2250
    @classmethod
2251
    def get_comic_info(cls, soup, link):
2252
        title = soup.find('h2', class_='post-title').string
2253
        author = soup.find("span", class_="post-author").find("a").string
2254
        date_str = soup.find("span", class_="post-date").string
2255
        day = string_to_date(date_str, "%B %d, %Y")
2256
        imgs = soup.find("div", id="comic").find_all("img")
2257
        alt = imgs[0]['alt']
2258
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2259
        return {
2260
            'img': [i['src'] for i in imgs],
2261
            'title': title,
2262
            'alt': alt,
2263
            'author': author,
2264
            'day': day.day,
2265
            'month': day.month,
2266
            'year': day.year
2267
        }
2268
2269
2270
class EveryDayBlues(GenericNavigableComic):
2271
    """Class to retrieve EveryDayBlues Comics."""
2272
    name = "blues"
2273
    long_name = "Every Day Blues"
2274
    url = "http://everydayblues.net"
2275
    get_first_comic_link = get_a_navi_navifirst
2276
    get_navi_link = get_link_rel_next
2277
2278
    @classmethod
2279
    def get_comic_info(cls, soup, link):
2280
        title = soup.find("h2", class_="post-title").string
2281
        author = soup.find("span", class_="post-author").find("a").string
2282
        date_str = soup.find("span", class_="post-date").string
2283
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2284
        imgs = soup.find("div", id="comic").find_all("img")
2285
        assert all(i['alt'] == i['title'] == title for i in imgs)
2286
        assert len(imgs) <= 1
2287
        return {
2288
            'img': [i['src'] for i in imgs],
2289
            'title': title,
2290
            'author': author,
2291
            'day': day.day,
2292
            'month': day.month,
2293
            'year': day.year
2294
        }
2295
2296
2297
class BiterComics(GenericNavigableComic):
2298
    """Class to retrieve Biter Comics."""
2299
    name = "biter"
2300
    long_name = "Biter Comics"
2301
    url = "http://www.bitercomics.com"
2302
    get_first_comic_link = get_a_navi_navifirst
2303
    get_navi_link = get_link_rel_next
2304
2305
    @classmethod
2306
    def get_comic_info(cls, soup, link):
2307
        title = soup.find("h1", class_="entry-title").string
2308
        author = soup.find("span", class_="author vcard").find("a").string
2309
        date_str = soup.find("span", class_="entry-date").string
2310
        day = string_to_date(date_str, "%B %d, %Y")
2311
        imgs = soup.find("div", id="comic").find_all("img")
2312
        assert all(i['alt'] == i['title'] for i in imgs)
2313
        assert len(imgs) == 1
2314
        alt = imgs[0]['alt']
2315
        return {
2316
            'img': [i['src'] for i in imgs],
2317
            'title': title,
2318
            'alt': alt,
2319
            'author': author,
2320
            'day': day.day,
2321
            'month': day.month,
2322
            'year': day.year
2323
        }
2324
2325
2326
class TheAwkwardYeti(GenericNavigableComic):
2327
    """Class to retrieve The Awkward Yeti comics."""
2328
    # Also on http://www.gocomics.com/the-awkward-yeti
2329
    # Also on http://larstheyeti.tumblr.com
2330
    # Also on https://tapastic.com/series/TheAwkwardYeti
2331
    name = 'yeti'
2332
    long_name = 'The Awkward Yeti'
2333
    url = 'http://theawkwardyeti.com'
2334
    get_first_comic_link = get_a_navi_navifirst
2335
    get_navi_link = get_link_rel_next
2336
2337
    @classmethod
2338
    def get_comic_info(cls, soup, link):
2339
        title = soup.find('h2', class_='post-title').string
2340
        date_str = soup.find("span", class_="post-date").string
2341
        day = string_to_date(date_str, "%B %d, %Y")
2342
        imgs = soup.find("div", id="comic").find_all("img")
2343
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2344
        return {
2345
            'img': [i['src'] for i in imgs],
2346
            'title': title,
2347
            'day': day.day,
2348
            'month': day.month,
2349
            'year': day.year
2350
        }
2351
2352
2353
class PleasantThoughts(GenericNavigableComic):
2354
    """Class to retrieve Pleasant Thoughts comics."""
2355
    name = 'pleasant'
2356
    long_name = 'Pleasant Thoughts'
2357
    url = 'http://pleasant-thoughts.com'
2358
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2359
    get_navi_link = get_link_rel_next
2360
2361
    @classmethod
2362
    def get_comic_info(cls, soup, link):
2363
        post = soup.find('div', class_='post-content')
2364
        title = post.find('h2', class_='post-title').string
2365
        imgs = post.find("div", class_="entry").find_all("img")
2366
        return {
2367
            'title': title,
2368
            'img': [i['src'] for i in imgs],
2369
        }
2370
2371
2372
class MisterAndMe(GenericNavigableComic):
2373
    """Class to retrieve Mister & Me Comics."""
2374
    # Also on http://www.gocomics.com/mister-and-me
2375
    # Also on https://tapastic.com/series/Mister-and-Me
2376
    name = 'mister'
2377
    long_name = 'Mister & Me'
2378
    url = 'http://www.mister-and-me.com'
2379
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2380
    get_navi_link = get_link_rel_next
2381
2382
    @classmethod
2383
    def get_comic_info(cls, soup, link):
2384
        title = soup.find('h2', class_='post-title').string
2385
        author = soup.find("span", class_="post-author").find("a").string
2386
        date_str = soup.find("span", class_="post-date").string
2387
        day = string_to_date(date_str, "%B %d, %Y")
2388
        imgs = soup.find("div", id="comic").find_all("img")
2389
        assert all(i['alt'] == i['title'] for i in imgs)
2390
        assert len(imgs) <= 1
2391
        alt = imgs[0]['alt'] if imgs else ""
2392
        return {
2393
            'img': [i['src'] for i in imgs],
2394
            'title': title,
2395
            'alt': alt,
2396
            'author': author,
2397
            'day': day.day,
2398
            'month': day.month,
2399
            'year': day.year
2400
        }
2401
2402
2403
class LastPlaceComics(GenericNavigableComic):
2404
    """Class to retrieve Last Place Comics."""
2405
    name = 'lastplace'
2406
    long_name = 'LastPlaceComics'
2407
    url = "http://lastplacecomics.com"
2408
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2409
    get_navi_link = get_link_rel_next
2410
2411
    @classmethod
2412
    def get_comic_info(cls, soup, link):
2413
        title = soup.find('h2', class_='post-title').string
2414
        author = soup.find("span", class_="post-author").find("a").string
2415
        date_str = soup.find("span", class_="post-date").string
2416
        day = string_to_date(date_str, "%B %d, %Y")
2417
        imgs = soup.find("div", id="comic").find_all("img")
2418
        assert all(i['alt'] == i['title'] for i in imgs)
2419
        assert len(imgs) <= 1
2420
        alt = imgs[0]['alt'] if imgs else ""
2421
        return {
2422
            'img': [i['src'] for i in imgs],
2423
            'title': title,
2424
            'alt': alt,
2425
            'author': author,
2426
            'day': day.day,
2427
            'month': day.month,
2428
            'year': day.year
2429
        }
2430
2431
2432 View Code Duplication
class TalesOfAbsurdity(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2433
    """Class to retrieve Tales Of Absurdity comics."""
2434
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2435
    # Also on http://talesofabsurdity.tumblr.com
2436
    name = 'absurdity'
2437
    long_name = 'Tales of Absurdity'
2438
    url = 'http://talesofabsurdity.com'
2439
    get_first_comic_link = get_a_navi_navifirst
2440
    get_navi_link = get_a_navi_comicnavnext_navinext
2441
2442
    @classmethod
2443
    def get_comic_info(cls, soup, link):
2444
        title = soup.find('h2', class_='post-title').string
2445
        author = soup.find("span", class_="post-author").find("a").string
2446
        date_str = soup.find("span", class_="post-date").string
2447
        day = string_to_date(date_str, "%B %d, %Y")
2448
        imgs = soup.find("div", id="comic").find_all("img")
2449
        assert all(i['alt'] == i['title'] for i in imgs)
2450
        alt = imgs[0]['alt'] if imgs else ""
2451
        return {
2452
            'img': [i['src'] for i in imgs],
2453
            'title': title,
2454
            'alt': alt,
2455
            'author': author,
2456
            'day': day.day,
2457
            'month': day.month,
2458
            'year': day.year
2459
        }
2460
2461
2462 View Code Duplication
class EndlessOrigami(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2463
    """Class to retrieve Endless Origami Comics."""
2464
    name = "origami"
2465
    long_name = "Endless Origami"
2466
    url = "http://endlessorigami.com"
2467
    get_first_comic_link = get_a_navi_navifirst
2468
    get_navi_link = get_link_rel_next
2469
2470
    @classmethod
2471
    def get_comic_info(cls, soup, link):
2472
        title = soup.find('h2', class_='post-title').string
2473
        author = soup.find("span", class_="post-author").find("a").string
2474
        date_str = soup.find("span", class_="post-date").string
2475
        day = string_to_date(date_str, "%B %d, %Y")
2476
        imgs = soup.find("div", id="comic").find_all("img")
2477
        assert all(i['alt'] == i['title'] for i in imgs)
2478
        alt = imgs[0]['alt'] if imgs else ""
2479
        return {
2480
            'img': [i['src'] for i in imgs],
2481
            'title': title,
2482
            'alt': alt,
2483
            'author': author,
2484
            'day': day.day,
2485
            'month': day.month,
2486
            'year': day.year
2487
        }
2488
2489
2490 View Code Duplication
class PlanC(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2491
    """Class to retrieve Plan C comics."""
2492
    name = 'planc'
2493
    long_name = 'Plan C'
2494
    url = 'http://www.plancomic.com'
2495
    get_first_comic_link = get_a_navi_navifirst
2496
    get_navi_link = get_a_navi_comicnavnext_navinext
2497
2498
    @classmethod
2499
    def get_comic_info(cls, soup, link):
2500
        title = soup.find('h2', class_='post-title').string
2501
        date_str = soup.find("span", class_="post-date").string
2502
        day = string_to_date(date_str, "%B %d, %Y")
2503
        imgs = soup.find('div', id='comic').find_all('img')
2504
        return {
2505
            'title': title,
2506
            'img': [i['src'] for i in imgs],
2507
            'month': day.month,
2508
            'year': day.year,
2509
            'day': day.day,
2510
        }
2511
2512
2513 View Code Duplication
class BuniComic(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2514
    """Class to retrieve Buni Comics."""
2515
    name = 'buni'
2516
    long_name = 'BuniComics'
2517
    url = 'http://www.bunicomic.com'
2518
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2519
    get_navi_link = get_link_rel_next
2520
2521
    @classmethod
2522
    def get_comic_info(cls, soup, link):
2523
        imgs = soup.find('div', id='comic').find_all('img')
2524
        assert all(i['alt'] == i['title'] for i in imgs)
2525
        assert len(imgs) == 1
2526
        return {
2527
            'img': [i['src'] for i in imgs],
2528
            'title': imgs[0]['title'],
2529
        }
2530
2531
2532
class GenericCommitStrip(GenericNavigableComic):
2533
    """Generic class to retrieve Commit Strips in different languages."""
2534
    get_navi_link = get_a_rel_next
2535
2536
    @classmethod
2537
    def get_comic_info(cls, soup, link):
2538
        desc = soup.find('meta', property='og:description')['content']
2539
        title = soup.find('meta', property='og:title')['content']
2540
        imgs = soup.find('div', class_='entry-content').find_all('img')
2541
        title2 = ' '.join(i.get('title', '') for i in imgs)
2542
        return {
2543
            'title': title,
2544
            'title2': title2,
2545
            'description': desc,
2546
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2547
        }
2548
2549
2550
class CommitStripFr(GenericCommitStrip):
2551
    """Class to retrieve Commit Strips in French."""
2552
    name = 'commit_fr'
2553
    long_name = 'Commit Strip (Fr)'
2554
    url = 'http://www.commitstrip.com/fr'
2555
2556
    @classmethod
2557
    def get_first_comic_link(cls):
2558
        return {'href': 'http://www.commitstrip.com/fr/2012/02/22/interview/'}
2559
2560
2561
class CommitStripEn(GenericCommitStrip):
2562
    """Class to retrieve Commit Strips in English."""
2563
    name = 'commit_en'
2564
    long_name = 'Commit Strip (En)'
2565
    url = 'http://www.commitstrip.com/en'
2566
2567
    @classmethod
2568
    def get_first_comic_link(cls):
2569
        return {'href': 'http://www.commitstrip.com/en/2012/02/22/interview/'}
2570
2571
2572 View Code Duplication
class GenericBoumerie(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2573
    """Generic class to retrieve Boumeries comics in different languages."""
2574
    get_first_comic_link = get_a_navi_navifirst
2575
    get_navi_link = get_link_rel_next
2576
    date_format = NotImplemented
2577
    lang = NotImplemented
2578
2579
    @classmethod
2580
    def get_comic_info(cls, soup, link):
2581
        title = soup.find('h2', class_='post-title').string
2582
        short_url = soup.find('link', rel='shortlink')['href']
2583
        author = soup.find("span", class_="post-author").find("a").string
2584
        date_str = soup.find('span', class_='post-date').string
2585
        day = string_to_date(date_str, cls.date_format, cls.lang)
2586
        imgs = soup.find('div', id='comic').find_all('img')
2587
        assert all(i['alt'] == i['title'] for i in imgs)
2588
        return {
2589
            'short_url': short_url,
2590
            'img': [i['src'] for i in imgs],
2591
            'title': title,
2592
            'author': author,
2593
            'month': day.month,
2594
            'year': day.year,
2595
            'day': day.day,
2596
        }
2597
2598
2599
class BoumerieEn(GenericBoumerie):
2600
    """Class to retrieve Boumeries comics in English."""
2601
    name = 'boumeries_en'
2602
    long_name = 'Boumeries (En)'
2603
    url = 'http://comics.boumerie.com'
2604
    date_format = "%B %d, %Y"
2605
    lang = 'en_GB.UTF-8'
2606
2607
2608
class BoumerieFr(GenericBoumerie):
2609
    """Class to retrieve Boumeries comics in French."""
2610
    name = 'boumeries_fr'
2611
    long_name = 'Boumeries (Fr)'
2612
    url = 'http://bd.boumerie.com'
2613
    date_format = "%A, %d %B %Y"
2614
    lang = "fr_FR.utf8"
2615
2616
2617 View Code Duplication
class UnearthedComics(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2618
    """Class to retrieve Unearthed comics."""
2619
    # Also on http://tapastic.com/series/UnearthedComics
2620
    # Also on http://unearthedcomics.tumblr.com
2621
    name = 'unearthed'
2622
    long_name = 'Unearthed Comics'
2623
    url = 'http://unearthedcomics.com'
2624
    get_navi_link = get_link_rel_next
2625
2626
    @classmethod
2627
    def get_first_comic_link(cls):
2628
        return {'href': 'http://unearthedcomics.com/comics/world-with-turn-signals/'}
2629
2630
    @classmethod
2631
    def get_comic_info(cls, soup, link):
2632
        short_url = soup.find('link', rel='shortlink')['href']
2633
        title_elt = soup.find('h1') or soup.find('h2')
2634
        title = title_elt.string if title_elt else ""
2635
        desc = soup.find('meta', property='og:description')
2636
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2637
        day = string_to_date(date_str, "%Y-%m-%d")
2638
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2639
        imgs = post.find_all('img')
2640
        return {
2641
            'title': title,
2642
            'description': desc,
2643
            'url2': short_url,
2644
            'img': [i['src'] for i in imgs],
2645
            'month': day.month,
2646
            'year': day.year,
2647
            'day': day.day,
2648
        }
2649
2650
2651
class Optipess(GenericNavigableComic):
2652
    """Class to retrieve Optipess comics."""
2653
    name = 'optipess'
2654
    long_name = 'Optipess'
2655
    url = 'http://www.optipess.com'
2656
    get_first_comic_link = get_a_navi_navifirst
2657
    get_navi_link = get_link_rel_next
2658
2659
    @classmethod
2660
    def get_comic_info(cls, soup, link):
2661
        title = soup.find('h2', class_='post-title').string
2662
        author = soup.find("span", class_="post-author").find("a").string
2663
        comic = soup.find('div', id='comic')
2664
        imgs = comic.find_all('img') if comic else []
2665
        alt = imgs[0]['title'] if imgs else ""
2666
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2667
        date_str = soup.find('span', class_='post-date').string
2668
        day = string_to_date(date_str, "%B %d, %Y")
2669
        return {
2670
            'title': title,
2671
            'alt': alt,
2672
            'author': author,
2673
            'img': [i['src'] for i in imgs],
2674
            'month': day.month,
2675
            'year': day.year,
2676
            'day': day.day,
2677
        }
2678
2679
2680
class PainTrainComic(GenericNavigableComic):
2681
    """Class to retrieve Pain Train Comics."""
2682
    name = 'paintrain'
2683
    long_name = 'Pain Train Comics'
2684
    url = 'http://paintraincomic.com'
2685
    get_first_comic_link = get_a_navi_navifirst
2686
    get_navi_link = get_link_rel_next
2687
2688
    @classmethod
2689
    def get_comic_info(cls, soup, link):
2690
        title = soup.find('h2', class_='post-title').string
2691
        short_url = soup.find('link', rel='shortlink')['href']
2692
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2693
        num = int(short_url_re.match(short_url).groups()[0])
2694
        imgs = soup.find('div', id='comic').find_all('img')
2695
        alt = imgs[0]['title']
2696
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2697
        date_str = soup.find('span', class_='post-date').string
2698
        day = string_to_date(date_str, "%d/%m/%Y")
2699
        return {
2700
            'short_url': short_url,
2701
            'num': num,
2702
            'img': [i['src'] for i in imgs],
2703
            'month': day.month,
2704
            'year': day.year,
2705
            'day': day.day,
2706
            'alt': alt,
2707
            'title': title,
2708
        }
2709
2710
2711
class MoonBeard(GenericNavigableComic):
2712
    """Class to retrieve MoonBeard comics."""
2713
    # Also on http://blog.squiresjam.es/moonbeard
2714
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2715
    name = 'moonbeard'
2716
    long_name = 'Moon Beard'
2717
    url = 'http://moonbeard.com'
2718
    get_first_comic_link = get_a_navi_navifirst
2719
    get_navi_link = get_a_navi_navinext
2720
2721
    @classmethod
2722
    def get_comic_info(cls, soup, link):
2723
        title = soup.find('h2', class_='post-title').string
2724
        short_url = soup.find('link', rel='shortlink')['href']
2725
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2726
        num = int(short_url_re.match(short_url).groups()[0])
2727
        imgs = soup.find('div', id='comic').find_all('img')
2728
        alt = imgs[0]['title']
2729
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2730
        date_str = soup.find('span', class_='post-date').string
2731
        day = string_to_date(date_str, "%B %d, %Y")
2732
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2733
        author = soup.find('span', class_='post-author').string
2734
        return {
2735
            'short_url': short_url,
2736
            'num': num,
2737
            'img': [i['src'] for i in imgs],
2738
            'month': day.month,
2739
            'year': day.year,
2740
            'day': day.day,
2741
            'title': title,
2742
            'tags': tags,
2743
            'alt': alt,
2744
            'author': author,
2745
        }
2746
2747
2748
class AHamADay(GenericNavigableComic):
2749
    """Class to retrieve class A Ham A Day comics."""
2750
    name = 'ham'
2751
    long_name = 'A Ham A Day'
2752
    url = 'http://www.ahammaday.com'
2753
    get_url_from_link = join_cls_url_to_href
2754
2755
    @classmethod
2756
    def get_first_comic_link(cls):
2757
        return {'href': 'http://www.ahammaday.com/today/3/6/french'}
2758
2759
    @classmethod
2760
    def get_navi_link(cls, last_soup, next_):
2761
        # prev is next / next is prev
2762
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2763
2764
    @classmethod
2765
    def get_comic_info(cls, soup, link):
2766
        date_str = soup.find('time', class_='published')['datetime']
2767
        day = string_to_date(date_str, "%Y-%m-%d")
2768
        author = soup.find('span', class_='blog-author').find('a').string
2769
        title = soup.find('meta', property='og:title')['content']
2770
        imgs = soup.find_all('meta', itemprop='image')
2771
        return {
2772
            'img': [i['content'] for i in imgs],
2773
            'title': title,
2774
            'author': author,
2775
            'day': day.day,
2776
            'month': day.month,
2777
            'year': day.year,
2778
        }
2779
2780
2781 View Code Duplication
class LittleLifeLines(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2782
    """Class to retrieve Little Life Lines comics."""
2783
    name = 'life'
2784
    long_name = 'Little Life Lines'
2785
    url = 'http://www.littlelifelines.com'
2786
    get_url_from_link = join_cls_url_to_href
2787
2788
    @classmethod
2789
    def get_first_comic_link(cls):
2790
        return {'href': 'http://www.littlelifelines.com/comics/well-done'}
2791
2792
    @classmethod
2793
    def get_navi_link(cls, last_soup, next_):
2794
        # prev is next / next is prev
2795
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2796
        return li.find('a') if li else None
2797
2798
    @classmethod
2799
    def get_comic_info(cls, soup, link):
2800
        title = soup.find('meta', property='og:title')['content']
2801
        desc = soup.find('meta', property='og:description')['content']
2802
        date_str = soup.find('time', class_='published')['datetime']
2803
        day = string_to_date(date_str, "%Y-%m-%d")
2804
        author = soup.find('a', rel='author').string
2805
        div_content = soup.find('div', class_="body entry-content")
2806
        imgs = div_content.find_all('img')
2807
        imgs = [i for i in imgs if i.get('src') is not None]
2808
        alt = imgs[0]['alt']
2809
        return {
2810
            'title': title,
2811
            'alt': alt,
2812
            'description': desc,
2813
            'author': author,
2814
            'day': day.day,
2815
            'month': day.month,
2816
            'year': day.year,
2817
            'img': [i['src'] for i in imgs],
2818
        }
2819
2820
2821 View Code Duplication
class GenericWordPressInkblot(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2822
    """Generic class to retrieve comics using WordPress with Inkblot."""
2823
    get_navi_link = get_link_rel_next
2824
2825
    @classmethod
2826
    def get_first_comic_link(cls):
2827
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2828
2829
    @classmethod
2830
    def get_comic_info(cls, soup, link):
2831
        title = soup.find('meta', property='og:title')['content']
2832
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2833
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2834
        day = string_to_date(date_str, "%Y-%m-%d")
2835
        return {
2836
            'title': title,
2837
            'day': day.day,
2838
            'month': day.month,
2839
            'year': day.year,
2840
            'img': [i['src'] for i in imgs],
2841
        }
2842
2843
2844
class EverythingsStupid(GenericWordPressInkblot):
2845
    """Class to retrieve Everything's stupid Comics."""
2846
    # Also on http://tapastic.com/series/EverythingsStupid
2847
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2848
    # Also on http://everythingsstupidcomics.tumblr.com
2849
    name = 'stupid'
2850
    long_name = "Everything's Stupid"
2851
    url = 'http://everythingsstupid.net'
2852
2853
2854
class TheIsmComics(GenericWordPressInkblot):
2855
    """Class to retrieve The Ism Comics."""
2856
    # Also on https://tapastic.com/series/TheIsm (?)
2857
    name = 'theism'
2858
    long_name = "The Ism"
2859
    url = 'http://www.theism-comics.com'
2860
2861
2862
class WoodenPlankStudios(GenericWordPressInkblot):
2863
    """Class to retrieve Wooden Plank Studios comics."""
2864
    name = 'woodenplank'
2865
    long_name = 'Wooden Plank Studios'
2866
    url = 'http://woodenplankstudios.com'
2867
2868
2869
class ElectricBunnyComic(GenericNavigableComic):
2870
    """Class to retrieve Electric Bunny Comics."""
2871
    # Also on http://electricbunnycomics.tumblr.com
2872
    name = 'bunny'
2873
    long_name = 'Electric Bunny Comic'
2874
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
2875
    get_url_from_link = join_cls_url_to_href
2876
2877
    @classmethod
2878
    def get_first_comic_link(cls):
2879
        return get_soup_at_url(cls.url).find('img', alt='First').parent
2880
2881
    @classmethod
2882
    def get_navi_link(cls, last_soup, next_):
2883
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
2884
        return img.parent if img else None
2885
2886
    @classmethod
2887
    def get_comic_info(cls, soup, link):
2888
        title = soup.find('meta', property='og:title')['content']
2889
        imgs = soup.find_all('meta', property='og:image')
2890
        return {
2891
            'title': title,
2892
            'img': [i['content'] for i in imgs],
2893
        }
2894
2895
2896
class SheldonComics(GenericNavigableComic):
2897
    """Class to retrieve Sheldon comics."""
2898
    # Also on http://www.gocomics.com/sheldon
2899
    name = 'sheldon'
2900
    long_name = 'Sheldon Comics'
2901
    url = 'http://www.sheldoncomics.com'
2902
2903
    @classmethod
2904
    def get_first_comic_link(cls):
2905
        return get_soup_at_url(cls.url).find("a", id="nav-first")
2906
2907
    @classmethod
2908
    def get_navi_link(cls, last_soup, next_):
2909
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
2910
            if link['href'] != 'http://www.sheldoncomics.com':
2911
                return link
2912
        return None
2913
2914
    @classmethod
2915
    def get_comic_info(cls, soup, link):
2916
        imgs = soup.find("div", id="comic-foot").find_all("img")
2917
        assert all(i['alt'] == i['title'] for i in imgs)
2918
        assert len(imgs) == 1
2919
        title = imgs[0]['title']
2920
        return {
2921
            'title': title,
2922
            'img': [i['src'] for i in imgs],
2923
        }
2924
2925
2926
class CubeDrone(GenericNavigableComic):
2927
    """Class to retrieve Cube Drone comics."""
2928
    name = 'cubedrone'
2929
    long_name = 'Cube Drone'
2930
    url = 'http://cube-drone.com/comics'
2931
    get_url_from_link = join_cls_url_to_href
2932
2933
    @classmethod
2934
    def get_first_comic_link(cls):
2935
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
2936
2937
    @classmethod
2938
    def get_navi_link(cls, last_soup, next_):
2939
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
2940
        return last_soup.find('span', class_=class_).parent
2941
2942
    @classmethod
2943
    def get_comic_info(cls, soup, link):
2944
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2945
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
2946
        # date_str = soup.find('h2', class_='comic_title').find('small').string
2947
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
2948
        imgs = soup.find_all('img', class_='comic img-responsive')
2949
        title2 = imgs[0]['title']
2950
        alt = imgs[0]['alt']
2951
        return {
2952
            'url2': url2,
2953
            'title': title,
2954
            'title2': title2,
2955
            'alt': alt,
2956
            'img': [i['src'] for i in imgs],
2957
        }
2958
2959
2960
class MakeItStoopid(GenericNavigableComic):
2961
    """Class to retrieve Make It Stoopid Comics."""
2962
    name = 'stoopid'
2963
    long_name = 'Make it stoopid'
2964
    url = 'http://makeitstoopid.com/comic.php'
2965
2966
    @classmethod
2967
    def get_nav(cls, soup):
2968
        cnav = soup.find_all(class_='cnav')
2969
        nav1, nav2 = cnav[:5], cnav[5:]
2970
        assert nav1 == nav2
2971
        # begin, prev, archive, next_, end = nav1
2972
        return [None if i.get('href') is None else i for i in nav1]
2973
2974
    @classmethod
2975
    def get_first_comic_link(cls):
2976
        return cls.get_nav(get_soup_at_url(cls.url))[0]
2977
2978
    @classmethod
2979
    def get_navi_link(cls, last_soup, next_):
2980
        return cls.get_nav(last_soup)[3 if next_ else 1]
2981
2982
    @classmethod
2983
    def get_comic_info(cls, soup, link):
2984
        title = link['title']
2985
        imgs = soup.find_all('img', id='comicimg')
2986
        return {
2987
            'title': title,
2988
            'img': [i['src'] for i in imgs],
2989
        }
2990
2991
2992
class GeekAndPoke(GenericNavigableComic):
2993
    """Class to retrieve Geek And Poke comics."""
2994
    name = 'geek'
2995
    long_name = 'Geek And Poke'
2996
    url = 'http://geek-and-poke.com'
2997
    get_url_from_link = join_cls_url_to_href
2998
2999
    @classmethod
3000
    def get_first_comic_link(cls):
3001
        return {'href': 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'}
3002
3003
    @classmethod
3004
    def get_navi_link(cls, last_soup, next_):
3005
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3006
3007
    @classmethod
3008
    def get_comic_info(cls, soup, link):
3009
        title = soup.find('meta', property='og:title')['content']
3010
        desc = soup.find('meta', property='og:description')['content']
3011
        date_str = soup.find('time', class_='published')['datetime']
3012
        day = string_to_date(date_str, "%Y-%m-%d")
3013
        author = soup.find('a', rel='author').string
3014
        div_content = (soup.find('div', class_="body entry-content") or
3015
                       soup.find('div', class_="special-content"))
3016
        imgs = div_content.find_all('img')
3017
        imgs = [i for i in imgs if i.get('src') is not None]
3018
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3019
        alt = imgs[0].get('alt', "") if imgs else []
3020
        return {
3021
            'title': title,
3022
            'alt': alt,
3023
            'description': desc,
3024
            'author': author,
3025
            'day': day.day,
3026
            'month': day.month,
3027
            'year': day.year,
3028
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3029
        }
3030
3031
3032
class GenericTumblrV1(GenericComic):
3033
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3034
3035
    @classmethod
3036
    def get_next_comic(cls, last_comic):
3037
        for p in cls.get_posts(last_comic):
3038
            comic = cls.get_comic_info(p)
3039
            if comic is not None:
3040
                yield comic
3041
3042
    @classmethod
3043
    def get_url_from_post(cls, post):
3044
        return post['url']
3045
3046
    @classmethod
3047
    def get_api_url(cls):
3048
        return urljoin_wrapper(cls.url, '/api/read/')
3049
3050
    @classmethod
3051
    def get_comic_info(cls, post):
3052
        """Get information about a particular comics."""
3053
        # print(post)
3054
        type_ = post['type']
3055
        if type_ != 'photo':
3056
            # print("Type is %s" % type_)
3057
            return None
3058
        tumblr_id = int(post['id'])
3059
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3060
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3061
        caption = post.find('photo-caption')
3062
        title = caption.string if caption else ""
3063
        tags = ' '.join(t.string for t in post.find_all('tag'))
3064
        # Photos may appear in 'photo' tags and/or straight in the post
3065
        photo_tags = post.find_all('photo')
3066
        if not photo_tags:
3067
            photo_tags = [post]
3068
        # Images are in multiple resolutions - taking the first one
3069
        imgs = [photo.find('photo-url') for photo in photo_tags]
3070
        return {
3071
            'url': cls.get_url_from_post(post),
3072
            'url2': post['url-with-slug'],
3073
            'day': day.day,
3074
            'month': day.month,
3075
            'year': day.year,
3076
            'title': title,
3077
            'tags': tags,
3078
            'img': [i.string for i in imgs],
3079
            'tumblr-id': tumblr_id,
3080
            'api_url': api_url,  # for debug purposes
3081
        }
3082
3083
    @classmethod
3084
    def get_posts(cls, last_comic, nb_post_per_call=10):
3085
        """Get posts using API. nb_post_per_call is max 50.
3086
3087
        Posts are retrieved from newer to older as per the tumblr v1 api
3088
        but are returned in chronological order."""
3089
        waiting_for_url = last_comic['url'] if last_comic else None
3090
        posts_acc = []
3091
        if last_comic is not None:
3092
            # Sometimes, tumblr posts are deleted. We previous post is deleted, we
3093
            # might end up spending a lot of time looking for something that
3094
            # doesn't exist. Failing early and clearly might be a better option.
3095
            last_api_url = last_comic['api_url']
3096
            try:
3097
                get_soup_at_url(last_api_url)
3098
            except urllib.error.HTTPError:
3099
                try:
3100
                    get_soup_at_url(cls.url)
3101
                except urllib.error.HTTPError:
3102
                    print("Did not find previous post nor main url %s" % cls.url)
3103
                else:
3104
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3105
                return reversed(posts_acc)
3106
        api_url = cls.get_api_url()
3107
        posts = get_soup_at_url(api_url).find('posts')
3108
        start, total = int(posts['start']), int(posts['total'])
3109
        assert start == 0
3110
        for starting_num in range(0, total, nb_post_per_call):
3111
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3112
            # print(api_url2)
3113
            posts2 = get_soup_at_url(api_url2).find('posts')
3114
            start2, total2 = int(posts2['start']), int(posts2['total'])
3115
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3116
            # This may happen and should be handled in the future
3117
            assert total == total2, "%d != %d" % (total, total2)
3118
            for p in posts2.find_all('post'):
3119
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3120
                    return reversed(posts_acc)
3121
                posts_acc.append(p)
3122
        if waiting_for_url is None:
3123
            return reversed(posts_acc)
3124
        print("Did not find %s : there might be a problem" % waiting_for_url)
3125
        return []
3126
3127
3128
class IrwinCardozo(GenericTumblrV1):
3129
    """Class to retrieve Irwin Cardozo Comics."""
3130
    name = 'irwinc'
3131
    long_name = 'Irwin Cardozo'
3132
    url = 'http://irwincardozocomics.tumblr.com'
3133
3134
3135
class AccordingToDevin(GenericTumblrV1):
3136
    """Class to retrieve According To Devin comics."""
3137
    name = 'devin'
3138
    long_name = 'According To Devin'
3139
    url = 'http://accordingtodevin.tumblr.com'
3140
3141
3142
class ItsTheTieTumblr(GenericTumblrV1):
3143
    """Class to retrieve It's the tie comics."""
3144
    # Also on http://itsthetie.com
3145
    # Also on https://tapastic.com/series/itsthetie
3146
    name = 'tie-tumblr'
3147
    long_name = "It's the tie (from Tumblr)"
3148
    url = "http://itsthetie.tumblr.com"
3149
3150
3151
class OctopunsTumblr(GenericTumblrV1):
3152
    """Class to retrieve Octopuns comics."""
3153
    # Also on http://www.octopuns.net
3154
    name = 'octopuns-tumblr'
3155
    long_name = 'Octopuns (from Tumblr)'
3156
    url = 'http://octopuns.tumblr.com'
3157
3158
3159
class PicturesInBoxesTumblr(GenericTumblrV1):
3160
    """Class to retrieve Pictures In Boxes comics."""
3161
    # Also on http://www.picturesinboxes.com
3162
    name = 'picturesinboxes-tumblr'
3163
    long_name = 'Pictures in Boxes (from Tumblr)'
3164
    url = 'http://picturesinboxescomic.tumblr.com'
3165
3166
3167
class TubeyToonsTumblr(GenericTumblrV1):
3168
    """Class to retrieve TubeyToons comics."""
3169
    # Also on http://tapastic.com/series/Tubey-Toons
3170
    # Also on http://tubeytoons.com
3171
    name = 'tubeytoons-tumblr'
3172
    long_name = 'Tubey Toons (from Tumblr)'
3173
    url = 'http://tubeytoons.tumblr.com'
3174
3175
3176
class UnearthedComicsTumblr(GenericTumblrV1):
3177
    """Class to retrieve Unearthed comics."""
3178
    # Also on http://tapastic.com/series/UnearthedComics
3179
    # Also on http://unearthedcomics.com
3180
    name = 'unearthed-tumblr'
3181
    long_name = 'Unearthed Comics (from Tumblr)'
3182
    url = 'http://unearthedcomics.tumblr.com'
3183
3184
3185
class PieComic(GenericTumblrV1):
3186
    """Class to retrieve Pie Comic comics."""
3187
    name = 'pie'
3188
    long_name = 'Pie Comic'
3189
    url = "http://piecomic.tumblr.com"
3190
3191
3192
class MrEthanDiamond(GenericTumblrV1):
3193
    """Class to retrieve Mr Ethan Diamond comics."""
3194
    name = 'diamond'
3195
    long_name = 'Mr Ethan Diamond'
3196
    url = 'http://mrethandiamond.tumblr.com'
3197
3198
3199
class Flocci(GenericTumblrV1):
3200
    """Class to retrieve floccinaucinihilipilification comics."""
3201
    name = 'flocci'
3202
    long_name = 'floccinaucinihilipilification'
3203
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3204
3205
3206
class UpAndOut(GenericTumblrV1):
3207
    """Class to retrieve Up & Out comics."""
3208
    # Also on http://tapastic.com/series/UP-and-OUT
3209
    name = 'upandout'
3210
    long_name = 'Up And Out (from Tumblr)'
3211
    url = 'http://upandoutcomic.tumblr.com'
3212
3213
3214
class Pundemonium(GenericTumblrV1):
3215
    """Class to retrieve Pundemonium comics."""
3216
    name = 'pundemonium'
3217
    long_name = 'Pundemonium'
3218
    url = 'http://monstika.tumblr.com'
3219
3220
3221
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3222
    """Class to retrieve Poorly Drawn Lines comics."""
3223
    # Also on http://poorlydrawnlines.com
3224
    name = 'poorlydrawn-tumblr'
3225
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3226
    url = 'http://pdlcomics.tumblr.com'
3227
3228
3229
class PearShapedComics(GenericTumblrV1):
3230
    """Class to retrieve Pear Shaped Comics."""
3231
    name = 'pearshaped'
3232
    long_name = 'Pear-Shaped Comics'
3233
    url = 'http://pearshapedcomics.com'
3234
3235
3236
class PondScumComics(GenericTumblrV1):
3237
    """Class to retrieve Pond Scum Comics."""
3238
    name = 'pond'
3239
    long_name = 'Pond Scum'
3240
    url = 'http://pondscumcomic.tumblr.com'
3241
3242
3243
class MercworksTumblr(GenericTumblrV1):
3244
    """Class to retrieve Mercworks comics."""
3245
    # Also on http://mercworks.net
3246
    name = 'mercworks-tumblr'
3247
    long_name = 'Mercworks (from Tumblr)'
3248
    url = 'http://mercworks.tumblr.com'
3249
3250
3251
class OwlTurdTumblr(GenericTumblrV1):
3252
    """Class to retrieve Owl Turd comics."""
3253
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3254
    name = 'owlturd-tumblr'
3255
    long_name = 'Owl Turd (from Tumblr)'
3256
    url = 'http://owlturd.com'
3257
3258
3259
class VectorBelly(GenericTumblrV1):
3260
    """Class to retrieve Vector Belly comics."""
3261
    # Also on http://vectorbelly.com
3262
    name = 'vector'
3263
    long_name = 'Vector Belly'
3264
    url = 'http://vectorbelly.tumblr.com'
3265
3266
3267
class GoneIntoRapture(GenericTumblrV1):
3268
    """Class to retrieve Gone Into Rapture comics."""
3269
    # Also on http://goneintorapture.tumblr.com
3270
    # Also on http://tapastic.com/series/Goneintorapture
3271
    name = 'rapture'
3272
    long_name = 'Gone Into Rapture'
3273
    url = 'http://www.goneintorapture.com'
3274
3275
3276
class TheOatmealTumblr(GenericTumblrV1):
3277
    """Class to retrieve The Oatmeal comics."""
3278
    # Also on http://theoatmeal.com
3279
    name = 'oatmeal-tumblr'
3280
    long_name = 'The Oatmeal (from Tumblr)'
3281
    url = 'http://oatmeal.tumblr.com'
3282
3283
3284
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3285
    """Class to retrieve Heck If I Know Comics."""
3286
    # Also on http://tapastic.com/series/Regular
3287
    name = 'heck-tumblr'
3288
    long_name = 'Heck if I Know comics (from Tumblr)'
3289
    url = 'http://heckifiknowcomics.com'
3290
3291
3292
class MyJetPack(GenericTumblrV1):
3293
    """Class to retrieve My Jet Pack comics."""
3294
    name = 'jetpack'
3295
    long_name = 'My Jet Pack'
3296
    url = 'http://myjetpack.tumblr.com'
3297
3298
3299
class CheerUpEmoKidTumblr(GenericTumblrV1):
3300
    """Class to retrieve CheerUpEmoKid comics."""
3301
    # Also on http://www.cheerupemokid.com
3302
    # Also on http://tapastic.com/series/CUEK
3303
    name = 'cuek-tumblr'
3304
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3305
    url = 'http://enzocomics.tumblr.com'
3306
3307
3308
class ForLackOfABetterComic(GenericTumblrV1):
3309
    """Class to retrieve For Lack Of A Better Comics."""
3310
    # Also on http://forlackofabettercomic.com
3311
    name = 'lack'
3312
    long_name = 'For Lack Of A Better Comic'
3313
    url = 'http://forlackofabettercomic.tumblr.com'
3314
3315
3316
class ZenPencilsTumblr(GenericTumblrV1):
3317
    """Class to retrieve ZenPencils comics."""
3318
    # Also on http://zenpencils.com
3319
    # Also on http://www.gocomics.com/zen-pencils
3320
    name = 'zenpencils-tumblr'
3321
    long_name = 'Zen Pencils (from Tumblr)'
3322
    url = 'http://zenpencils.tumblr.com'
3323
3324
3325
class ThreeWordPhraseTumblr(GenericTumblrV1):
3326
    """Class to retrieve Three Word Phrase comics."""
3327
    # Also on http://threewordphrase.com
3328
    name = 'threeword-tumblr'
3329
    long_name = 'Three Word Phrase (from Tumblr)'
3330
    url = 'http://www.threewordphrase.tumblr.com'
3331
3332
3333
class TimeTrabbleTumblr(GenericTumblrV1):
3334
    """Class to retrieve Time Trabble comics."""
3335
    # Also on http://timetrabble.com
3336
    name = 'timetrabble-tumblr'
3337
    long_name = 'Time Trabble (from Tumblr)'
3338
    url = 'http://timetrabble.tumblr.com'
3339
3340
3341
class SafelyEndangeredTumblr(GenericTumblrV1):
3342
    """Class to retrieve Safely Endangered comics."""
3343
    # Also on http://www.safelyendangered.com
3344
    name = 'endangered-tumblr'
3345
    long_name = 'Safely Endangered (from Tumblr)'
3346
    url = 'http://tumblr.safelyendangered.com'
3347
3348
3349
class MouseBearComedyTumblr(GenericTumblrV1):
3350
    """Class to retrieve Mouse Bear Comedy comics."""
3351
    # Also on http://www.mousebearcomedy.com
3352
    name = 'mousebear-tumblr'
3353
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3354
    url = 'http://mousebearcomedy.tumblr.com'
3355
3356
3357
class BouletCorpTumblr(GenericTumblrV1):
3358
    """Class to retrieve BouletCorp comics."""
3359
    # Also on http://www.bouletcorp.com
3360
    name = 'boulet-tumblr'
3361
    long_name = 'Boulet Corp (from Tumblr)'
3362
    url = 'http://bouletcorp.tumblr.com'
3363
3364
3365
class TheAwkwardYetiTumblr(GenericTumblrV1):
3366
    """Class to retrieve The Awkward Yeti comics."""
3367
    # Also on http://www.gocomics.com/the-awkward-yeti
3368
    # Also on http://theawkwardyeti.com
3369
    # Also on https://tapastic.com/series/TheAwkwardYeti
3370
    name = 'yeti-tumblr'
3371
    long_name = 'The Awkward Yeti (from Tumblr)'
3372
    url = 'http://larstheyeti.tumblr.com'
3373
3374
3375
class NellucNhoj(GenericTumblrV1):
3376
    """Class to retrieve NellucNhoj comics."""
3377
    name = 'nhoj'
3378
    long_name = 'Nelluc Nhoj'
3379
    url = 'http://nellucnhoj.com'
3380
3381
3382
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3383
    """Class to retrieve Down The Upward Spiral comics."""
3384
    # Also on http://www.downtheupwardspiral.com
3385
    name = 'spiral-tumblr'
3386
    long_name = 'Down the Upward Spiral (from Tumblr)'
3387
    url = 'http://downtheupwardspiral.tumblr.com'
3388
3389
3390
class AsPerUsualTumblr(GenericTumblrV1):
3391
    """Class to retrieve As Per Usual comics."""
3392
    # Also on https://tapastic.com/series/AsPerUsual
3393
    name = 'usual-tumblr'
3394
    long_name = 'As Per Usual (from Tumblr)'
3395
    url = 'http://as-per-usual.tumblr.com'
3396
3397
3398
class OneOneOneOneComicTumblr(GenericTumblrV1):
3399
    """Class to retrieve 1111 Comics."""
3400
    # Also on http://www.1111comics.me
3401
    # Also on https://tapastic.com/series/1111-Comics
3402
    name = '1111-tumblr'
3403
    long_name = '1111 Comics (from Tumblr)'
3404
    url = 'http://comics1111.tumblr.com'
3405
3406
3407
class JhallComicsTumblr(GenericTumblrV1):
3408
    """Class to retrieve Jhall Comics."""
3409
    # Also on http://jhallcomics.com
3410
    name = 'jhall-tumblr'
3411
    long_name = 'Jhall Comics (from Tumblr)'
3412
    url = 'http://jhallcomics.tumblr.com'
3413
3414
3415
class BerkeleyMewsTumblr(GenericTumblrV1):
3416
    """Class to retrieve Berkeley Mews comics."""
3417
    # Also on http://www.gocomics.com/berkeley-mews
3418
    # Also on http://www.berkeleymews.com
3419
    name = 'berkeley-tumblr'
3420
    long_name = 'Berkeley Mews (from Tumblr)'
3421
    url = 'http://mews.tumblr.com'
3422
3423
3424
class JoanCornellaTumblr(GenericTumblrV1):
3425
    """Class to retrieve Joan Cornella comics."""
3426
    # Also on http://joancornella.net
3427
    name = 'cornella-tumblr'
3428
    long_name = 'Joan Cornella (from Tumblr)'
3429
    url = 'http://cornellajoan.tumblr.com'
3430
3431
3432
class RespawnComicTumblr(GenericTumblrV1):
3433
    """Class to retrieve Respawn Comic."""
3434
    # Also on http://respawncomic.com
3435
    name = 'respawn-tumblr'
3436
    long_name = 'Respawn Comic (from Tumblr)'
3437
    url = 'http://respawncomic.tumblr.com'
3438
3439
3440
class ChrisHallbeckTumblr(GenericTumblrV1):
3441
    """Class to retrieve Chris Hallbeck comics."""
3442
    # Also on https://tapastic.com/ChrisHallbeck
3443
    # Also on http://maximumble.com
3444
    # Also on http://minimumble.com
3445
    # Also on http://thebookofbiff.com
3446
    name = 'hallbeck-tumblr'
3447
    long_name = 'Chris Hallback (from Tumblr)'
3448
    url = 'http://chrishallbeck.tumblr.com'
3449
3450
3451
class ComicNuggets(GenericTumblrV1):
3452
    """Class to retrieve Comic Nuggets."""
3453
    name = 'nuggets'
3454
    long_name = 'Comic Nuggets'
3455
    url = 'http://comicnuggets.com'
3456
3457
3458
class PigeonGazetteTumblr(GenericTumblrV1):
3459
    """Class to retrieve The Pigeon Gazette comics."""
3460
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3461
    name = 'pigeon-tumblr'
3462
    long_name = 'The Pigeon Gazette (from Tumblr)'
3463
    url = 'http://thepigeongazette.tumblr.com'
3464
3465
3466
class CancerOwl(GenericTumblrV1):
3467
    """Class to retrieve Cancer Owl comics."""
3468
    # Also on http://cancerowl.com
3469
    name = 'cancerowl-tumblr'
3470
    long_name = 'Cancer Owl (from Tumblr)'
3471
    url = 'http://cancerowl.tumblr.com'
3472
3473
3474
class FowlLanguageTumblr(GenericTumblrV1):
3475
    """Class to retrieve Fowl Language comics."""
3476
    # Also on http://www.fowllanguagecomics.com
3477
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3478
    # Also on http://www.gocomics.com/fowl-language
3479
    name = 'fowllanguage-tumblr'
3480
    long_name = 'Fowl Language Comics (from Tumblr)'
3481
    url = 'http://fowllanguagecomics.tumblr.com'
3482
3483
3484
class TheOdd1sOutTumblr(GenericTumblrV1):
3485
    """Class to retrieve The Odd 1s Out comics."""
3486
    # Also on http://theodd1sout.com
3487
    # Also on https://tapastic.com/series/Theodd1sout
3488
    name = 'theodd-tumblr'
3489
    long_name = 'The Odd 1s Out (from Tumblr)'
3490
    url = 'http://theodd1sout.tumblr.com'
3491
3492
3493
class TheUnderfoldTumblr(GenericTumblrV1):
3494
    """Class to retrieve The Underfold comics."""
3495
    # Also on http://theunderfold.com
3496
    name = 'underfold-tumblr'
3497
    long_name = 'The Underfold (from Tumblr)'
3498
    url = 'http://theunderfold.tumblr.com'
3499
3500
3501
class LolNeinTumblr(GenericTumblrV1):
3502
    """Class to retrieve Lol Nein comics."""
3503
    # Also on http://lolnein.com
3504
    name = 'lolnein-tumblr'
3505
    long_name = 'Lol Nein (from Tumblr)'
3506
    url = 'http://lolneincom.tumblr.com'
3507
3508
3509
class FatAwesomeComicsTumblr(GenericTumblrV1):
3510
    """Class to retrieve Fat Awesome Comics."""
3511
    # Also on http://fatawesome.com/comics
3512
    name = 'fatawesome-tumblr'
3513
    long_name = 'Fat Awesome (from Tumblr)'
3514
    url = 'http://fatawesomecomedy.tumblr.com'
3515
3516
3517
class TheWorldIsFlatTumblr(GenericTumblrV1):
3518
    """Class to retrieve The World Is Flat Comics."""
3519
    # Also on https://tapastic.com/series/The-World-is-Flat
3520
    name = 'flatworld-tumblr'
3521
    long_name = 'The World Is Flat (from Tumblr)'
3522
    url = 'http://theworldisflatcomics.tumblr.com'
3523
3524
3525
class DorrisMc(GenericTumblrV1):
3526
    """Class to retrieve Dorris Mc Comics"""
3527
    # Also on http://www.gocomics.com/dorris-mccomics
3528
    name = 'dorrismc'
3529
    long_name = 'Dorris Mc'
3530
    url = 'http://dorrismccomics.com'
3531
3532
3533
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3534
    """Class to retrieve Leleoz comics."""
3535
    # Also on https://tapastic.com/series/Leleoz
3536
    name = 'leleoz-tumblr'
3537
    long_name = 'Leleoz (from Tumblr)'
3538
    url = 'http://leleozcomics.tumblr.com'
3539
3540
3541
class MoonBeardTumblr(GenericTumblrV1):
3542
    """Class to retrieve MoonBeard comics."""
3543
    # Also on http://moonbeard.com
3544
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3545
    name = 'moonbeard-tumblr'
3546
    long_name = 'Moon Beard (from Tumblr)'
3547
    url = 'http://blog.squiresjam.es/moonbeard'
3548
3549
3550
class AComik(GenericTumblrV1):
3551
    """Class to retrieve A Comik"""
3552
    name = 'comik'
3553
    long_name = 'A Comik'
3554
    url = 'http://acomik.com'
3555
3556
3557
class ClassicRandy(GenericTumblrV1):
3558
    """Class to retrieve Classic Randy comics."""
3559
    name = 'randy'
3560
    long_name = 'Classic Randy'
3561
    url = 'http://classicrandy.tumblr.com'
3562
3563
3564
class DagssonTumblr(GenericTumblrV1):
3565
    """Class to retrieve Dagsson comics."""
3566
    # Also on http://www.dagsson.com
3567
    name = 'dagsson-tumblr'
3568
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3569
    url = 'http://hugleikurdagsson.tumblr.com'
3570
3571
3572
class LinsEditionsTumblr(GenericTumblrV1):
3573
    """Class to retrieve L.I.N.S. Editions comics."""
3574
    # Also on https://linsedition.com
3575
    name = 'lins-tumblr'
3576
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3577
    url = 'http://linscomics.tumblr.com'
3578
3579
3580
class OrigamiHotDish(GenericTumblrV1):
3581
    """Class to retrieve Origami Hot Dish comics."""
3582
    name = 'origamihotdish'
3583
    long_name = 'Origami Hot Dish'
3584
    url = 'http://origamihotdish.com'
3585
3586
3587
class HitAndMissComicsTumblr(GenericTumblrV1):
3588
    """Class to retrieve Hit and Miss Comics."""
3589
    name = 'hitandmiss'
3590
    long_name = 'Hit and Miss Comics'
3591
    url = 'http://hitandmisscomics.tumblr.com'
3592
3593
3594
class HMBlanc(GenericTumblrV1):
3595
    """Class to retrieve HM Blanc comics."""
3596
    name = 'hmblanc'
3597
    long_name = 'HM Blanc'
3598
    url = 'http://hmblanc.tumblr.com'
3599
3600
3601
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3602
    """Class to retrieve Tales Of Absurdity comics."""
3603
    # Also on http://talesofabsurdity.com
3604
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3605
    name = 'absurdity-tumblr'
3606
    long_name = 'Tales of Absurdity (from Tumblr)'
3607
    url = 'http://talesofabsurdity.tumblr.com'
3608
3609
3610
class RobbieAndBobby(GenericTumblrV1):
3611
    """Class to retrieve Robbie And Bobby comics."""
3612
    # Also on http://robbieandbobby.com
3613
    name = 'robbie-tumblr'
3614
    long_name = 'Robbie And Bobby (from Tumblr)'
3615
    url = 'http://robbieandbobby.tumblr.com'
3616
3617
3618
class ElectricBunnyComicTumblr(GenericTumblrV1):
3619
    """Class to retrieve Electric Bunny Comics."""
3620
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3621
    name = 'bunny-tumblr'
3622
    long_name = 'Electric Bunny Comic (from Tumblr)'
3623
    url = 'http://electricbunnycomics.tumblr.com'
3624
3625
3626
class Hoomph(GenericTumblrV1):
3627
    """Class to retrieve Hoomph comics."""
3628
    name = 'hoomph'
3629
    long_name = 'Hoomph'
3630
    url = 'http://hoom.ph'
3631
3632
3633
class BFGFSTumblr(GenericTumblrV1):
3634
    """Class to retrieve BFGFS comics."""
3635
    # Also on https://tapastic.com/series/BFGFS
3636
    # Also on http://bfgfs.com
3637
    name = 'bfgfs-tumblr'
3638
    long_name = 'BFGFS (from Tumblr)'
3639
    url = 'http://bfgfs.tumblr.com'
3640
3641
3642
class DoodleForFood(GenericTumblrV1):
3643
    """Class to retrieve Doodle For Food comics."""
3644
    # Also on  http://doodleforfood.com
3645
    name = 'doodle'
3646
    long_name = 'Doodle For Food'
3647
    url = 'http://doodleforfood.com'
3648
3649
3650
class HorovitzComics(GenericListableComic):
3651
    """Generic class to handle the logic common to the different comics from Horovitz."""
3652
    url = 'http://www.horovitzcomics.com'
3653
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
3654
    link_re = NotImplemented
3655
    get_url_from_archive_element = join_cls_url_to_href
3656
3657
    @classmethod
3658
    def get_comic_info(cls, soup, link):
3659
        href = link['href']
3660
        num = int(cls.link_re.match(href).groups()[0])
3661
        title = link.string
3662
        imgs = soup.find_all('img', id='comic')
3663
        assert len(imgs) == 1
3664
        year, month, day = [int(s)
3665
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
3666
        return {
3667
            'title': title,
3668
            'day': day,
3669
            'month': month,
3670
            'year': year,
3671
            'img': [i['src'] for i in imgs],
3672
            'num': num,
3673
        }
3674
3675
    @classmethod
3676
    def get_archive_elements(cls):
3677
        archive = 'http://www.horovitzcomics.com/comics/archive/'
3678
        return reversed(get_soup_at_url(archive).find_all('a', href=cls.link_re))
3679
3680
3681
class HorovitzNew(HorovitzComics):
3682
    """Class to retrieve Horovitz new comics."""
3683
    name = 'horovitznew'
3684
    long_name = 'Horovitz New'
3685
    link_re = re.compile('^/comics/new/([0-9]+)$')
3686
3687
3688
class HorovitzClassic(HorovitzComics):
3689
    """Class to retrieve Horovitz classic comics."""
3690
    name = 'horovitzclassic'
3691
    long_name = 'Horovitz Classic'
3692
    link_re = re.compile('^/comics/classic/([0-9]+)$')
3693
3694
3695
class GenericGoComic(GenericNavigableComic):
3696
    """Generic class to handle the logic common to comics from gocomics.com."""
3697
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
3698
3699
    @classmethod
3700
    def get_first_comic_link(cls):
3701
        return get_soup_at_url(cls.url).find('a', class_='beginning')
3702
3703
    @classmethod
3704
    def get_navi_link(cls, last_soup, next_):
3705
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
3706
3707
    @classmethod
3708
    def get_url_from_link(cls, link):
3709
        gocomics = 'http://www.gocomics.com'
3710
        return urljoin_wrapper(gocomics, link['href'])
3711
3712
    @classmethod
3713
    def get_comic_info(cls, soup, link):
3714
        url = cls.get_url_from_link(link)
3715
        year, month, day = [int(s)
3716
                            for s in cls.url_date_re.match(url).groups()]
3717
        return {
3718
            'day': day,
3719
            'month': month,
3720
            'year': year,
3721
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
3722
            'author': soup.find('meta', attrs={'name': 'author'})['content']
3723
        }
3724
3725
3726
class PearlsBeforeSwine(GenericGoComic):
3727
    """Class to retrieve Pearls Before Swine comics."""
3728
    name = 'pearls'
3729
    long_name = 'Pearls Before Swine'
3730
    url = 'http://www.gocomics.com/pearlsbeforeswine'
3731
3732
3733
class Peanuts(GenericGoComic):
3734
    """Class to retrieve Peanuts comics."""
3735
    name = 'peanuts'
3736
    long_name = 'Peanuts'
3737
    url = 'http://www.gocomics.com/peanuts'
3738
3739
3740
class MattWuerker(GenericGoComic):
3741
    """Class to retrieve Matt Wuerker comics."""
3742
    name = 'wuerker'
3743
    long_name = 'Matt Wuerker'
3744
    url = 'http://www.gocomics.com/mattwuerker'
3745
3746
3747
class TomToles(GenericGoComic):
3748
    """Class to retrieve Tom Toles comics."""
3749
    name = 'toles'
3750
    long_name = 'Tom Toles'
3751
    url = 'http://www.gocomics.com/tomtoles'
3752
3753
3754
class BreakOfDay(GenericGoComic):
3755
    """Class to retrieve Break Of Day comics."""
3756
    name = 'breakofday'
3757
    long_name = 'Break Of Day'
3758
    url = 'http://www.gocomics.com/break-of-day'
3759
3760
3761
class Brevity(GenericGoComic):
3762
    """Class to retrieve Brevity comics."""
3763
    name = 'brevity'
3764
    long_name = 'Brevity'
3765
    url = 'http://www.gocomics.com/brevity'
3766
3767
3768
class MichaelRamirez(GenericGoComic):
3769
    """Class to retrieve Michael Ramirez comics."""
3770
    name = 'ramirez'
3771
    long_name = 'Michael Ramirez'
3772
    url = 'http://www.gocomics.com/michaelramirez'
3773
3774
3775
class MikeLuckovich(GenericGoComic):
3776
    """Class to retrieve Mike Luckovich comics."""
3777
    name = 'luckovich'
3778
    long_name = 'Mike Luckovich'
3779
    url = 'http://www.gocomics.com/mikeluckovich'
3780
3781
3782
class JimBenton(GenericGoComic):
3783
    """Class to retrieve Jim Benton comics."""
3784
    # Also on http://jimbenton.tumblr.com
3785
    name = 'benton'
3786
    long_name = 'Jim Benton'
3787
    url = 'http://www.gocomics.com/jim-benton-cartoons'
3788
3789
3790
class TheArgyleSweater(GenericGoComic):
3791
    """Class to retrieve the Argyle Sweater comics."""
3792
    name = 'argyle'
3793
    long_name = 'Argyle Sweater'
3794
    url = 'http://www.gocomics.com/theargylesweater'
3795
3796
3797
class SunnyStreet(GenericGoComic):
3798
    """Class to retrieve Sunny Street comics."""
3799
    # Also on http://www.sunnystreetcomics.com
3800
    name = 'sunny'
3801
    long_name = 'Sunny Street'
3802
    url = 'http://www.gocomics.com/sunny-street'
3803
3804
3805
class OffTheMark(GenericGoComic):
3806
    """Class to retrieve Off The Mark comics."""
3807
    # Also on https://www.offthemark.com
3808
    name = 'offthemark'
3809
    long_name = 'Off The Mark'
3810
    url = 'http://www.gocomics.com/offthemark'
3811
3812
3813
class WuMo(GenericGoComic):
3814
    """Class to retrieve WuMo comics."""
3815
    # Also on http://wumo.com
3816
    name = 'wumo'
3817
    long_name = 'WuMo'
3818
    url = 'http://www.gocomics.com/wumo'
3819
3820
3821
class LunarBaboon(GenericGoComic):
3822
    """Class to retrieve Lunar Baboon comics."""
3823
    # Also on http://www.lunarbaboon.com
3824
    # Also on https://tapastic.com/series/Lunarbaboon
3825
    name = 'lunarbaboon'
3826
    long_name = 'Lunar Baboon'
3827
    url = 'http://www.gocomics.com/lunarbaboon'
3828
3829
3830
class SandersenGocomic(GenericGoComic):
3831
    """Class to retrieve Sarah Andersen comics."""
3832
    # Also on http://sarahcandersen.com
3833
    # Also on http://tapastic.com/series/Doodle-Time
3834
    name = 'sandersen-goc'
3835
    long_name = 'Sarah Andersen (from GoComics)'
3836
    url = 'http://www.gocomics.com/sarahs-scribbles'
3837
3838
3839
class CalvinAndHobbesGoComic(GenericGoComic):
3840
    """Class to retrieve Calvin and Hobbes comics."""
3841
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
3842
    name = 'calvin-goc'
3843
    long_name = 'Calvin and Hobbes (from GoComics)'
3844
    url = 'http://www.gocomics.com/calvinandhobbes'
3845
3846
3847
class RallGoComic(GenericGoComic):
3848
    """Class to retrieve Ted Rall comics."""
3849
    # Also on http://rall.com/comic
3850
    name = 'rall-goc'
3851
    long_name = "Ted Rall (from GoComics)"
3852
    url = "http://www.gocomics.com/tedrall"
3853
3854
3855
class TheAwkwardYetiGoComic(GenericGoComic):
3856
    """Class to retrieve The Awkward Yeti comics."""
3857
    # Also on http://larstheyeti.tumblr.com
3858
    # Also on http://theawkwardyeti.com
3859
    # Also on https://tapastic.com/series/TheAwkwardYeti
3860
    name = 'yeti-goc'
3861
    long_name = 'The Awkward Yeti (from GoComics)'
3862
    url = 'http://www.gocomics.com/the-awkward-yeti'
3863
3864
3865
class BerkeleyMewsGoComics(GenericGoComic):
3866
    """Class to retrieve Berkeley Mews comics."""
3867
    # Also on http://mews.tumblr.com
3868
    # Also on http://www.berkeleymews.com
3869
    name = 'berkeley-goc'
3870
    long_name = 'Berkeley Mews (from GoComics)'
3871
    url = 'http://www.gocomics.com/berkeley-mews'
3872
3873
3874
class SheldonGoComics(GenericGoComic):
3875
    """Class to retrieve Sheldon comics."""
3876
    # Also on http://www.sheldoncomics.com
3877
    name = 'sheldon-goc'
3878
    long_name = 'Sheldon Comics (from GoComics)'
3879
    url = 'http://www.gocomics.com/sheldon'
3880
3881
3882
class FowlLanguageGoComics(GenericGoComic):
3883
    """Class to retrieve Fowl Language comics."""
3884
    # Also on http://www.fowllanguagecomics.com
3885
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3886
    # Also on http://fowllanguagecomics.tumblr.com
3887
    name = 'fowllanguage-goc'
3888
    long_name = 'Fowl Language Comics (from GoComics)'
3889
    url = 'http://www.gocomics.com/fowl-language'
3890
3891
3892
class NickAnderson(GenericGoComic):
3893
    """Class to retrieve Nick Anderson comics."""
3894
    name = 'nickanderson'
3895
    long_name = 'Nick Anderson'
3896
    url = 'http://www.gocomics.com/nickanderson'
3897
3898
3899
class GarfieldGoComics(GenericGoComic):
3900
    """Class to retrieve Garfield comics."""
3901
    # Also on http://garfield.com
3902
    name = 'garfield-goc'
3903
    long_name = 'Garfield (from GoComics)'
3904
    url = 'http://www.gocomics.com/garfield'
3905
3906
3907
class DorrisMcGoComics(GenericGoComic):
3908
    """Class to retrieve Dorris Mc Comics"""
3909
    # Also on http://dorrismccomics.com
3910
    name = 'dorrismc-goc'
3911
    long_name = 'Dorris Mc (from GoComics)'
3912
    url = 'http://www.gocomics.com/dorris-mccomics'
3913
3914
3915
class MisterAndMeGoComics(GenericGoComic):
3916
    """Class to retrieve Mister & Me Comics."""
3917
    # Also on http://www.mister-and-me.com
3918
    # Also on https://tapastic.com/series/Mister-and-Me
3919
    name = 'mister-goc'
3920
    long_name = 'Mister & Me (from GoComics)'
3921
    url = 'http://www.gocomics.com/mister-and-me'
3922
3923
3924
class GenericTapasticComic(GenericListableComic):
3925
    """Generic class to handle the logic common to comics from tapastic.com."""
3926
3927
    @classmethod
3928
    def get_comic_info(cls, soup, archive_elt):
3929
        timestamp = int(archive_elt['publishDate']) / 1000.0
3930
        day = datetime.datetime.fromtimestamp(timestamp).date()
3931
        imgs = soup.find_all('img', class_='art-image')
3932
        if not imgs:
3933
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
3934
            return None
3935
        assert len(imgs) > 0
3936
        return {
3937
            'day': day.day,
3938
            'year': day.year,
3939
            'month': day.month,
3940
            'img': [i['src'] for i in imgs],
3941
            'title': archive_elt['title'],
3942
        }
3943
3944
    @classmethod
3945
    def get_url_from_archive_element(cls, archive_elt):
3946
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
3947
3948
    @classmethod
3949
    def get_archive_elements(cls):
3950
        pref, suff = 'episodeList : ', ','
3951
        # Information is stored in the javascript part
3952
        # I don't know the clean way to get it so this is the ugly way.
3953
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
3954
        return json.loads(string)
3955
3956
3957
class VegetablesForDessert(GenericTapasticComic):
3958
    """Class to retrieve Vegetables For Dessert comics."""
3959
    # Also on http://vegetablesfordessert.tumblr.com
3960
    name = 'vegetables'
3961
    long_name = 'Vegetables For Dessert'
3962
    url = 'http://tapastic.com/series/vegetablesfordessert'
3963
3964
3965
class FowlLanguageTapa(GenericTapasticComic):
3966
    """Class to retrieve Fowl Language comics."""
3967
    # Also on http://www.fowllanguagecomics.com
3968
    # Also on http://fowllanguagecomics.tumblr.com
3969
    # Also on http://www.gocomics.com/fowl-language
3970
    name = 'fowllanguage-tapa'
3971
    long_name = 'Fowl Language Comics (from Tapastic)'
3972
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
3973
3974
3975
class OscillatingProfundities(GenericTapasticComic):
3976
    """Class to retrieve Oscillating Profundities comics."""
3977
    name = 'oscillating'
3978
    long_name = 'Oscillating Profundities'
3979
    url = 'http://tapastic.com/series/oscillatingprofundities'
3980
3981
3982
class ZnoflatsComics(GenericTapasticComic):
3983
    """Class to retrieve Znoflats comics."""
3984
    name = 'znoflats'
3985
    long_name = 'Znoflats Comics'
3986
    url = 'http://tapastic.com/series/Znoflats-Comics'
3987
3988
3989
class SandersenTapastic(GenericTapasticComic):
3990
    """Class to retrieve Sarah Andersen comics."""
3991
    # Also on http://sarahcandersen.com
3992
    # Also on http://www.gocomics.com/sarahs-scribbles
3993
    name = 'sandersen-tapa'
3994
    long_name = 'Sarah Andersen (from Tapastic)'
3995
    url = 'http://tapastic.com/series/Doodle-Time'
3996
3997
3998
class TubeyToonsTapastic(GenericTapasticComic):
3999
    """Class to retrieve TubeyToons comics."""
4000
    # Also on http://tubeytoons.com
4001
    # Also on http://tubeytoons.tumblr.com
4002
    name = 'tubeytoons-tapa'
4003
    long_name = 'Tubey Toons (from Tapastic)'
4004
    url = 'http://tapastic.com/series/Tubey-Toons'
4005
4006
4007
class AnythingComicTapastic(GenericTapasticComic):
4008
    """Class to retrieve Anything Comics."""
4009
    # Also on http://www.anythingcomic.com
4010
    name = 'anythingcomic-tapa'
4011
    long_name = 'Anything Comic (from Tapastic)'
4012
    url = 'http://tapastic.com/series/anything'
4013
4014
4015
class UnearthedComicsTapastic(GenericTapasticComic):
4016
    """Class to retrieve Unearthed comics."""
4017
    # Also on http://unearthedcomics.com
4018
    # Also on http://unearthedcomics.tumblr.com
4019
    name = 'unearthed-tapa'
4020
    long_name = 'Unearthed Comics (from Tapastic)'
4021
    url = 'http://tapastic.com/series/UnearthedComics'
4022
4023
4024
class EverythingsStupidTapastic(GenericTapasticComic):
4025
    """Class to retrieve Everything's stupid Comics."""
4026
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4027
    # Also on http://everythingsstupid.net
4028
    name = 'stupid-tapa'
4029
    long_name = "Everything's Stupid (from Tapastic)"
4030
    url = 'http://tapastic.com/series/EverythingsStupid'
4031
4032
4033
class JustSayEhTapastic(GenericTapasticComic):
4034
    """Class to retrieve Just Say Eh comics."""
4035
    # Also on http://www.justsayeh.com
4036
    name = 'justsayeh-tapa'
4037
    long_name = 'Just Say Eh (from Tapastic)'
4038
    url = 'http://tapastic.com/series/Just-Say-Eh'
4039
4040
4041
class ThorsThundershackTapastic(GenericTapasticComic):
4042
    """Class to retrieve Thor's Thundershack comics."""
4043
    # Also on http://www.thorsthundershack.com
4044
    name = 'thor-tapa'
4045
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4046
    url = 'http://tapastic.com/series/Thors-Thundershac'
4047
4048
4049
class OwlTurdTapastic(GenericTapasticComic):
4050
    """Class to retrieve Owl Turd comics."""
4051
    # Also on http://owlturd.com
4052
    name = 'owlturd-tapa'
4053
    long_name = 'Owl Turd (from Tapastic)'
4054
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4055
4056
4057
class GoneIntoRaptureTapastic(GenericTapasticComic):
4058
    """Class to retrieve Gone Into Rapture comics."""
4059
    # Also on http://goneintorapture.tumblr.com
4060
    # Also on http://www.goneintorapture.com
4061
    name = 'rapture-tapa'
4062
    long_name = 'Gone Into Rapture (from Tapastic)'
4063
    url = 'http://tapastic.com/series/Goneintorapture'
4064
4065
4066
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4067
    """Class to retrieve Heck If I Know Comics."""
4068
    # Also on http://heckifiknowcomics.com
4069
    name = 'heck-tapa'
4070
    long_name = 'Heck if I Know comics (from Tapastic)'
4071
    url = 'http://tapastic.com/series/Regular'
4072
4073
4074
class CheerUpEmoKidTapa(GenericTapasticComic):
4075
    """Class to retrieve CheerUpEmoKid comics."""
4076
    # Also on http://www.cheerupemokid.com
4077
    # Also on http://enzocomics.tumblr.com
4078
    name = 'cuek-tapa'
4079
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4080
    url = 'http://tapastic.com/series/CUEK'
4081
4082
4083
class BigFootJusticeTapa(GenericTapasticComic):
4084
    """Class to retrieve Big Foot Justice comics."""
4085
    # Also on http://bigfootjustice.com
4086
    name = 'bigfoot-tapa'
4087
    long_name = 'Big Foot Justice (from Tapastic)'
4088
    url = 'http://tapastic.com/series/bigfoot-justice'
4089
4090
4091
class UpAndOutTapa(GenericTapasticComic):
4092
    """Class to retrieve Up & Out comics."""
4093
    # Also on http://upandoutcomic.tumblr.com
4094
    name = 'upandout-tapa'
4095
    long_name = 'Up And Out (from Tapastic)'
4096
    url = 'http://tapastic.com/series/UP-and-OUT'
4097
4098
4099
class ToonHoleTapa(GenericTapasticComic):
4100
    """Class to retrieve Toon Holes comics."""
4101
    # Also on http://www.toonhole.com
4102
    name = 'toonhole-tapa'
4103
    long_name = 'Toon Hole (from Tapastic)'
4104
    url = 'http://tapastic.com/series/TOONHOLE'
4105
4106
4107
class AngryAtNothingTapa(GenericTapasticComic):
4108
    """Class to retrieve Angry at Nothing comics."""
4109
    # Also on http://www.angryatnothing.net
4110
    name = 'angry-tapa'
4111
    long_name = 'Angry At Nothing (from Tapastic)'
4112
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4113
4114
4115
class LeleozTapa(GenericTapasticComic):
4116
    """Class to retrieve Leleoz comics."""
4117
    # Also on http://leleozcomics.tumblr.com
4118
    name = 'leleoz-tapa'
4119
    long_name = 'Leleoz (from Tapastic)'
4120
    url = 'https://tapastic.com/series/Leleoz'
4121
4122
4123
class TheAwkwardYetiTapa(GenericTapasticComic):
4124
    """Class to retrieve The Awkward Yeti comics."""
4125
    # Also on http://www.gocomics.com/the-awkward-yeti
4126
    # Also on http://theawkwardyeti.com
4127
    # Also on http://larstheyeti.tumblr.com
4128
    name = 'yeti-tapa'
4129
    long_name = 'The Awkward Yeti (from Tapastic)'
4130
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4131
4132
4133
class AsPerUsualTapa(GenericTapasticComic):
4134
    """Class to retrieve As Per Usual comics."""
4135
    # Also on http://as-per-usual.tumblr.com
4136
    name = 'usual-tapa'
4137
    long_name = 'As Per Usual (from Tapastic)'
4138
    url = 'https://tapastic.com/series/AsPerUsual'
4139
4140
4141
class OneOneOneOneComicTapa(GenericTapasticComic):
4142
    """Class to retrieve 1111 Comics."""
4143
    # Also on http://www.1111comics.me
4144
    # Also on http://comics1111.tumblr.com
4145
    name = '1111-tapa'
4146
    long_name = '1111 Comics (from Tapastic)'
4147
    url = 'https://tapastic.com/series/1111-Comics'
4148
4149
4150
class TumbleDryTapa(GenericTapasticComic):
4151
    """Class to retrieve Tumble Dry comics."""
4152
    # Also on http://tumbledrycomics.com
4153
    name = 'tumbledry-tapa'
4154
    long_name = 'Tumblr Dry (from Tapastic)'
4155
    url = 'https://tapastic.com/series/TumbleDryComics'
4156
4157
4158
class DeadlyPanelTapa(GenericTapasticComic):
4159
    """Class to retrieve Deadly Panel comics."""
4160
    # Also on http://www.deadlypanel.com
4161
    name = 'deadly-tapa'
4162
    long_name = 'Deadly Panel (from Tapastic)'
4163
    url = 'https://tapastic.com/series/deadlypanel'
4164
4165
4166
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4167
    """Class to retrieve Chris Hallbeck comics."""
4168
    # Also on http://chrishallbeck.tumblr.com
4169
    # Also on http://maximumble.com
4170
    name = 'hallbeckmaxi-tapa'
4171
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4172
    url = 'https://tapastic.com/series/Maximumble'
4173
4174
4175
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4176
    """Class to retrieve Chris Hallbeck comics."""
4177
    # Also on http://chrishallbeck.tumblr.com
4178
    # Also on http://minimumble.com
4179
    name = 'hallbeckmini-tapa'
4180
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4181
    url = 'https://tapastic.com/series/Minimumble'
4182
4183
4184
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4185
    """Class to retrieve Chris Hallbeck comics."""
4186
    # Also on http://chrishallbeck.tumblr.com
4187
    # Also on http://thebookofbiff.com
4188
    name = 'hallbeckbiff-tapa'
4189
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4190
    url = 'https://tapastic.com/series/Biff'
4191
4192
4193
class RandoWisTapa(GenericTapasticComic):
4194
    """Class to retrieve RandoWis comics."""
4195
    # Also on https://randowis.com
4196
    name = 'randowis-tapa'
4197
    long_name = 'RandoWis (from Tapastic)'
4198
    url = 'https://tapastic.com/series/RandoWis'
4199
4200
4201
class PigeonGazetteTapa(GenericTapasticComic):
4202
    """Class to retrieve The Pigeon Gazette comics."""
4203
    # Also on http://thepigeongazette.tumblr.com
4204
    name = 'pigeon-tapa'
4205
    long_name = 'The Pigeon Gazette (from Tapastic)'
4206
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4207
4208
4209
class TheOdd1sOutTapa(GenericTapasticComic):
4210
    """Class to retrieve The Odd 1s Out comics."""
4211
    # Also on http://theodd1sout.com
4212
    # Also on http://theodd1sout.tumblr.com
4213
    name = 'theodd-tapa'
4214
    long_name = 'The Odd 1s Out (from Tapastic)'
4215
    url = 'https://tapastic.com/series/Theodd1sout'
4216
4217
4218
class TheWorldIsFlatTapa(GenericTapasticComic):
4219
    """Class to retrieve The World Is Flat Comics."""
4220
    # Also on http://theworldisflatcomics.tumblr.com
4221
    name = 'flatworld-tapa'
4222
    long_name = 'The World Is Flat (from Tapastic)'
4223
    url = 'https://tapastic.com/series/The-World-is-Flat'
4224
4225
4226
class MisterAndMeTapa(GenericTapasticComic):
4227
    """Class to retrieve Mister & Me Comics."""
4228
    # Also on http://www.mister-and-me.com
4229
    # Also on http://www.gocomics.com/mister-and-me
4230
    name = 'mister-tapa'
4231
    long_name = 'Mister & Me (from Tapastic)'
4232
    url = 'https://tapastic.com/series/Mister-and-Me'
4233
4234
4235
class TalesOfAbsurdityTapa(GenericTapasticComic):
4236
    """Class to retrieve Tales Of Absurdity comics."""
4237
    # Also on http://talesofabsurdity.com
4238
    # Also on http://talesofabsurdity.tumblr.com
4239
    name = 'absurdity-tapa'
4240
    long_name = 'Tales of Absurdity (from Tapastic)'
4241
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4242
4243
4244
class BFGFSTapa(GenericTapasticComic):
4245
    """Class to retrieve BFGFS comics."""
4246
    # Also on http://bfgfs.com
4247
    # Also on http://bfgfs.tumblr.com
4248
    name = 'bfgfs-tapa'
4249
    long_name = 'BFGFS (from Tapastic)'
4250
    url = 'https://tapastic.com/series/BFGFS'
4251
4252
4253
class DoodleForFoodTapa(GenericTapasticComic):
4254
    """Class to retrieve Doodle For Food comics."""
4255
    # Also on http://doodleforfood.com
4256
    name = 'doodle-tapa'
4257
    long_name = 'Doodle For Food (from Tapastic)'
4258
    url = 'https://tapastic.com/series/Doodle-for-Food'
4259
4260
4261
def get_subclasses(klass):
4262
    """Gets the list of direct/indirect subclasses of a class"""
4263
    subclasses = klass.__subclasses__()
4264
    for derived in list(subclasses):
4265
        subclasses.extend(get_subclasses(derived))
4266
    return subclasses
4267
4268
4269
def remove_st_nd_rd_th_from_date(string):
4270
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4271
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4272
    return (string.replace('st', '')
4273
            .replace('nd', '')
4274
            .replace('rd', '')
4275
            .replace('th', '')
4276
            .replace('Augu', 'August'))
4277
4278
4279
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4280
    """Function to convert string to date object.
4281
    Wrapper around datetime.datetime.strptime."""
4282
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4283
    prev_locale = locale.setlocale(locale.LC_ALL)
4284
    if local != prev_locale:
4285
        locale.setlocale(locale.LC_ALL, local)
4286
    ret = datetime.datetime.strptime(string, date_format).date()
4287
    if local != prev_locale:
4288
        locale.setlocale(locale.LC_ALL, prev_locale)
4289
    return ret
4290
4291
4292
COMICS = set(get_subclasses(GenericComic))
4293
VALID_COMICS = [c for c in COMICS if c.name is not None]
4294
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4295
assert len(VALID_COMICS) == len(COMIC_NAMES)
4296
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4297
assert len(VALID_COMICS) == len(CLASS_NAMES)
4298