Completed
Push — master ( 95f075...3d1429 )
by De
35s
created

comics.py (40 issues)

Code
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    try:
333
        url = cls.first_url
334
    except AttributeError:
335
        url = input("Get starting URL: ")
336
    print(url)
337
    comic = cls.get_prev_link(get_soup_at_url(url))
338
    while comic:
339
        url = cls.get_url_from_link(comic)
340
        print(url)
341
        comic = cls.get_prev_link(get_soup_at_url(url))
342
    return {'href': url}
343
344
345
class GenericEmptyComic(GenericComic):
346
    """Generic class for comics where nothing is to be done.
347
348
    It can be useful to deactivate temporarily comics that do not work
349
    properly by replacing `def MyComic(GenericWhateverComic)` with
350
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
351
    _categories = ('EMPTY', )
352
353
    @classmethod
354
    def get_next_comic(cls, last_comic):
355
        """Implementation of get_next_comic returning no comics."""
356
        cls.log("comic is considered as empty - returning no comic")
357
        return []
358
359
360
class ExtraFabulousComics(GenericNavigableComic):
361
    """Class to retrieve Extra Fabulous Comics."""
362
    # Also on https://extrafabulouscomics.tumblr.com
363
    name = 'efc'
364
    long_name = 'Extra Fabulous Comics'
365
    url = 'http://extrafabulouscomics.com'
366
    _categories = ('EFC', )
367
    get_first_comic_link = get_a_navi_navifirst
368
    get_navi_link = get_link_rel_next
369
370
    @classmethod
371
    def get_comic_info(cls, soup, link):
372
        """Get information about a particular comics."""
373
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
374
        imgs = soup.find_all('img', src=img_src_re)
375
        title = soup.find('meta', property='og:title')['content']
376
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
377
        day = string_to_date(date_str, "%Y-%m-%d")
378
        return {
379
            'title': title,
380
            'img': [i['src'] for i in imgs],
381
            'month': day.month,
382
            'year': day.year,
383
            'day': day.day,
384
            'prefix': title + '-'
385
        }
386
387
388
class GenericLeMondeBlog(GenericNavigableComic):
389
    """Generic class to retrieve comics from Le Monde blogs."""
390
    _categories = ('LEMONDE', 'FRANCAIS')
391
    get_navi_link = get_link_rel_next
392
    get_first_comic_link = simulate_first_link
393
    first_url = NotImplemented
394
395
    @classmethod
396
    def get_comic_info(cls, soup, link):
397
        """Get information about a particular comics."""
398
        url2 = soup.find('link', rel='shortlink')['href']
399
        title = soup.find('meta', property='og:title')['content']
400
        date_str = soup.find("span", class_="entry-date").string
401
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
402
        imgs = soup.find_all('meta', property='og:image')
403
        return {
404
            'title': title,
405
            'url2': url2,
406
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
407
            'month': day.month,
408
            'year': day.year,
409
            'day': day.day,
410
        }
411
412
413
class ZepWorld(GenericLeMondeBlog):
414
    """Class to retrieve Zep World comics."""
415
    name = "zep"
416
    long_name = "Zep World"
417
    url = "http://zepworld.blog.lemonde.fr"
418
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
419
420
421
class Vidberg(GenericLeMondeBlog):
422
    """Class to retrieve Vidberg comics."""
423
    name = 'vidberg'
424
    long_name = "Vidberg - l'actu en patates"
425
    url = "http://vidberg.blog.lemonde.fr"
426
    # Not the first but I didn't find an efficient way to retrieve it
427
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
428
429
430
class Plantu(GenericLeMondeBlog):
431
    """Class to retrieve Plantu comics."""
432
    name = 'plantu'
433
    long_name = "Plantu"
434
    url = "http://plantu.blog.lemonde.fr"
435
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
436
437
438
class XavierGorce(GenericLeMondeBlog):
439
    """Class to retrieve Xavier Gorce comics."""
440
    name = 'gorce'
441
    long_name = "Xavier Gorce"
442
    url = "http://xaviergorce.blog.lemonde.fr"
443
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
444
445
446
class CartooningForPeace(GenericLeMondeBlog):
447
    """Class to retrieve Cartooning For Peace comics."""
448
    name = 'forpeace'
449
    long_name = "Cartooning For Peace"
450
    url = "http://cartooningforpeace.blog.lemonde.fr"
451
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
452
453
454
class Aurel(GenericLeMondeBlog):
455
    """Class to retrieve Aurel comics."""
456
    name = 'aurel'
457
    long_name = "Aurel"
458
    url = "http://aurel.blog.lemonde.fr"
459
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
460
461
462
class LesCulottees(GenericLeMondeBlog):
463
    """Class to retrieve Les Culottees comics."""
464
    name = 'culottees'
465
    long_name = 'Les Culottees'
466
    url = "http://lesculottees.blog.lemonde.fr"
467
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
468
469
470
class UneAnneeAuLycee(GenericLeMondeBlog):
471
    """Class to retrieve Une Annee Au Lycee comics."""
472
    name = 'lycee'
473
    long_name = 'Une Annee au Lycee'
474
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
475
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
476
477
478
class Rall(GenericNavigableComic):
479
    """Class to retrieve Ted Rall comics."""
480
    # Also on http://www.gocomics.com/tedrall
481
    name = 'rall'
482
    long_name = "Ted Rall"
483
    url = "http://rall.com/comic"
484
    _categories = ('RALL', )
485
    get_navi_link = get_link_rel_next
486
    get_first_comic_link = simulate_first_link
487
    # Not the first but I didn't find an efficient way to retrieve it
488
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
489
490
    @classmethod
491
    def get_comic_info(cls, soup, link):
492
        """Get information about a particular comics."""
493
        title = soup.find('meta', property='og:title')['content']
494
        author = soup.find("span", class_="author vcard").find("a").string
495
        date_str = soup.find("span", class_="entry-date").string
496
        day = string_to_date(date_str, "%B %d, %Y")
497
        desc = soup.find('meta', property='og:description')['content']
498
        imgs = soup.find('div', class_='entry-content').find_all('img')
499
        imgs = imgs[:-7]  # remove social media buttons
500
        return {
501
            'title': title,
502
            'author': author,
503
            'month': day.month,
504
            'year': day.year,
505
            'day': day.day,
506
            'description': desc,
507
            'img': [i['src'] for i in imgs],
508
        }
509
510
511
class Dilem(GenericNavigableComic):
512
    """Class to retrieve Ali Dilem comics."""
513
    name = 'dilem'
514
    long_name = 'Ali Dilem'
515
    url = 'http://information.tv5monde.com/dilem'
516
    _categories = ('FRANCAIS', )
517
    get_url_from_link = join_cls_url_to_href
518
    get_first_comic_link = simulate_first_link
519
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
520
521
    @classmethod
522
    def get_navi_link(cls, last_soup, next_):
523
        """Get link to next or previous comic."""
524
        # prev is next / next is prev
525
        li = last_soup.find('li', class_='prev' if next_ else 'next')
526 View Code Duplication
        return li.find('a') if li else None
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
527
528
    @classmethod
529
    def get_comic_info(cls, soup, link):
530
        """Get information about a particular comics."""
531
        short_url = soup.find('link', rel='shortlink')['href']
532
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
533
        imgs = soup.find_all('meta', property='og:image')
534
        date_str = soup.find('span', property='dc:date')['content']
535
        date_str = date_str[:10]
536
        day = string_to_date(date_str, "%Y-%m-%d")
537
        return {
538
            'short_url': short_url,
539
            'title': title,
540
            'img': [i['content'] for i in imgs],
541
            'day': day.day,
542
            'month': day.month,
543
            'year': day.year,
544
        }
545
546
547
class SpaceAvalanche(GenericNavigableComic):
548
    """Class to retrieve Space Avalanche comics."""
549
    name = 'avalanche'
550
    long_name = 'Space Avalanche'
551
    url = 'http://www.spaceavalanche.com'
552
    get_navi_link = get_link_rel_next
553
554
    @classmethod
555
    def get_first_comic_link(cls):
556
        """Get link to first comics."""
557
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
558
559
    @classmethod
560
    def get_comic_info(cls, soup, link):
561
        """Get information about a particular comics."""
562
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
563
        title = link['title']
564
        url = cls.get_url_from_link(link)
565
        year, month, day = [int(s)
566
                            for s in url_date_re.match(url).groups()]
567
        imgs = soup.find("div", class_="entry").find_all("img")
568
        return {
569
            'title': title,
570
            'day': day,
571
            'month': month,
572
            'year': year,
573
            'img': [i['src'] for i in imgs],
574
        }
575
576
577
class ZenPencils(GenericNavigableComic):
578
    """Class to retrieve ZenPencils comics."""
579
    # Also on http://zenpencils.tumblr.com
580
    # Also on http://www.gocomics.com/zen-pencils
581
    name = 'zenpencils'
582
    long_name = 'Zen Pencils'
583
    url = 'http://zenpencils.com'
584
    _categories = ('ZENPENCILS', )
585
    get_navi_link = get_link_rel_next
586
    get_first_comic_link = simulate_first_link
587
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
588
589
    @classmethod
590
    def get_comic_info(cls, soup, link):
591
        """Get information about a particular comics."""
592
        imgs = soup.find('div', id='comic').find_all('img')
593
        # imgs2 = soup.find_all('meta', property='og:image')
594
        post = soup.find('div', class_='post-content')
595
        author = post.find("span", class_="post-author").find("a").string
596
        title = soup.find('h2', class_='post-title').string
597
        date_str = post.find('span', class_='post-date').string
598
        day = string_to_date(date_str, "%B %d, %Y")
599
        assert imgs
600
        assert all(i['alt'] == i['title'] for i in imgs)
601
        assert all(i['alt'] in (title, "") for i in imgs)
602
        return {
603
            'title': title,
604
            'author': author,
605
            'day': day.day,
606
            'month': day.month,
607
            'year': day.year,
608
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
609
        }
610
611
612
class ItsTheTie(GenericEmptyComic, GenericNavigableComic):
613
    """Class to retrieve It's the tie comics."""
614
    # Also on http://itsthetie.tumblr.com
615
    # Also on https://tapastic.com/series/itsthetie
616
    name = 'tie'
617
    long_name = "It's the tie"
618
    url = "http://itsthetie.com"
619
    _categories = ('TIE', )
620
    get_first_comic_link = get_div_navfirst_a
621
    get_navi_link = get_a_rel_next
622
623
    @classmethod
624
    def get_comic_info(cls, soup, link):
625
        """Get information about a particular comics."""
626
        title = soup.find('h1', class_='comic-title').find('a').string
627
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
628
        day = string_to_date(date_str, "%B %d, %Y")
629
        # Bonus images may or may not be in meta og:image.
630
        imgs = soup.find_all('meta', property='og:image')
631
        imgs_src = [i['content'] for i in imgs]
632
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
633
        bonus_src = [b['data-oversrc'] for b in bonus]
634
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
635
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
636
        tag_meta = soup.find('meta', property='article:tag')
637
        tags = tag_meta['content'] if tag_meta else ""
638
        return {
639
            'title': title,
640
            'month': day.month,
641
            'year': day.year,
642
            'day': day.day,
643
            'img': all_imgs_src,
644
            'tags': tags,
645
        }
646
647
648 View Code Duplication
class PenelopeBagieu(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
649
    """Class to retrieve comics from Penelope Bagieu's blog."""
650
    name = 'bagieu'
651
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
652
    url = 'http://www.penelope-jolicoeur.com'
653
    _categories = ('FRANCAIS', )
654
    get_navi_link = get_link_rel_next
655
    get_first_comic_link = simulate_first_link
656
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
657
658
    @classmethod
659
    def get_comic_info(cls, soup, link):
660
        """Get information about a particular comics."""
661
        date_str = soup.find('h2', class_='date-header').string
662
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
663
        imgs = soup.find('div', class_='entry-body').find_all('img')
664
        title = soup.find('h3', class_='entry-header').string
665
        return {
666
            'title': title,
667
            'img': [i['src'] for i in imgs],
668
            'month': day.month,
669
            'year': day.year,
670
            'day': day.day,
671
        }
672
673
674 View Code Duplication
class OneOneOneOneComic(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
675
    """Class to retrieve 1111 Comics."""
676
    # Also on http://comics1111.tumblr.com
677
    # Also on https://tapastic.com/series/1111-Comics
678
    name = '1111'
679
    long_name = '1111 Comics'
680
    url = 'http://www.1111comics.me'
681
    _categories = ('ONEONEONEONE', )
682
    get_first_comic_link = get_div_navfirst_a
683
    get_navi_link = get_link_rel_next
684
685
    @classmethod
686
    def get_comic_info(cls, soup, link):
687
        """Get information about a particular comics."""
688
        title = soup.find('h1', class_='comic-title').find('a').string
689
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
690
        day = string_to_date(date_str, "%B %d, %Y")
691
        imgs = soup.find_all('meta', property='og:image')
692
        return {
693
            'title': title,
694
            'month': day.month,
695
            'year': day.year,
696
            'day': day.day,
697
            'img': [i['content'] for i in imgs],
698
        }
699
700
701 View Code Duplication
class AngryAtNothing(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
702
    """Class to retrieve Angry at Nothing comics."""
703
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
704
    # Also on http://angryatnothing.tumblr.com
705
    name = 'angry'
706
    long_name = 'Angry At Nothing'
707
    url = 'http://www.angryatnothing.net'
708
    get_first_comic_link = get_div_navfirst_a
709
    get_navi_link = get_a_rel_next
710
711
    @classmethod
712
    def get_comic_info(cls, soup, link):
713
        """Get information about a particular comics."""
714
        title = soup.find('h1', class_='comic-title').find('a').string
715
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
716
        day = string_to_date(date_str, "%B %d, %Y")
717
        imgs = soup.find_all('meta', property='og:image')
718
        return {
719
            'title': title,
720
            'month': day.month,
721
            'year': day.year,
722
            'day': day.day,
723
            'img': [i['content'] for i in imgs],
724
        }
725
726
727
class NeDroid(GenericNavigableComic):
728
    """Class to retrieve NeDroid comics."""
729
    name = 'nedroid'
730 View Code Duplication
    long_name = 'NeDroid'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
731
    url = 'http://nedroid.com'
732
    get_first_comic_link = get_div_navfirst_a
733
    get_navi_link = get_link_rel_next
734
    get_url_from_link = join_cls_url_to_href
735
736
    @classmethod
737
    def get_comic_info(cls, soup, link):
738
        """Get information about a particular comics."""
739
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
740
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
741
        num = int(short_url_re.match(short_url).groups()[0])
742
        imgs = soup.find('div', id='comic').find_all('img')
743
        assert len(imgs) == 1
744
        title = imgs[0]['alt']
745
        title2 = imgs[0]['title']
746
        return {
747
            'short_url': short_url,
748
            'title': title,
749
            'title2': title2,
750
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
751
            'num': num,
752
        }
753
754
755
class Garfield(GenericNavigableComic):
756
    """Class to retrieve Garfield comics."""
757
    # Also on http://www.gocomics.com/garfield
758
    name = 'garfield'
759
    long_name = 'Garfield'
760
    url = 'https://garfield.com'
761
    _categories = ('GARFIELD', )
762
    get_first_comic_link = simulate_first_link
763
    first_url = 'https://garfield.com/comic/1978/06/19'
764
765
    @classmethod
766
    def get_navi_link(cls, last_soup, next_):
767
        """Get link to next or previous comic."""
768
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
769
770
    @classmethod
771
    def get_comic_info(cls, soup, link):
772
        """Get information about a particular comics."""
773
        url = cls.get_url_from_link(link)
774
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
775
        year, month, day = [int(s) for s in date_re.match(url).groups()]
776
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
777
        return {
778
            'month': month,
779
            'year': year,
780
            'day': day,
781
            'img': [i['src'] for i in imgs],
782 View Code Duplication
        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
783
784
785
class Dilbert(GenericNavigableComic):
786
    """Class to retrieve Dilbert comics."""
787
    # Also on http://www.gocomics.com/dilbert-classics
788
    name = 'dilbert'
789
    long_name = 'Dilbert'
790
    url = 'http://dilbert.com'
791
    get_url_from_link = join_cls_url_to_href
792
    get_first_comic_link = simulate_first_link
793
    first_url = 'http://dilbert.com/strip/1989-04-16'
794
795
    @classmethod
796
    def get_navi_link(cls, last_soup, next_):
797
        """Get link to next or previous comic."""
798
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
799
        return link.find('a') if link else None
800
801
    @classmethod
802
    def get_comic_info(cls, soup, link):
803
        """Get information about a particular comics."""
804
        title = soup.find('meta', property='og:title')['content']
805
        imgs = soup.find_all('meta', property='og:image')
806
        desc = soup.find('meta', property='og:description')['content']
807
        date_str = soup.find('meta', property='article:publish_date')['content']
808
        day = string_to_date(date_str, "%B %d, %Y")
809
        author = soup.find('meta', property='article:author')['content']
810
        tags = soup.find('meta', property='article:tag')['content']
811
        return {
812
            'title': title,
813
            'description': desc,
814
            'img': [i['content'] for i in imgs],
815
            'author': author,
816
            'tags': tags,
817
            'day': day.day,
818
            'month': day.month,
819
            'year': day.year
820
        }
821
822
823
class VictimsOfCircumsolar(GenericEmptyComic, GenericNavigableComic):
824
    """Class to retrieve VictimsOfCircumsolar comics."""
825
    # Also on https://victimsofcomics.tumblr.com
826
    name = 'circumsolar'
827
    long_name = 'Victims Of Circumsolar'
828
    url = 'http://www.victimsofcircumsolar.com'
829
    get_navi_link = get_a_navi_comicnavnext_navinext
830
    get_first_comic_link = simulate_first_link
831
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
832
833
    @classmethod
834
    def get_comic_info(cls, soup, link):
835
        """Get information about a particular comics."""
836
        # Date is on the archive page
837
        title = soup.find_all('meta', property='og:title')[-1]['content']
838
        desc = soup.find_all('meta', property='og:description')[-1]['content']
839
        imgs = soup.find('div', id='comic').find_all('img')
840
        assert all(i['title'] == i['alt'] == title for i in imgs)
841
        return {
842
            'title': title,
843
            'description': desc,
844
            'img': [i['src'] for i in imgs],
845
        }
846
847
848
class ThreeWordPhrase(GenericNavigableComic):
849
    """Class to retrieve Three Word Phrase comics."""
850
    # Also on http://www.threewordphrase.tumblr.com
851
    name = 'threeword'
852
    long_name = 'Three Word Phrase'
853
    url = 'http://threewordphrase.com'
854
    get_url_from_link = join_cls_url_to_href
855
856
    @classmethod
857
    def get_first_comic_link(cls):
858
        """Get link to first comics."""
859
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
860
861
    @classmethod
862
    def get_navi_link(cls, last_soup, next_):
863
        """Get link to next or previous comic."""
864
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
865
        return None if link.get('href') is None else link
866
867
    @classmethod
868
    def get_comic_info(cls, soup, link):
869
        """Get information about a particular comics."""
870
        title = soup.find('title')
871
        imgs = [img for img in soup.find_all('img')
872
                if not img['src'].endswith(
873
                    ('link.gif', '32.png', 'twpbookad.jpg',
874
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
875
        return {
876
            'title': title.string if title else None,
877
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
878
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
879
        }
880
881
882
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
883
    """Class to retrieve Deadly Panel comics."""
884
    # Also on https://tapastic.com/series/deadlypanel
885
    # Also on https://deadlypanel.tumblr.com
886
    name = 'deadly'
887
    long_name = 'Deadly Panel'
888
    url = 'http://www.deadlypanel.com'
889
    get_first_comic_link = get_a_navi_navifirst
890
    get_navi_link = get_a_navi_comicnavnext_navinext
891
892
    @classmethod
893
    def get_comic_info(cls, soup, link):
894
        """Get information about a particular comics."""
895
        imgs = soup.find('div', id='comic').find_all('img')
896
        assert all(i['alt'] == i['title'] for i in imgs)
897
        return {
898
            'img': [i['src'] for i in imgs],
899
        }
900
901
902 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
903
    """Class to retrieve The Gentleman Armchair comics."""
904
    name = 'gentlemanarmchair'
905
    long_name = 'The Gentleman Armchair'
906
    url = 'http://thegentlemansarmchair.com'
907
    get_first_comic_link = get_a_navi_navifirst
908
    get_navi_link = get_link_rel_next
909
910
    @classmethod
911
    def get_comic_info(cls, soup, link):
912
        """Get information about a particular comics."""
913
        title = soup.find('h2', class_='post-title').string
914
        author = soup.find("span", class_="post-author").find("a").string
915
        date_str = soup.find('span', class_='post-date').string
916
        day = string_to_date(date_str, "%B %d, %Y")
917
        imgs = soup.find('div', id='comic').find_all('img')
918
        return {
919
            'img': [i['src'] for i in imgs],
920
            'title': title,
921
            'author': author,
922
            'month': day.month,
923
            'year': day.year,
924
            'day': day.day,
925
        }
926
927
928 View Code Duplication
class ImogenQuest(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
929
    """Class to retrieve Imogen Quest comics."""
930
    # Also on http://imoquest.tumblr.com
931
    name = 'imogen'
932
    long_name = 'Imogen Quest'
933
    url = 'http://imogenquest.net'
934
    get_first_comic_link = get_div_navfirst_a
935
    get_navi_link = get_a_rel_next
936
937
    @classmethod
938
    def get_comic_info(cls, soup, link):
939
        """Get information about a particular comics."""
940
        title = soup.find('h2', class_='post-title').string
941
        author = soup.find("span", class_="post-author").find("a").string
942
        date_str = soup.find('span', class_='post-date').string
943
        day = string_to_date(date_str, '%B %d, %Y')
944
        imgs = soup.find('div', class_='comicpane').find_all('img')
945
        assert all(i['alt'] == i['title'] for i in imgs)
946
        title2 = imgs[0]['title']
947
        return {
948
            'day': day.day,
949
            'month': day.month,
950
            'year': day.year,
951
            'img': [i['src'] for i in imgs],
952
            'title': title,
953
            'title2': title2,
954
            'author': author,
955
        }
956
957
958
class MyExtraLife(GenericNavigableComic):
959
    """Class to retrieve My Extra Life comics."""
960
    name = 'extralife'
961
    long_name = 'My Extra Life'
962
    url = 'http://www.myextralife.com'
963
    get_navi_link = get_link_rel_next
964
965
    @classmethod
966
    def get_first_comic_link(cls):
967
        """Get link to first comics."""
968
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
969
970
    @classmethod
971
    def get_comic_info(cls, soup, link):
972
        """Get information about a particular comics."""
973
        title = soup.find("h1", class_="comic_title").string
974
        date_str = soup.find("span", class_="comic_date").string
975
        day = string_to_date(date_str, "%B %d, %Y")
976
        imgs = soup.find_all("img", class_="comic")
977
        assert all(i['alt'] == i['title'] == title for i in imgs)
978
        return {
979
            'title': title,
980
            'img': [i['src'] for i in imgs if i["src"]],
981
            'day': day.day,
982
            'month': day.month,
983
            'year': day.year
984
        }
985
986
987
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
988
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
989
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
990
    # Also on http://smbc-comics.tumblr.com
991
    name = 'smbc'
992
    long_name = 'Saturday Morning Breakfast Cereal'
993
    url = 'http://www.smbc-comics.com'
994
    _categories = ('SMBC', )
995
    get_navi_link = get_a_rel_next
996
997
    @classmethod
998
    def get_first_comic_link(cls):
999
        """Get link to first comics."""
1000
        return get_soup_at_url(cls.url).find('a', rel='start')
1001
1002
    @classmethod
1003
    def get_comic_info(cls, soup, link):
1004
        """Get information about a particular comics."""
1005
        image1 = soup.find('img', id='cc-comic')
1006
        image_url1 = image1['src']
1007
        aftercomic = soup.find('div', id='aftercomic')
1008
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1009
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1010
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1011
        day = string_to_date(date_str, "%B %d, %Y")
1012
        return {
1013
            'title': image1['title'],
1014
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
1015
            'day': day.day,
1016
            'month': day.month,
1017
            'year': day.year
1018
        }
1019
1020
1021
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1022
    """Class to retrieve Perry Bible Fellowship comics."""
1023
    name = 'pbf'
1024
    long_name = 'Perry Bible Fellowship'
1025
    url = 'http://pbfcomics.com'
1026
    get_url_from_archive_element = join_cls_url_to_href
1027
1028
    @classmethod
1029
    def get_archive_elements(cls):
1030
        soup = get_soup_at_url(cls.url)
1031
        thumbnails = soup.find('div', id='all_thumbnails')
1032
        return reversed(thumbnails.find_all('a'))
1033
1034
    @classmethod
1035
    def get_comic_info(cls, soup, link):
1036
        """Get information about a particular comics."""
1037
        name = soup.find('meta', property='og:title')['content']
1038
        imgs = soup.find_all('meta', property='og:image')
1039
        assert len(imgs) == 1
1040
        return {
1041
            'name': name,
1042
            'img': [i['content'] for i in imgs],
1043
        }
1044
1045
1046
class Mercworks(GenericNavigableComic):
1047
    """Class to retrieve Mercworks comics."""
1048
    # Also on http://mercworks.tumblr.com
1049
    name = 'mercworks'
1050
    long_name = 'Mercworks'
1051
    url = 'http://mercworks.net'
1052
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1053
    get_navi_link = get_link_rel_next
1054
1055
    @classmethod
1056
    def get_comic_info(cls, soup, link):
1057
        """Get information about a particular comics."""
1058
        title = soup.find('meta', property='og:title')['content']
1059
        metadesc = soup.find('meta', property='og:description')
1060
        desc = metadesc['content'] if metadesc else ""
1061
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1062
        day = string_to_date(date_str, "%Y-%m-%d")
1063
        imgs = soup.find_all('meta', property='og:image')
1064
        return {
1065
            'img': [i['content'] for i in imgs],
1066
            'title': title,
1067
            'desc': desc,
1068
            'day': day.day,
1069
            'month': day.month,
1070
            'year': day.year
1071
        }
1072
1073
1074
class BerkeleyMews(GenericListableComic):
1075
    """Class to retrieve Berkeley Mews comics."""
1076
    # Also on http://mews.tumblr.com
1077
    # Also on http://www.gocomics.com/berkeley-mews
1078
    name = 'berkeley'
1079
    long_name = 'Berkeley Mews'
1080
    url = 'http://www.berkeleymews.com'
1081
    _categories = ('BERKELEY', )
1082
    get_url_from_archive_element = get_href
1083
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1084
1085
    @classmethod
1086
    def get_archive_elements(cls):
1087
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1088
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1089
1090
    @classmethod
1091
    def get_comic_info(cls, soup, link):
1092
        """Get information about a particular comics."""
1093
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1094
        url = cls.get_url_from_archive_element(link)
1095
        num = int(cls.comic_num_re.match(url).groups()[0])
1096
        img = soup.find('div', id='comic').find('img')
1097
        assert all(i['alt'] == i['title'] for i in [img])
1098
        title2 = img['title']
1099
        img_url = img['src']
1100
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1101
        return {
1102
            'num': num,
1103
            'title': link.string,
1104
            'title2': title2,
1105
            'img': [img_url],
1106
            'year': year,
1107
            'month': month,
1108
            'day': day,
1109
        }
1110
1111
1112
class GenericBouletCorp(GenericNavigableComic):
1113
    """Generic class to retrieve BouletCorp comics in different languages."""
1114
    # Also on https://bouletcorp.tumblr.com
1115
    _categories = ('BOULET', )
1116
    get_navi_link = get_link_rel_next
1117
1118
    @classmethod
1119
    def get_first_comic_link(cls):
1120
        """Get link to first comics."""
1121
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1122
1123
    @classmethod
1124
    def get_comic_info(cls, soup, link):
1125
        """Get information about a particular comics."""
1126
        url = cls.get_url_from_link(link)
1127
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1128
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1129
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1130
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1131
        title = soup.find('title').string
1132
        return {
1133
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1134
            'title': title,
1135
            'texts': texts,
1136
            'year': year,
1137
            'month': month,
1138
            'day': day,
1139
        }
1140
1141
1142
class BouletCorp(GenericBouletCorp):
1143
    """Class to retrieve BouletCorp comics."""
1144
    name = 'boulet'
1145
    long_name = 'Boulet Corp'
1146
    url = 'http://www.bouletcorp.com'
1147
    _categories = ('FRANCAIS', )
1148
1149
1150
class BouletCorpEn(GenericBouletCorp):
1151
    """Class to retrieve EnglishBouletCorp comics."""
1152
    name = 'boulet_en'
1153
    long_name = 'Boulet Corp English'
1154
    url = 'http://english.bouletcorp.com'
1155
1156
1157 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1158
    """Class to retrieve Amazing Super Powers comics."""
1159
    name = 'asp'
1160
    long_name = 'Amazing Super Powers'
1161
    url = 'http://www.amazingsuperpowers.com'
1162
    get_first_comic_link = get_a_navi_navifirst
1163
    get_navi_link = get_a_navi_navinext
1164
1165
    @classmethod
1166
    def get_comic_info(cls, soup, link):
1167
        """Get information about a particular comics."""
1168
        author = soup.find("span", class_="post-author").find("a").string
1169
        date_str = soup.find('span', class_='post-date').string
1170
        day = string_to_date(date_str, "%B %d, %Y")
1171
        imgs = soup.find('div', id='comic').find_all('img')
1172
        title = ' '.join(i['title'] for i in imgs)
1173
        assert all(i['alt'] == i['title'] for i in imgs)
1174
        return {
1175
            'title': title,
1176
            'author': author,
1177
            'img': [img['src'] for img in imgs],
1178
            'day': day.day,
1179
            'month': day.month,
1180
            'year': day.year
1181
        }
1182
1183
1184
class ToonHole(GenericNavigableComic):
1185
    """Class to retrieve Toon Holes comics."""
1186
    # Also on http://tapastic.com/series/TOONHOLE
1187
    name = 'toonhole'
1188
    long_name = 'Toon Hole'
1189
    url = 'http://www.toonhole.com'
1190
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1191 View Code Duplication
    get_navi_link = get_link_rel_next
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1192
1193
    @classmethod
1194
    def get_comic_info(cls, soup, link):
1195
        """Get information about a particular comics."""
1196
        short_url = soup.find('link', rel='shortlink')['href']
1197
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1198
        day = string_to_date(date_str, "%B %d, %Y")
1199
        imgs = soup.find('div', id='comic').find_all('img')
1200
        if imgs:
1201
            img = imgs[0]
1202
            title = img['alt']
1203
            assert img['title'] == title
1204
        else:
1205
            title = ""
1206
        return {
1207
            'short_url': short_url,
1208
            'title': title,
1209
            'month': day.month,
1210
            'year': day.year,
1211
            'day': day.day,
1212
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1213
        }
1214
1215
1216
class Channelate(GenericNavigableComic):
1217
    """Class to retrieve Channelate comics."""
1218
    name = 'channelate'
1219
    long_name = 'Channelate'
1220
    url = 'http://www.channelate.com'
1221
    get_first_comic_link = get_div_navfirst_a
1222
    get_navi_link = get_link_rel_next
1223
    get_url_from_link = join_cls_url_to_href
1224
1225
    @classmethod
1226
    def get_comic_info(cls, soup, link):
1227
        """Get information about a particular comics."""
1228
        author = soup.find("span", class_="post-author").find("a").string
1229
        date_str = soup.find('span', class_='post-date').string
1230
        day = string_to_date(date_str, '%Y/%m/%d')
1231
        title = soup.find('meta', property='og:title')['content']
1232
        post = soup.find('div', id='comic')
1233
        imgs = post.find_all('img') if post else []
1234
        extra_url = None
1235
        extra_div = soup.find('div', id='extrapanelbutton')
1236
        if extra_div:
1237
            extra_url = extra_div.find('a')['href']
1238
            extra_soup = get_soup_at_url(extra_url)
1239
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1240
            imgs.extend(extra_imgs)
1241
        return {
1242
            'url_extra': extra_url,
1243
            'title': title,
1244
            'author': author,
1245
            'month': day.month,
1246
            'year': day.year,
1247
            'day': day.day,
1248
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1249
        }
1250
1251
1252
class CyanideAndHappiness(GenericNavigableComic):
1253
    """Class to retrieve Cyanide And Happiness comics."""
1254
    name = 'cyanide'
1255
    long_name = 'Cyanide and Happiness'
1256
    url = 'http://explosm.net'
1257
    _categories = ('NSFW', )
1258
    get_url_from_link = join_cls_url_to_href
1259
1260
    @classmethod
1261
    def get_first_comic_link(cls):
1262
        """Get link to first comics."""
1263
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1264
1265
    @classmethod
1266
    def get_navi_link(cls, last_soup, next_):
1267
        """Get link to next or previous comic."""
1268
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1269
        return None if link.get('href') is None else link
1270
1271
    @classmethod
1272
    def get_comic_info(cls, soup, link):
1273
        """Get information about a particular comics."""
1274
        url2 = soup.find('meta', property='og:url')['content']
1275
        num = int(url2.split('/')[-2])
1276
        date_str = soup.find('h3').find('a').string
1277
        day = string_to_date(date_str, '%Y.%m.%d')
1278
        author = soup.find('small', class_="author-credit-name").string
1279
        assert author.startswith('by ')
1280
        author = author[3:]
1281
        imgs = soup.find_all('img', id='main-comic')
1282
        return {
1283
            'num': num,
1284
            'author': author,
1285
            'month': day.month,
1286
            'year': day.year,
1287
            'day': day.day,
1288
            'prefix': '%d-' % num,
1289
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1290
        }
1291
1292
1293
class MrLovenstein(GenericComic):
1294
    """Class to retrieve Mr Lovenstein comics."""
1295
    # Also on https://tapastic.com/series/MrLovenstein
1296
    name = 'mrlovenstein'
1297
    long_name = 'Mr. Lovenstein'
1298
    url = 'http://www.mrlovenstein.com'
1299
1300
    @classmethod
1301
    def get_next_comic(cls, last_comic):
1302
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1303
        # TODO: more info from http://www.mrlovenstein.com/archive
1304
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1305
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1306
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1307
        first, last = min(nums), max(nums)
1308
        if last_comic:
1309
            first = last_comic['num'] + 1
1310
        for num in range(first, last + 1):
1311
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1312
            soup = get_soup_at_url(url)
1313
            imgs = list(
1314
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1315
            description = soup.find('meta', attrs={'name': 'description'})['content']
1316
            yield {
1317
                'url': url,
1318
                'num': num,
1319
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1320
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1321
                'description': description,
1322
            }
1323
1324
1325
class DinosaurComics(GenericListableComic):
1326
    """Class to retrieve Dinosaur Comics comics."""
1327
    name = 'dinosaur'
1328
    long_name = 'Dinosaur Comics'
1329
    url = 'http://www.qwantz.com'
1330
    get_url_from_archive_element = get_href
1331
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1332
1333
    @classmethod
1334
    def get_archive_elements(cls):
1335
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1336
        # first link is random -> skip it
1337
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1338
1339
    @classmethod
1340
    def get_comic_info(cls, soup, link):
1341
        """Get information about a particular comics."""
1342
        url = cls.get_url_from_archive_element(link)
1343
        num = int(cls.comic_link_re.match(url).groups()[0])
1344
        date_str = link.string
1345
        text = link.next_sibling.string
1346
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1347
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1348
        img = soup.find('img', src=comic_img_re)
1349
        return {
1350
            'month': day.month,
1351
            'year': day.year,
1352
            'day': day.day,
1353
            'img': [img.get('src')],
1354
            'title': img.get('title'),
1355
            'text': text,
1356
            'num': num,
1357
        }
1358
1359
1360
class ButterSafe(GenericListableComic):
1361 View Code Duplication
    """Class to retrieve Butter Safe comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1362
    name = 'butter'
1363
    long_name = 'ButterSafe'
1364
    url = 'http://buttersafe.com'
1365
    get_url_from_archive_element = get_href
1366
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1367
1368
    @classmethod
1369
    def get_archive_elements(cls):
1370
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1371
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1372
1373
    @classmethod
1374
    def get_comic_info(cls, soup, link):
1375
        """Get information about a particular comics."""
1376
        url = cls.get_url_from_archive_element(link)
1377
        title = link.string
1378
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1379
        img = soup.find('div', id='comic').find('img')
1380
        assert img['alt'] == title
1381
        return {
1382
            'title': title,
1383
            'day': day,
1384
            'month': month,
1385
            'year': year,
1386
            'img': [img['src']],
1387
        }
1388
1389
1390
class CalvinAndHobbes(GenericComic):
1391
    """Class to retrieve Calvin and Hobbes comics."""
1392
    # Also on http://www.gocomics.com/calvinandhobbes/
1393
    name = 'calvin'
1394
    long_name = 'Calvin and Hobbes'
1395
    # This is not through any official webpage but eh...
1396
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1397
1398
    @classmethod
1399
    def get_next_comic(cls, last_comic):
1400
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1401
        last_date = get_date_for_comic(
1402
            last_comic) if last_comic else date(1985, 11, 1)
1403
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1404
        img_re = re.compile('')
1405
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1406
            url = link['href']
1407
            year, month = link_re.match(url).groups()
1408
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1409
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1410
                month_url = urljoin_wrapper(cls.url, url)
1411
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1412
                    img_src = img['src']
1413
                    day = int(img_re.match(img_src).groups()[0])
1414
                    comic_date = date(int(year), int(month), day)
1415
                    if comic_date > last_date:
1416
                        yield {
1417
                            'url': month_url,
1418
                            'year': int(year),
1419
                            'month': int(month),
1420
                            'day': int(day),
1421
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1422
                        }
1423
                        last_date = comic_date
1424
1425
1426
class AbstruseGoose(GenericListableComic):
1427 View Code Duplication
    """Class to retrieve AbstruseGoose Comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1428
    name = 'abstruse'
1429
    long_name = 'Abstruse Goose'
1430
    url = 'http://abstrusegoose.com'
1431
    get_url_from_archive_element = get_href
1432
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1433
    comic_img_re = re.compile('^%s/strips/.*' % url)
1434
1435
    @classmethod
1436
    def get_archive_elements(cls):
1437
        archive_url = urljoin_wrapper(cls.url, 'archive')
1438
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1439
1440
    @classmethod
1441
    def get_comic_info(cls, soup, archive_elt):
1442
        comic_url = cls.get_url_from_archive_element(archive_elt)
1443
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1444
        return {
1445
            'num': num,
1446
            'title': archive_elt.string,
1447
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1448
        }
1449
1450
1451
class PhDComics(GenericNavigableComic):
1452
    """Class to retrieve PHD Comics."""
1453
    name = 'phd'
1454
    long_name = 'PhD Comics'
1455
    url = 'http://phdcomics.com/comics/archive.php'
1456
1457
    @classmethod
1458
    def get_first_comic_link(cls):
1459
        """Get link to first comics."""
1460
        soup = get_soup_at_url(cls.url)
1461
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1462
        return None if img is None else img.parent
1463
1464
    @classmethod
1465
    def get_navi_link(cls, last_soup, next_):
1466
        """Get link to next or previous comic."""
1467
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1468
        img = last_soup.find('img', src=url)
1469
        return None if img is None else img.parent
1470
1471
    @classmethod
1472
    def get_comic_info(cls, soup, link):
1473
        """Get information about a particular comics."""
1474
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1475
        imgs = soup.find_all('meta', property='og:image')
1476
        return {
1477
            'img': [i['content'] for i in imgs],
1478
            'title': title,
1479
        }
1480
1481
1482 View Code Duplication
class Octopuns(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1483
    """Class to retrieve Octopuns comics."""
1484
    # Also on http://octopuns.tumblr.com
1485
    name = 'octopuns'
1486
    long_name = 'Octopuns'
1487
    url = 'http://www.octopuns.net'
1488
1489
    @classmethod
1490
    def get_first_comic_link(cls):
1491
        """Get link to first comics."""
1492
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1493
1494
    @classmethod
1495
    def get_navi_link(cls, last_soup, next_):
1496
        """Get link to next or previous comic."""
1497
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1498
        return None if link.get('href') is None else link
1499
1500
    @classmethod
1501
    def get_comic_info(cls, soup, link):
1502
        """Get information about a particular comics."""
1503
        title = soup.find('h3', class_='post-title entry-title').string
1504
        date_str = soup.find('h2', class_='date-header').string
1505
        day = string_to_date(date_str, "%A, %B %d, %Y")
1506
        imgs = soup.find_all('link', rel='image_src')
1507
        return {
1508
            'img': [i['href'] for i in imgs],
1509
            'title': title,
1510
            'day': day.day,
1511
            'month': day.month,
1512
            'year': day.year,
1513
        }
1514
1515
1516
class Quarktees(GenericNavigableComic):
1517
    """Class to retrieve the Quarktees comics."""
1518
    name = 'quarktees'
1519
    long_name = 'Quarktees'
1520
    url = 'http://www.quarktees.com/blogs/news'
1521
    get_url_from_link = join_cls_url_to_href
1522
    get_first_comic_link = simulate_first_link
1523
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1524
1525
    @classmethod
1526
    def get_navi_link(cls, last_soup, next_):
1527
        """Get link to next or previous comic."""
1528
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1529
1530
    @classmethod
1531
    def get_comic_info(cls, soup, link):
1532
        """Get information about a particular comics."""
1533
        title = soup.find('meta', property='og:title')['content']
1534
        article = soup.find('div', class_='single-article')
1535
        imgs = article.find_all('img')
1536
        return {
1537
            'title': title,
1538
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1539
        }
1540
1541
1542
class OverCompensating(GenericNavigableComic):
1543
    """Class to retrieve the Over Compensating comics."""
1544
    name = 'compensating'
1545
    long_name = 'Over Compensating'
1546
    url = 'http://www.overcompensating.com'
1547
    get_url_from_link = join_cls_url_to_href
1548
1549
    @classmethod
1550
    def get_first_comic_link(cls):
1551
        """Get link to first comics."""
1552
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1553
1554
    @classmethod
1555
    def get_navi_link(cls, last_soup, next_):
1556
        """Get link to next or previous comic."""
1557
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1558
1559
    @classmethod
1560
    def get_comic_info(cls, soup, link):
1561
        """Get information about a particular comics."""
1562
        img_src_re = re.compile('^/oc/comics/.*')
1563
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1564
        comic_url = cls.get_url_from_link(link)
1565
        num = int(comic_num_re.match(comic_url).groups()[0])
1566
        img = soup.find('img', src=img_src_re)
1567
        return {
1568
            'num': num,
1569
            'img': [urljoin_wrapper(comic_url, img['src'])],
1570
            'title': img.get('title')
1571
        }
1572
1573
1574
class Oglaf(GenericNavigableComic):
1575
    """Class to retrieve Oglaf comics."""
1576
    name = 'oglaf'
1577
    long_name = 'Oglaf [NSFW]'
1578
    url = 'http://oglaf.com'
1579
    _categories = ('NSFW', )
1580
    get_url_from_link = join_cls_url_to_href
1581
1582
    @classmethod
1583
    def get_first_comic_link(cls):
1584
        """Get link to first comics."""
1585
        return get_soup_at_url(cls.url).find("div", id="st").parent
1586
1587
    @classmethod
1588
    def get_navi_link(cls, last_soup, next_):
1589
        """Get link to next or previous comic."""
1590
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1591
        return div.parent if div else None
1592
1593
    @classmethod
1594
    def get_comic_info(cls, soup, link):
1595
        """Get information about a particular comics."""
1596
        title = soup.find('title').string
1597
        title_imgs = soup.find('div', id='tt').find_all('img')
1598
        assert len(title_imgs) == 1
1599
        strip_imgs = soup.find_all('img', id='strip')
1600
        assert len(strip_imgs) == 1
1601
        imgs = title_imgs + strip_imgs
1602
        desc = ' '.join(i['title'] for i in imgs)
1603
        return {
1604
            'title': title,
1605
            'img': [i['src'] for i in imgs],
1606
            'description': desc,
1607
        }
1608
1609
1610
class ScandinaviaAndTheWorld(GenericNavigableComic):
1611
    """Class to retrieve Scandinavia And The World comics."""
1612
    name = 'satw'
1613
    long_name = 'Scandinavia And The World'
1614
    url = 'http://satwcomic.com'
1615
    get_first_comic_link = simulate_first_link
1616
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1617
1618
    @classmethod
1619
    def get_navi_link(cls, last_soup, next_):
1620
        """Get link to next or previous comic."""
1621
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1622
1623
    @classmethod
1624
    def get_comic_info(cls, soup, link):
1625
        """Get information about a particular comics."""
1626
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1627
        desc = soup.find('meta', property='og:description')['content']
1628
        imgs = soup.find_all('img', itemprop="image")
1629
        return {
1630
            'title': title,
1631
            'description': desc,
1632
            'img': [i['src'] for i in imgs],
1633
        }
1634
1635
1636
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1637
    """Class to retrieve the Something Of That Ilk comics."""
1638
    name = 'somethingofthatilk'
1639
    long_name = 'Something Of That Ilk'
1640
    url = 'http://www.somethingofthatilk.com'
1641
1642
1643
class InfiniteMonkeyBusiness(GenericNavigableComic):
1644
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1645
    name = 'monkey'
1646
    long_name = 'Infinite Monkey Business'
1647
    url = 'http://infinitemonkeybusiness.net'
1648
    get_navi_link = get_a_navi_comicnavnext_navinext
1649
    get_first_comic_link = simulate_first_link
1650
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1651
1652
    @classmethod
1653
    def get_comic_info(cls, soup, link):
1654
        """Get information about a particular comics."""
1655
        title = soup.find('meta', property='og:title')['content']
1656
        imgs = soup.find('div', id='comic').find_all('img')
1657
        return {
1658
            'title': title,
1659
            'img': [i['src'] for i in imgs],
1660
        }
1661
1662
1663
class Wondermark(GenericListableComic):
1664
    """Class to retrieve the Wondermark comics."""
1665
    name = 'wondermark'
1666
    long_name = 'Wondermark'
1667
    url = 'http://wondermark.com'
1668
    get_url_from_archive_element = get_href
1669
1670
    @classmethod
1671
    def get_archive_elements(cls):
1672
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1673
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1674
1675
    @classmethod
1676
    def get_comic_info(cls, soup, link):
1677
        """Get information about a particular comics."""
1678
        date_str = soup.find('div', class_='postdate').find('em').string
1679
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1680
        div = soup.find('div', id='comic')
1681
        if div:
1682
            img = div.find('img')
1683
            img_src = [img['src']]
1684
            alt = img['alt']
1685
            assert alt == img['title']
1686
            title = soup.find('meta', property='og:title')['content']
1687
        else:
1688
            img_src = []
1689
            alt = ''
1690
            title = ''
1691
        return {
1692
            'month': day.month,
1693
            'year': day.year,
1694
            'day': day.day,
1695
            'img': img_src,
1696
            'title': title,
1697
            'alt': alt,
1698
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1699
        }
1700
1701
1702 View Code Duplication
class WarehouseComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1703
    """Class to retrieve Warehouse Comic comics."""
1704
    name = 'warehouse'
1705
    long_name = 'Warehouse Comic'
1706
    url = 'http://warehousecomic.com'
1707
    get_first_comic_link = get_a_navi_navifirst
1708
    get_navi_link = get_link_rel_next
1709
1710
    @classmethod
1711
    def get_comic_info(cls, soup, link):
1712
        """Get information about a particular comics."""
1713
        title = soup.find('h2', class_='post-title').string
1714
        date_str = soup.find('span', class_='post-date').string
1715
        day = string_to_date(date_str, "%B %d, %Y")
1716
        imgs = soup.find('div', id='comic').find_all('img')
1717
        return {
1718
            'img': [i['src'] for i in imgs],
1719
            'title': title,
1720
            'day': day.day,
1721
            'month': day.month,
1722
            'year': day.year,
1723
        }
1724
1725
1726
class JustSayEh(GenericNavigableComic):
1727
    """Class to retrieve Just Say Eh comics."""
1728
    # Also on http//tapastic.com/series/Just-Say-Eh
1729
    name = 'justsayeh'
1730
    long_name = 'Just Say Eh'
1731
    url = 'http://www.justsayeh.com'
1732
    get_first_comic_link = get_a_navi_navifirst
1733
    get_navi_link = get_a_navi_comicnavnext_navinext
1734
1735
    @classmethod
1736
    def get_comic_info(cls, soup, link):
1737
        """Get information about a particular comics."""
1738
        title = soup.find('h2', class_='post-title').string
1739
        imgs = soup.find("div", id="comic").find_all("img")
1740
        assert all(i['alt'] == i['title'] for i in imgs)
1741
        alt = imgs[0]['alt']
1742
        return {
1743
            'img': [i['src'] for i in imgs],
1744
            'title': title,
1745
            'alt': alt,
1746
        }
1747
1748
1749 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1750
    """Class to retrieve Mouse Bear Comedy comics."""
1751
    # Also on http://mousebearcomedy.tumblr.com
1752
    name = 'mousebear'
1753
    long_name = 'Mouse Bear Comedy'
1754
    url = 'http://www.mousebearcomedy.com'
1755
    get_first_comic_link = get_a_navi_navifirst
1756
    get_navi_link = get_a_navi_comicnavnext_navinext
1757
1758
    @classmethod
1759
    def get_comic_info(cls, soup, link):
1760
        """Get information about a particular comics."""
1761
        title = soup.find('h2', class_='post-title').string
1762
        author = soup.find("span", class_="post-author").find("a").string
1763
        date_str = soup.find("span", class_="post-date").string
1764
        day = string_to_date(date_str, '%B %d, %Y')
1765
        imgs = soup.find("div", id="comic").find_all("img")
1766
        assert all(i['alt'] == i['title'] == title for i in imgs)
1767
        return {
1768
            'day': day.day,
1769
            'month': day.month,
1770
            'year': day.year,
1771
            'img': [i['src'] for i in imgs],
1772
            'title': title,
1773
            'author': author,
1774
        }
1775
1776
1777
class BigFootJustice(GenericNavigableComic):
1778
    """Class to retrieve Big Foot Justice comics."""
1779
    # Also on http://tapastic.com/series/bigfoot-justice
1780
    name = 'bigfoot'
1781
    long_name = 'Big Foot Justice'
1782
    url = 'http://bigfootjustice.com'
1783
    get_first_comic_link = get_a_navi_navifirst
1784
    get_navi_link = get_a_navi_comicnavnext_navinext
1785
1786
    @classmethod
1787
    def get_comic_info(cls, soup, link):
1788
        """Get information about a particular comics."""
1789
        imgs = soup.find('div', id='comic').find_all('img')
1790
        assert all(i['title'] == i['alt'] for i in imgs)
1791
        title = ' '.join(i['title'] for i in imgs)
1792
        return {
1793
            'img': [i['src'] for i in imgs],
1794
            'title': title,
1795
        }
1796
1797
1798
class RespawnComic(GenericNavigableComic):
1799
    """Class to retrieve Respawn Comic."""
1800
    # Also on https://respawncomic.tumblr.com
1801
    name = 'respawn'
1802
    long_name = 'Respawn Comic'
1803
    url = 'http://respawncomic.com '
1804
    _categories = ('RESPAWN', )
1805
    get_navi_link = get_a_rel_next
1806
    get_first_comic_link = simulate_first_link
1807 View Code Duplication
    first_url = 'http://respawncomic.com/comic/c0001/'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1808
1809
    @classmethod
1810
    def get_comic_info(cls, soup, link):
1811
        """Get information about a particular comics."""
1812
        title = soup.find('meta', property='og:title')['content']
1813
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1814
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1815
        date_str = date_str[:10]
1816
        day = string_to_date(date_str, "%Y-%m-%d")
1817
        imgs = soup.find_all('meta', property='og:image')
1818
        skip_imgs = {
1819
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1820
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1821
        }
1822
        return {
1823
            'title': title,
1824
            'author': author,
1825
            'day': day.day,
1826
            'month': day.month,
1827
            'year': day.year,
1828
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1829
        }
1830
1831
1832 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1833
    """Class to retrieve Safely Endangered comics."""
1834
    # Also on http://tumblr.safelyendangered.com
1835
    name = 'endangered'
1836
    long_name = 'Safely Endangered'
1837
    url = 'http://www.safelyendangered.com'
1838
    get_navi_link = get_link_rel_next
1839
    get_first_comic_link = simulate_first_link
1840
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1841
1842
    @classmethod
1843
    def get_comic_info(cls, soup, link):
1844
        """Get information about a particular comics."""
1845
        title = soup.find('h2', class_='post-title').string
1846
        date_str = soup.find('span', class_='post-date').string
1847
        day = string_to_date(date_str, '%B %d, %Y')
1848
        imgs = soup.find('div', id='comic').find_all('img')
1849
        alt = imgs[0]['alt']
1850
        assert all(i['alt'] == i['title'] for i in imgs)
1851
        return {
1852
            'day': day.day,
1853
            'month': day.month,
1854
            'year': day.year,
1855
            'img': [i['src'] for i in imgs],
1856
            'title': title,
1857
            'alt': alt,
1858
        }
1859
1860
1861 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1862
    """Class to retrieve Pictures In Boxes comics."""
1863
    # Also on https://picturesinboxescomic.tumblr.com
1864
    name = 'picturesinboxes'
1865
    long_name = 'Pictures in Boxes'
1866
    url = 'http://www.picturesinboxes.com'
1867
    get_navi_link = get_a_navi_navinext
1868
    get_first_comic_link = simulate_first_link
1869
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1870
1871
    @classmethod
1872
    def get_comic_info(cls, soup, link):
1873
        """Get information about a particular comics."""
1874
        title = soup.find('h2', class_='post-title').string
1875
        author = soup.find("span", class_="post-author").find("a").string
1876
        date_str = soup.find('span', class_='post-date').string
1877
        day = string_to_date(date_str, '%B %d, %Y')
1878
        imgs = soup.find('div', class_='comicpane').find_all('img')
1879
        assert imgs
1880
        assert all(i['title'] == i['alt'] == title for i in imgs)
1881
        return {
1882
            'day': day.day,
1883
            'month': day.month,
1884
            'year': day.year,
1885
            'img': [i['src'] for i in imgs],
1886
            'title': title,
1887
            'author': author,
1888
        }
1889
1890
1891 View Code Duplication
class Penmen(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1892
    """Class to retrieve Penmen comics."""
1893
    name = 'penmen'
1894
    long_name = 'Penmen'
1895
    url = 'http://penmen.com'
1896
    get_navi_link = get_link_rel_next
1897
    get_first_comic_link = simulate_first_link
1898
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1899
1900
    @classmethod
1901
    def get_comic_info(cls, soup, link):
1902
        """Get information about a particular comics."""
1903
        title = soup.find('title').string
1904
        imgs = soup.find('div', class_='entry-content').find_all('img')
1905
        short_url = soup.find('link', rel='shortlink')['href']
1906
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1907
        date_str = soup.find('time')['datetime'][:10]
1908
        day = string_to_date(date_str, "%Y-%m-%d")
1909
        return {
1910
            'title': title,
1911
            'short_url': short_url,
1912
            'img': [i['src'] for i in imgs],
1913
            'tags': tags,
1914
            'month': day.month,
1915
            'year': day.year,
1916
            'day': day.day,
1917
        }
1918
1919
1920
class TheDoghouseDiaries(GenericNavigableComic):
1921
    """Class to retrieve The Dog House Diaries comics."""
1922
    name = 'doghouse'
1923
    long_name = 'The Dog House Diaries'
1924
    url = 'http://thedoghousediaries.com'
1925
1926
    @classmethod
1927
    def get_first_comic_link(cls):
1928
        """Get link to first comics."""
1929
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1930
1931
    @classmethod
1932
    def get_navi_link(cls, last_soup, next_):
1933
        """Get link to next or previous comic."""
1934
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1935
1936
    @classmethod
1937
    def get_comic_info(cls, soup, link):
1938
        """Get information about a particular comics."""
1939
        comic_img_re = re.compile('^dhdcomics/.*')
1940
        img = soup.find('img', src=comic_img_re)
1941
        comic_url = cls.get_url_from_link(link)
1942
        return {
1943
            'title': soup.find('h2', id='titleheader').string,
1944
            'title2': soup.find('div', id='subtext').string,
1945
            'alt': img.get('title'),
1946
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1947
            'num': int(comic_url.split('/')[-1]),
1948
        }
1949
1950
1951
class InvisibleBread(GenericListableComic):
1952
    """Class to retrieve Invisible Bread comics."""
1953
    # Also on http://www.gocomics.com/invisible-bread
1954
    name = 'invisiblebread'
1955
    long_name = 'Invisible Bread'
1956
    url = 'http://invisiblebread.com'
1957
1958
    @classmethod
1959
    def get_archive_elements(cls):
1960
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1961
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1962
1963
    @classmethod
1964
    def get_url_from_archive_element(cls, td):
1965
        return td.find('a')['href']
1966
1967
    @classmethod
1968
    def get_comic_info(cls, soup, td):
1969
        """Get information about a particular comics."""
1970
        url = cls.get_url_from_archive_element(td)
1971
        title = td.find('a').string
1972
        month_and_day = td.previous_sibling.string
1973
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1974
        year = link_re.match(url).groups()[0]
1975
        date_str = month_and_day + ' ' + year
1976
        day = string_to_date(date_str, '%b %d %Y')
1977
        imgs = [soup.find('div', id='comic').find('img')]
1978
        assert len(imgs) == 1
1979
        assert all(i['title'] == i['alt'] == title for i in imgs)
1980
        return {
1981
            'month': day.month,
1982
            'year': day.year,
1983
            'day': day.day,
1984
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1985
            'title': title,
1986
        }
1987
1988
1989
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1990
    """Class to retrieve Disco Bleach Comics."""
1991
    name = 'discobleach'
1992
    long_name = 'Disco Bleach'
1993
    url = 'http://discobleach.com'
1994
1995
1996
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1997
    """Class to retrieve TubeyToons comics."""
1998
    # Also on http://tapastic.com/series/Tubey-Toons
1999
    # Also on https://tubeytoons.tumblr.com
2000
    name = 'tubeytoons'
2001
    long_name = 'Tubey Toons'
2002
    url = 'http://tubeytoons.com'
2003
    _categories = ('TUNEYTOONS', )
2004
2005
2006 View Code Duplication
class CompletelySeriousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2007
    """Class to retrieve Completely Serious comics."""
2008
    name = 'completelyserious'
2009
    long_name = 'Completely Serious Comics'
2010
    url = 'http://completelyseriouscomics.com'
2011
    get_first_comic_link = get_a_navi_navifirst
2012
    get_navi_link = get_a_navi_navinext
2013
2014
    @classmethod
2015
    def get_comic_info(cls, soup, link):
2016
        """Get information about a particular comics."""
2017
        title = soup.find('h2', class_='post-title').string
2018
        author = soup.find('span', class_='post-author').contents[1].string
2019
        date_str = soup.find('span', class_='post-date').string
2020
        day = string_to_date(date_str, '%B %d, %Y')
2021
        imgs = soup.find('div', class_='comicpane').find_all('img')
2022
        assert imgs
2023
        alt = imgs[0]['title']
2024
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2025
        return {
2026
            'month': day.month,
2027
            'year': day.year,
2028
            'day': day.day,
2029
            'img': [i['src'] for i in imgs],
2030
            'title': title,
2031
            'alt': alt,
2032
            'author': author,
2033
        }
2034
2035
2036
class PoorlyDrawnLines(GenericListableComic):
2037 View Code Duplication
    """Class to retrieve Poorly Drawn Lines comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2038
    # Also on http://pdlcomics.tumblr.com
2039
    name = 'poorlydrawn'
2040
    long_name = 'Poorly Drawn Lines'
2041
    url = 'https://www.poorlydrawnlines.com'
2042
    _categories = ('POORLYDRAWN', )
2043
    get_url_from_archive_element = get_href
2044
2045
    @classmethod
2046
    def get_comic_info(cls, soup, link):
2047
        """Get information about a particular comics."""
2048
        imgs = soup.find('div', class_='post').find_all('img')
2049
        assert len(imgs) <= 1
2050
        return {
2051
            'img': [i['src'] for i in imgs],
2052
            'title': imgs[0].get('title', "") if imgs else "",
2053
        }
2054
2055
    @classmethod
2056
    def get_archive_elements(cls):
2057
        archive_url = urljoin_wrapper(cls.url, 'archive')
2058
        url_re = re.compile('^%s/comic/.' % cls.url)
2059
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2060
2061
2062 View Code Duplication
class LoadingComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2063
    """Class to retrieve Loading Artist comics."""
2064
    name = 'loadingartist'
2065
    long_name = 'Loading Artist'
2066
    url = 'http://www.loadingartist.com/latest'
2067
2068
    @classmethod
2069
    def get_first_comic_link(cls):
2070
        """Get link to first comics."""
2071
        return get_soup_at_url(cls.url).find('a', title="First")
2072
2073
    @classmethod
2074
    def get_navi_link(cls, last_soup, next_):
2075
        """Get link to next or previous comic."""
2076
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2077
2078
    @classmethod
2079
    def get_comic_info(cls, soup, link):
2080
        """Get information about a particular comics."""
2081
        title = soup.find('h1').string
2082
        date_str = soup.find('span', class_='date').string.strip()
2083
        day = string_to_date(date_str, "%B %d, %Y")
2084
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2085
        return {
2086
            'title': title,
2087
            'img': [i['src'] for i in imgs],
2088
            'month': day.month,
2089
            'year': day.year,
2090
            'day': day.day,
2091
        }
2092
2093
2094 View Code Duplication
class ChuckleADuck(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2095
    """Class to retrieve Chuckle-A-Duck comics."""
2096
    name = 'chuckleaduck'
2097
    long_name = 'Chuckle-A-duck'
2098
    url = 'http://chuckleaduck.com'
2099
    get_first_comic_link = get_div_navfirst_a
2100
    get_navi_link = get_link_rel_next
2101
2102
    @classmethod
2103
    def get_comic_info(cls, soup, link):
2104
        """Get information about a particular comics."""
2105
        date_str = soup.find('span', class_='post-date').string
2106
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2107
        author = soup.find('span', class_='post-author').string
2108
        div = soup.find('div', id='comic')
2109
        imgs = div.find_all('img') if div else []
2110
        title = imgs[0]['title'] if imgs else ""
2111
        assert all(i['title'] == i['alt'] == title for i in imgs)
2112
        return {
2113
            'month': day.month,
2114
            'year': day.year,
2115
            'day': day.day,
2116
            'img': [i['src'] for i in imgs],
2117
            'title': title,
2118
            'author': author,
2119
        }
2120
2121
2122
class DepressedAlien(GenericNavigableComic):
2123
    """Class to retrieve Depressed Alien Comics."""
2124
    name = 'depressedalien'
2125
    long_name = 'Depressed Alien'
2126
    url = 'http://depressedalien.com'
2127
    get_url_from_link = join_cls_url_to_href
2128
2129
    @classmethod
2130
    def get_first_comic_link(cls):
2131
        """Get link to first comics."""
2132
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2133
2134
    @classmethod
2135
    def get_navi_link(cls, last_soup, next_):
2136
        """Get link to next or previous comic."""
2137
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2138
2139
    @classmethod
2140
    def get_comic_info(cls, soup, link):
2141
        """Get information about a particular comics."""
2142
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2143
        imgs = soup.find_all('meta', property='og:image')
2144
        return {
2145
            'title': title,
2146
            'img': [i['content'] for i in imgs],
2147
        }
2148 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2149
2150
class TurnOffUs(GenericListableComic):
2151
    """Class to retrieve TurnOffUs comics."""
2152
    name = 'turnoffus'
2153
    long_name = 'Turn Off Us'
2154
    url = 'http://turnoff.us'
2155
    get_url_from_archive_element = join_cls_url_to_href
2156
2157
    @classmethod
2158
    def get_archive_elements(cls):
2159
        archive_url = urljoin_wrapper(cls.url, 'all')
2160
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2161
        return reversed(post_list.find_all('a', class_='post-link'))
2162
2163
    @classmethod
2164
    def get_comic_info(cls, soup, archive_elt):
2165
        """Get information about a particular comics."""
2166
        title = soup.find('meta', property='og:title')['content']
2167
        imgs = soup.find_all('meta', property='og:image')
2168
        return {
2169
            'title': title,
2170
            'img': [i['content'] for i in imgs],
2171
        }
2172
2173
2174
class ThingsInSquares(GenericListableComic):
2175
    """Class to retrieve Things In Squares comics."""
2176
    # This can be retrieved in other languages
2177
    # Also on https://tapastic.com/series/Things-in-Squares
2178
    name = 'squares'
2179
    long_name = 'Things in squares'
2180
    url = 'http://www.thingsinsquares.com'
2181
2182
    @classmethod
2183
    def get_comic_info(cls, soup, tr):
2184
        """Get information about a particular comics."""
2185
        _, td2, td3 = tr.find_all('td')
2186
        a = td2.find('a')
2187
        date_str = td3.string
2188
        day = string_to_date(date_str, "%m.%d.%y")
2189
        title = a.string
2190
        title2 = soup.find('meta', property='og:title')['content']
2191
        desc = soup.find('meta', property='og:description')
2192
        description = desc['content'] if desc else ''
2193
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2194
        imgs = soup.find('div', class_='entry-content').find_all('img')
2195
        return {
2196
            'day': day.day,
2197
            'month': day.month,
2198
            'year': day.year,
2199
            'title': title,
2200
            'title2': title2,
2201
            'description': description,
2202
            'tags': tags,
2203
            'img': [i['src'] for i in imgs],
2204
            'alt': ' '.join(i['alt'] for i in imgs),
2205
        }
2206
2207
    @classmethod
2208
    def get_url_from_archive_element(cls, tr):
2209
        _, td2, __ = tr.find_all('td')
2210
        return td2.find('a')['href']
2211
2212
    @classmethod
2213
    def get_archive_elements(cls):
2214
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2215
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2216
2217
2218 View Code Duplication
class HappleTea(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2219
    """Class to retrieve Happle Tea Comics."""
2220
    name = 'happletea'
2221
    long_name = 'Happle Tea'
2222
    url = 'http://www.happletea.com'
2223
    get_first_comic_link = get_a_navi_navifirst
2224
    get_navi_link = get_link_rel_next
2225
2226
    @classmethod
2227
    def get_comic_info(cls, soup, link):
2228
        """Get information about a particular comics."""
2229
        imgs = soup.find('div', id='comic').find_all('img')
2230
        post = soup.find('div', class_='post-content')
2231
        title = post.find('h2', class_='post-title').string
2232
        author = post.find('a', rel='author').string
2233
        date_str = post.find('span', class_='post-date').string
2234
        day = string_to_date(date_str, "%B %d, %Y")
2235
        assert all(i['alt'] == i['title'] for i in imgs)
2236
        return {
2237
            'title': title,
2238
            'img': [i['src'] for i in imgs],
2239
            'alt': ''.join(i['alt'] for i in imgs),
2240
            'month': day.month,
2241
            'year': day.year,
2242
            'day': day.day,
2243
            'author': author,
2244
        }
2245
2246
2247
class RockPaperScissors(GenericNavigableComic):
2248
    """Class to retrieve Rock Paper Scissors comics."""
2249
    name = 'rps'
2250
    long_name = 'Rock Paper Scissors'
2251
    url = 'http://rps-comics.com'
2252
    get_first_comic_link = get_a_navi_navifirst
2253
    get_navi_link = get_link_rel_next
2254
2255
    @classmethod
2256
    def get_comic_info(cls, soup, link):
2257
        """Get information about a particular comics."""
2258
        title = soup.find('title').string
2259
        imgs = soup.find_all('meta', property='og:image')
2260
        short_url = soup.find('link', rel='shortlink')['href']
2261
        transcript = soup.find('div', id='transcript-content').string
2262
        return {
2263
            'title': title,
2264
            'transcript': transcript,
2265
            'short_url': short_url,
2266
            'img': [i['content'] for i in imgs],
2267
        }
2268
2269
2270
class FatAwesomeComics(GenericNavigableComic):
2271
    """Class to retrieve Fat Awesome Comics."""
2272
    # Also on http://fatawesomecomedy.tumblr.com
2273
    name = 'fatawesome'
2274
    long_name = 'Fat Awesome'
2275
    url = 'http://fatawesome.com/comics'
2276
    get_navi_link = get_a_rel_next
2277
    get_first_comic_link = simulate_first_link
2278
    first_url = 'http://fatawesome.com/shortbus/'
2279
2280
    @classmethod
2281
    def get_comic_info(cls, soup, link):
2282
        """Get information about a particular comics."""
2283
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2284
        description = soup.find('meta', attrs={'name': 'description'})['content']
2285
        tags_prop = soup.find('meta', property='article:tag')
2286
        tags = tags_prop['content'] if tags_prop else ""
2287
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2288
        day = string_to_date(date_str, "%Y-%m-%d")
2289
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2290
        assert len(imgs) == 1
2291
        return {
2292
            'title': title,
2293
            'description': description,
2294
            'tags': tags,
2295
            'alt': "".join(i['alt'] for i in imgs),
2296
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2297
            'month': day.month,
2298
            'year': day.year,
2299
            'day': day.day,
2300
        }
2301
2302
2303
class JuliasDrawings(GenericListableComic):
2304
    """Class to retrieve Julia's Drawings."""
2305
    name = 'julia'
2306
    long_name = "Julia's Drawings"
2307
    url = 'https://drawings.jvns.ca'
2308
    get_url_from_archive_element = get_href
2309
2310
    @classmethod
2311
    def get_archive_elements(cls):
2312
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2313
        return [art.find('a') for art in reversed(articles)]
2314
2315
    @classmethod
2316
    def get_comic_info(cls, soup, archive_elt):
2317
        """Get information about a particular comics."""
2318
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2319
        day = string_to_date(date_str, "%Y-%m-%d")
2320
        title = soup.find('h3', class_='p-post-title').string
2321
        imgs = soup.find('section', class_='post-content').find_all('img')
2322
        return {
2323
            'title': title,
2324
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2325
            'month': day.month,
2326
            'year': day.year,
2327
            'day': day.day,
2328
        }
2329
2330
2331
class AnythingComic(GenericListableComic):
2332
    """Class to retrieve Anything Comics."""
2333
    # Also on http://tapastic.com/series/anything
2334
    name = 'anythingcomic'
2335
    long_name = 'Anything Comic'
2336
    url = 'http://www.anythingcomic.com'
2337
2338
    @classmethod
2339
    def get_archive_elements(cls):
2340
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2341
        # The first 2 <tr>'s do not correspond to comics
2342
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2343
2344
    @classmethod
2345
    def get_url_from_archive_element(cls, tr):
2346 View Code Duplication
        """Get url corresponding to an archive element."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2347
        _, td_comic, td_date, _ = tr.find_all('td')
2348
        link = td_comic.find('a')
2349
        return urljoin_wrapper(cls.url, link['href'])
2350
2351
    @classmethod
2352
    def get_comic_info(cls, soup, tr):
2353
        """Get information about a particular comics."""
2354
        td_num, td_comic, td_date, _ = tr.find_all('td')
2355
        num = int(td_num.string)
2356
        link = td_comic.find('a')
2357
        title = link.string
2358
        imgs = soup.find_all('img', id='comic_image')
2359
        date_str = td_date.string
2360
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2361
        assert len(imgs) == 1
2362
        assert all(i.get('alt') == i.get('title') for i in imgs)
2363
        return {
2364
            'num': num,
2365
            'title': title,
2366
            'alt': imgs[0].get('alt', ''),
2367
            'img': [i['src'] for i in imgs],
2368
            'month': day.month,
2369
            'year': day.year,
2370
            'day': day.day,
2371
        }
2372
2373
2374
class LonnieMillsap(GenericNavigableComic):
2375
    """Class to retrieve Lonnie Millsap's comics."""
2376
    name = 'millsap'
2377
    long_name = 'Lonnie Millsap'
2378
    url = 'http://www.lonniemillsap.com'
2379
    get_navi_link = get_link_rel_next
2380
    get_first_comic_link = simulate_first_link
2381
    first_url = 'http://www.lonniemillsap.com/?p=42'
2382
2383
    @classmethod
2384
    def get_comic_info(cls, soup, link):
2385
        """Get information about a particular comics."""
2386
        title = soup.find('h2', class_='post-title').string
2387
        post = soup.find('div', class_='post-content')
2388
        author = post.find("span", class_="post-author").find("a").string
2389
        date_str = post.find("span", class_="post-date").string
2390
        day = string_to_date(date_str, "%B %d, %Y")
2391
        imgs = post.find("div", class_="entry").find_all("img")
2392
        return {
2393
            'title': title,
2394
            'author': author,
2395
            'img': [i['src'] for i in imgs],
2396
            'month': day.month,
2397
            'year': day.year,
2398
            'day': day.day,
2399
        }
2400
2401
2402
class LinsEditions(GenericNavigableComic):
2403
    """Class to retrieve L.I.N.S. Editions comics."""
2404
    # Also on https://linscomics.tumblr.com
2405
    # Now on https://warandpeas.com
2406
    name = 'lins'
2407
    long_name = 'L.I.N.S. Editions'
2408
    url = 'https://linsedition.com'
2409
    _categories = ('LINS', )
2410
    get_navi_link = get_link_rel_next
2411
    get_first_comic_link = simulate_first_link
2412
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2413
2414
    @classmethod
2415
    def get_comic_info(cls, soup, link):
2416
        """Get information about a particular comics."""
2417
        title = soup.find('meta', property='og:title')['content']
2418
        imgs = soup.find_all('meta', property='og:image')
2419
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2420
        day = string_to_date(date_str, "%Y-%m-%d")
2421
        return {
2422
            'title': title,
2423
            'img': [i['content'] for i in imgs],
2424
            'month': day.month,
2425
            'year': day.year,
2426
            'day': day.day,
2427
        }
2428
2429
2430
class ThorsThundershack(GenericNavigableComic):
2431
    """Class to retrieve Thor's Thundershack comics."""
2432
    # Also on http://tapastic.com/series/Thors-Thundershac
2433
    name = 'thor'
2434
    long_name = 'Thor\'s Thundershack'
2435
    url = 'http://www.thorsthundershack.com'
2436
    _categories = ('THOR', )
2437
    get_url_from_link = join_cls_url_to_href
2438
2439
    @classmethod
2440
    def get_first_comic_link(cls):
2441
        """Get link to first comics."""
2442
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2443
2444
    @classmethod
2445
    def get_navi_link(cls, last_soup, next_):
2446
        """Get link to next or previous comic."""
2447
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2448
            if link['href'] != '/comic':
2449
                return link
2450 View Code Duplication
        return None
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2451
2452
    @classmethod
2453
    def get_comic_info(cls, soup, link):
2454
        """Get information about a particular comics."""
2455
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2456
        description = soup.find('div', itemprop='articleBody').text
2457
        author = soup.find('span', itemprop='author copyrightHolder').string
2458
        imgs = soup.find_all('img', itemprop='image')
2459
        assert all(i['title'] == i['alt'] for i in imgs)
2460
        alt = imgs[0]['alt'] if imgs else ""
2461
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2462
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2463
        return {
2464
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2465
            'month': day.month,
2466
            'year': day.year,
2467
            'day': day.day,
2468
            'author': author,
2469
            'title': title,
2470
            'alt': alt,
2471
            'description': description,
2472
        }
2473
2474
2475
class GerbilWithAJetpack(GenericNavigableComic):
2476 View Code Duplication
    """Class to retrieve GerbilWithAJetpack comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2477
    name = 'gerbil'
2478
    long_name = 'Gerbil With A Jetpack'
2479
    url = 'http://gerbilwithajetpack.com'
2480
    get_first_comic_link = get_a_navi_navifirst
2481
    get_navi_link = get_a_rel_next
2482
2483
    @classmethod
2484
    def get_comic_info(cls, soup, link):
2485
        """Get information about a particular comics."""
2486
        title = soup.find('h2', class_='post-title').string
2487
        author = soup.find("span", class_="post-author").find("a").string
2488
        date_str = soup.find("span", class_="post-date").string
2489
        day = string_to_date(date_str, "%B %d, %Y")
2490
        imgs = soup.find("div", id="comic").find_all("img")
2491
        alt = imgs[0]['alt']
2492
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2493
        return {
2494
            'img': [i['src'] for i in imgs],
2495
            'title': title,
2496
            'alt': alt,
2497
            'author': author,
2498
            'day': day.day,
2499
            'month': day.month,
2500
            'year': day.year
2501
        }
2502
2503
2504 View Code Duplication
class EveryDayBlues(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2505
    """Class to retrieve EveryDayBlues Comics."""
2506
    name = "blues"
2507
    long_name = "Every Day Blues"
2508
    url = "http://everydayblues.net"
2509
    get_first_comic_link = get_a_navi_navifirst
2510
    get_navi_link = get_link_rel_next
2511
2512
    @classmethod
2513
    def get_comic_info(cls, soup, link):
2514
        """Get information about a particular comics."""
2515
        title = soup.find("h2", class_="post-title").string
2516
        author = soup.find("span", class_="post-author").find("a").string
2517
        date_str = soup.find("span", class_="post-date").string
2518
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2519
        imgs = soup.find("div", id="comic").find_all("img")
2520
        assert all(i['alt'] == i['title'] == title for i in imgs)
2521
        assert len(imgs) <= 1
2522
        return {
2523
            'img': [i['src'] for i in imgs],
2524
            'title': title,
2525
            'author': author,
2526
            'day': day.day,
2527
            'month': day.month,
2528
            'year': day.year
2529
        }
2530
2531
2532
class BiterComics(GenericNavigableComic):
2533
    """Class to retrieve Biter Comics."""
2534 View Code Duplication
    name = "biter"
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2535
    long_name = "Biter Comics"
2536
    url = "http://www.bitercomics.com"
2537
    get_first_comic_link = get_a_navi_navifirst
2538
    get_navi_link = get_link_rel_next
2539
2540
    @classmethod
2541
    def get_comic_info(cls, soup, link):
2542
        """Get information about a particular comics."""
2543
        title = soup.find("h1", class_="entry-title").string
2544
        author = soup.find("span", class_="author vcard").find("a").string
2545
        date_str = soup.find("span", class_="entry-date").string
2546
        day = string_to_date(date_str, "%B %d, %Y")
2547
        imgs = soup.find("div", id="comic").find_all("img")
2548
        assert all(i['alt'] == i['title'] for i in imgs)
2549
        assert len(imgs) == 1
2550
        alt = imgs[0]['alt']
2551
        return {
2552
            'img': [i['src'] for i in imgs],
2553
            'title': title,
2554
            'alt': alt,
2555
            'author': author,
2556
            'day': day.day,
2557
            'month': day.month,
2558
            'year': day.year
2559
        }
2560
2561
2562
class TheAwkwardYeti(GenericNavigableComic):
2563
    """Class to retrieve The Awkward Yeti comics."""
2564
    # Also on http://www.gocomics.com/the-awkward-yeti
2565
    # Also on http://larstheyeti.tumblr.com
2566
    # Also on https://tapastic.com/series/TheAwkwardYeti
2567
    name = 'yeti'
2568
    long_name = 'The Awkward Yeti'
2569
    url = 'http://theawkwardyeti.com'
2570
    _categories = ('YETI', )
2571
    get_first_comic_link = get_a_navi_navifirst
2572
    get_navi_link = get_link_rel_next
2573
2574
    @classmethod
2575
    def get_comic_info(cls, soup, link):
2576
        """Get information about a particular comics."""
2577
        title = soup.find('h2', class_='post-title').string
2578
        date_str = soup.find("span", class_="post-date").string
2579
        day = string_to_date(date_str, "%B %d, %Y")
2580
        imgs = soup.find("div", id="comic").find_all("img")
2581
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2582
        return {
2583 View Code Duplication
            'img': [i['src'] for i in imgs],
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2584
            'title': title,
2585
            'day': day.day,
2586
            'month': day.month,
2587
            'year': day.year
2588
        }
2589
2590
2591
class PleasantThoughts(GenericNavigableComic):
2592
    """Class to retrieve Pleasant Thoughts comics."""
2593
    name = 'pleasant'
2594
    long_name = 'Pleasant Thoughts'
2595
    url = 'http://pleasant-thoughts.com'
2596
    get_first_comic_link = get_a_navi_navifirst
2597
    get_navi_link = get_link_rel_next
2598
2599
    @classmethod
2600
    def get_comic_info(cls, soup, link):
2601
        """Get information about a particular comics."""
2602
        post = soup.find('div', class_='post-content')
2603
        title = post.find('h2', class_='post-title').string
2604
        imgs = post.find("div", class_="entry").find_all("img")
2605
        return {
2606
            'title': title,
2607
            'img': [i['src'] for i in imgs],
2608
        }
2609
2610
2611
class MisterAndMe(GenericNavigableComic):
2612
    """Class to retrieve Mister & Me Comics."""
2613
    # Also on http://www.gocomics.com/mister-and-me
2614
    # Also on https://tapastic.com/series/Mister-and-Me
2615
    name = 'mister'
2616
    long_name = 'Mister & Me'
2617
    url = 'http://www.mister-and-me.com'
2618
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2619
    get_navi_link = get_link_rel_next
2620
2621
    @classmethod
2622
    def get_comic_info(cls, soup, link):
2623
        """Get information about a particular comics."""
2624
        title = soup.find('h2', class_='post-title').string
2625
        author = soup.find("span", class_="post-author").find("a").string
2626
        date_str = soup.find("span", class_="post-date").string
2627
        day = string_to_date(date_str, "%B %d, %Y")
2628
        imgs = soup.find("div", id="comic").find_all("img")
2629
        assert all(i['alt'] == i['title'] for i in imgs)
2630
        assert len(imgs) <= 1
2631
        alt = imgs[0]['alt'] if imgs else ""
2632
        return {
2633
            'img': [i['src'] for i in imgs],
2634
            'title': title,
2635
            'alt': alt,
2636
            'author': author,
2637
            'day': day.day,
2638
            'month': day.month,
2639
            'year': day.year
2640
        }
2641
2642
2643
class LastPlaceComics(GenericNavigableComic):
2644
    """Class to retrieve Last Place Comics."""
2645 View Code Duplication
    name = 'lastplace'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2646
    long_name = 'Last Place Comics'
2647
    url = "http://lastplacecomics.com"
2648
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2649
    get_navi_link = get_link_rel_next
2650
2651
    @classmethod
2652
    def get_comic_info(cls, soup, link):
2653
        """Get information about a particular comics."""
2654
        title = soup.find('h2', class_='post-title').string
2655
        author = soup.find("span", class_="post-author").find("a").string
2656
        date_str = soup.find("span", class_="post-date").string
2657
        day = string_to_date(date_str, "%B %d, %Y")
2658
        imgs = soup.find("div", id="comic").find_all("img")
2659
        assert all(i['alt'] == i['title'] for i in imgs)
2660
        assert len(imgs) <= 1
2661
        alt = imgs[0]['alt'] if imgs else ""
2662
        return {
2663
            'img': [i['src'] for i in imgs],
2664
            'title': title,
2665
            'alt': alt,
2666
            'author': author,
2667
            'day': day.day,
2668
            'month': day.month,
2669
            'year': day.year
2670
        }
2671
2672
2673
class TalesOfAbsurdity(GenericNavigableComic):
2674
    """Class to retrieve Tales Of Absurdity comics."""
2675
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2676
    # Also on http://talesofabsurdity.tumblr.com
2677 View Code Duplication
    name = 'absurdity'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2678
    long_name = 'Tales of Absurdity'
2679
    url = 'http://talesofabsurdity.com'
2680
    _categories = ('ABSURDITY', )
2681
    get_first_comic_link = get_a_navi_navifirst
2682
    get_navi_link = get_a_navi_comicnavnext_navinext
2683
2684
    @classmethod
2685
    def get_comic_info(cls, soup, link):
2686
        """Get information about a particular comics."""
2687
        title = soup.find('h2', class_='post-title').string
2688
        author = soup.find("span", class_="post-author").find("a").string
2689
        date_str = soup.find("span", class_="post-date").string
2690
        day = string_to_date(date_str, "%B %d, %Y")
2691
        imgs = soup.find("div", id="comic").find_all("img")
2692
        assert all(i['alt'] == i['title'] for i in imgs)
2693
        alt = imgs[0]['alt'] if imgs else ""
2694
        return {
2695
            'img': [i['src'] for i in imgs],
2696
            'title': title,
2697
            'alt': alt,
2698
            'author': author,
2699
            'day': day.day,
2700
            'month': day.month,
2701
            'year': day.year
2702
        }
2703
2704
2705
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2706
    """Class to retrieve Endless Origami Comics."""
2707
    name = "origami"
2708
    long_name = "Endless Origami"
2709
    url = "http://endlessorigami.com"
2710
    get_first_comic_link = get_a_navi_navifirst
2711
    get_navi_link = get_link_rel_next
2712
2713
    @classmethod
2714
    def get_comic_info(cls, soup, link):
2715
        """Get information about a particular comics."""
2716
        title = soup.find('h2', class_='post-title').string
2717
        author = soup.find("span", class_="post-author").find("a").string
2718
        date_str = soup.find("span", class_="post-date").string
2719
        day = string_to_date(date_str, "%B %d, %Y")
2720
        imgs = soup.find("div", id="comic").find_all("img")
2721
        assert all(i['alt'] == i['title'] for i in imgs)
2722
        alt = imgs[0]['alt'] if imgs else ""
2723
        return {
2724
            'img': [i['src'] for i in imgs],
2725
            'title': title,
2726
            'alt': alt,
2727
            'author': author,
2728
            'day': day.day,
2729
            'month': day.month,
2730
            'year': day.year
2731
        }
2732
2733
2734
class PlanC(GenericNavigableComic):
2735
    """Class to retrieve Plan C comics."""
2736
    name = 'planc'
2737
    long_name = 'Plan C'
2738
    url = 'http://www.plancomic.com'
2739
    get_first_comic_link = get_a_navi_navifirst
2740
    get_navi_link = get_a_navi_comicnavnext_navinext
2741
2742
    @classmethod
2743
    def get_comic_info(cls, soup, link):
2744
        """Get information about a particular comics."""
2745
        title = soup.find('h2', class_='post-title').string
2746
        date_str = soup.find("span", class_="post-date").string
2747
        day = string_to_date(date_str, "%B %d, %Y")
2748 View Code Duplication
        imgs = soup.find('div', id='comic').find_all('img')
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2749
        return {
2750
            'title': title,
2751
            'img': [i['src'] for i in imgs],
2752
            'month': day.month,
2753
            'year': day.year,
2754
            'day': day.day,
2755
        }
2756
2757
2758
class BuniComic(GenericNavigableComic):
2759
    """Class to retrieve Buni Comics."""
2760
    name = 'buni'
2761
    long_name = 'BuniComics'
2762
    url = 'http://www.bunicomic.com'
2763
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2764
    get_navi_link = get_link_rel_next
2765
2766
    @classmethod
2767
    def get_comic_info(cls, soup, link):
2768
        """Get information about a particular comics."""
2769
        imgs = soup.find('div', id='comic').find_all('img')
2770
        assert all(i['alt'] == i['title'] for i in imgs)
2771
        assert len(imgs) == 1
2772
        return {
2773
            'img': [i['src'] for i in imgs],
2774
            'title': imgs[0]['title'],
2775
        }
2776
2777
2778
class GenericCommitStrip(GenericNavigableComic):
2779
    """Generic class to retrieve Commit Strips in different languages."""
2780
    get_navi_link = get_a_rel_next
2781
    get_first_comic_link = simulate_first_link
2782
    first_url = NotImplemented
2783
2784
    @classmethod
2785
    def get_comic_info(cls, soup, link):
2786
        """Get information about a particular comics."""
2787
        desc = soup.find('meta', property='og:description')['content']
2788 View Code Duplication
        title = soup.find('meta', property='og:title')['content']
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2789
        imgs = soup.find('div', class_='entry-content').find_all('img')
2790
        title2 = ' '.join(i.get('title', '') for i in imgs)
2791
        return {
2792
            'title': title,
2793
            'title2': title2,
2794
            'description': desc,
2795
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2796
        }
2797
2798
2799
class CommitStripFr(GenericCommitStrip):
2800
    """Class to retrieve Commit Strips in French."""
2801
    name = 'commit_fr'
2802
    long_name = 'Commit Strip (Fr)'
2803
    url = 'http://www.commitstrip.com/fr'
2804
    _categories = ('FRANCAIS', )
2805
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2806
2807
2808
class CommitStripEn(GenericCommitStrip):
2809
    """Class to retrieve Commit Strips in English."""
2810
    name = 'commit_en'
2811
    long_name = 'Commit Strip (En)'
2812
    url = 'http://www.commitstrip.com/en'
2813
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2814
2815
2816
class GenericBoumerie(GenericNavigableComic):
2817
    """Generic class to retrieve Boumeries comics in different languages."""
2818
    get_first_comic_link = get_a_navi_navifirst
2819
    get_navi_link = get_link_rel_next
2820
    date_format = NotImplemented
2821
    lang = NotImplemented
2822
2823
    @classmethod
2824
    def get_comic_info(cls, soup, link):
2825
        """Get information about a particular comics."""
2826
        title = soup.find('h2', class_='post-title').string
2827
        short_url = soup.find('link', rel='shortlink')['href']
2828
        author = soup.find("span", class_="post-author").find("a").string
2829
        date_str = soup.find('span', class_='post-date').string
2830
        day = string_to_date(date_str, cls.date_format, cls.lang)
2831
        imgs = soup.find('div', id='comic').find_all('img')
2832
        assert all(i['alt'] == i['title'] for i in imgs)
2833
        return {
2834
            'short_url': short_url,
2835
            'img': [i['src'] for i in imgs],
2836
            'title': title,
2837
            'author': author,
2838
            'month': day.month,
2839
            'year': day.year,
2840
            'day': day.day,
2841
        }
2842
2843
2844
class BoumerieEn(GenericBoumerie):
2845
    """Class to retrieve Boumeries comics in English."""
2846
    name = 'boumeries_en'
2847
    long_name = 'Boumeries (En)'
2848
    url = 'http://comics.boumerie.com'
2849
    date_format = "%B %d, %Y"
2850
    lang = 'en_GB.UTF-8'
2851
2852
2853
class BoumerieFr(GenericBoumerie):
2854
    """Class to retrieve Boumeries comics in French."""
2855
    name = 'boumeries_fr'
2856
    long_name = 'Boumeries (Fr)'
2857
    url = 'http://bd.boumerie.com'
2858
    _categories = ('FRANCAIS', )
2859
    date_format = "%A, %d %B %Y"
2860
    lang = "fr_FR.utf8"
2861
2862
2863
class UnearthedComics(GenericNavigableComic):
2864
    """Class to retrieve Unearthed comics."""
2865
    # Also on http://tapastic.com/series/UnearthedComics
2866
    # Also on https://unearthedcomics.tumblr.com
2867
    name = 'unearthed'
2868
    long_name = 'Unearthed Comics'
2869 View Code Duplication
    url = 'http://unearthedcomics.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2870
    _categories = ('UNEARTHED', )
2871
    get_navi_link = get_link_rel_next
2872
    get_first_comic_link = simulate_first_link
2873
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2874
2875
    @classmethod
2876
    def get_comic_info(cls, soup, link):
2877
        """Get information about a particular comics."""
2878
        short_url = soup.find('link', rel='shortlink')['href']
2879
        title_elt = soup.find('h1') or soup.find('h2')
2880
        title = title_elt.string if title_elt else ""
2881
        desc = soup.find('meta', property='og:description')
2882
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2883
        day = string_to_date(date_str, "%Y-%m-%d")
2884
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2885
        imgs = post.find_all('img')
2886
        return {
2887
            'title': title,
2888
            'description': desc,
2889
            'url2': short_url,
2890
            'img': [i['src'] for i in imgs],
2891
            'month': day.month,
2892
            'year': day.year,
2893
            'day': day.day,
2894
        }
2895
2896
2897
class Optipess(GenericNavigableComic):
2898
    """Class to retrieve Optipess comics."""
2899
    name = 'optipess'
2900
    long_name = 'Optipess'
2901
    url = 'http://www.optipess.com'
2902
    get_first_comic_link = get_a_navi_navifirst
2903
    get_navi_link = get_link_rel_next
2904
2905
    @classmethod
2906
    def get_comic_info(cls, soup, link):
2907
        """Get information about a particular comics."""
2908
        title = soup.find('h2', class_='post-title').string
2909
        author = soup.find("span", class_="post-author").find("a").string
2910
        comic = soup.find('div', id='comic')
2911
        imgs = comic.find_all('img') if comic else []
2912
        alt = imgs[0]['title'] if imgs else ""
2913
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2914
        date_str = soup.find('span', class_='post-date').string
2915
        day = string_to_date(date_str, "%B %d, %Y")
2916
        return {
2917
            'title': title,
2918
            'alt': alt,
2919
            'author': author,
2920
            'img': [i['src'] for i in imgs],
2921
            'month': day.month,
2922
            'year': day.year,
2923
            'day': day.day,
2924
        }
2925
2926
2927
class PainTrainComic(GenericNavigableComic):
2928
    """Class to retrieve Pain Train Comics."""
2929
    name = 'paintrain'
2930
    long_name = 'Pain Train Comics'
2931
    url = 'http://paintraincomic.com'
2932
    get_first_comic_link = get_a_navi_navifirst
2933
    get_navi_link = get_link_rel_next
2934
2935
    @classmethod
2936
    def get_comic_info(cls, soup, link):
2937
        """Get information about a particular comics."""
2938
        title = soup.find('h2', class_='post-title').string
2939
        short_url = soup.find('link', rel='shortlink')['href']
2940
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2941
        num = int(short_url_re.match(short_url).groups()[0])
2942
        imgs = soup.find('div', id='comic').find_all('img')
2943
        alt = imgs[0]['title']
2944
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2945
        date_str = soup.find('span', class_='post-date').string
2946
        day = string_to_date(date_str, "%d/%m/%Y")
2947
        return {
2948
            'short_url': short_url,
2949
            'num': num,
2950
            'img': [i['src'] for i in imgs],
2951
            'month': day.month,
2952
            'year': day.year,
2953
            'day': day.day,
2954
            'alt': alt,
2955
            'title': title,
2956
        }
2957
2958
2959
class MoonBeard(GenericNavigableComic):
2960
    """Class to retrieve MoonBeard comics."""
2961
    # Also on http://squireseses.tumblr.com
2962
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2963
    name = 'moonbeard'
2964
    long_name = 'Moon Beard'
2965
    url = 'http://moonbeard.com'
2966
    _categories = ('MOONBEARD', )
2967
    get_first_comic_link = get_a_navi_navifirst
2968
    get_navi_link = get_a_navi_navinext
2969
2970
    @classmethod
2971
    def get_comic_info(cls, soup, link):
2972
        """Get information about a particular comics."""
2973
        title = soup.find('h2', class_='post-title').string
2974
        short_url = soup.find('link', rel='shortlink')['href']
2975
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2976
        num = int(short_url_re.match(short_url).groups()[0])
2977
        imgs = soup.find('div', id='comic').find_all('img')
2978
        alt = imgs[0]['title']
2979
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2980
        date_str = soup.find('span', class_='post-date').string
2981
        day = string_to_date(date_str, "%B %d, %Y")
2982
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2983
        author = soup.find('span', class_='post-author').string
2984
        return {
2985
            'short_url': short_url,
2986
            'num': num,
2987
            'img': [i['src'] for i in imgs],
2988
            'month': day.month,
2989
            'year': day.year,
2990
            'day': day.day,
2991
            'title': title,
2992
            'tags': tags,
2993
            'alt': alt,
2994
            'author': author,
2995
        }
2996
2997
2998
class AHammADay(GenericEmptyComic, GenericNavigableComic):
2999 View Code Duplication
    """Class to retrieve class A Hamm A Day comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3000
    name = 'hamm'
3001
    long_name = 'A Hamm A Day'
3002
    url = 'http://www.ahammaday.com'
3003
    get_url_from_link = join_cls_url_to_href
3004
    get_first_comic_link = simulate_first_link
3005
    first_url = 'http://www.ahammaday.com/today/3/6/french'
3006
3007
    @classmethod
3008
    def get_navi_link(cls, last_soup, next_):
3009
        """Get link to next or previous comic."""
3010
        # prev is next / next is prev
3011
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
3012
3013
    @classmethod
3014
    def get_comic_info(cls, soup, link):
3015
        """Get information about a particular comics."""
3016
        date_str = soup.find('time', class_='published')['datetime']
3017
        day = string_to_date(date_str, "%Y-%m-%d")
3018
        author = soup.find('span', class_='blog-author').find('a').string
3019
        title = soup.find('meta', property='og:title')['content']
3020
        imgs = soup.find_all('meta', itemprop='image')
3021
        return {
3022
            'img': [i['content'] for i in imgs],
3023
            'title': title,
3024
            'author': author,
3025
            'day': day.day,
3026
            'month': day.month,
3027
            'year': day.year,
3028
        }
3029
3030
3031
class LittleLifeLines(GenericNavigableComic):
3032
    """Class to retrieve Little Life Lines comics."""
3033
    # Also on https://little-life-lines.tumblr.com
3034
    name = 'life'
3035
    long_name = 'Little Life Lines'
3036
    url = 'http://www.littlelifelines.com'
3037
    get_url_from_link = join_cls_url_to_href
3038
    get_first_comic_link = simulate_first_link
3039
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3040
3041
    @classmethod
3042
    def get_navi_link(cls, last_soup, next_):
3043
        """Get link to next or previous comic."""
3044
        # prev is next / next is prev
3045
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3046
        return li.find('a') if li else None
3047
3048
    @classmethod
3049
    def get_comic_info(cls, soup, link):
3050
        """Get information about a particular comics."""
3051
        title = soup.find('meta', property='og:title')['content']
3052
        desc = soup.find('meta', property='og:description')['content']
3053
        date_str = soup.find('time', class_='published')['datetime']
3054
        day = string_to_date(date_str, "%Y-%m-%d")
3055
        author = soup.find('a', rel='author').string
3056
        div_content = soup.find('div', class_="body entry-content")
3057
        imgs = div_content.find_all('img')
3058
        imgs = [i for i in imgs if i.get('src') is not None]
3059
        alt = imgs[0]['alt']
3060
        return {
3061
            'title': title,
3062
            'alt': alt,
3063
            'description': desc,
3064
            'author': author,
3065
            'day': day.day,
3066
            'month': day.month,
3067
            'year': day.year,
3068
            'img': [i['src'] for i in imgs],
3069
        }
3070
3071
3072
class GenericWordPressInkblot(GenericNavigableComic):
3073
    """Generic class to retrieve comics using WordPress with Inkblot."""
3074
    get_navi_link = get_link_rel_next
3075
3076
    @classmethod
3077
    def get_first_comic_link(cls):
3078
        """Get link to first comics."""
3079 View Code Duplication
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3080
3081
    @classmethod
3082
    def get_comic_info(cls, soup, link):
3083
        """Get information about a particular comics."""
3084
        title = soup.find('meta', property='og:title')['content']
3085
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3086
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3087
        day = string_to_date(date_str, "%Y-%m-%d")
3088
        return {
3089
            'title': title,
3090
            'day': day.day,
3091
            'month': day.month,
3092
            'year': day.year,
3093
            'img': [i['src'] for i in imgs],
3094
        }
3095
3096
3097
class EverythingsStupid(GenericWordPressInkblot):
3098
    """Class to retrieve Everything's stupid Comics."""
3099
    # Also on http://tapastic.com/series/EverythingsStupid
3100
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3101
    # Also on http://everythingsstupidcomics.tumblr.com
3102
    name = 'stupid'
3103
    long_name = "Everything's Stupid"
3104
    url = 'http://everythingsstupid.net'
3105
3106
3107
class TheIsmComics(GenericWordPressInkblot):
3108
    """Class to retrieve The Ism Comics."""
3109
    # Also on https://tapastic.com/series/TheIsm (?)
3110
    name = 'theism'
3111
    long_name = "The Ism"
3112
    url = 'http://www.theism-comics.com'
3113
3114
3115
class WoodenPlankStudios(GenericWordPressInkblot):
3116
    """Class to retrieve Wooden Plank Studios comics."""
3117
    name = 'woodenplank'
3118
    long_name = 'Wooden Plank Studios'
3119
    url = 'http://woodenplankstudios.com'
3120
3121
3122
class ElectricBunnyComic(GenericNavigableComic):
3123
    """Class to retrieve Electric Bunny Comics."""
3124
    # Also on http://electricbunnycomics.tumblr.com
3125
    name = 'bunny'
3126
    long_name = 'Electric Bunny Comic'
3127
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3128
    get_url_from_link = join_cls_url_to_href
3129
3130
    @classmethod
3131
    def get_first_comic_link(cls):
3132
        """Get link to first comics."""
3133
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3134
3135
    @classmethod
3136
    def get_navi_link(cls, last_soup, next_):
3137
        """Get link to next or previous comic."""
3138
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3139
        return img.parent if img else None
3140
3141
    @classmethod
3142
    def get_comic_info(cls, soup, link):
3143
        """Get information about a particular comics."""
3144
        title = soup.find('meta', property='og:title')['content']
3145
        imgs = soup.find_all('meta', property='og:image')
3146
        return {
3147
            'title': title,
3148
            'img': [i['content'] for i in imgs],
3149
        }
3150
3151
3152
class SheldonComics(GenericNavigableComic):
3153
    """Class to retrieve Sheldon comics."""
3154
    # Also on http://www.gocomics.com/sheldon
3155
    name = 'sheldon'
3156 View Code Duplication
    long_name = 'Sheldon Comics'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3157
    url = 'http://www.sheldoncomics.com'
3158
3159
    @classmethod
3160
    def get_first_comic_link(cls):
3161
        """Get link to first comics."""
3162
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3163
3164
    @classmethod
3165
    def get_navi_link(cls, last_soup, next_):
3166
        """Get link to next or previous comic."""
3167
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3168
            if link['href'] != 'http://www.sheldoncomics.com':
3169
                return link
3170
        return None
3171
3172
    @classmethod
3173
    def get_comic_info(cls, soup, link):
3174
        """Get information about a particular comics."""
3175
        imgs = soup.find("div", id="comic-foot").find_all("img")
3176
        assert all(i['alt'] == i['title'] for i in imgs)
3177
        assert len(imgs) == 1
3178
        title = imgs[0]['title']
3179
        return {
3180
            'title': title,
3181
            'img': [i['src'] for i in imgs],
3182
        }
3183
3184
3185
class Ubertool(GenericNavigableComic):
3186
    """Class to retrieve Ubertool comics."""
3187
    # Also on https://ubertool.tumblr.com
3188
    # Also on https://tapastic.com/series/ubertool
3189
    name = 'ubertool'
3190
    long_name = 'Ubertool'
3191
    url = 'http://ubertoolcomic.com'
3192
    _categories = ('UBERTOOL', )
3193
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3194
    get_navi_link = get_a_comicnavbase_comicnavnext
3195
3196
    @classmethod
3197
    def get_comic_info(cls, soup, link):
3198
        """Get information about a particular comics."""
3199
        title = soup.find('h2', class_='post-title').string
3200
        date_str = soup.find('span', class_='post-date').string
3201
        day = string_to_date(date_str, "%B %d, %Y")
3202
        imgs = soup.find('div', id='comic').find_all('img')
3203
        return {
3204
            'img': [i['src'] for i in imgs],
3205
            'title': title,
3206
            'month': day.month,
3207
            'year': day.year,
3208
            'day': day.day,
3209
        }
3210
3211
3212
class EarthExplodes(GenericNavigableComic):
3213
    """Class to retrieve The Earth Explodes comics."""
3214
    name = 'earthexplodes'
3215
    long_name = 'The Earth Explodes'
3216
    url = 'http://www.earthexplodes.com'
3217
    get_url_from_link = join_cls_url_to_href
3218
    get_first_comic_link = simulate_first_link
3219
    first_url = 'http://www.earthexplodes.com/comics/000/'
3220
3221
    @classmethod
3222
    def get_navi_link(cls, last_soup, next_):
3223
        """Get link to next or previous comic."""
3224
        return last_soup.find('a', id='next' if next_ else 'prev')
3225
3226
    @classmethod
3227
    def get_comic_info(cls, soup, link):
3228
        """Get information about a particular comics."""
3229
        title = soup.find('title').string
3230
        imgs = soup.find('div', id='image').find_all('img')
3231
        alt = imgs[0].get('title', '')
3232
        return {
3233
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3234
            'title': title,
3235
            'alt': alt,
3236
        }
3237
3238
3239
class PomComics(GenericNavigableComic):
3240
    """Class to retrieve PomComics."""
3241
    name = 'pom'
3242
    long_name = 'Pom Comics / Piece of Me'
3243
    url = 'http://www.pomcomic.com'
3244
    get_url_from_link = join_cls_url_to_href
3245
3246
    @classmethod
3247
    def get_first_comic_link(cls):
3248
        """Get link to first comics."""
3249
        return get_soup_at_url(cls.url).find('a', class_='btn-first')
3250
3251
    @classmethod
3252
    def get_navi_link(cls, last_soup, next_):
3253
        """Get link to next or previous comic."""
3254
        return last_soup.find('a', class_='btn-next' if next_ else 'btn-prev')
3255
3256
    @classmethod
3257
    def get_comic_info(cls, soup, link):
3258
        """Get information about a particular comics."""
3259
        title = soup.find('h1').string
3260
        desc = soup.find('meta', property='og:description')['content']
3261
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3262
        imgs = soup.find('div', class_='comic').find_all('img')
3263
        return {
3264
            'title': title,
3265
            'desc': desc,
3266
            'tags': tags,
3267
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3268
        }
3269
3270
3271
class CubeDrone(GenericEmptyComic, GenericNavigableComic):
3272
    """Class to retrieve Cube Drone comics."""
3273
    name = 'cubedrone'
3274
    long_name = 'Cube Drone'
3275
    url = 'http://cube-drone.com/comics'
3276
    get_url_from_link = join_cls_url_to_href
3277
3278
    @classmethod
3279
    def get_first_comic_link(cls):
3280
        """Get link to first comics."""
3281
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3282
3283
    @classmethod
3284
    def get_navi_link(cls, last_soup, next_):
3285
        """Get link to next or previous comic."""
3286
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3287
        return last_soup.find('span', class_=class_).parent
3288
3289
    @classmethod
3290
    def get_comic_info(cls, soup, link):
3291
        """Get information about a particular comics."""
3292
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3293
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3294
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3295
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3296
        imgs = soup.find_all('img', class_='comic img-responsive')
3297
        title2 = imgs[0]['title']
3298
        alt = imgs[0]['alt']
3299
        return {
3300
            'url2': url2,
3301
            'title': title,
3302
            'title2': title2,
3303
            'alt': alt,
3304
            'img': [i['src'] for i in imgs],
3305
        }
3306
3307
3308
class MakeItStoopid(GenericNavigableComic):
3309
    """Class to retrieve Make It Stoopid Comics."""
3310
    name = 'stoopid'
3311
    long_name = 'Make it stoopid'
3312
    url = 'http://makeitstoopid.com/comic.php'
3313
3314
    @classmethod
3315
    def get_nav(cls, soup):
3316
        """Get the navigation elements from soup object."""
3317
        cnav = soup.find_all(class_='cnav')
3318
        nav1, nav2 = cnav[:5], cnav[5:]
3319
        assert nav1 == nav2
3320
        # begin, prev, archive, next_, end = nav1
3321
        return [None if i.get('href') is None else i for i in nav1]
3322
3323
    @classmethod
3324
    def get_first_comic_link(cls):
3325
        """Get link to first comics."""
3326
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3327
3328
    @classmethod
3329
    def get_navi_link(cls, last_soup, next_):
3330
        """Get link to next or previous comic."""
3331
        return cls.get_nav(last_soup)[3 if next_ else 1]
3332
3333
    @classmethod
3334
    def get_comic_info(cls, soup, link):
3335
        """Get information about a particular comics."""
3336
        title = link['title']
3337
        imgs = soup.find_all('img', id='comicimg')
3338
        return {
3339
            'title': title,
3340
            'img': [i['src'] for i in imgs],
3341
        }
3342
3343
3344
class OffTheLeashDog(GenericNavigableComic):
3345
    """Class to retrieve Off The Leash Dog comics."""
3346
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3347
    # Also on http://www.rupertfawcettcartoons.com
3348
    name = 'offtheleash'
3349
    long_name = 'Off The Leash Dog'
3350
    url = 'http://offtheleashdogcartoons.com'
3351
    _categories = ('FAWCETT', )
3352
    get_navi_link = get_a_rel_next
3353
    get_first_comic_link = simulate_first_link
3354
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3355
3356
    @classmethod
3357
    def get_comic_info(cls, soup, link):
3358
        """Get information about a particular comics."""
3359
        print(link)
3360
        title = soup.find("h1", class_="entry-title").string
3361
        imgs = soup.find('div', class_='entry-content').find_all('img')
3362
        return {
3363
            'title': title,
3364
            'img': [i['src'] for i in imgs],
3365
        }
3366
3367
3368
class MarketoonistComics(GenericNavigableComic):
3369
    """Class to retrieve Marketoonist Comics."""
3370
    name = 'marketoonist'
3371
    long_name = 'Marketoonist'
3372
    url = 'https://marketoonist.com/cartoons'
3373
    get_first_comic_link = simulate_first_link
3374
    get_navi_link = get_link_rel_next
3375
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3376
3377
    @classmethod
3378
    def get_comic_info(cls, soup, link):
3379
        """Get information about a particular comics."""
3380
        imgs = soup.find_all('meta', property='og:image')
3381
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3382
        day = string_to_date(date_str, "%Y-%m-%d")
3383
        title = soup.find('meta', property='og:title')['content']
3384
        return {
3385
            'img': [i['content'] for i in imgs],
3386
            'day': day.day,
3387
            'month': day.month,
3388
            'year': day.year,
3389
            'title': title,
3390
        }
3391
3392
3393
class ConsoliaComics(GenericNavigableComic):
3394
    """Class to retrieve Consolia comics."""
3395
    name = 'consolia'
3396
    long_name = 'consolia'
3397
    url = 'https://consolia-comic.com'
3398
    get_url_from_link = join_cls_url_to_href
3399
3400
    @classmethod
3401
    def get_first_comic_link(cls):
3402
        """Get link to first comics."""
3403
        return get_soup_at_url(cls.url).find('a', class_='first')
3404
3405
    @classmethod
3406
    def get_navi_link(cls, last_soup, next_):
3407
        """Get link to next or previous comic."""
3408
        return last_soup.find('a', class_='next' if next_ else 'prev')
3409
3410
    @classmethod
3411
    def get_comic_info(cls, soup, link):
3412
        """Get information about a particular comics."""
3413
        title = soup.find('meta', property='og:title')['content']
3414
        date_str = soup.find('time')["datetime"]
3415
        day = string_to_date(date_str, "%Y-%m-%d")
3416
        imgs = soup.find_all('meta', property='og:image')
3417
        return {
3418
            'title': title,
3419
            'img': [i['content'] for i in imgs],
3420
            'day': day.day,
3421
            'month': day.month,
3422
            'year': day.year,
3423
        }
3424
3425
3426
class TuMourrasMoinsBete(GenericNavigableComic):
3427
    """Class to retrieve Tu Mourras Moins Bete comics."""
3428
    name = 'mourrasmoinsbete'
3429
    long_name = 'Tu Mourras Moins Bete'
3430
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3431
    _categories = ('FRANCAIS', )
3432
    get_first_comic_link = simulate_first_link
3433
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3434
3435
    @classmethod
3436
    def get_navi_link(cls, last_soup, next_):
3437
        """Get link to next or previous comic."""
3438
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3439
3440
    @classmethod
3441
    def get_comic_info(cls, soup, link):
3442
        """Get information about a particular comics."""
3443
        title = soup.find('title').string
3444
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3445
        author = soup.find('span', itemprop='author').string
3446
        return {
3447
            'img': [i['src'] for i in imgs],
3448
            'author': author,
3449
            'title': title,
3450
        }
3451
3452
3453
class GeekAndPoke(GenericNavigableComic):
3454
    """Class to retrieve Geek And Poke comics."""
3455
    name = 'geek'
3456
    long_name = 'Geek And Poke'
3457
    url = 'http://geek-and-poke.com'
3458
    get_url_from_link = join_cls_url_to_href
3459
    get_first_comic_link = simulate_first_link
3460
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3461
3462
    @classmethod
3463
    def get_navi_link(cls, last_soup, next_):
3464
        """Get link to next or previous comic."""
3465
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3466
3467
    @classmethod
3468
    def get_comic_info(cls, soup, link):
3469
        """Get information about a particular comics."""
3470
        title = soup.find('meta', property='og:title')['content']
3471
        desc = soup.find('meta', property='og:description')['content']
3472
        date_str = soup.find('time', class_='published')['datetime']
3473
        day = string_to_date(date_str, "%Y-%m-%d")
3474
        author = soup.find('a', rel='author').string
3475
        div_content = (soup.find('div', class_="body entry-content") or
3476
                       soup.find('div', class_="special-content"))
3477
        imgs = div_content.find_all('img')
3478
        imgs = [i for i in imgs if i.get('src') is not None]
3479
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3480
        alt = imgs[0].get('alt', "") if imgs else []
3481
        return {
3482
            'title': title,
3483
            'alt': alt,
3484
            'description': desc,
3485
            'author': author,
3486
            'day': day.day,
3487
            'month': day.month,
3488
            'year': day.year,
3489
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3490
        }
3491
3492
3493
class GloryOwlComix(GenericNavigableComic):
3494
    """Class to retrieve Glory Owl comics."""
3495
    name = 'gloryowl'
3496
    long_name = 'Glory Owl'
3497
    url = 'http://gloryowlcomix.blogspot.fr'
3498
    _categories = ('NSFW', 'FRANCAIS')
3499
    get_first_comic_link = simulate_first_link
3500
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3501
3502
    @classmethod
3503
    def get_navi_link(cls, last_soup, next_):
3504
        """Get link to next or previous comic."""
3505
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3506
3507
    @classmethod
3508
    def get_comic_info(cls, soup, link):
3509
        """Get information about a particular comics."""
3510
        title = soup.find('title').string
3511
        imgs = soup.find_all('link', rel='image_src')
3512
        author = soup.find('a', rel='author').string
3513
        return {
3514
            'img': [i['href'] for i in imgs],
3515
            'author': author,
3516
            'title': title,
3517
        }
3518
3519
3520
class GenericTumblrV1(GenericComic):
3521
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3522
    _categories = ('TUMBLR', )
3523
3524
    @classmethod
3525
    def get_next_comic(cls, last_comic):
3526
        """Generic implementation of get_next_comic for Tumblr comics."""
3527
        for p in cls.get_posts(last_comic):
3528
            comic = cls.get_comic_info(p)
3529
            if comic is not None:
3530
                yield comic
3531
3532
    @classmethod
3533
    def get_url_from_post(cls, post):
3534
        url = post['url']
3535
        if not url.startswith(cls.url):
3536
            print("url '%s' does not start with '%s'" % (url, cls.url))
3537
        return url
3538
3539
    @classmethod
3540
    def get_api_url(cls):
3541
        return urljoin_wrapper(cls.url, '/api/read/')
3542
3543
    @classmethod
3544
    def get_api_url_for_id(cls, tumblr_id):
3545
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3546
3547
    @classmethod
3548
    def get_comic_info(cls, post):
3549
        """Get information about a particular comics."""
3550
        type_ = post['type']
3551
        if type_ != 'photo':
3552
            return None
3553
        tumblr_id = int(post['id'])
3554
        api_url = cls.get_api_url_for_id(tumblr_id)
3555
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3556
        caption = post.find('photo-caption')
3557
        title = caption.string if caption else ""
3558
        tags = ' '.join(t.string for t in post.find_all('tag'))
3559
        # Photos may appear in 'photo' tags and/or straight in the post
3560
        photo_tags = post.find_all('photo')
3561
        if not photo_tags:
3562
            photo_tags = [post]
3563
        # Images are in multiple resolutions - taking the first one
3564
        imgs = [photo.find('photo-url') for photo in photo_tags]
3565
        return {
3566
            'url': cls.get_url_from_post(post),
3567
            'url2': post['url-with-slug'],
3568
            'day': day.day,
3569
            'month': day.month,
3570
            'year': day.year,
3571
            'title': title,
3572
            'tags': tags,
3573
            'img': [i.string for i in imgs],
3574
            'tumblr-id': tumblr_id,
3575
            'api_url': api_url,
3576
        }
3577
3578
    @classmethod
3579
    def get_posts(cls, last_comic, nb_post_per_call=10):
3580
        """Get posts using API. nb_post_per_call is max 50.
3581
3582
        Posts are retrieved from newer to older as per the tumblr v1 api
3583
        but are returned in chronological order."""
3584
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3585
        posts_acc = []
3586
        if last_comic is not None:
3587
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3588
            # might end up spending a lot of time looking for something that
3589
            # doesn't exist. Failing early and clearly might be a better option.
3590
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3591
            try:
3592
                get_soup_at_url(last_api_url)
3593
            except urllib.error.HTTPError:
3594
                try:
3595
                    get_soup_at_url(cls.url)
3596
                except urllib.error.HTTPError:
3597
                    print("Did not find previous post nor main url %s" % cls.url)
3598
                else:
3599
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3600
                return reversed(posts_acc)
3601
        api_url = cls.get_api_url()
3602
        posts = get_soup_at_url(api_url).find('posts')
3603
        start, total = int(posts['start']), int(posts['total'])
3604
        assert start == 0
3605
        for starting_num in range(0, total, nb_post_per_call):
3606
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3607
            posts2 = get_soup_at_url(api_url2).find('posts')
3608
            start2, total2 = int(posts2['start']), int(posts2['total'])
3609
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3610
            # This may happen and should be handled in the future
3611
            assert total == total2, "%d != %d" % (total, total2)
3612
            for p in posts2.find_all('post'):
3613
                tumblr_id = int(p['id'])
3614
                if waiting_for_id and waiting_for_id == tumblr_id:
3615
                    return reversed(posts_acc)
3616
                posts_acc.append(p)
3617
        if waiting_for_id is None:
3618
            return reversed(posts_acc)
3619
        print("Did not find %s : there might be a problem" % waiting_for_id)
3620
        return []
3621
3622
3623
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3624
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3625
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3626
    # Also on http://www.smbc-comics.com
3627
    name = 'smbc-tumblr'
3628
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3629
    url = 'http://smbc-comics.tumblr.com'
3630
    _categories = ('SMBC', )
3631
3632
3633
class IrwinCardozo(GenericTumblrV1):
3634
    """Class to retrieve Irwin Cardozo Comics."""
3635
    name = 'irwinc'
3636
    long_name = 'Irwin Cardozo'
3637
    url = 'http://irwincardozocomics.tumblr.com'
3638
3639
3640
class AccordingToDevin(GenericTumblrV1):
3641
    """Class to retrieve According To Devin comics."""
3642
    name = 'devin'
3643
    long_name = 'According To Devin'
3644
    url = 'http://accordingtodevin.tumblr.com'
3645
3646
3647
class ItsTheTieTumblr(GenericTumblrV1):
3648
    """Class to retrieve It's the tie comics."""
3649
    # Also on http://itsthetie.com
3650
    # Also on https://tapastic.com/series/itsthetie
3651
    name = 'tie-tumblr'
3652
    long_name = "It's the tie (from Tumblr)"
3653
    url = "http://itsthetie.tumblr.com"
3654
    _categories = ('TIE', )
3655
3656
3657
class OctopunsTumblr(GenericTumblrV1):
3658
    """Class to retrieve Octopuns comics."""
3659
    # Also on http://www.octopuns.net
3660
    name = 'octopuns-tumblr'
3661
    long_name = 'Octopuns (from Tumblr)'
3662
    url = 'http://octopuns.tumblr.com'
3663
3664
3665
class PicturesInBoxesTumblr(GenericTumblrV1):
3666
    """Class to retrieve Pictures In Boxes comics."""
3667
    # Also on http://www.picturesinboxes.com
3668
    name = 'picturesinboxes-tumblr'
3669
    long_name = 'Pictures in Boxes (from Tumblr)'
3670
    url = 'https://picturesinboxescomic.tumblr.com'
3671
3672
3673
class TubeyToonsTumblr(GenericTumblrV1):
3674
    """Class to retrieve TubeyToons comics."""
3675
    # Also on http://tapastic.com/series/Tubey-Toons
3676
    # Also on http://tubeytoons.com
3677
    name = 'tubeytoons-tumblr'
3678
    long_name = 'Tubey Toons (from Tumblr)'
3679
    url = 'https://tubeytoons.tumblr.com'
3680
    _categories = ('TUNEYTOONS', )
3681
3682
3683
class UnearthedComicsTumblr(GenericTumblrV1):
3684
    """Class to retrieve Unearthed comics."""
3685
    # Also on http://tapastic.com/series/UnearthedComics
3686
    # Also on http://unearthedcomics.com
3687
    name = 'unearthed-tumblr'
3688
    long_name = 'Unearthed Comics (from Tumblr)'
3689
    url = 'https://unearthedcomics.tumblr.com'
3690
    _categories = ('UNEARTHED', )
3691
3692
3693
class PieComic(GenericTumblrV1):
3694
    """Class to retrieve Pie Comic comics."""
3695
    name = 'pie'
3696
    long_name = 'Pie Comic'
3697
    url = "http://piecomic.tumblr.com"
3698
3699
3700
class MrEthanDiamond(GenericTumblrV1):
3701
    """Class to retrieve Mr Ethan Diamond comics."""
3702
    name = 'diamond'
3703
    long_name = 'Mr Ethan Diamond'
3704
    url = 'http://mrethandiamond.tumblr.com'
3705
3706
3707
class Flocci(GenericTumblrV1):
3708
    """Class to retrieve floccinaucinihilipilification comics."""
3709
    name = 'flocci'
3710
    long_name = 'floccinaucinihilipilification'
3711
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3712
3713
3714
class UpAndOut(GenericTumblrV1):
3715
    """Class to retrieve Up & Out comics."""
3716
    # Also on http://tapastic.com/series/UP-and-OUT
3717
    name = 'upandout'
3718
    long_name = 'Up And Out (from Tumblr)'
3719
    url = 'http://upandoutcomic.tumblr.com'
3720
3721
3722
class Pundemonium(GenericTumblrV1):
3723
    """Class to retrieve Pundemonium comics."""
3724
    name = 'pundemonium'
3725
    long_name = 'Pundemonium'
3726
    url = 'http://monstika.tumblr.com'
3727
3728
3729
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3730
    """Class to retrieve Poorly Drawn Lines comics."""
3731
    # Also on http://poorlydrawnlines.com
3732
    name = 'poorlydrawn-tumblr'
3733
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3734
    url = 'http://pdlcomics.tumblr.com'
3735
    _categories = ('POORLYDRAWN', )
3736
3737
3738
class PearShapedComics(GenericTumblrV1):
3739
    """Class to retrieve Pear Shaped Comics."""
3740
    name = 'pearshaped'
3741
    long_name = 'Pear-Shaped Comics'
3742
    url = 'http://pearshapedcomics.com'
3743
3744
3745
class PondScumComics(GenericTumblrV1):
3746
    """Class to retrieve Pond Scum Comics."""
3747
    name = 'pond'
3748
    long_name = 'Pond Scum'
3749
    url = 'http://pondscumcomic.tumblr.com'
3750
3751
3752
class MercworksTumblr(GenericTumblrV1):
3753
    """Class to retrieve Mercworks comics."""
3754
    # Also on http://mercworks.net
3755
    name = 'mercworks-tumblr'
3756
    long_name = 'Mercworks (from Tumblr)'
3757
    url = 'http://mercworks.tumblr.com'
3758
3759
3760
class OwlTurdTumblr(GenericTumblrV1):
3761
    """Class to retrieve Owl Turd comics."""
3762
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3763
    name = 'owlturd-tumblr'
3764
    long_name = 'Owl Turd (from Tumblr)'
3765
    url = 'http://owlturd.com'
3766
    _categories = ('OWLTURD', )
3767
3768
3769
class VectorBelly(GenericTumblrV1):
3770
    """Class to retrieve Vector Belly comics."""
3771
    # Also on http://vectorbelly.com
3772
    name = 'vector'
3773
    long_name = 'Vector Belly'
3774
    url = 'http://vectorbelly.tumblr.com'
3775
3776
3777
class GoneIntoRapture(GenericTumblrV1):
3778
    """Class to retrieve Gone Into Rapture comics."""
3779
    # Also on http://goneintorapture.tumblr.com
3780
    # Also on http://tapastic.com/series/Goneintorapture
3781
    name = 'rapture'
3782
    long_name = 'Gone Into Rapture'
3783
    url = 'http://goneintorapture.com'
3784
3785
3786
class TheOatmealTumblr(GenericTumblrV1):
3787
    """Class to retrieve The Oatmeal comics."""
3788
    # Also on http://theoatmeal.com
3789
    name = 'oatmeal-tumblr'
3790
    long_name = 'The Oatmeal (from Tumblr)'
3791
    url = 'http://oatmeal.tumblr.com'
3792
3793
3794
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3795
    """Class to retrieve Heck If I Know Comics."""
3796
    # Also on http://tapastic.com/series/Regular
3797
    name = 'heck-tumblr'
3798
    long_name = 'Heck if I Know comics (from Tumblr)'
3799
    url = 'http://heckifiknowcomics.com'
3800
3801
3802
class MyJetPack(GenericTumblrV1):
3803
    """Class to retrieve My Jet Pack comics."""
3804
    name = 'jetpack'
3805
    long_name = 'My Jet Pack'
3806
    url = 'http://myjetpack.tumblr.com'
3807
3808
3809
class CheerUpEmoKidTumblr(GenericTumblrV1):
3810
    """Class to retrieve CheerUpEmoKid comics."""
3811
    # Also on http://www.cheerupemokid.com
3812
    # Also on http://tapastic.com/series/CUEK
3813
    name = 'cuek-tumblr'
3814
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3815
    url = 'https://enzocomics.tumblr.com'
3816
3817
3818
class ForLackOfABetterComic(GenericTumblrV1):
3819
    """Class to retrieve For Lack Of A Better Comics."""
3820
    # Also on http://forlackofabettercomic.com
3821
    name = 'lack'
3822
    long_name = 'For Lack Of A Better Comic'
3823
    url = 'http://forlackofabettercomic.tumblr.com'
3824
3825
3826
class ZenPencilsTumblr(GenericTumblrV1):
3827
    """Class to retrieve ZenPencils comics."""
3828
    # Also on http://zenpencils.com
3829
    # Also on http://www.gocomics.com/zen-pencils
3830
    name = 'zenpencils-tumblr'
3831
    long_name = 'Zen Pencils (from Tumblr)'
3832
    url = 'http://zenpencils.tumblr.com'
3833
    _categories = ('ZENPENCILS', )
3834
3835
3836
class ThreeWordPhraseTumblr(GenericTumblrV1):
3837
    """Class to retrieve Three Word Phrase comics."""
3838
    # Also on http://threewordphrase.com
3839
    name = 'threeword-tumblr'
3840
    long_name = 'Three Word Phrase (from Tumblr)'
3841
    url = 'http://threewordphrase.tumblr.com'
3842
3843
3844
class TimeTrabbleTumblr(GenericTumblrV1):
3845
    """Class to retrieve Time Trabble comics."""
3846
    # Also on http://timetrabble.com
3847
    name = 'timetrabble-tumblr'
3848
    long_name = 'Time Trabble (from Tumblr)'
3849
    url = 'http://timetrabble.tumblr.com'
3850
3851
3852
class SafelyEndangeredTumblr(GenericTumblrV1):
3853
    """Class to retrieve Safely Endangered comics."""
3854
    # Also on http://www.safelyendangered.com
3855
    name = 'endangered-tumblr'
3856
    long_name = 'Safely Endangered (from Tumblr)'
3857
    url = 'http://tumblr.safelyendangered.com'
3858
3859
3860
class MouseBearComedyTumblr(GenericTumblrV1):
3861
    """Class to retrieve Mouse Bear Comedy comics."""
3862
    # Also on http://www.mousebearcomedy.com
3863
    name = 'mousebear-tumblr'
3864
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3865
    url = 'http://mousebearcomedy.tumblr.com'
3866
3867
3868
class BouletCorpTumblr(GenericTumblrV1):
3869
    """Class to retrieve BouletCorp comics."""
3870
    # Also on http://www.bouletcorp.com
3871
    name = 'boulet-tumblr'
3872
    long_name = 'Boulet Corp (from Tumblr)'
3873
    url = 'https://bouletcorp.tumblr.com'
3874
    _categories = ('BOULET', )
3875
3876
3877
class TheAwkwardYetiTumblr(GenericTumblrV1):
3878
    """Class to retrieve The Awkward Yeti comics."""
3879
    # Also on http://www.gocomics.com/the-awkward-yeti
3880
    # Also on http://theawkwardyeti.com
3881
    # Also on https://tapastic.com/series/TheAwkwardYeti
3882
    name = 'yeti-tumblr'
3883
    long_name = 'The Awkward Yeti (from Tumblr)'
3884
    url = 'http://larstheyeti.tumblr.com'
3885
    _categories = ('YETI', )
3886
3887
3888
class NellucNhoj(GenericTumblrV1):
3889
    """Class to retrieve NellucNhoj comics."""
3890
    name = 'nhoj'
3891
    long_name = 'Nelluc Nhoj'
3892
    url = 'http://nellucnhoj.com'
3893
3894
3895
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3896
    """Class to retrieve Down The Upward Spiral comics."""
3897
    # Also on http://www.downtheupwardspiral.com
3898
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
3899
    name = 'spiral-tumblr'
3900
    long_name = 'Down the Upward Spiral (from Tumblr)'
3901
    url = 'http://downtheupwardspiral.tumblr.com'
3902
3903
3904
class AsPerUsualTumblr(GenericTumblrV1):
3905
    """Class to retrieve As Per Usual comics."""
3906
    # Also on https://tapastic.com/series/AsPerUsual
3907
    name = 'usual-tumblr'
3908
    long_name = 'As Per Usual (from Tumblr)'
3909
    url = 'http://as-per-usual.tumblr.com'
3910
    categories = ('DAMILEE', )
3911
3912
3913
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3914
    """Class to retrieve Hot Comics For Cool People."""
3915
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3916
    # Also on http://hotcomics.biz (links to tumblr)
3917
    # Also on http://hcfcp.com (links to tumblr)
3918
    name = 'hotcomics-tumblr'
3919
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3920
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3921
    categories = ('DAMILEE', )
3922
3923
3924
class OneOneOneOneComicTumblr(GenericTumblrV1):
3925
    """Class to retrieve 1111 Comics."""
3926
    # Also on http://www.1111comics.me
3927
    # Also on https://tapastic.com/series/1111-Comics
3928
    name = '1111-tumblr'
3929
    long_name = '1111 Comics (from Tumblr)'
3930
    url = 'http://comics1111.tumblr.com'
3931
    _categories = ('ONEONEONEONE', )
3932
3933
3934
class JhallComicsTumblr(GenericTumblrV1):
3935
    """Class to retrieve Jhall Comics."""
3936
    # Also on http://jhallcomics.com
3937
    name = 'jhall-tumblr'
3938
    long_name = 'Jhall Comics (from Tumblr)'
3939
    url = 'http://jhallcomics.tumblr.com'
3940
3941
3942
class BerkeleyMewsTumblr(GenericTumblrV1):
3943
    """Class to retrieve Berkeley Mews comics."""
3944
    # Also on http://www.gocomics.com/berkeley-mews
3945
    # Also on http://www.berkeleymews.com
3946
    name = 'berkeley-tumblr'
3947
    long_name = 'Berkeley Mews (from Tumblr)'
3948
    url = 'http://mews.tumblr.com'
3949
    _categories = ('BERKELEY', )
3950
3951
3952
class JoanCornellaTumblr(GenericTumblrV1):
3953
    """Class to retrieve Joan Cornella comics."""
3954
    # Also on http://joancornella.net
3955
    name = 'cornella-tumblr'
3956
    long_name = 'Joan Cornella (from Tumblr)'
3957
    url = 'http://cornellajoan.tumblr.com'
3958
3959
3960
class RespawnComicTumblr(GenericTumblrV1):
3961
    """Class to retrieve Respawn Comic."""
3962
    # Also on http://respawncomic.com
3963
    name = 'respawn-tumblr'
3964
    long_name = 'Respawn Comic (from Tumblr)'
3965
    url = 'https://respawncomic.tumblr.com'
3966
3967
3968
class ChrisHallbeckTumblr(GenericTumblrV1):
3969
    """Class to retrieve Chris Hallbeck comics."""
3970
    # Also on https://tapastic.com/ChrisHallbeck
3971
    # Also on http://maximumble.com
3972
    # Also on http://minimumble.com
3973
    # Also on http://thebookofbiff.com
3974
    name = 'hallbeck-tumblr'
3975
    long_name = 'Chris Hallback (from Tumblr)'
3976
    url = 'https://chrishallbeck.tumblr.com'
3977
    _categories = ('HALLBACK', )
3978
3979
3980
class ComicNuggets(GenericTumblrV1):
3981
    """Class to retrieve Comic Nuggets."""
3982
    name = 'nuggets'
3983
    long_name = 'Comic Nuggets'
3984
    url = 'http://comicnuggets.com'
3985
3986
3987
class PigeonGazetteTumblr(GenericTumblrV1):
3988
    """Class to retrieve The Pigeon Gazette comics."""
3989
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3990
    name = 'pigeon-tumblr'
3991
    long_name = 'The Pigeon Gazette (from Tumblr)'
3992
    url = 'http://thepigeongazette.tumblr.com'
3993
3994
3995
class CancerOwl(GenericTumblrV1):
3996
    """Class to retrieve Cancer Owl comics."""
3997
    # Also on http://cancerowl.com
3998
    name = 'cancerowl-tumblr'
3999
    long_name = 'Cancer Owl (from Tumblr)'
4000
    url = 'http://cancerowl.tumblr.com'
4001
4002
4003
class FowlLanguageTumblr(GenericTumblrV1):
4004
    """Class to retrieve Fowl Language comics."""
4005
    # Also on http://www.fowllanguagecomics.com
4006
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4007
    # Also on http://www.gocomics.com/fowl-language
4008
    name = 'fowllanguage-tumblr'
4009
    long_name = 'Fowl Language Comics (from Tumblr)'
4010
    url = 'http://fowllanguagecomics.tumblr.com'
4011
    _categories = ('FOWLLANGUAGE', )
4012
4013
4014
class TheOdd1sOutTumblr(GenericTumblrV1):
4015
    """Class to retrieve The Odd 1s Out comics."""
4016
    # Also on http://theodd1sout.com
4017
    # Also on https://tapastic.com/series/Theodd1sout
4018
    name = 'theodd-tumblr'
4019
    long_name = 'The Odd 1s Out (from Tumblr)'
4020
    url = 'http://theodd1sout.tumblr.com'
4021
4022
4023
class TheUnderfoldTumblr(GenericTumblrV1):
4024
    """Class to retrieve The Underfold comics."""
4025
    # Also on http://theunderfold.com
4026
    name = 'underfold-tumblr'
4027
    long_name = 'The Underfold (from Tumblr)'
4028
    url = 'http://theunderfold.tumblr.com'
4029
4030
4031
class LolNeinTumblr(GenericTumblrV1):
4032
    """Class to retrieve Lol Nein comics."""
4033
    # Also on http://lolnein.com
4034
    name = 'lolnein-tumblr'
4035
    long_name = 'Lol Nein (from Tumblr)'
4036
    url = 'http://lolneincom.tumblr.com'
4037
4038
4039
class FatAwesomeComicsTumblr(GenericTumblrV1):
4040
    """Class to retrieve Fat Awesome Comics."""
4041
    # Also on http://fatawesome.com/comics
4042
    name = 'fatawesome-tumblr'
4043
    long_name = 'Fat Awesome (from Tumblr)'
4044
    url = 'http://fatawesomecomedy.tumblr.com'
4045
4046
4047
class TheWorldIsFlatTumblr(GenericTumblrV1):
4048
    """Class to retrieve The World Is Flat Comics."""
4049
    # Also on https://tapastic.com/series/The-World-is-Flat
4050
    name = 'flatworld-tumblr'
4051
    long_name = 'The World Is Flat (from Tumblr)'
4052
    url = 'http://theworldisflatcomics.com'
4053
4054
4055
class DorrisMc(GenericTumblrV1):
4056
    """Class to retrieve Dorris Mc Comics"""
4057
    # Also on http://www.gocomics.com/dorris-mccomics
4058
    name = 'dorrismc'
4059
    long_name = 'Dorris Mc'
4060
    url = 'http://dorrismccomics.com'
4061
4062
4063
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
4064
    """Class to retrieve Leleoz comics."""
4065
    # Also on https://tapastic.com/series/Leleoz
4066
    name = 'leleoz-tumblr'
4067
    long_name = 'Leleoz (from Tumblr)'
4068
    url = 'http://leleozcomics.tumblr.com'
4069
4070
4071
class MoonBeardTumblr(GenericTumblrV1):
4072
    """Class to retrieve MoonBeard comics."""
4073
    # Also on http://moonbeard.com
4074
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4075
    name = 'moonbeard-tumblr'
4076
    long_name = 'Moon Beard (from Tumblr)'
4077
    url = 'http://squireseses.tumblr.com'
4078
    _categories = ('MOONBEARD', )
4079
4080
4081
class AComik(GenericTumblrV1):
4082
    """Class to retrieve A Comik"""
4083
    name = 'comik'
4084
    long_name = 'A Comik'
4085
    url = 'http://acomik.com'
4086
4087
4088
class ClassicRandy(GenericTumblrV1):
4089
    """Class to retrieve Classic Randy comics."""
4090
    name = 'randy'
4091
    long_name = 'Classic Randy'
4092
    url = 'http://classicrandy.tumblr.com'
4093
4094
4095
class DagssonTumblr(GenericTumblrV1):
4096
    """Class to retrieve Dagsson comics."""
4097
    # Also on http://www.dagsson.com
4098
    name = 'dagsson-tumblr'
4099
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4100
    url = 'https://hugleikurdagsson.tumblr.com'
4101
4102
4103
class LinsEditionsTumblr(GenericTumblrV1):
4104
    """Class to retrieve L.I.N.S. Editions comics."""
4105
    # Also on https://linsedition.com
4106
    # Now on http://warandpeas.tumblr.com
4107
    name = 'lins-tumblr'
4108
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4109
    url = 'https://linscomics.tumblr.com'
4110
    _categories = ('LINS', )
4111
4112
4113
class WarAndPeasTumblr(GenericTumblrV1):
4114
    """Class to retrieve War And Peas comics."""
4115
    # Was on https://linscomics.tumblr.com
4116
    name = 'warandpeas-tumblr'
4117
    long_name = 'War And Peas (from Tumblr)'
4118
    url = 'http://warandpeas.tumblr.com'
4119
    _categories = ('WARANDPEAS', )
4120
4121
4122
class OrigamiHotDish(GenericTumblrV1):
4123
    """Class to retrieve Origami Hot Dish comics."""
4124
    name = 'origamihotdish'
4125
    long_name = 'Origami Hot Dish'
4126
    url = 'http://origamihotdish.com'
4127
4128
4129
class HitAndMissComicsTumblr(GenericTumblrV1):
4130
    """Class to retrieve Hit and Miss Comics."""
4131
    name = 'hitandmiss'
4132
    long_name = 'Hit and Miss Comics'
4133
    url = 'https://hitandmisscomics.tumblr.com'
4134
4135
4136
class HMBlanc(GenericTumblrV1):
4137
    """Class to retrieve HM Blanc comics."""
4138
    name = 'hmblanc'
4139
    long_name = 'HM Blanc'
4140
    url = 'http://hmblanc.tumblr.com'
4141
4142
4143
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4144
    """Class to retrieve Tales Of Absurdity comics."""
4145
    # Also on http://talesofabsurdity.com
4146
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4147
    name = 'absurdity-tumblr'
4148
    long_name = 'Tales of Absurdity (from Tumblr)'
4149
    url = 'http://talesofabsurdity.tumblr.com'
4150
    _categories = ('ABSURDITY', )
4151
4152
4153
class RobbieAndBobby(GenericTumblrV1):
4154
    """Class to retrieve Robbie And Bobby comics."""
4155
    # Also on http://robbieandbobby.com
4156
    name = 'robbie-tumblr'
4157
    long_name = 'Robbie And Bobby (from Tumblr)'
4158
    url = 'http://robbieandbobby.tumblr.com'
4159
4160
4161
class ElectricBunnyComicTumblr(GenericTumblrV1):
4162
    """Class to retrieve Electric Bunny Comics."""
4163
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4164
    name = 'bunny-tumblr'
4165
    long_name = 'Electric Bunny Comic (from Tumblr)'
4166
    url = 'http://electricbunnycomics.tumblr.com'
4167
4168
4169
class Hoomph(GenericTumblrV1):
4170
    """Class to retrieve Hoomph comics."""
4171
    name = 'hoomph'
4172
    long_name = 'Hoomph'
4173
    url = 'http://hoom.ph'
4174
4175
4176
class BFGFSTumblr(GenericTumblrV1):
4177
    """Class to retrieve BFGFS comics."""
4178
    # Also on https://tapastic.com/series/BFGFS
4179
    # Also on http://bfgfs.com
4180
    name = 'bfgfs-tumblr'
4181
    long_name = 'BFGFS (from Tumblr)'
4182
    url = 'https://bfgfs.tumblr.com'
4183
4184
4185
class DoodleForFood(GenericTumblrV1):
4186
    """Class to retrieve Doodle For Food comics."""
4187
    # Also on https://tapastic.com/series/Doodle-for-Food
4188
    name = 'doodle'
4189
    long_name = 'Doodle For Food'
4190
    url = 'http://www.doodleforfood.com'
4191
4192
4193
class CassandraCalinTumblr(GenericTumblrV1):
4194
    """Class to retrieve C. Cassandra comics."""
4195
    # Also on http://cassandracalin.com
4196
    # Also on https://tapastic.com/series/C-Cassandra-comics
4197
    name = 'cassandra-tumblr'
4198
    long_name = 'Cassandra Calin (from Tumblr)'
4199
    url = 'http://c-cassandra.tumblr.com'
4200
4201
4202
class DougWasTaken(GenericTumblrV1):
4203
    """Class to retrieve Doug Was Taken comics."""
4204
    name = 'doug'
4205
    long_name = 'Doug Was Taken'
4206
    url = 'https://dougwastaken.tumblr.com'
4207
4208
4209
class MandatoryRollerCoaster(GenericTumblrV1):
4210
    """Class to retrieve Mandatory Roller Coaster comics."""
4211
    name = 'rollercoaster'
4212
    long_name = 'Mandatory Roller Coaster'
4213
    url = 'http://mandatoryrollercoaster.com'
4214
4215
4216
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4217
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4218
    name = 'cperspqccltt'
4219
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4220
    url = 'http://marcoandco.tumblr.com'
4221
4222
4223
class TheGrohlTroll(GenericTumblrV1):
4224
    """Class to retrieve The Grohl Troll comics."""
4225
    name = 'grohltroll'
4226
    long_name = 'The Grohl Troll'
4227
    url = 'http://thegrohltroll.com'
4228
4229
4230
class WebcomicName(GenericTumblrV1):
4231
    """Class to retrieve Webcomic Name comics."""
4232
    name = 'webcomicname'
4233
    long_name = 'Webcomic Name'
4234
    url = 'http://webcomicname.com'
4235
4236
4237
class BooksOfAdam(GenericTumblrV1):
4238
    """Class to retrieve Books of Adam comics."""
4239
    # Also on http://www.booksofadam.com
4240
    name = 'booksofadam'
4241
    long_name = 'Books of Adam'
4242
    url = 'http://booksofadam.tumblr.com'
4243
4244
4245
class HarkAVagrant(GenericTumblrV1):
4246
    """Class to retrieve Hark A Vagrant comics."""
4247
    # Also on http://www.harkavagrant.com
4248
    name = 'hark-tumblr'
4249
    long_name = 'Hark A Vagrant (from Tumblr)'
4250
    url = 'http://beatonna.tumblr.com'
4251
4252
4253
class OurSuperAdventureTumblr(GenericTumblrV1):
4254
    """Class to retrieve Our Super Adventure comics."""
4255
    # Also on https://tapastic.com/series/Our-Super-Adventure
4256
    # Also on http://www.oursuperadventure.com
4257
    # http://sarahgraley.com
4258
    name = 'superadventure-tumblr'
4259
    long_name = 'Our Super Adventure (from Tumblr)'
4260
    url = 'http://sarahssketchbook.tumblr.com'
4261
4262
4263
class JakeLikesOnions(GenericTumblrV1):
4264
    """Class to retrieve Jake Likes Onions comics."""
4265
    name = 'jake'
4266
    long_name = 'Jake Likes Onions'
4267
    url = 'http://jakelikesonions.com'
4268
4269
4270
class InYourFaceCake(GenericTumblrV1):
4271
    """Class to retrieve In Your Face Cake comics."""
4272
    name = 'inyourfacecake-tumblr'
4273
    long_name = 'In Your Face Cake (from Tumblr)'
4274
    url = 'https://in-your-face-cake.tumblr.com'
4275
4276
4277
class Robospunk(GenericTumblrV1):
4278
    """Class to retrieve Robospunk comics."""
4279
    name = 'robospunk'
4280
    long_name = 'Robospunk'
4281
    url = 'http://robospunk.com'
4282
4283
4284
class BananaTwinky(GenericTumblrV1):
4285
    """Class to retrieve Banana Twinky comics."""
4286
    name = 'banana'
4287
    long_name = 'Banana Twinky'
4288
    url = 'https://bananatwinky.tumblr.com'
4289
4290
4291
class YesterdaysPopcornTumblr(GenericTumblrV1):
4292
    """Class to retrieve Yesterday's Popcorn comics."""
4293
    # Also on http://www.yesterdayspopcorn.com
4294
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4295
    name = 'popcorn-tumblr'
4296
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4297
    url = 'http://yesterdayspopcorn.tumblr.com'
4298
4299
4300
class TwistedDoodles(GenericTumblrV1):
4301
    """Class to retrieve Twisted Doodles comics."""
4302
    name = 'twisted'
4303
    long_name = 'Twisted Doodles'
4304
    url = 'http://www.twisteddoodles.com'
4305
4306
4307
class UbertoolTumblr(GenericTumblrV1):
4308
    """Class to retrieve Ubertool comics."""
4309
    # Also on http://ubertoolcomic.com
4310
    # Also on https://tapastic.com/series/ubertool
4311
    name = 'ubertool-tumblr'
4312 View Code Duplication
    long_name = 'Ubertool (from Tumblr)'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4313
    url = 'https://ubertool.tumblr.com'
4314
    _categories = ('UBERTOOL', )
4315
4316
4317
class LittleLifeLinesTumblr(GenericTumblrV1):
4318
    """Class to retrieve Little Life Lines comics."""
4319
    # Also on http://www.littlelifelines.com
4320
    name = 'life-tumblr'
4321
    long_name = 'Little Life Lines (from Tumblr)'
4322
    url = 'https://little-life-lines.tumblr.com'
4323
4324
4325
class TheyCanTalk(GenericTumblrV1):
4326
    """Class to retrieve They Can Talk comics."""
4327
    name = 'theycantalk'
4328
    long_name = 'They Can Talk'
4329
    url = 'http://theycantalk.com'
4330
4331
4332
class Will5NeverCome(GenericTumblrV1):
4333
    """Class to retrieve Will 5:00 Never Come comics."""
4334
    name = 'will5'
4335
    long_name = 'Will 5:00 Never Come ?'
4336
    url = 'http://will5nevercome.com'
4337
4338
4339
class Sephko(GenericTumblrV1):
4340
    """Class to retrieve Sephko Comics."""
4341
    # Also on http://www.sephko.com
4342
    name = 'sephko'
4343
    long_name = 'Sephko'
4344
    url = 'https://sephko.tumblr.com'
4345
4346
4347
class BlazersAtDawn(GenericTumblrV1):
4348
    """Class to retrieve Blazers At Dawn Comics."""
4349
    name = 'blazers'
4350
    long_name = 'Blazers At Dawn'
4351
    url = 'http://blazersatdawn.tumblr.com'
4352
4353
4354
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4355
    """Class to retrieve Art By Moga Comics."""
4356
    name = 'moga'
4357
    long_name = 'Art By Moga'
4358
    url = 'http://artbymoga.tumblr.com'
4359
4360
4361
class VerbalVomitTumblr(GenericTumblrV1):
4362
    """Class to retrieve Verbal Vomit comics."""
4363
    # Also on http://www.verbal-vomit.com
4364
    name = 'vomit-tumblr'
4365
    long_name = 'Verbal Vomit (from Tumblr)'
4366
    url = 'http://verbalvomits.tumblr.com'
4367
4368
4369
class LibraryComic(GenericTumblrV1):
4370
    """Class to retrieve LibraryComic."""
4371
    # Also on http://librarycomic.com
4372
    name = 'library-tumblr'
4373
    long_name = 'LibraryComic (from Tumblr)'
4374
    url = 'https://librarycomic.tumblr.com'
4375
4376
4377
class TizzyStitchBirdTumblr(GenericTumblrV1):
4378
    """Class to retrieve Tizzy Stitch Bird comics."""
4379
    # Also on http://tizzystitchbird.com
4380
    # Also on https://tapastic.com/series/TizzyStitchbird
4381
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4382
    name = 'tizzy-tumblr'
4383
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4384
    url = 'http://tizzystitchbird.tumblr.com'
4385
4386
4387
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4388
    """Class to retrieve VictimsOfCircumsolar comics."""
4389
    # Also on http://www.victimsofcircumsolar.com
4390
    name = 'circumsolar-tumblr'
4391
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4392
    url = 'https://victimsofcomics.tumblr.com'
4393
4394
4395
class RockPaperCynicTumblr(GenericTumblrV1):
4396
    """Class to retrieve RockPaperCynic comics."""
4397
    # Also on http://www.rockpapercynic.com
4398
    # Also on https://tapastic.com/series/rockpapercynic
4399
    name = 'rpc-tumblr'
4400
    long_name = 'Rock Paper Cynic (from Tumblr)'
4401
    url = 'http://rockpapercynic.tumblr.com'
4402
4403
4404
class DeadlyPanelTumblr(GenericTumblrV1):
4405
    """Class to retrieve Deadly Panel comics."""
4406
    # Also on http://www.deadlypanel.com
4407
    # Also on https://tapastic.com/series/deadlypanel
4408
    name = 'deadly-tumblr'
4409
    long_name = 'Deadly Panel (from Tumblr)'
4410
    url = 'https://deadlypanel.tumblr.com'
4411
4412
4413
class CatanaComics(GenericTumblrV1):
4414
    """Class to retrieve Catana comics."""
4415
    name = 'catana'
4416
    long_name = 'Catana'
4417
    url = 'http://www.catanacomics.com'
4418
4419
4420
class AngryAtNothingTumblr(GenericTumblrV1):
4421
    """Class to retrieve Angry at Nothing comics."""
4422
    # Also on http://www.angryatnothing.net
4423
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4424
    name = 'angry-tumblr'
4425
    long_name = 'Angry At Nothing (from Tumblr)'
4426
    url = 'http://angryatnothing.tumblr.com'
4427
4428
4429
class ShanghaiTango(GenericTumblrV1):
4430
    """Class to retrieve Shanghai Tango comic."""
4431
    name = 'tango'
4432
    long_name = 'Shanghai Tango'
4433
    url = 'http://tango2010weibo.tumblr.com'
4434
4435
4436
class OffTheLeashDogTumblr(GenericTumblrV1):
4437
    """Class to retrieve Off The Leash Dog comics."""
4438
    # Also on http://offtheleashdogcartoons.com
4439
    # Also on http://www.rupertfawcettcartoons.com
4440
    name = 'offtheleash-tumblr'
4441
    long_name = 'Off The Leash Dog (from Tumblr)'
4442
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4443
    _categories = ('FAWCETT', )
4444
4445
4446
class ImogenQuestTumblr(GenericTumblrV1):
4447
    """Class to retrieve Imogen Quest comics."""
4448
    # Also on http://imogenquest.net
4449
    name = 'imogen-tumblr'
4450
    long_name = 'Imogen Quest (from Tumblr)'
4451
    url = 'http://imoquest.tumblr.com'
4452
4453
4454
class Shitfest(GenericTumblrV1):
4455
    """Class to retrieve Shitfest comics."""
4456
    name = 'shitfest'
4457
    long_name = 'Shitfest'
4458
    url = 'http://shitfestcomic.com'
4459
4460
4461
class IceCreamSandwichComics(GenericTumblrV1):
4462
    """Class to retrieve Ice Cream Sandwich Comics."""
4463
    name = 'icecream'
4464
    long_name = 'Ice Cream Sandwich Comics'
4465
    url = 'http://icecreamsandwichcomics.com'
4466
4467
4468
class Dustinteractive(GenericTumblrV1):
4469
    """Class to retrieve Dustinteractive comics."""
4470
    name = 'dustinteractive'
4471
    long_name = 'Dustinteractive'
4472
    url = 'http://dustinteractive.com'
4473
4474
4475
class StickyCinemaFloor(GenericTumblrV1):
4476
    """Class to retrieve Sticky Cinema Floor comics."""
4477
    name = 'stickycinema'
4478
    long_name = 'Sticky Cinema Floor'
4479
    url = 'https://stickycinemafloor.tumblr.com'
4480
4481
4482
class IncidentalComicsTumblr(GenericTumblrV1):
4483
    """Class to retrieve Incidental Comics."""
4484
    # Also on http://www.incidentalcomics.com
4485
    name = 'incidental-tumblr'
4486
    long_name = 'Incidental Comics (from Tumblr)'
4487
    url = 'http://incidentalcomics.tumblr.com'
4488
4489
4490
class APleasantWasteOfTimeTumblr(GenericTumblrV1):
4491
    """Class to retrieve A Pleasant Waste Of Time comics."""
4492
    # Also on https://tapas.io/series/A-Pleasant-
4493
    name = 'pleasant-waste-tumblr'
4494
    long_name = 'A Pleasant Waste Of Time (from Tumblr)'
4495
    url = 'https://artjcf.tumblr.com'
4496
    _categories = ('WASTE', )
4497
4498
4499
class HorovitzComicsTumblr(GenericTumblrV1):
4500
    """Class to retrieve Horovitz new comics."""
4501
    # Also on http://www.horovitzcomics.com
4502
    name = 'horovitz-tumblr'
4503
    long_name = 'Horovitz (from Tumblr)'
4504
    url = 'https://horovitzcomics.tumblr.com'
4505
    _categories = ('HOROVITZ', )
4506
4507
4508
class DeepDarkFearsTumblr(GenericTumblrV1):
4509
    """Class to retrieve DeepvDarkvFears comics."""
4510
    name = 'deep-dark-fears-tumblr'
4511
    long_name = 'Deep Dark Fears (from Tumblr)'
4512
    url = 'http://deep-dark-fears.tumblr.com'
4513
4514
4515
class ExtraFabulousComicsTumblr(GenericTumblrV1):
4516
    """Class to retrieve Extra Fabulous Comics."""
4517
    # Also on http://extrafabulouscomics.com
4518
    name = 'efc-tumblr'
4519
    long_name = 'Extra Fabulous Comics (from Tumblr)'
4520
    url = 'https://extrafabulouscomics.tumblr.com'
4521
    _categories = ('EFC', )
4522
4523
4524
class JamesOfNoTradesTumblr(GenericTumblrV1):
4525
    """Class to retrieve JamesOfNoTrades comics."""
4526
    # Also on http://jamesofnotrades.com
4527
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
4528
    # Also on https://tapas.io/series/James-of-No-Trades
4529
    name = 'jamesofnotrades-tumblr'
4530
    long_name = 'James Of No Trades (from Tumblr)'
4531
    url = 'http://jamesfregan.tumblr.com'
4532
    _categories = ('JAMESOFNOTRADES', )
4533
4534
4535
class HorovitzComics(GenericEmptyComic, GenericListableComic):
4536
    """Generic class to handle the logic common to the different comics from Horovitz."""
4537
    # Also on https://horovitzcomics.tumblr.com
4538
    url = 'http://www.horovitzcomics.com'
4539
    _categories = ('HOROVITZ', )
4540
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4541
    link_re = NotImplemented
4542
    get_url_from_archive_element = join_cls_url_to_href
4543
4544
    @classmethod
4545
    def get_comic_info(cls, soup, link):
4546
        """Get information about a particular comics."""
4547
        href = link['href']
4548
        num = int(cls.link_re.match(href).groups()[0])
4549
        title = link.string
4550
        imgs = soup.find_all('img', id='comic')
4551
        assert len(imgs) == 1
4552
        year, month, day = [int(s)
4553
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4554
        return {
4555
            'title': title,
4556
            'day': day,
4557
            'month': month,
4558
            'year': year,
4559
            'img': [i['src'] for i in imgs],
4560
            'num': num,
4561
        }
4562
4563
    @classmethod
4564
    def get_archive_elements(cls):
4565
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4566
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4567
4568
4569
class HorovitzNew(HorovitzComics):
4570
    """Class to retrieve Horovitz new comics."""
4571
    name = 'horovitznew'
4572
    long_name = 'Horovitz New'
4573
    link_re = re.compile('^/comics/new/([0-9]+)$')
4574
4575
4576
class HorovitzClassic(HorovitzComics):
4577
    """Class to retrieve Horovitz classic comics."""
4578
    name = 'horovitzclassic'
4579
    long_name = 'Horovitz Classic'
4580
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4581
4582
4583
class GenericGoComic(GenericNavigableComic):
4584
    """Generic class to handle the logic common to comics from gocomics.com."""
4585
    _categories = ('GOCOMIC', )
4586
4587
    @classmethod
4588
    def get_first_comic_link(cls):
4589
        """Get link to first comics."""
4590
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4591
4592
    @classmethod
4593
    def get_navi_link(cls, last_soup, next_):
4594
        """Get link to next or previous comic."""
4595
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4596
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right hidden-sm-up sm '
4597
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4598
4599
    @classmethod
4600
    def get_url_from_link(cls, link):
4601
        gocomics = 'http://www.gocomics.com'
4602
        return urljoin_wrapper(gocomics, link['href'])
4603
4604
    @classmethod
4605
    def get_comic_info(cls, soup, link):
4606
        """Get information about a particular comics."""
4607
        date_str = soup.find('meta', property='article:published_time')['content']
4608
        day = string_to_date(date_str, "%Y-%m-%d")
4609
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4610
        author = soup.find('meta', property='article:author')['content']
4611
        tags = soup.find('meta', property='article:tag')['content']
4612
        return {
4613
            'day': day.day,
4614
            'month': day.month,
4615
            'year': day.year,
4616
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4617
            'author': author,
4618
            'tags': tags,
4619
        }
4620
4621
4622
class PearlsBeforeSwine(GenericGoComic):
4623
    """Class to retrieve Pearls Before Swine comics."""
4624
    name = 'pearls'
4625
    long_name = 'Pearls Before Swine'
4626
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4627
4628
4629
class Peanuts(GenericGoComic):
4630
    """Class to retrieve Peanuts comics."""
4631
    name = 'peanuts'
4632
    long_name = 'Peanuts'
4633
    url = 'http://www.gocomics.com/peanuts'
4634
4635
4636
class MattWuerker(GenericGoComic):
4637
    """Class to retrieve Matt Wuerker comics."""
4638
    name = 'wuerker'
4639
    long_name = 'Matt Wuerker'
4640
    url = 'http://www.gocomics.com/mattwuerker'
4641
4642
4643
class TomToles(GenericGoComic):
4644
    """Class to retrieve Tom Toles comics."""
4645
    name = 'toles'
4646
    long_name = 'Tom Toles'
4647
    url = 'http://www.gocomics.com/tomtoles'
4648
4649
4650
class BreakOfDay(GenericGoComic):
4651
    """Class to retrieve Break Of Day comics."""
4652
    name = 'breakofday'
4653
    long_name = 'Break Of Day'
4654
    url = 'http://www.gocomics.com/break-of-day'
4655
4656
4657
class Brevity(GenericGoComic):
4658
    """Class to retrieve Brevity comics."""
4659
    name = 'brevity'
4660
    long_name = 'Brevity'
4661
    url = 'http://www.gocomics.com/brevitypanel'
4662
4663
4664
class MichaelRamirez(GenericGoComic):
4665
    """Class to retrieve Michael Ramirez comics."""
4666
    name = 'ramirez'
4667
    long_name = 'Michael Ramirez'
4668
    url = 'http://www.gocomics.com/michaelramirez'
4669
4670
4671
class MikeLuckovich(GenericGoComic):
4672
    """Class to retrieve Mike Luckovich comics."""
4673
    name = 'luckovich'
4674
    long_name = 'Mike Luckovich'
4675
    url = 'http://www.gocomics.com/mikeluckovich'
4676
4677
4678
class JimBenton(GenericGoComic):
4679
    """Class to retrieve Jim Benton comics."""
4680
    # Also on http://jimbenton.tumblr.com
4681
    name = 'benton'
4682
    long_name = 'Jim Benton'
4683
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4684
4685
4686
class TheArgyleSweater(GenericGoComic):
4687
    """Class to retrieve the Argyle Sweater comics."""
4688
    name = 'argyle'
4689
    long_name = 'Argyle Sweater'
4690
    url = 'http://www.gocomics.com/theargylesweater'
4691
4692
4693
class SunnyStreet(GenericGoComic):
4694
    """Class to retrieve Sunny Street comics."""
4695
    # Also on http://www.sunnystreetcomics.com
4696
    name = 'sunny'
4697
    long_name = 'Sunny Street'
4698
    url = 'http://www.gocomics.com/sunny-street'
4699
4700
4701
class OffTheMark(GenericGoComic):
4702
    """Class to retrieve Off The Mark comics."""
4703
    # Also on https://www.offthemark.com
4704
    name = 'offthemark'
4705
    long_name = 'Off The Mark'
4706
    url = 'http://www.gocomics.com/offthemark'
4707
4708
4709
class WuMo(GenericGoComic):
4710
    """Class to retrieve WuMo comics."""
4711
    # Also on http://wumo.com
4712
    name = 'wumo'
4713
    long_name = 'WuMo'
4714
    url = 'http://www.gocomics.com/wumo'
4715
4716
4717
class LunarBaboon(GenericGoComic):
4718
    """Class to retrieve Lunar Baboon comics."""
4719
    # Also on http://www.lunarbaboon.com
4720
    # Also on https://tapastic.com/series/Lunarbaboon
4721
    name = 'lunarbaboon'
4722
    long_name = 'Lunar Baboon'
4723
    url = 'http://www.gocomics.com/lunarbaboon'
4724
4725
4726
class SandersenGocomic(GenericGoComic):
4727
    """Class to retrieve Sarah Andersen comics."""
4728
    # Also on http://sarahcandersen.com
4729
    # Also on http://tapastic.com/series/Doodle-Time
4730
    name = 'sandersen-goc'
4731
    long_name = 'Sarah Andersen (from GoComics)'
4732
    url = 'http://www.gocomics.com/sarahs-scribbles'
4733
4734
4735
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4736
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4737
    # Also on http://smbc-comics.tumblr.com
4738
    # Also on http://www.smbc-comics.com
4739
    name = 'smbc-goc'
4740
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4741
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4742
    _categories = ('SMBC', )
4743
4744
4745
class CalvinAndHobbesGoComic(GenericGoComic):
4746
    """Class to retrieve Calvin and Hobbes comics."""
4747
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4748
    name = 'calvin-goc'
4749
    long_name = 'Calvin and Hobbes (from GoComics)'
4750
    url = 'http://www.gocomics.com/calvinandhobbes'
4751
4752
4753
class RallGoComic(GenericGoComic):
4754
    """Class to retrieve Ted Rall comics."""
4755
    # Also on http://rall.com/comic
4756
    name = 'rall-goc'
4757
    long_name = "Ted Rall (from GoComics)"
4758
    url = "http://www.gocomics.com/ted-rall"
4759
    _categories = ('RALL', )
4760
4761
4762
class TheAwkwardYetiGoComic(GenericGoComic):
4763
    """Class to retrieve The Awkward Yeti comics."""
4764
    # Also on http://larstheyeti.tumblr.com
4765
    # Also on http://theawkwardyeti.com
4766
    # Also on https://tapastic.com/series/TheAwkwardYeti
4767
    name = 'yeti-goc'
4768
    long_name = 'The Awkward Yeti (from GoComics)'
4769
    url = 'http://www.gocomics.com/the-awkward-yeti'
4770
    _categories = ('YETI', )
4771
4772
4773
class BerkeleyMewsGoComics(GenericGoComic):
4774
    """Class to retrieve Berkeley Mews comics."""
4775
    # Also on http://mews.tumblr.com
4776
    # Also on http://www.berkeleymews.com
4777
    name = 'berkeley-goc'
4778
    long_name = 'Berkeley Mews (from GoComics)'
4779
    url = 'http://www.gocomics.com/berkeley-mews'
4780
    _categories = ('BERKELEY', )
4781
4782
4783
class SheldonGoComics(GenericGoComic):
4784
    """Class to retrieve Sheldon comics."""
4785
    # Also on http://www.sheldoncomics.com
4786
    name = 'sheldon-goc'
4787
    long_name = 'Sheldon Comics (from GoComics)'
4788
    url = 'http://www.gocomics.com/sheldon'
4789
4790
4791
class FowlLanguageGoComics(GenericGoComic):
4792
    """Class to retrieve Fowl Language comics."""
4793
    # Also on http://www.fowllanguagecomics.com
4794
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4795
    # Also on http://fowllanguagecomics.tumblr.com
4796
    name = 'fowllanguage-goc'
4797
    long_name = 'Fowl Language Comics (from GoComics)'
4798
    url = 'http://www.gocomics.com/fowl-language'
4799
    _categories = ('FOWLLANGUAGE', )
4800
4801
4802
class NickAnderson(GenericGoComic):
4803
    """Class to retrieve Nick Anderson comics."""
4804
    name = 'nickanderson'
4805
    long_name = 'Nick Anderson'
4806
    url = 'http://www.gocomics.com/nickanderson'
4807
4808
4809
class GarfieldGoComics(GenericGoComic):
4810
    """Class to retrieve Garfield comics."""
4811
    # Also on http://garfield.com
4812
    name = 'garfield-goc'
4813
    long_name = 'Garfield (from GoComics)'
4814
    url = 'http://www.gocomics.com/garfield'
4815
    _categories = ('GARFIELD', )
4816
4817
4818
class DorrisMcGoComics(GenericGoComic):
4819
    """Class to retrieve Dorris Mc Comics"""
4820
    # Also on http://dorrismccomics.com
4821
    name = 'dorrismc-goc'
4822
    long_name = 'Dorris Mc (from GoComics)'
4823
    url = 'http://www.gocomics.com/dorris-mccomics'
4824
4825
4826
class FoxTrot(GenericGoComic):
4827
    """Class to retrieve FoxTrot comics."""
4828
    name = 'foxtrot'
4829
    long_name = 'FoxTrot'
4830
    url = 'http://www.gocomics.com/foxtrot'
4831
4832
4833
class FoxTrotClassics(GenericGoComic):
4834
    """Class to retrieve FoxTrot Classics comics."""
4835
    name = 'foxtrot-classics'
4836
    long_name = 'FoxTrot Classics'
4837
    url = 'http://www.gocomics.com/foxtrotclassics'
4838
4839
4840
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4841
    """Class to retrieve Mister & Me Comics."""
4842
    # Also on http://www.mister-and-me.com
4843
    # Also on https://tapastic.com/series/Mister-and-Me
4844
    name = 'mister-goc'
4845
    long_name = 'Mister & Me (from GoComics)'
4846
    url = 'http://www.gocomics.com/mister-and-me'
4847
4848
4849
class NonSequitur(GenericGoComic):
4850
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4851
    name = 'nonsequitur'
4852
    long_name = 'Non Sequitur'
4853
    url = 'http://www.gocomics.com/nonsequitur'
4854
4855
4856
class GenericTapasticComic(GenericListableComic):
4857
    """Generic class to handle the logic common to comics from tapastic.com."""
4858
    _categories = ('TAPASTIC', )
4859
4860
    @classmethod
4861
    def get_comic_info(cls, soup, archive_elt):
4862
        """Get information about a particular comics."""
4863
        timestamp = int(archive_elt['publishDate']) / 1000.0
4864
        day = datetime.datetime.fromtimestamp(timestamp).date()
4865
        imgs = soup.find_all('img', class_='art-image')
4866
        if not imgs:
4867
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4868
            return None
4869
        assert len(imgs) > 0
4870
        return {
4871
            'day': day.day,
4872
            'year': day.year,
4873
            'month': day.month,
4874
            'img': [i['src'] for i in imgs],
4875
            'title': archive_elt['title'],
4876
        }
4877
4878
    @classmethod
4879
    def get_url_from_archive_element(cls, archive_elt):
4880
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4881
4882
    @classmethod
4883
    def get_archive_elements(cls):
4884
        pref, suff = 'episodeList : ', ','
4885
        # Information is stored in the javascript part
4886
        # I don't know the clean way to get it so this is the ugly way.
4887
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4888
        return json.loads(string)
4889
4890
4891
class VegetablesForDessert(GenericTapasticComic):
4892
    """Class to retrieve Vegetables For Dessert comics."""
4893
    # Also on http://vegetablesfordessert.tumblr.com
4894
    name = 'vegetables'
4895
    long_name = 'Vegetables For Dessert'
4896
    url = 'http://tapastic.com/series/vegetablesfordessert'
4897
4898
4899
class FowlLanguageTapa(GenericTapasticComic):
4900
    """Class to retrieve Fowl Language comics."""
4901
    # Also on http://www.fowllanguagecomics.com
4902
    # Also on http://fowllanguagecomics.tumblr.com
4903
    # Also on http://www.gocomics.com/fowl-language
4904
    name = 'fowllanguage-tapa'
4905
    long_name = 'Fowl Language Comics (from Tapastic)'
4906
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4907
    _categories = ('FOWLLANGUAGE', )
4908
4909
4910
class OscillatingProfundities(GenericTapasticComic):
4911
    """Class to retrieve Oscillating Profundities comics."""
4912
    name = 'oscillating'
4913
    long_name = 'Oscillating Profundities'
4914
    url = 'http://tapastic.com/series/oscillatingprofundities'
4915
4916
4917
class ZnoflatsComics(GenericTapasticComic):
4918
    """Class to retrieve Znoflats comics."""
4919
    name = 'znoflats'
4920
    long_name = 'Znoflats Comics'
4921
    url = 'http://tapastic.com/series/Znoflats-Comics'
4922
4923
4924
class SandersenTapastic(GenericTapasticComic):
4925
    """Class to retrieve Sarah Andersen comics."""
4926
    # Also on http://sarahcandersen.com
4927
    # Also on http://www.gocomics.com/sarahs-scribbles
4928
    name = 'sandersen-tapa'
4929
    long_name = 'Sarah Andersen (from Tapastic)'
4930
    url = 'http://tapastic.com/series/Doodle-Time'
4931
4932
4933
class TubeyToonsTapastic(GenericTapasticComic):
4934
    """Class to retrieve TubeyToons comics."""
4935
    # Also on http://tubeytoons.com
4936
    # Also on https://tubeytoons.tumblr.com
4937
    name = 'tubeytoons-tapa'
4938
    long_name = 'Tubey Toons (from Tapastic)'
4939
    url = 'http://tapastic.com/series/Tubey-Toons'
4940
    _categories = ('TUNEYTOONS', )
4941
4942
4943
class AnythingComicTapastic(GenericTapasticComic):
4944
    """Class to retrieve Anything Comics."""
4945
    # Also on http://www.anythingcomic.com
4946
    name = 'anythingcomic-tapa'
4947
    long_name = 'Anything Comic (from Tapastic)'
4948
    url = 'http://tapastic.com/series/anything'
4949
4950
4951
class UnearthedComicsTapastic(GenericTapasticComic):
4952
    """Class to retrieve Unearthed comics."""
4953
    # Also on http://unearthedcomics.com
4954
    # Also on https://unearthedcomics.tumblr.com
4955
    name = 'unearthed-tapa'
4956
    long_name = 'Unearthed Comics (from Tapastic)'
4957
    url = 'http://tapastic.com/series/UnearthedComics'
4958
    _categories = ('UNEARTHED', )
4959
4960
4961
class EverythingsStupidTapastic(GenericTapasticComic):
4962
    """Class to retrieve Everything's stupid Comics."""
4963
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4964
    # Also on http://everythingsstupid.net
4965
    name = 'stupid-tapa'
4966
    long_name = "Everything's Stupid (from Tapastic)"
4967
    url = 'http://tapastic.com/series/EverythingsStupid'
4968
4969
4970
class JustSayEhTapastic(GenericTapasticComic):
4971
    """Class to retrieve Just Say Eh comics."""
4972
    # Also on http://www.justsayeh.com
4973
    name = 'justsayeh-tapa'
4974
    long_name = 'Just Say Eh (from Tapastic)'
4975
    url = 'http://tapastic.com/series/Just-Say-Eh'
4976
4977
4978
class ThorsThundershackTapastic(GenericTapasticComic):
4979
    """Class to retrieve Thor's Thundershack comics."""
4980
    # Also on http://www.thorsthundershack.com
4981
    name = 'thor-tapa'
4982
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4983
    url = 'http://tapastic.com/series/Thors-Thundershac'
4984
    _categories = ('THOR', )
4985
4986
4987
class OwlTurdTapastic(GenericTapasticComic):
4988
    """Class to retrieve Owl Turd comics."""
4989
    # Also on http://owlturd.com
4990
    name = 'owlturd-tapa'
4991
    long_name = 'Owl Turd (from Tapastic)'
4992
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4993
    _categories = ('OWLTURD', )
4994
4995
4996
class GoneIntoRaptureTapastic(GenericTapasticComic):
4997
    """Class to retrieve Gone Into Rapture comics."""
4998
    # Also on http://goneintorapture.tumblr.com
4999
    # Also on http://goneintorapture.com
5000
    name = 'rapture-tapa'
5001
    long_name = 'Gone Into Rapture (from Tapastic)'
5002
    url = 'http://tapastic.com/series/Goneintorapture'
5003
5004
5005
class HeckIfIKnowComicsTapa(GenericTapasticComic):
5006
    """Class to retrieve Heck If I Know Comics."""
5007
    # Also on http://heckifiknowcomics.com
5008
    name = 'heck-tapa'
5009
    long_name = 'Heck if I Know comics (from Tapastic)'
5010
    url = 'http://tapastic.com/series/Regular'
5011
5012
5013
class CheerUpEmoKidTapa(GenericTapasticComic):
5014
    """Class to retrieve CheerUpEmoKid comics."""
5015
    # Also on http://www.cheerupemokid.com
5016
    # Also on https://enzocomics.tumblr.com
5017
    name = 'cuek-tapa'
5018
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
5019
    url = 'http://tapastic.com/series/CUEK'
5020
5021
5022
class BigFootJusticeTapa(GenericTapasticComic):
5023
    """Class to retrieve Big Foot Justice comics."""
5024
    # Also on http://bigfootjustice.com
5025
    name = 'bigfoot-tapa'
5026
    long_name = 'Big Foot Justice (from Tapastic)'
5027
    url = 'http://tapastic.com/series/bigfoot-justice'
5028
5029
5030
class UpAndOutTapa(GenericTapasticComic):
5031
    """Class to retrieve Up & Out comics."""
5032
    # Also on http://upandoutcomic.tumblr.com
5033
    name = 'upandout-tapa'
5034
    long_name = 'Up And Out (from Tapastic)'
5035
    url = 'http://tapastic.com/series/UP-and-OUT'
5036
5037
5038
class ToonHoleTapa(GenericTapasticComic):
5039
    """Class to retrieve Toon Holes comics."""
5040
    # Also on http://www.toonhole.com
5041
    name = 'toonhole-tapa'
5042
    long_name = 'Toon Hole (from Tapastic)'
5043
    url = 'http://tapastic.com/series/TOONHOLE'
5044
5045
5046
class AngryAtNothingTapa(GenericTapasticComic):
5047
    """Class to retrieve Angry at Nothing comics."""
5048
    # Also on http://www.angryatnothing.net
5049
    # Also on http://angryatnothing.tumblr.com
5050
    name = 'angry-tapa'
5051
    long_name = 'Angry At Nothing (from Tapastic)'
5052
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
5053
5054
5055
class LeleozTapa(GenericTapasticComic):
5056
    """Class to retrieve Leleoz comics."""
5057
    # Also on http://leleozcomics.tumblr.com
5058
    name = 'leleoz-tapa'
5059
    long_name = 'Leleoz (from Tapastic)'
5060
    url = 'https://tapastic.com/series/Leleoz'
5061
5062
5063
class TheAwkwardYetiTapa(GenericTapasticComic):
5064
    """Class to retrieve The Awkward Yeti comics."""
5065
    # Also on http://www.gocomics.com/the-awkward-yeti
5066
    # Also on http://theawkwardyeti.com
5067
    # Also on http://larstheyeti.tumblr.com
5068
    name = 'yeti-tapa'
5069
    long_name = 'The Awkward Yeti (from Tapastic)'
5070
    url = 'https://tapastic.com/series/TheAwkwardYeti'
5071
    _categories = ('YETI', )
5072
5073
5074
class AsPerUsualTapa(GenericTapasticComic):
5075
    """Class to retrieve As Per Usual comics."""
5076
    # Also on http://as-per-usual.tumblr.com
5077
    name = 'usual-tapa'
5078
    long_name = 'As Per Usual (from Tapastic)'
5079
    url = 'https://tapastic.com/series/AsPerUsual'
5080
    categories = ('DAMILEE', )
5081
5082
5083
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
5084
    """Class to retrieve Hot Comics For Cool People."""
5085
    # Also on http://hotcomicsforcoolpeople.tumblr.com
5086
    # Also on http://hotcomics.biz (links to tumblr)
5087
    # Also on http://hcfcp.com (links to tumblr)
5088
    name = 'hotcomics-tapa'
5089
    long_name = 'Hot Comics For Cool People (from Tapastic)'
5090
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
5091
    categories = ('DAMILEE', )
5092
5093
5094
class OneOneOneOneComicTapa(GenericTapasticComic):
5095
    """Class to retrieve 1111 Comics."""
5096
    # Also on http://www.1111comics.me
5097
    # Also on http://comics1111.tumblr.com
5098
    name = '1111-tapa'
5099
    long_name = '1111 Comics (from Tapastic)'
5100
    url = 'https://tapastic.com/series/1111-Comics'
5101
    _categories = ('ONEONEONEONE', )
5102
5103
5104
class TumbleDryTapa(GenericTapasticComic):
5105
    """Class to retrieve Tumble Dry comics."""
5106
    # Also on http://tumbledrycomics.com
5107
    name = 'tumbledry-tapa'
5108
    long_name = 'Tumblr Dry (from Tapastic)'
5109
    url = 'https://tapastic.com/series/TumbleDryComics'
5110
5111
5112
class DeadlyPanelTapa(GenericTapasticComic):
5113
    """Class to retrieve Deadly Panel comics."""
5114
    # Also on http://www.deadlypanel.com
5115
    # Also on https://deadlypanel.tumblr.com
5116
    name = 'deadly-tapa'
5117
    long_name = 'Deadly Panel (from Tapastic)'
5118
    url = 'https://tapastic.com/series/deadlypanel'
5119
5120
5121
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5122
    """Class to retrieve Chris Hallbeck comics."""
5123
    # Also on https://chrishallbeck.tumblr.com
5124
    # Also on http://maximumble.com
5125
    name = 'hallbeckmaxi-tapa'
5126
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5127
    url = 'https://tapastic.com/series/Maximumble'
5128
    _categories = ('HALLBACK', )
5129
5130
5131
class ChrisHallbeckMiniTapa(GenericEmptyComic, GenericTapasticComic):
5132
    """Class to retrieve Chris Hallbeck comics."""
5133
    # Also on https://chrishallbeck.tumblr.com
5134
    # Also on http://minimumble.com
5135
    name = 'hallbeckmini-tapa'
5136
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5137
    url = 'https://tapastic.com/series/Minimumble'
5138
    _categories = ('HALLBACK', )
5139
5140
5141
class ChrisHallbeckBiffTapa(GenericEmptyComic, GenericTapasticComic):
5142
    """Class to retrieve Chris Hallbeck comics."""
5143
    # Also on https://chrishallbeck.tumblr.com
5144
    # Also on http://thebookofbiff.com
5145
    name = 'hallbeckbiff-tapa'
5146
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5147
    url = 'https://tapastic.com/series/Biff'
5148
    _categories = ('HALLBACK', )
5149
5150
5151
class RandoWisTapa(GenericTapasticComic):
5152
    """Class to retrieve RandoWis comics."""
5153
    # Also on https://randowis.com
5154
    name = 'randowis-tapa'
5155
    long_name = 'RandoWis (from Tapastic)'
5156
    url = 'https://tapastic.com/series/RandoWis'
5157
5158
5159
class PigeonGazetteTapa(GenericTapasticComic):
5160
    """Class to retrieve The Pigeon Gazette comics."""
5161
    # Also on http://thepigeongazette.tumblr.com
5162
    name = 'pigeon-tapa'
5163
    long_name = 'The Pigeon Gazette (from Tapastic)'
5164
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5165
5166
5167
class TheOdd1sOutTapa(GenericTapasticComic):
5168
    """Class to retrieve The Odd 1s Out comics."""
5169
    # Also on http://theodd1sout.com
5170
    # Also on http://theodd1sout.tumblr.com
5171
    name = 'theodd-tapa'
5172
    long_name = 'The Odd 1s Out (from Tapastic)'
5173
    url = 'https://tapastic.com/series/Theodd1sout'
5174
5175
5176
class TheWorldIsFlatTapa(GenericTapasticComic):
5177
    """Class to retrieve The World Is Flat Comics."""
5178
    # Also on http://theworldisflatcomics.tumblr.com
5179
    name = 'flatworld-tapa'
5180
    long_name = 'The World Is Flat (from Tapastic)'
5181
    url = 'https://tapastic.com/series/The-World-is-Flat'
5182
5183
5184
class MisterAndMeTapa(GenericTapasticComic):
5185
    """Class to retrieve Mister & Me Comics."""
5186
    # Also on http://www.mister-and-me.com
5187
    # Also on http://www.gocomics.com/mister-and-me
5188
    name = 'mister-tapa'
5189
    long_name = 'Mister & Me (from Tapastic)'
5190
    url = 'https://tapastic.com/series/Mister-and-Me'
5191
5192
5193
class TalesOfAbsurdityTapa(GenericEmptyComic, GenericTapasticComic):
5194
    """Class to retrieve Tales Of Absurdity comics."""
5195
    # Also on http://talesofabsurdity.com
5196
    # Also on http://talesofabsurdity.tumblr.com
5197
    name = 'absurdity-tapa'
5198
    long_name = 'Tales of Absurdity (from Tapastic)'
5199
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5200
    _categories = ('ABSURDITY', )
5201
5202
5203
class BFGFSTapa(GenericTapasticComic):
5204
    """Class to retrieve BFGFS comics."""
5205
    # Also on http://bfgfs.com
5206
    # Also on https://bfgfs.tumblr.com
5207
    name = 'bfgfs-tapa'
5208
    long_name = 'BFGFS (from Tapastic)'
5209
    url = 'https://tapastic.com/series/BFGFS'
5210
5211
5212
class DoodleForFoodTapa(GenericTapasticComic):
5213
    """Class to retrieve Doodle For Food comics."""
5214
    # Also on http://www.doodleforfood.com
5215
    name = 'doodle-tapa'
5216
    long_name = 'Doodle For Food (from Tapastic)'
5217
    url = 'https://tapastic.com/series/Doodle-for-Food'
5218
5219
5220
class MrLovensteinTapa(GenericTapasticComic):
5221
    """Class to retrieve Mr Lovenstein comics."""
5222
    # Also on  https://tapastic.com/series/MrLovenstein
5223
    name = 'mrlovenstein-tapa'
5224
    long_name = 'Mr. Lovenstein (from Tapastic)'
5225
    url = 'https://tapastic.com/series/MrLovenstein'
5226
5227
5228
class CassandraCalinTapa(GenericTapasticComic):
5229
    """Class to retrieve C. Cassandra comics."""
5230
    # Also on http://cassandracalin.com
5231
    # Also on http://c-cassandra.tumblr.com
5232
    name = 'cassandra-tapa'
5233
    long_name = 'Cassandra Calin (from Tapastic)'
5234
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5235
5236
5237
class WafflesAndPancakes(GenericTapasticComic):
5238
    """Class to retrieve Waffles And Pancakes comics."""
5239
    # Also on http://wandpcomic.com
5240
    name = 'waffles'
5241
    long_name = 'Waffles And Pancakes'
5242
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5243
5244
5245
class YesterdaysPopcornTapastic(GenericTapasticComic):
5246
    """Class to retrieve Yesterday's Popcorn comics."""
5247
    # Also on http://www.yesterdayspopcorn.com
5248
    # Also on http://yesterdayspopcorn.tumblr.com
5249
    name = 'popcorn-tapa'
5250
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5251
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5252
5253
5254
class OurSuperAdventureTapastic(GenericEmptyComic, GenericTapasticComic):
5255
    """Class to retrieve Our Super Adventure comics."""
5256
    # Also on http://www.oursuperadventure.com
5257
    # http://sarahssketchbook.tumblr.com
5258
    # http://sarahgraley.com
5259
    name = 'superadventure-tapastic'
5260
    long_name = 'Our Super Adventure (from Tapastic)'
5261
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5262
5263
5264
class NamelessPCs(GenericTapasticComic):
5265
    """Class to retrieve Nameless PCs comics."""
5266
    # Also on http://namelesspcs.com
5267
    name = 'namelesspcs-tapa'
5268
    long_name = 'NamelessPCs (from Tapastic)'
5269
    url = 'https://tapastic.com/series/NamelessPC'
5270
5271
5272
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5273
    """Class to retrieve Down The Upward Spiral comics."""
5274
    # Also on http://www.downtheupwardspiral.com
5275
    # Also on http://downtheupwardspiral.tumblr.com
5276
    name = 'spiral-tapa'
5277
    long_name = 'Down the Upward Spiral (from Tapastic)'
5278
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5279
5280
5281
class UbertoolTapa(GenericTapasticComic):
5282
    """Class to retrieve Ubertool comics."""
5283
    # Also on http://ubertoolcomic.com
5284
    # Also on https://ubertool.tumblr.com
5285
    name = 'ubertool-tapa'
5286
    long_name = 'Ubertool (from Tapastic)'
5287
    url = 'https://tapastic.com/series/ubertool'
5288
    _categories = ('UBERTOOL', )
5289
5290
5291
class BarteNerdsTapa(GenericEmptyComic, GenericTapasticComic):
5292
    """Class to retrieve BarteNerds comics."""
5293
    # Also on http://www.bartenerds.com
5294
    name = 'bartenerds-tapa'
5295
    long_name = 'BarteNerds (from Tapastic)'
5296
    url = 'https://tapastic.com/series/BarteNERDS'
5297
5298
5299
class SmallBlueYonderTapa(GenericTapasticComic):
5300
    """Class to retrieve Small Blue Yonder comics."""
5301
    # Also on http://www.smallblueyonder.com
5302
    name = 'smallblue-tapa'
5303
    long_name = 'Small Blue Yonder (from Tapastic)'
5304
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5305
5306
5307
class TizzyStitchBirdTapa(GenericTapasticComic):
5308
    """Class to retrieve Tizzy Stitch Bird comics."""
5309
    # Also on http://tizzystitchbird.com
5310
    # Also on http://tizzystitchbird.tumblr.com
5311
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5312
    name = 'tizzy-tapa'
5313
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5314
    url = 'https://tapastic.com/series/TizzyStitchbird'
5315
5316
5317
class RockPaperCynicTapa(GenericTapasticComic):
5318
    """Class to retrieve RockPaperCynic comics."""
5319
    # Also on http://www.rockpapercynic.com
5320
    # Also on http://rockpapercynic.tumblr.com
5321
    name = 'rpc-tapa'
5322
    long_name = 'Rock Paper Cynic (from Tapastic)'
5323
    url = 'https://tapastic.com/series/rockpapercynic'
5324
5325
5326
class IsItCanonTapa(GenericTapasticComic):
5327
    """Class to retrieve Is It Canon comics."""
5328
    # Also on http://www.isitcanon.com
5329
    name = 'canon-tapa'
5330
    long_name = 'Is It Canon (from Tapastic)'
5331
    url = 'http://tapastic.com/series/isitcanon'
5332
5333
5334
class ItsTheTieTapa(GenericTapasticComic):
5335
    """Class to retrieve It's the tie comics."""
5336
    # Also on http://itsthetie.com
5337
    # Also on http://itsthetie.tumblr.com
5338
    name = 'tie-tapa'
5339
    long_name = "It's the tie (from Tapastic)"
5340
    url = "https://tapastic.com/series/itsthetie"
5341
    _categories = ('TIE', )
5342
5343
5344
class JamesOfNoTradesTapa(GenericTapasticComic):
5345
    """Class to retrieve JamesOfNoTrades comics."""
5346
    # Also on http://jamesofnotrades.com
5347
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
5348
    # Also on http://jamesfregan.tumblr.com
5349
    name = 'jamesofnotrades-tapa'
5350
    long_name = 'James Of No Trades (from Tapastic)'
5351
    url = 'https://tapas.io/series/James-of-No-Trades'
5352
    _categories = ('JAMESOFNOTRADES', )
5353
5354
5355
class MomentumTapa(GenericTapasticComic):
5356
    """Class to retrieve Momentum comics."""
5357
    # Also on http://www.momentumcomic.com
5358
    name = 'momentum-tapa'
5359
    long_name = 'Momentum (from Tapastic)'
5360
    url = 'https://tapastic.com/series/momentum'
5361
5362
5363
class APleasantWasteOfTimeTapa(GenericTapasticComic):
5364
    """Class to retrieve A Pleasant Waste Of Time comics."""
5365
    # Also on https://artjcf.tumblr.com
5366
    name = 'pleasant-waste-tapa'
5367
    long_name = 'A Pleasant Waste Of Time (from Tapastic)'
5368
    url = 'https://tapas.io/series/A-Pleasant-'
5369
    _categories = ('WASTE', )
5370
5371
5372
def get_subclasses(klass):
5373
    """Gets the list of direct/indirect subclasses of a class"""
5374
    subclasses = klass.__subclasses__()
5375
    for derived in list(subclasses):
5376
        subclasses.extend(get_subclasses(derived))
5377
    return subclasses
5378
5379
5380
def remove_st_nd_rd_th_from_date(string):
5381
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5382
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5383
    return (string.replace('st', '')
5384
            .replace('nd', '')
5385
            .replace('rd', '')
5386
            .replace('th', '')
5387
            .replace('Augu', 'August'))
5388
5389
5390
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5391
    """Function to convert string to date object.
5392
    Wrapper around datetime.datetime.strptime."""
5393
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5394
    prev_locale = locale.setlocale(locale.LC_ALL)
5395
    if local != prev_locale:
5396
        locale.setlocale(locale.LC_ALL, local)
5397
    ret = datetime.datetime.strptime(string, date_format).date()
5398
    if local != prev_locale:
5399
        locale.setlocale(locale.LC_ALL, prev_locale)
5400
    return ret
5401
5402
5403
COMICS = set(get_subclasses(GenericComic))
5404
VALID_COMICS = [c for c in COMICS if c.name is not None]
5405
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5406
assert len(VALID_COMICS) == len(COMIC_NAMES)
5407
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5408
assert len(VALID_COMICS) == len(CLASS_NAMES)
5409