Completed
Push — master ( ce2402...48aeb4 )
by De
01:29
created

comics.py (49 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    try:
333
        url = cls.first_url
334
    except AttributeError:
335
        url = input("Get starting URL: ")
336
    print(url)
337
    comic = cls.get_prev_link(get_soup_at_url(url))
338
    while comic:
339
        url = cls.get_url_from_link(comic)
340
        print(url)
341
        comic = cls.get_prev_link(get_soup_at_url(url))
342
    return {'href': url}
343
344
345
class GenericEmptyComic(GenericComic):
346
    """Generic class for comics where nothing is to be done.
347
348
    It can be useful to deactivate temporarily comics that do not work
349
    properly by replacing `def MyComic(GenericWhateverComic)` with
350
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
351
    _categories = ('EMPTY', )
352
353
    @classmethod
354
    def get_next_comic(cls, last_comic):
355
        """Implementation of get_next_comic returning no comics."""
356
        cls.log("comic is considered as empty - returning no comic")
357
        return []
358
359
360 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
361
    """Class to retrieve Extra Fabulous Comics."""
362
    name = 'efc'
363
    long_name = 'Extra Fabulous Comics'
364
    url = 'http://extrafabulouscomics.com'
365
    get_first_comic_link = get_a_navi_navifirst
366
    get_navi_link = get_link_rel_next
367
368
    @classmethod
369
    def get_comic_info(cls, soup, link):
370
        """Get information about a particular comics."""
371
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
372
        imgs = soup.find_all('img', src=img_src_re)
373
        title = soup.find('meta', property='og:title')['content']
374
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
375
        day = string_to_date(date_str, "%Y-%m-%d")
376
        return {
377
            'title': title,
378
            'img': [i['src'] for i in imgs],
379
            'month': day.month,
380
            'year': day.year,
381
            'day': day.day,
382
            'prefix': title + '-'
383
        }
384
385
386 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
387
    """Generic class to retrieve comics from Le Monde blogs."""
388
    _categories = ('LEMONDE', 'FRANCAIS')
389
    get_navi_link = get_link_rel_next
390
    get_first_comic_link = simulate_first_link
391
    first_url = NotImplemented
392
393
    @classmethod
394
    def get_comic_info(cls, soup, link):
395
        """Get information about a particular comics."""
396
        url2 = soup.find('link', rel='shortlink')['href']
397
        title = soup.find('meta', property='og:title')['content']
398
        date_str = soup.find("span", class_="entry-date").string
399
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
400
        imgs = soup.find_all('meta', property='og:image')
401
        return {
402
            'title': title,
403
            'url2': url2,
404
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
405
            'month': day.month,
406
            'year': day.year,
407
            'day': day.day,
408
        }
409
410
411
class ZepWorld(GenericLeMondeBlog):
412
    """Class to retrieve Zep World comics."""
413
    name = "zep"
414
    long_name = "Zep World"
415
    url = "http://zepworld.blog.lemonde.fr"
416
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
417
418
419
class Vidberg(GenericLeMondeBlog):
420
    """Class to retrieve Vidberg comics."""
421
    name = 'vidberg'
422
    long_name = "Vidberg - l'actu en patates"
423
    url = "http://vidberg.blog.lemonde.fr"
424
    # Not the first but I didn't find an efficient way to retrieve it
425
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
426
427
428
class Plantu(GenericLeMondeBlog):
429
    """Class to retrieve Plantu comics."""
430
    name = 'plantu'
431
    long_name = "Plantu"
432
    url = "http://plantu.blog.lemonde.fr"
433
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
434
435
436
class XavierGorce(GenericLeMondeBlog):
437
    """Class to retrieve Xavier Gorce comics."""
438
    name = 'gorce'
439
    long_name = "Xavier Gorce"
440
    url = "http://xaviergorce.blog.lemonde.fr"
441
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
442
443
444
class CartooningForPeace(GenericLeMondeBlog):
445
    """Class to retrieve Cartooning For Peace comics."""
446
    name = 'forpeace'
447
    long_name = "Cartooning For Peace"
448
    url = "http://cartooningforpeace.blog.lemonde.fr"
449
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
450
451
452
class Aurel(GenericLeMondeBlog):
453
    """Class to retrieve Aurel comics."""
454
    name = 'aurel'
455
    long_name = "Aurel"
456
    url = "http://aurel.blog.lemonde.fr"
457
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
458
459
460
class LesCulottees(GenericLeMondeBlog):
461
    """Class to retrieve Les Culottees comics."""
462
    name = 'culottees'
463
    long_name = 'Les Culottees'
464
    url = "http://lesculottees.blog.lemonde.fr"
465
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
466
467
468
class UneAnneeAuLycee(GenericLeMondeBlog):
469
    """Class to retrieve Une Annee Au Lycee comics."""
470
    name = 'lycee'
471
    long_name = 'Une Annee au Lycee'
472
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
473
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
474
475
476 View Code Duplication
class Rall(GenericNavigableComic):
477
    """Class to retrieve Ted Rall comics."""
478
    # Also on http://www.gocomics.com/tedrall
479
    name = 'rall'
480
    long_name = "Ted Rall"
481
    url = "http://rall.com/comic"
482
    _categories = ('RALL', )
483
    get_navi_link = get_link_rel_next
484
    get_first_comic_link = simulate_first_link
485
    # Not the first but I didn't find an efficient way to retrieve it
486
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
487
488
    @classmethod
489
    def get_comic_info(cls, soup, link):
490
        """Get information about a particular comics."""
491
        title = soup.find('meta', property='og:title')['content']
492
        author = soup.find("span", class_="author vcard").find("a").string
493
        date_str = soup.find("span", class_="entry-date").string
494
        day = string_to_date(date_str, "%B %d, %Y")
495
        desc = soup.find('meta', property='og:description')['content']
496
        imgs = soup.find('div', class_='entry-content').find_all('img')
497
        imgs = imgs[:-7]  # remove social media buttons
498
        return {
499
            'title': title,
500
            'author': author,
501
            'month': day.month,
502
            'year': day.year,
503
            'day': day.day,
504
            'description': desc,
505
            'img': [i['src'] for i in imgs],
506
        }
507
508
509
class Dilem(GenericNavigableComic):
510
    """Class to retrieve Ali Dilem comics."""
511
    name = 'dilem'
512
    long_name = 'Ali Dilem'
513
    url = 'http://information.tv5monde.com/dilem'
514
    _categories = ('FRANCAIS', )
515
    get_url_from_link = join_cls_url_to_href
516
    get_first_comic_link = simulate_first_link
517
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
518
519
    @classmethod
520
    def get_navi_link(cls, last_soup, next_):
521
        """Get link to next or previous comic."""
522
        # prev is next / next is prev
523
        li = last_soup.find('li', class_='prev' if next_ else 'next')
524
        return li.find('a') if li else None
525
526 View Code Duplication
    @classmethod
527
    def get_comic_info(cls, soup, link):
528
        """Get information about a particular comics."""
529
        short_url = soup.find('link', rel='shortlink')['href']
530
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
531
        imgs = soup.find_all('meta', property='og:image')
532
        date_str = soup.find('span', property='dc:date')['content']
533
        date_str = date_str[:10]
534
        day = string_to_date(date_str, "%Y-%m-%d")
535
        return {
536
            'short_url': short_url,
537
            'title': title,
538
            'img': [i['content'] for i in imgs],
539
            'day': day.day,
540
            'month': day.month,
541
            'year': day.year,
542
        }
543
544
545
class SpaceAvalanche(GenericNavigableComic):
546
    """Class to retrieve Space Avalanche comics."""
547
    name = 'avalanche'
548
    long_name = 'Space Avalanche'
549
    url = 'http://www.spaceavalanche.com'
550
    get_navi_link = get_link_rel_next
551
552
    @classmethod
553
    def get_first_comic_link(cls):
554
        """Get link to first comics."""
555
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
556
557
    @classmethod
558
    def get_comic_info(cls, soup, link):
559
        """Get information about a particular comics."""
560
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
561
        title = link['title']
562
        url = cls.get_url_from_link(link)
563
        year, month, day = [int(s)
564
                            for s in url_date_re.match(url).groups()]
565
        imgs = soup.find("div", class_="entry").find_all("img")
566
        return {
567
            'title': title,
568
            'day': day,
569
            'month': month,
570
            'year': year,
571
            'img': [i['src'] for i in imgs],
572
        }
573
574
575
class ZenPencils(GenericEmptyComic, GenericNavigableComic):
576
    """Class to retrieve ZenPencils comics."""
577
    # Also on http://zenpencils.tumblr.com
578
    # Also on http://www.gocomics.com/zen-pencils
579
    name = 'zenpencils'
580
    long_name = 'Zen Pencils'
581
    url = 'http://zenpencils.com'
582
    _categories = ('ZENPENCILS', )
583
    get_navi_link = get_link_rel_next
584
    get_first_comic_link = simulate_first_link
585
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
586
587
    @classmethod
588
    def get_comic_info(cls, soup, link):
589
        """Get information about a particular comics."""
590
        imgs = soup.find('div', id='comic').find_all('img')
591
        # imgs2 = soup.find_all('meta', property='og:image')
592
        post = soup.find('div', class_='post-content')
593
        author = post.find("span", class_="post-author").find("a").string
594
        title = soup.find('meta', property='og:title')['content']
595
        date_str = post.find('span', class_='post-date').string
596
        day = string_to_date(date_str, "%B %d, %Y")
597
        assert imgs
598
        assert all(i['alt'] == i['title'] for i in imgs)
599
        assert all(i['alt'] in (title, "") for i in imgs)
600
        desc = soup.find('meta', property='og:description')['content']
601
        return {
602
            'title': title,
603
            'description': desc,
604
            'author': author,
605
            'day': day.day,
606
            'month': day.month,
607
            'year': day.year,
608
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
609
        }
610
611
612
class ItsTheTie(GenericEmptyComic, GenericNavigableComic):
613
    """Class to retrieve It's the tie comics."""
614
    # Also on http://itsthetie.tumblr.com
615
    # Also on https://tapastic.com/series/itsthetie
616
    name = 'tie'
617
    long_name = "It's the tie"
618
    url = "http://itsthetie.com"
619
    _categories = ('TIE', )
620
    get_first_comic_link = get_div_navfirst_a
621
    get_navi_link = get_a_rel_next
622
623
    @classmethod
624
    def get_comic_info(cls, soup, link):
625
        """Get information about a particular comics."""
626
        title = soup.find('h1', class_='comic-title').find('a').string
627
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
628
        day = string_to_date(date_str, "%B %d, %Y")
629
        # Bonus images may or may not be in meta og:image.
630
        imgs = soup.find_all('meta', property='og:image')
631
        imgs_src = [i['content'] for i in imgs]
632
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
633
        bonus_src = [b['data-oversrc'] for b in bonus]
634
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
635
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
636
        tag_meta = soup.find('meta', property='article:tag')
637
        tags = tag_meta['content'] if tag_meta else ""
638
        return {
639
            'title': title,
640
            'month': day.month,
641
            'year': day.year,
642
            'day': day.day,
643
            'img': all_imgs_src,
644
            'tags': tags,
645
        }
646
647
648 View Code Duplication
class PenelopeBagieu(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
649
    """Class to retrieve comics from Penelope Bagieu's blog."""
650
    name = 'bagieu'
651
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
652
    url = 'http://www.penelope-jolicoeur.com'
653
    _categories = ('FRANCAIS', )
654
    get_navi_link = get_link_rel_next
655
    get_first_comic_link = simulate_first_link
656
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
657
658
    @classmethod
659
    def get_comic_info(cls, soup, link):
660
        """Get information about a particular comics."""
661
        date_str = soup.find('h2', class_='date-header').string
662
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
663
        imgs = soup.find('div', class_='entry-body').find_all('img')
664
        title = soup.find('h3', class_='entry-header').string
665
        return {
666
            'title': title,
667
            'img': [i['src'] for i in imgs],
668
            'month': day.month,
669
            'year': day.year,
670
            'day': day.day,
671
        }
672
673
674 View Code Duplication
class OneOneOneOneComic(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
675
    """Class to retrieve 1111 Comics."""
676
    # Also on http://comics1111.tumblr.com
677
    # Also on https://tapastic.com/series/1111-Comics
678
    name = '1111'
679
    long_name = '1111 Comics'
680
    url = 'http://www.1111comics.me'
681
    _categories = ('ONEONEONEONE', )
682
    get_first_comic_link = get_div_navfirst_a
683
    get_navi_link = get_link_rel_next
684
685
    @classmethod
686
    def get_comic_info(cls, soup, link):
687
        """Get information about a particular comics."""
688
        title = soup.find('h1', class_='comic-title').find('a').string
689
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
690
        day = string_to_date(date_str, "%B %d, %Y")
691
        imgs = soup.find_all('meta', property='og:image')
692
        return {
693
            'title': title,
694
            'month': day.month,
695
            'year': day.year,
696
            'day': day.day,
697
            'img': [i['content'] for i in imgs],
698
        }
699
700
701 View Code Duplication
class AngryAtNothing(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
702
    """Class to retrieve Angry at Nothing comics."""
703
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
704
    # Also on http://angryatnothing.tumblr.com
705
    name = 'angry'
706
    long_name = 'Angry At Nothing'
707
    url = 'http://www.angryatnothing.net'
708
    get_first_comic_link = get_div_navfirst_a
709
    get_navi_link = get_a_rel_next
710
711
    @classmethod
712
    def get_comic_info(cls, soup, link):
713
        """Get information about a particular comics."""
714
        title = soup.find('h1', class_='comic-title').find('a').string
715
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
716
        day = string_to_date(date_str, "%B %d, %Y")
717
        imgs = soup.find_all('meta', property='og:image')
718
        return {
719
            'title': title,
720
            'month': day.month,
721
            'year': day.year,
722
            'day': day.day,
723
            'img': [i['content'] for i in imgs],
724
        }
725
726
727
class NeDroid(GenericNavigableComic):
728
    """Class to retrieve NeDroid comics."""
729
    name = 'nedroid'
730 View Code Duplication
    long_name = 'NeDroid'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
731
    url = 'http://nedroid.com'
732
    get_first_comic_link = get_div_navfirst_a
733
    get_navi_link = get_link_rel_next
734
    get_url_from_link = join_cls_url_to_href
735
736
    @classmethod
737
    def get_comic_info(cls, soup, link):
738
        """Get information about a particular comics."""
739
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
740
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
741
        num = int(short_url_re.match(short_url).groups()[0])
742
        imgs = soup.find('div', id='comic').find_all('img')
743
        assert len(imgs) == 1
744
        title = imgs[0]['alt']
745
        title2 = imgs[0]['title']
746
        return {
747
            'short_url': short_url,
748
            'title': title,
749
            'title2': title2,
750
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
751
            'num': num,
752
        }
753
754
755
class Garfield(GenericNavigableComic):
756
    """Class to retrieve Garfield comics."""
757
    # Also on http://www.gocomics.com/garfield
758
    name = 'garfield'
759
    long_name = 'Garfield'
760
    url = 'https://garfield.com'
761
    _categories = ('GARFIELD', )
762
    get_first_comic_link = simulate_first_link
763
    first_url = 'https://garfield.com/comic/1978/06/19'
764
765
    @classmethod
766
    def get_navi_link(cls, last_soup, next_):
767
        """Get link to next or previous comic."""
768
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
769
770
    @classmethod
771
    def get_comic_info(cls, soup, link):
772
        """Get information about a particular comics."""
773
        url = cls.get_url_from_link(link)
774
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
775
        year, month, day = [int(s) for s in date_re.match(url).groups()]
776
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
777
        return {
778
            'month': month,
779
            'year': year,
780
            'day': day,
781
            'img': [i['src'] for i in imgs],
782 View Code Duplication
        }
783
784
785
class Dilbert(GenericNavigableComic):
786
    """Class to retrieve Dilbert comics."""
787
    # Also on http://www.gocomics.com/dilbert-classics
788
    name = 'dilbert'
789
    long_name = 'Dilbert'
790
    url = 'http://dilbert.com'
791
    get_url_from_link = join_cls_url_to_href
792
    get_first_comic_link = simulate_first_link
793
    first_url = 'http://dilbert.com/strip/1989-04-16'
794
795
    @classmethod
796
    def get_navi_link(cls, last_soup, next_):
797
        """Get link to next or previous comic."""
798
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
799
        return link.find('a') if link else None
800
801
    @classmethod
802
    def get_comic_info(cls, soup, link):
803
        """Get information about a particular comics."""
804
        title = soup.find('meta', property='og:title')['content']
805
        imgs = soup.find_all('meta', property='og:image')
806
        desc = soup.find('meta', property='og:description')['content']
807
        date_str = soup.find('meta', property='article:publish_date')['content']
808
        day = string_to_date(date_str, "%B %d, %Y")
809
        author = soup.find('meta', property='article:author')['content']
810
        tags = soup.find('meta', property='article:tag')['content']
811
        return {
812
            'title': title,
813
            'description': desc,
814
            'img': [i['content'] for i in imgs],
815
            'author': author,
816
            'tags': tags,
817
            'day': day.day,
818
            'month': day.month,
819
            'year': day.year
820
        }
821
822
823
class VictimsOfCircumsolar(GenericEmptyComic, GenericNavigableComic):
824
    """Class to retrieve VictimsOfCircumsolar comics."""
825
    # Also on https://victimsofcomics.tumblr.com
826
    name = 'circumsolar'
827
    long_name = 'Victims Of Circumsolar'
828
    url = 'http://www.victimsofcircumsolar.com'
829
    get_navi_link = get_a_navi_comicnavnext_navinext
830
    get_first_comic_link = simulate_first_link
831
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
832
833
    @classmethod
834
    def get_comic_info(cls, soup, link):
835
        """Get information about a particular comics."""
836
        # Date is on the archive page
837
        title = soup.find_all('meta', property='og:title')[-1]['content']
838
        desc = soup.find_all('meta', property='og:description')[-1]['content']
839
        imgs = soup.find('div', id='comic').find_all('img')
840
        assert all(i['title'] == i['alt'] == title for i in imgs)
841
        return {
842
            'title': title,
843
            'description': desc,
844
            'img': [i['src'] for i in imgs],
845
        }
846
847
848
class ThreeWordPhrase(GenericNavigableComic):
849
    """Class to retrieve Three Word Phrase comics."""
850
    # Also on http://www.threewordphrase.tumblr.com
851
    name = 'threeword'
852
    long_name = 'Three Word Phrase'
853
    url = 'http://threewordphrase.com'
854
    get_url_from_link = join_cls_url_to_href
855
856
    @classmethod
857
    def get_first_comic_link(cls):
858
        """Get link to first comics."""
859
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
860
861
    @classmethod
862
    def get_navi_link(cls, last_soup, next_):
863
        """Get link to next or previous comic."""
864
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
865
        return None if link.get('href') is None else link
866
867
    @classmethod
868
    def get_comic_info(cls, soup, link):
869
        """Get information about a particular comics."""
870
        title = soup.find('title')
871
        imgs = [img for img in soup.find_all('img')
872
                if not img['src'].endswith(
873
                    ('link.gif', '32.png', 'twpbookad.jpg',
874
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
875
        return {
876
            'title': title.string if title else None,
877
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
878
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
879
        }
880
881
882
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
883
    """Class to retrieve Deadly Panel comics."""
884
    # Also on https://tapastic.com/series/deadlypanel
885
    # Also on https://deadlypanel.tumblr.com
886
    name = 'deadly'
887
    long_name = 'Deadly Panel'
888
    url = 'http://www.deadlypanel.com'
889
    get_first_comic_link = get_a_navi_navifirst
890
    get_navi_link = get_a_navi_comicnavnext_navinext
891
892
    @classmethod
893
    def get_comic_info(cls, soup, link):
894
        """Get information about a particular comics."""
895
        imgs = soup.find('div', id='comic').find_all('img')
896
        assert all(i['alt'] == i['title'] for i in imgs)
897
        return {
898
            'img': [i['src'] for i in imgs],
899
        }
900
901
902 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
903
    """Class to retrieve The Gentleman Armchair comics."""
904
    name = 'gentlemanarmchair'
905
    long_name = 'The Gentleman Armchair'
906
    url = 'http://thegentlemansarmchair.com'
907
    get_first_comic_link = get_a_navi_navifirst
908
    get_navi_link = get_link_rel_next
909
910
    @classmethod
911
    def get_comic_info(cls, soup, link):
912
        """Get information about a particular comics."""
913
        title = soup.find('h2', class_='post-title').string
914
        author = soup.find("span", class_="post-author").find("a").string
915
        date_str = soup.find('span', class_='post-date').string
916
        day = string_to_date(date_str, "%B %d, %Y")
917
        imgs = soup.find('div', id='comic').find_all('img')
918
        return {
919
            'img': [i['src'] for i in imgs],
920
            'title': title,
921
            'author': author,
922
            'month': day.month,
923
            'year': day.year,
924
            'day': day.day,
925
        }
926
927
928 View Code Duplication
class ImogenQuest(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
929
    """Class to retrieve Imogen Quest comics."""
930
    # Also on http://imoquest.tumblr.com
931
    name = 'imogen'
932
    long_name = 'Imogen Quest'
933
    url = 'http://imogenquest.net'
934
    get_first_comic_link = get_div_navfirst_a
935
    get_navi_link = get_a_rel_next
936
937
    @classmethod
938
    def get_comic_info(cls, soup, link):
939
        """Get information about a particular comics."""
940
        title = soup.find('h2', class_='post-title').string
941
        author = soup.find("span", class_="post-author").find("a").string
942
        date_str = soup.find('span', class_='post-date').string
943
        day = string_to_date(date_str, '%B %d, %Y')
944
        imgs = soup.find('div', class_='comicpane').find_all('img')
945
        assert all(i['alt'] == i['title'] for i in imgs)
946
        title2 = imgs[0]['title']
947
        return {
948
            'day': day.day,
949
            'month': day.month,
950
            'year': day.year,
951
            'img': [i['src'] for i in imgs],
952
            'title': title,
953
            'title2': title2,
954
            'author': author,
955
        }
956
957
958 View Code Duplication
class MyExtraLife(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
959
    """Class to retrieve My Extra Life comics."""
960
    name = 'extralife'
961
    long_name = 'My Extra Life'
962
    url = 'http://www.myextralife.com'
963
    get_navi_link = get_link_rel_next
964
965
    @classmethod
966
    def get_first_comic_link(cls):
967
        """Get link to first comics."""
968
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
969
970
    @classmethod
971
    def get_comic_info(cls, soup, link):
972
        """Get information about a particular comics."""
973
        title = soup.find("h1", class_="comic_title").string
974
        date_str = soup.find("span", class_="comic_date").string
975
        day = string_to_date(date_str, "%B %d, %Y")
976
        imgs = soup.find_all("img", class_="comic")
977
        assert all(i['alt'] == i['title'] == title for i in imgs)
978
        return {
979
            'title': title,
980
            'img': [i['src'] for i in imgs if i["src"]],
981
            'day': day.day,
982
            'month': day.month,
983
            'year': day.year
984
        }
985
986
987
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
988
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
989
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
990
    # Also on http://smbc-comics.tumblr.com
991
    name = 'smbc'
992
    long_name = 'Saturday Morning Breakfast Cereal'
993
    url = 'http://www.smbc-comics.com'
994
    _categories = ('SMBC', )
995
    get_navi_link = get_a_rel_next
996
997
    @classmethod
998
    def get_first_comic_link(cls):
999
        """Get link to first comics."""
1000
        return get_soup_at_url(cls.url).find('a', rel='start')
1001
1002
    @classmethod
1003
    def get_comic_info(cls, soup, link):
1004
        """Get information about a particular comics."""
1005
        image1 = soup.find('img', id='cc-comic')
1006
        image_url1 = image1['src']
1007
        aftercomic = soup.find('div', id='aftercomic')
1008
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1009
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1010
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1011
        day = string_to_date(date_str, "%B %d, %Y")
1012
        return {
1013
            'title': image1['title'],
1014
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
1015
            'day': day.day,
1016
            'month': day.month,
1017
            'year': day.year
1018
        }
1019
1020
1021
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1022
    """Class to retrieve Perry Bible Fellowship comics."""
1023
    name = 'pbf'
1024
    long_name = 'Perry Bible Fellowship'
1025
    url = 'http://pbfcomics.com'
1026
    get_url_from_archive_element = join_cls_url_to_href
1027
1028
    @classmethod
1029
    def get_archive_elements(cls):
1030
        soup = get_soup_at_url(cls.url)
1031
        thumbnails = soup.find('div', id='all_thumbnails')
1032
        return reversed(thumbnails.find_all('a'))
1033
1034
    @classmethod
1035
    def get_comic_info(cls, soup, link):
1036
        """Get information about a particular comics."""
1037
        name = soup.find('meta', property='og:title')['content']
1038
        imgs = soup.find_all('meta', property='og:image')
1039
        assert len(imgs) == 1
1040
        return {
1041
            'name': name,
1042
            'img': [i['content'] for i in imgs],
1043
        }
1044
1045
1046 View Code Duplication
class Mercworks(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1047
    """Class to retrieve Mercworks comics."""
1048
    # Also on http://mercworks.tumblr.com
1049
    name = 'mercworks'
1050
    long_name = 'Mercworks'
1051
    url = 'http://mercworks.net'
1052
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1053
    get_navi_link = get_link_rel_next
1054
1055
    @classmethod
1056
    def get_comic_info(cls, soup, link):
1057
        """Get information about a particular comics."""
1058
        title = soup.find('meta', property='og:title')['content']
1059
        metadesc = soup.find('meta', property='og:description')
1060
        desc = metadesc['content'] if metadesc else ""
1061
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1062
        day = string_to_date(date_str, "%Y-%m-%d")
1063
        imgs = soup.find_all('meta', property='og:image')
1064
        return {
1065
            'img': [i['content'] for i in imgs],
1066
            'title': title,
1067
            'desc': desc,
1068
            'day': day.day,
1069
            'month': day.month,
1070
            'year': day.year
1071
        }
1072
1073
1074
class BerkeleyMews(GenericListableComic):
1075
    """Class to retrieve Berkeley Mews comics."""
1076
    # Also on http://mews.tumblr.com
1077
    # Also on http://www.gocomics.com/berkeley-mews
1078
    name = 'berkeley'
1079
    long_name = 'Berkeley Mews'
1080
    url = 'http://www.berkeleymews.com'
1081
    _categories = ('BERKELEY', )
1082
    get_url_from_archive_element = get_href
1083
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1084
1085
    @classmethod
1086
    def get_archive_elements(cls):
1087
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1088
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1089
1090
    @classmethod
1091
    def get_comic_info(cls, soup, link):
1092
        """Get information about a particular comics."""
1093
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1094
        url = cls.get_url_from_archive_element(link)
1095
        num = int(cls.comic_num_re.match(url).groups()[0])
1096
        img = soup.find('div', id='comic').find('img')
1097
        assert all(i['alt'] == i['title'] for i in [img])
1098
        title2 = img['title']
1099
        img_url = img['src']
1100
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1101
        return {
1102
            'num': num,
1103
            'title': link.string,
1104
            'title2': title2,
1105
            'img': [img_url],
1106
            'year': year,
1107
            'month': month,
1108
            'day': day,
1109
        }
1110
1111
1112
class GenericBouletCorp(GenericNavigableComic):
1113
    """Generic class to retrieve BouletCorp comics in different languages."""
1114
    # Also on https://bouletcorp.tumblr.com
1115
    _categories = ('BOULET', )
1116
    get_navi_link = get_link_rel_next
1117
1118
    @classmethod
1119
    def get_first_comic_link(cls):
1120
        """Get link to first comics."""
1121
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1122
1123
    @classmethod
1124
    def get_comic_info(cls, soup, link):
1125
        """Get information about a particular comics."""
1126
        url = cls.get_url_from_link(link)
1127
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1128
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1129
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1130
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1131
        title = soup.find('title').string
1132
        return {
1133
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1134
            'title': title,
1135
            'texts': texts,
1136
            'year': year,
1137
            'month': month,
1138
            'day': day,
1139
        }
1140
1141
1142
class BouletCorp(GenericBouletCorp):
1143
    """Class to retrieve BouletCorp comics."""
1144
    name = 'boulet'
1145
    long_name = 'Boulet Corp'
1146
    url = 'http://www.bouletcorp.com'
1147
    _categories = ('FRANCAIS', )
1148
1149
1150
class BouletCorpEn(GenericBouletCorp):
1151
    """Class to retrieve EnglishBouletCorp comics."""
1152
    name = 'boulet_en'
1153
    long_name = 'Boulet Corp English'
1154
    url = 'http://english.bouletcorp.com'
1155
1156
1157 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1158
    """Class to retrieve Amazing Super Powers comics."""
1159
    name = 'asp'
1160
    long_name = 'Amazing Super Powers'
1161
    url = 'http://www.amazingsuperpowers.com'
1162
    get_first_comic_link = get_a_navi_navifirst
1163
    get_navi_link = get_a_navi_navinext
1164
1165
    @classmethod
1166
    def get_comic_info(cls, soup, link):
1167
        """Get information about a particular comics."""
1168
        author = soup.find("span", class_="post-author").find("a").string
1169
        date_str = soup.find('span', class_='post-date').string
1170
        day = string_to_date(date_str, "%B %d, %Y")
1171
        imgs = soup.find('div', id='comic').find_all('img')
1172
        title = ' '.join(i['title'] for i in imgs)
1173
        assert all(i['alt'] == i['title'] for i in imgs)
1174
        return {
1175
            'title': title,
1176
            'author': author,
1177
            'img': [img['src'] for img in imgs],
1178
            'day': day.day,
1179
            'month': day.month,
1180
            'year': day.year
1181
        }
1182
1183
1184
class ToonHole(GenericNavigableComic):
1185
    """Class to retrieve Toon Holes comics."""
1186
    # Also on http://tapastic.com/series/TOONHOLE
1187
    name = 'toonhole'
1188
    long_name = 'Toon Hole'
1189
    url = 'http://www.toonhole.com'
1190
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1191
    get_navi_link = get_link_rel_next
1192
1193
    @classmethod
1194
    def get_comic_info(cls, soup, link):
1195
        """Get information about a particular comics."""
1196
        short_url = soup.find('link', rel='shortlink')['href']
1197
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1198
        day = string_to_date(date_str, "%B %d, %Y")
1199
        imgs = soup.find('div', id='comic').find_all('img')
1200
        if imgs:
1201
            img = imgs[0]
1202
            title = img['alt']
1203
            assert img['title'] == title
1204
        else:
1205
            title = ""
1206
        return {
1207
            'short_url': short_url,
1208
            'title': title,
1209
            'month': day.month,
1210
            'year': day.year,
1211
            'day': day.day,
1212
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1213
        }
1214
1215
1216
class Channelate(GenericNavigableComic):
1217
    """Class to retrieve Channelate comics."""
1218
    name = 'channelate'
1219
    long_name = 'Channelate'
1220
    url = 'http://www.channelate.com'
1221
    get_first_comic_link = get_div_navfirst_a
1222
    get_navi_link = get_link_rel_next
1223
    get_url_from_link = join_cls_url_to_href
1224
1225
    @classmethod
1226
    def get_comic_info(cls, soup, link):
1227
        """Get information about a particular comics."""
1228
        author = soup.find("span", class_="post-author").find("a").string
1229
        date_str = soup.find('span', class_='post-date').string
1230
        day = string_to_date(date_str, '%Y/%m/%d')
1231
        title = soup.find('meta', property='og:title')['content']
1232
        post = soup.find('div', id='comic')
1233
        imgs = post.find_all('img') if post else []
1234
        extra_url = None
1235
        extra_div = soup.find('div', id='extrapanelbutton')
1236
        if extra_div:
1237
            extra_url = extra_div.find('a')['href']
1238
            extra_soup = get_soup_at_url(extra_url)
1239
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1240
            imgs.extend(extra_imgs)
1241
        return {
1242
            'url_extra': extra_url,
1243
            'title': title,
1244
            'author': author,
1245
            'month': day.month,
1246
            'year': day.year,
1247
            'day': day.day,
1248
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1249
        }
1250
1251
1252
class CyanideAndHappiness(GenericNavigableComic):
1253
    """Class to retrieve Cyanide And Happiness comics."""
1254
    name = 'cyanide'
1255
    long_name = 'Cyanide and Happiness'
1256
    url = 'http://explosm.net'
1257
    _categories = ('NSFW', )
1258
    get_url_from_link = join_cls_url_to_href
1259
1260
    @classmethod
1261
    def get_first_comic_link(cls):
1262
        """Get link to first comics."""
1263
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1264
1265
    @classmethod
1266
    def get_navi_link(cls, last_soup, next_):
1267
        """Get link to next or previous comic."""
1268
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1269
        return None if link.get('href') is None else link
1270
1271
    @classmethod
1272
    def get_comic_info(cls, soup, link):
1273
        """Get information about a particular comics."""
1274
        url2 = soup.find('meta', property='og:url')['content']
1275
        num = int(url2.split('/')[-2])
1276
        date_str = soup.find('h3').find('a').string
1277
        day = string_to_date(date_str, '%Y.%m.%d')
1278
        author = soup.find('small', class_="author-credit-name").string
1279
        assert author.startswith('by ')
1280
        author = author[3:]
1281
        imgs = soup.find_all('img', id='main-comic')
1282
        return {
1283
            'num': num,
1284
            'author': author,
1285
            'month': day.month,
1286
            'year': day.year,
1287
            'day': day.day,
1288
            'prefix': '%d-' % num,
1289
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1290
        }
1291
1292
1293
class MrLovenstein(GenericComic):
1294
    """Class to retrieve Mr Lovenstein comics."""
1295
    # Also on https://tapastic.com/series/MrLovenstein
1296
    name = 'mrlovenstein'
1297
    long_name = 'Mr. Lovenstein'
1298
    url = 'http://www.mrlovenstein.com'
1299
1300
    @classmethod
1301
    def get_next_comic(cls, last_comic):
1302
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1303
        # TODO: more info from http://www.mrlovenstein.com/archive
1304
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1305
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1306
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1307
        first, last = min(nums), max(nums)
1308
        if last_comic:
1309
            first = last_comic['num'] + 1
1310
        for num in range(first, last + 1):
1311
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1312
            soup = get_soup_at_url(url)
1313
            imgs = list(
1314
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1315
            description = soup.find('meta', attrs={'name': 'description'})['content']
1316
            yield {
1317
                'url': url,
1318
                'num': num,
1319
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1320
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1321
                'description': description,
1322
            }
1323
1324
1325
class DinosaurComics(GenericListableComic):
1326
    """Class to retrieve Dinosaur Comics comics."""
1327
    name = 'dinosaur'
1328
    long_name = 'Dinosaur Comics'
1329
    url = 'http://www.qwantz.com'
1330
    get_url_from_archive_element = get_href
1331
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1332
1333
    @classmethod
1334
    def get_archive_elements(cls):
1335
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1336
        # first link is random -> skip it
1337
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1338
1339
    @classmethod
1340
    def get_comic_info(cls, soup, link):
1341
        """Get information about a particular comics."""
1342
        url = cls.get_url_from_archive_element(link)
1343
        num = int(cls.comic_link_re.match(url).groups()[0])
1344
        date_str = link.string
1345
        text = link.next_sibling.string
1346
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1347
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1348
        img = soup.find('img', src=comic_img_re)
1349
        return {
1350
            'month': day.month,
1351
            'year': day.year,
1352
            'day': day.day,
1353
            'img': [img.get('src')],
1354
            'title': img.get('title'),
1355
            'text': text,
1356
            'num': num,
1357
        }
1358
1359
1360
class ButterSafe(GenericListableComic):
1361 View Code Duplication
    """Class to retrieve Butter Safe comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1362
    name = 'butter'
1363
    long_name = 'ButterSafe'
1364
    url = 'http://buttersafe.com'
1365
    get_url_from_archive_element = get_href
1366
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1367
1368
    @classmethod
1369
    def get_archive_elements(cls):
1370
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1371
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1372
1373
    @classmethod
1374
    def get_comic_info(cls, soup, link):
1375
        """Get information about a particular comics."""
1376
        url = cls.get_url_from_archive_element(link)
1377
        title = link.string
1378
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1379
        img = soup.find('div', id='comic').find('img')
1380
        assert img['alt'] == title
1381
        return {
1382
            'title': title,
1383
            'day': day,
1384
            'month': month,
1385
            'year': year,
1386
            'img': [img['src']],
1387
        }
1388
1389
1390
class CalvinAndHobbes(GenericComic):
1391
    """Class to retrieve Calvin and Hobbes comics."""
1392
    # Also on http://www.gocomics.com/calvinandhobbes/
1393
    name = 'calvin'
1394
    long_name = 'Calvin and Hobbes'
1395
    # This is not through any official webpage but eh...
1396
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1397
1398
    @classmethod
1399
    def get_next_comic(cls, last_comic):
1400
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1401
        last_date = get_date_for_comic(
1402
            last_comic) if last_comic else date(1985, 11, 1)
1403
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1404
        img_re = re.compile('')
1405
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1406
            url = link['href']
1407
            year, month = link_re.match(url).groups()
1408
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1409
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1410
                month_url = urljoin_wrapper(cls.url, url)
1411
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1412
                    img_src = img['src']
1413
                    day = int(img_re.match(img_src).groups()[0])
1414
                    comic_date = date(int(year), int(month), day)
1415
                    if comic_date > last_date:
1416
                        yield {
1417
                            'url': month_url,
1418
                            'year': int(year),
1419
                            'month': int(month),
1420
                            'day': int(day),
1421
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1422
                        }
1423
                        last_date = comic_date
1424
1425
1426
class AbstruseGoose(GenericListableComic):
1427 View Code Duplication
    """Class to retrieve AbstruseGoose Comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1428
    name = 'abstruse'
1429
    long_name = 'Abstruse Goose'
1430
    url = 'http://abstrusegoose.com'
1431
    get_url_from_archive_element = get_href
1432
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1433
    comic_img_re = re.compile('^%s/strips/.*' % url)
1434
1435
    @classmethod
1436
    def get_archive_elements(cls):
1437
        archive_url = urljoin_wrapper(cls.url, 'archive')
1438
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1439
1440
    @classmethod
1441
    def get_comic_info(cls, soup, archive_elt):
1442
        comic_url = cls.get_url_from_archive_element(archive_elt)
1443
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1444
        return {
1445
            'num': num,
1446
            'title': archive_elt.string,
1447
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1448
        }
1449
1450
1451
class PhDComics(GenericNavigableComic):
1452
    """Class to retrieve PHD Comics."""
1453
    name = 'phd'
1454
    long_name = 'PhD Comics'
1455
    url = 'http://phdcomics.com/comics/archive.php'
1456
1457
    @classmethod
1458
    def get_first_comic_link(cls):
1459
        """Get link to first comics."""
1460
        soup = get_soup_at_url(cls.url)
1461
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1462
        return None if img is None else img.parent
1463
1464
    @classmethod
1465
    def get_navi_link(cls, last_soup, next_):
1466
        """Get link to next or previous comic."""
1467
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1468
        img = last_soup.find('img', src=url)
1469
        return None if img is None else img.parent
1470
1471
    @classmethod
1472
    def get_comic_info(cls, soup, link):
1473
        """Get information about a particular comics."""
1474
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1475
        imgs = soup.find_all('meta', property='og:image')
1476
        return {
1477
            'img': [i['content'] for i in imgs],
1478
            'title': title,
1479
        }
1480
1481
1482 View Code Duplication
class Octopuns(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1483
    """Class to retrieve Octopuns comics."""
1484
    # Also on http://octopuns.tumblr.com
1485
    name = 'octopuns'
1486
    long_name = 'Octopuns'
1487
    url = 'http://www.octopuns.net'
1488
1489
    @classmethod
1490
    def get_first_comic_link(cls):
1491
        """Get link to first comics."""
1492
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1493
1494
    @classmethod
1495
    def get_navi_link(cls, last_soup, next_):
1496
        """Get link to next or previous comic."""
1497
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1498
        return None if link.get('href') is None else link
1499
1500
    @classmethod
1501
    def get_comic_info(cls, soup, link):
1502
        """Get information about a particular comics."""
1503
        title = soup.find('h3', class_='post-title entry-title').string
1504
        date_str = soup.find('h2', class_='date-header').string
1505
        day = string_to_date(date_str, "%A, %B %d, %Y")
1506
        imgs = soup.find_all('link', rel='image_src')
1507
        return {
1508
            'img': [i['href'] for i in imgs],
1509
            'title': title,
1510
            'day': day.day,
1511
            'month': day.month,
1512
            'year': day.year,
1513
        }
1514
1515
1516
class Quarktees(GenericNavigableComic):
1517
    """Class to retrieve the Quarktees comics."""
1518
    name = 'quarktees'
1519
    long_name = 'Quarktees'
1520
    url = 'http://www.quarktees.com/blogs/news'
1521
    get_url_from_link = join_cls_url_to_href
1522
    get_first_comic_link = simulate_first_link
1523
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1524
1525
    @classmethod
1526
    def get_navi_link(cls, last_soup, next_):
1527
        """Get link to next or previous comic."""
1528
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1529
1530
    @classmethod
1531
    def get_comic_info(cls, soup, link):
1532
        """Get information about a particular comics."""
1533
        title = soup.find('meta', property='og:title')['content']
1534
        article = soup.find('div', class_='single-article')
1535
        imgs = article.find_all('img')
1536
        return {
1537
            'title': title,
1538
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1539
        }
1540
1541
1542
class OverCompensating(GenericNavigableComic):
1543
    """Class to retrieve the Over Compensating comics."""
1544
    name = 'compensating'
1545
    long_name = 'Over Compensating'
1546
    url = 'http://www.overcompensating.com'
1547
    get_url_from_link = join_cls_url_to_href
1548
1549
    @classmethod
1550
    def get_first_comic_link(cls):
1551
        """Get link to first comics."""
1552
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1553
1554
    @classmethod
1555
    def get_navi_link(cls, last_soup, next_):
1556
        """Get link to next or previous comic."""
1557
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1558
1559
    @classmethod
1560
    def get_comic_info(cls, soup, link):
1561
        """Get information about a particular comics."""
1562
        img_src_re = re.compile('^/oc/comics/.*')
1563
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1564
        comic_url = cls.get_url_from_link(link)
1565
        num = int(comic_num_re.match(comic_url).groups()[0])
1566
        img = soup.find('img', src=img_src_re)
1567
        return {
1568
            'num': num,
1569
            'img': [urljoin_wrapper(comic_url, img['src'])],
1570
            'title': img.get('title')
1571
        }
1572
1573
1574
class Oglaf(GenericNavigableComic):
1575
    """Class to retrieve Oglaf comics."""
1576
    name = 'oglaf'
1577
    long_name = 'Oglaf [NSFW]'
1578
    url = 'http://oglaf.com'
1579
    _categories = ('NSFW', )
1580
    get_url_from_link = join_cls_url_to_href
1581
1582
    @classmethod
1583
    def get_first_comic_link(cls):
1584
        """Get link to first comics."""
1585
        return get_soup_at_url(cls.url).find("div", id="st").parent
1586
1587
    @classmethod
1588
    def get_navi_link(cls, last_soup, next_):
1589
        """Get link to next or previous comic."""
1590
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1591
        return div.parent if div else None
1592
1593
    @classmethod
1594
    def get_comic_info(cls, soup, link):
1595
        """Get information about a particular comics."""
1596
        title = soup.find('title').string
1597
        title_imgs = soup.find('div', id='tt').find_all('img')
1598
        assert len(title_imgs) == 1
1599
        strip_imgs = soup.find_all('img', id='strip')
1600
        assert len(strip_imgs) == 1
1601
        imgs = title_imgs + strip_imgs
1602
        desc = ' '.join(i['title'] for i in imgs)
1603
        return {
1604
            'title': title,
1605
            'img': [i['src'] for i in imgs],
1606
            'description': desc,
1607
        }
1608
1609
1610
class ScandinaviaAndTheWorld(GenericNavigableComic):
1611
    """Class to retrieve Scandinavia And The World comics."""
1612
    name = 'satw'
1613
    long_name = 'Scandinavia And The World'
1614
    url = 'http://satwcomic.com'
1615
    get_first_comic_link = simulate_first_link
1616
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1617
1618
    @classmethod
1619
    def get_navi_link(cls, last_soup, next_):
1620
        """Get link to next or previous comic."""
1621
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1622
1623
    @classmethod
1624
    def get_comic_info(cls, soup, link):
1625
        """Get information about a particular comics."""
1626
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1627
        desc = soup.find('meta', property='og:description')['content']
1628
        imgs = soup.find_all('img', itemprop="image")
1629
        return {
1630
            'title': title,
1631
            'description': desc,
1632
            'img': [i['src'] for i in imgs],
1633
        }
1634
1635
1636
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1637
    """Class to retrieve the Something Of That Ilk comics."""
1638
    name = 'somethingofthatilk'
1639
    long_name = 'Something Of That Ilk'
1640
    url = 'http://www.somethingofthatilk.com'
1641
1642
1643
class InfiniteMonkeyBusiness(GenericNavigableComic):
1644
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1645
    name = 'monkey'
1646
    long_name = 'Infinite Monkey Business'
1647
    url = 'http://infinitemonkeybusiness.net'
1648
    get_navi_link = get_a_navi_comicnavnext_navinext
1649
    get_first_comic_link = simulate_first_link
1650
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1651
1652
    @classmethod
1653
    def get_comic_info(cls, soup, link):
1654
        """Get information about a particular comics."""
1655
        title = soup.find('meta', property='og:title')['content']
1656
        imgs = soup.find('div', id='comic').find_all('img')
1657
        return {
1658
            'title': title,
1659
            'img': [i['src'] for i in imgs],
1660
        }
1661
1662
1663
class Wondermark(GenericListableComic):
1664
    """Class to retrieve the Wondermark comics."""
1665
    name = 'wondermark'
1666
    long_name = 'Wondermark'
1667
    url = 'http://wondermark.com'
1668
    get_url_from_archive_element = get_href
1669
1670
    @classmethod
1671
    def get_archive_elements(cls):
1672
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1673
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1674
1675
    @classmethod
1676
    def get_comic_info(cls, soup, link):
1677
        """Get information about a particular comics."""
1678
        date_str = soup.find('div', class_='postdate').find('em').string
1679
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1680
        div = soup.find('div', id='comic')
1681
        if div:
1682
            img = div.find('img')
1683
            img_src = [img['src']]
1684
            alt = img['alt']
1685
            assert alt == img['title']
1686
            title = soup.find('meta', property='og:title')['content']
1687
        else:
1688
            img_src = []
1689
            alt = ''
1690
            title = ''
1691
        return {
1692
            'month': day.month,
1693
            'year': day.year,
1694
            'day': day.day,
1695
            'img': img_src,
1696
            'title': title,
1697
            'alt': alt,
1698
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1699
        }
1700
1701
1702 View Code Duplication
class WarehouseComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1703
    """Class to retrieve Warehouse Comic comics."""
1704
    name = 'warehouse'
1705
    long_name = 'Warehouse Comic'
1706
    url = 'http://warehousecomic.com'
1707
    get_first_comic_link = get_a_navi_navifirst
1708
    get_navi_link = get_link_rel_next
1709
1710
    @classmethod
1711
    def get_comic_info(cls, soup, link):
1712
        """Get information about a particular comics."""
1713
        title = soup.find('h2', class_='post-title').string
1714
        date_str = soup.find('span', class_='post-date').string
1715
        day = string_to_date(date_str, "%B %d, %Y")
1716
        imgs = soup.find('div', id='comic').find_all('img')
1717
        return {
1718
            'img': [i['src'] for i in imgs],
1719
            'title': title,
1720
            'day': day.day,
1721
            'month': day.month,
1722
            'year': day.year,
1723
        }
1724
1725
1726
class JustSayEh(GenericNavigableComic):
1727
    """Class to retrieve Just Say Eh comics."""
1728
    # Also on http//tapastic.com/series/Just-Say-Eh
1729
    name = 'justsayeh'
1730
    long_name = 'Just Say Eh'
1731
    url = 'http://www.justsayeh.com'
1732
    get_first_comic_link = get_a_navi_navifirst
1733
    get_navi_link = get_a_navi_comicnavnext_navinext
1734
1735
    @classmethod
1736
    def get_comic_info(cls, soup, link):
1737
        """Get information about a particular comics."""
1738
        title = soup.find('h2', class_='post-title').string
1739
        imgs = soup.find("div", id="comic").find_all("img")
1740
        assert all(i['alt'] == i['title'] for i in imgs)
1741
        alt = imgs[0]['alt']
1742
        return {
1743
            'img': [i['src'] for i in imgs],
1744
            'title': title,
1745
            'alt': alt,
1746
        }
1747
1748
1749 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1750
    """Class to retrieve Mouse Bear Comedy comics."""
1751
    # Also on http://mousebearcomedy.tumblr.com
1752
    name = 'mousebear'
1753
    long_name = 'Mouse Bear Comedy'
1754
    url = 'http://www.mousebearcomedy.com'
1755
    get_first_comic_link = get_a_navi_navifirst
1756
    get_navi_link = get_a_navi_comicnavnext_navinext
1757
1758
    @classmethod
1759
    def get_comic_info(cls, soup, link):
1760
        """Get information about a particular comics."""
1761
        title = soup.find('h2', class_='post-title').string
1762
        author = soup.find("span", class_="post-author").find("a").string
1763
        date_str = soup.find("span", class_="post-date").string
1764
        day = string_to_date(date_str, '%B %d, %Y')
1765
        imgs = soup.find("div", id="comic").find_all("img")
1766
        assert all(i['alt'] == i['title'] == title for i in imgs)
1767
        return {
1768
            'day': day.day,
1769
            'month': day.month,
1770
            'year': day.year,
1771
            'img': [i['src'] for i in imgs],
1772
            'title': title,
1773
            'author': author,
1774
        }
1775
1776
1777 View Code Duplication
class BigFootJustice(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1778
    """Class to retrieve Big Foot Justice comics."""
1779
    # Also on http://tapastic.com/series/bigfoot-justice
1780
    name = 'bigfoot'
1781
    long_name = 'Big Foot Justice'
1782
    url = 'http://bigfootjustice.com'
1783
    get_first_comic_link = get_a_navi_navifirst
1784
    get_navi_link = get_a_navi_comicnavnext_navinext
1785
1786
    @classmethod
1787
    def get_comic_info(cls, soup, link):
1788
        """Get information about a particular comics."""
1789
        imgs = soup.find('div', id='comic').find_all('img')
1790
        assert all(i['title'] == i['alt'] for i in imgs)
1791
        title = ' '.join(i['title'] for i in imgs)
1792
        return {
1793
            'img': [i['src'] for i in imgs],
1794
            'title': title,
1795
        }
1796
1797
1798
class RespawnComic(GenericNavigableComic):
1799
    """Class to retrieve Respawn Comic."""
1800
    # Also on https://respawncomic.tumblr.com
1801
    name = 'respawn'
1802
    long_name = 'Respawn Comic'
1803
    url = 'http://respawncomic.com '
1804
    _categories = ('RESPAWN', )
1805
    get_navi_link = get_a_rel_next
1806
    get_first_comic_link = simulate_first_link
1807
    first_url = 'http://respawncomic.com/comic/c0001/'
1808
1809 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1810
    def get_comic_info(cls, soup, link):
1811
        """Get information about a particular comics."""
1812
        title = soup.find('meta', property='og:title')['content']
1813
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1814
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1815
        date_str = date_str[:10]
1816
        day = string_to_date(date_str, "%Y-%m-%d")
1817
        imgs = soup.find_all('meta', property='og:image')
1818
        skip_imgs = {
1819
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1820
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1821
        }
1822
        return {
1823
            'title': title,
1824
            'author': author,
1825
            'day': day.day,
1826
            'month': day.month,
1827
            'year': day.year,
1828
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1829
        }
1830
1831
1832 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1833
    """Class to retrieve Safely Endangered comics."""
1834
    # Also on http://tumblr.safelyendangered.com
1835
    name = 'endangered'
1836
    long_name = 'Safely Endangered'
1837
    url = 'http://www.safelyendangered.com'
1838
    get_navi_link = get_link_rel_next
1839
    get_first_comic_link = simulate_first_link
1840
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1841
1842
    @classmethod
1843
    def get_comic_info(cls, soup, link):
1844
        """Get information about a particular comics."""
1845
        title = soup.find('h2', class_='post-title').string
1846
        date_str = soup.find('span', class_='post-date').string
1847
        day = string_to_date(date_str, '%B %d, %Y')
1848
        imgs = soup.find('div', id='comic').find_all('img')
1849
        alt = imgs[0]['alt']
1850
        assert all(i['alt'] == i['title'] for i in imgs)
1851
        return {
1852
            'day': day.day,
1853
            'month': day.month,
1854
            'year': day.year,
1855
            'img': [i['src'] for i in imgs],
1856
            'title': title,
1857
            'alt': alt,
1858
        }
1859
1860
1861 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1862
    """Class to retrieve Pictures In Boxes comics."""
1863
    # Also on https://picturesinboxescomic.tumblr.com
1864
    name = 'picturesinboxes'
1865
    long_name = 'Pictures in Boxes'
1866
    url = 'http://www.picturesinboxes.com'
1867
    get_navi_link = get_a_navi_navinext
1868
    get_first_comic_link = simulate_first_link
1869
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1870
1871
    @classmethod
1872
    def get_comic_info(cls, soup, link):
1873
        """Get information about a particular comics."""
1874
        title = soup.find('h2', class_='post-title').string
1875
        author = soup.find("span", class_="post-author").find("a").string
1876
        date_str = soup.find('span', class_='post-date').string
1877
        day = string_to_date(date_str, '%B %d, %Y')
1878
        imgs = soup.find('div', class_='comicpane').find_all('img')
1879
        assert imgs
1880
        assert all(i['title'] == i['alt'] == title for i in imgs)
1881
        return {
1882
            'day': day.day,
1883
            'month': day.month,
1884
            'year': day.year,
1885
            'img': [i['src'] for i in imgs],
1886
            'title': title,
1887
            'author': author,
1888
        }
1889
1890
1891 View Code Duplication
class Penmen(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1892
    """Class to retrieve Penmen comics."""
1893
    name = 'penmen'
1894
    long_name = 'Penmen'
1895
    url = 'http://penmen.com'
1896
    get_navi_link = get_link_rel_next
1897
    get_first_comic_link = simulate_first_link
1898
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1899
1900
    @classmethod
1901
    def get_comic_info(cls, soup, link):
1902
        """Get information about a particular comics."""
1903
        title = soup.find('title').string
1904
        imgs = soup.find('div', class_='entry-content').find_all('img')
1905
        short_url = soup.find('link', rel='shortlink')['href']
1906
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1907
        date_str = soup.find('time')['datetime'][:10]
1908
        day = string_to_date(date_str, "%Y-%m-%d")
1909
        return {
1910
            'title': title,
1911
            'short_url': short_url,
1912
            'img': [i['src'] for i in imgs],
1913
            'tags': tags,
1914
            'month': day.month,
1915
            'year': day.year,
1916
            'day': day.day,
1917
        }
1918
1919
1920
class TheDoghouseDiaries(GenericNavigableComic):
1921
    """Class to retrieve The Dog House Diaries comics."""
1922
    name = 'doghouse'
1923
    long_name = 'The Dog House Diaries'
1924
    url = 'http://thedoghousediaries.com'
1925
1926
    @classmethod
1927
    def get_first_comic_link(cls):
1928
        """Get link to first comics."""
1929
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1930
1931
    @classmethod
1932
    def get_navi_link(cls, last_soup, next_):
1933
        """Get link to next or previous comic."""
1934
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1935
1936
    @classmethod
1937
    def get_comic_info(cls, soup, link):
1938
        """Get information about a particular comics."""
1939
        comic_img_re = re.compile('^dhdcomics/.*')
1940
        img = soup.find('img', src=comic_img_re)
1941
        comic_url = cls.get_url_from_link(link)
1942
        return {
1943
            'title': soup.find('h2', id='titleheader').string,
1944
            'title2': soup.find('div', id='subtext').string,
1945
            'alt': img.get('title'),
1946
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1947
            'num': int(comic_url.split('/')[-1]),
1948
        }
1949
1950
1951
class InvisibleBread(GenericListableComic):
1952
    """Class to retrieve Invisible Bread comics."""
1953
    # Also on http://www.gocomics.com/invisible-bread
1954
    name = 'invisiblebread'
1955
    long_name = 'Invisible Bread'
1956
    url = 'http://invisiblebread.com'
1957
1958
    @classmethod
1959
    def get_archive_elements(cls):
1960
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1961
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1962
1963
    @classmethod
1964
    def get_url_from_archive_element(cls, td):
1965
        return td.find('a')['href']
1966
1967
    @classmethod
1968 View Code Duplication
    def get_comic_info(cls, soup, td):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1969
        """Get information about a particular comics."""
1970
        url = cls.get_url_from_archive_element(td)
1971
        title = td.find('a').string
1972
        month_and_day = td.previous_sibling.string
1973
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1974
        year = link_re.match(url).groups()[0]
1975
        date_str = month_and_day + ' ' + year
1976
        day = string_to_date(date_str, '%b %d %Y')
1977
        imgs = [soup.find('div', id='comic').find('img')]
1978
        assert len(imgs) == 1
1979
        assert all(i['title'] == i['alt'] == title for i in imgs)
1980
        return {
1981
            'month': day.month,
1982
            'year': day.year,
1983
            'day': day.day,
1984
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1985
            'title': title,
1986
        }
1987
1988
1989
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1990
    """Class to retrieve Disco Bleach Comics."""
1991
    name = 'discobleach'
1992
    long_name = 'Disco Bleach'
1993
    url = 'http://discobleach.com'
1994
1995
1996
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1997
    """Class to retrieve TubeyToons comics."""
1998
    # Also on http://tapastic.com/series/Tubey-Toons
1999
    # Also on https://tubeytoons.tumblr.com
2000
    name = 'tubeytoons'
2001
    long_name = 'Tubey Toons'
2002
    url = 'http://tubeytoons.com'
2003
    _categories = ('TUNEYTOONS', )
2004
2005
2006 View Code Duplication
class CompletelySeriousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2007
    """Class to retrieve Completely Serious comics."""
2008
    name = 'completelyserious'
2009
    long_name = 'Completely Serious Comics'
2010
    url = 'http://completelyseriouscomics.com'
2011
    get_first_comic_link = get_a_navi_navifirst
2012
    get_navi_link = get_a_navi_navinext
2013
2014
    @classmethod
2015
    def get_comic_info(cls, soup, link):
2016
        """Get information about a particular comics."""
2017
        title = soup.find('h2', class_='post-title').string
2018
        author = soup.find('span', class_='post-author').contents[1].string
2019
        date_str = soup.find('span', class_='post-date').string
2020
        day = string_to_date(date_str, '%B %d, %Y')
2021
        imgs = soup.find('div', class_='comicpane').find_all('img')
2022
        assert imgs
2023
        alt = imgs[0]['title']
2024
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2025
        return {
2026
            'month': day.month,
2027
            'year': day.year,
2028
            'day': day.day,
2029
            'img': [i['src'] for i in imgs],
2030
            'title': title,
2031
            'alt': alt,
2032
            'author': author,
2033
        }
2034
2035
2036
class PoorlyDrawnLines(GenericListableComic):
2037 View Code Duplication
    """Class to retrieve Poorly Drawn Lines comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2038
    # Also on http://pdlcomics.tumblr.com
2039
    name = 'poorlydrawn'
2040
    long_name = 'Poorly Drawn Lines'
2041
    url = 'https://www.poorlydrawnlines.com'
2042
    _categories = ('POORLYDRAWN', )
2043
    get_url_from_archive_element = get_href
2044
2045
    @classmethod
2046
    def get_comic_info(cls, soup, link):
2047
        """Get information about a particular comics."""
2048
        imgs = soup.find('div', class_='post').find_all('img')
2049
        assert len(imgs) <= 1
2050
        return {
2051
            'img': [i['src'] for i in imgs],
2052
            'title': imgs[0].get('title', "") if imgs else "",
2053
        }
2054
2055
    @classmethod
2056
    def get_archive_elements(cls):
2057
        archive_url = urljoin_wrapper(cls.url, 'archive')
2058
        url_re = re.compile('^%s/comic/.' % cls.url)
2059
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2060
2061
2062 View Code Duplication
class LoadingComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2063
    """Class to retrieve Loading Artist comics."""
2064
    name = 'loadingartist'
2065
    long_name = 'Loading Artist'
2066
    url = 'http://www.loadingartist.com/latest'
2067
2068
    @classmethod
2069
    def get_first_comic_link(cls):
2070
        """Get link to first comics."""
2071
        return get_soup_at_url(cls.url).find('a', title="First")
2072
2073
    @classmethod
2074
    def get_navi_link(cls, last_soup, next_):
2075
        """Get link to next or previous comic."""
2076
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2077
2078
    @classmethod
2079
    def get_comic_info(cls, soup, link):
2080
        """Get information about a particular comics."""
2081
        title = soup.find('h1').string
2082
        date_str = soup.find('span', class_='date').string.strip()
2083
        day = string_to_date(date_str, "%B %d, %Y")
2084
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2085
        return {
2086
            'title': title,
2087
            'img': [i['src'] for i in imgs],
2088
            'month': day.month,
2089
            'year': day.year,
2090
            'day': day.day,
2091
        }
2092
2093
2094 View Code Duplication
class ChuckleADuck(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2095
    """Class to retrieve Chuckle-A-Duck comics."""
2096
    name = 'chuckleaduck'
2097
    long_name = 'Chuckle-A-duck'
2098
    url = 'http://chuckleaduck.com'
2099
    get_first_comic_link = get_div_navfirst_a
2100
    get_navi_link = get_link_rel_next
2101
2102
    @classmethod
2103
    def get_comic_info(cls, soup, link):
2104
        """Get information about a particular comics."""
2105
        date_str = soup.find('span', class_='post-date').string
2106
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2107
        author = soup.find('span', class_='post-author').string
2108
        div = soup.find('div', id='comic')
2109
        imgs = div.find_all('img') if div else []
2110
        title = imgs[0]['title'] if imgs else ""
2111
        assert all(i['title'] == i['alt'] == title for i in imgs)
2112
        return {
2113
            'month': day.month,
2114
            'year': day.year,
2115
            'day': day.day,
2116
            'img': [i['src'] for i in imgs],
2117
            'title': title,
2118
            'author': author,
2119
        }
2120
2121
2122
class DepressedAlien(GenericNavigableComic):
2123
    """Class to retrieve Depressed Alien Comics."""
2124
    name = 'depressedalien'
2125
    long_name = 'Depressed Alien'
2126
    url = 'http://depressedalien.com'
2127
    get_url_from_link = join_cls_url_to_href
2128
2129
    @classmethod
2130
    def get_first_comic_link(cls):
2131
        """Get link to first comics."""
2132
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2133
2134
    @classmethod
2135
    def get_navi_link(cls, last_soup, next_):
2136
        """Get link to next or previous comic."""
2137
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2138
2139
    @classmethod
2140
    def get_comic_info(cls, soup, link):
2141
        """Get information about a particular comics."""
2142
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2143
        imgs = soup.find_all('meta', property='og:image')
2144
        return {
2145
            'title': title,
2146
            'img': [i['content'] for i in imgs],
2147
        }
2148
2149
2150 View Code Duplication
class TurnOffUs(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2151
    """Class to retrieve TurnOffUs comics."""
2152
    name = 'turnoffus'
2153
    long_name = 'Turn Off Us'
2154
    url = 'http://turnoff.us'
2155
    get_url_from_archive_element = join_cls_url_to_href
2156
2157
    @classmethod
2158
    def get_archive_elements(cls):
2159
        archive_url = urljoin_wrapper(cls.url, 'all')
2160
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2161
        return reversed(post_list.find_all('a', class_='post-link'))
2162
2163
    @classmethod
2164
    def get_comic_info(cls, soup, archive_elt):
2165
        """Get information about a particular comics."""
2166
        title = soup.find('meta', property='og:title')['content']
2167
        imgs = soup.find_all('meta', property='og:image')
2168
        return {
2169
            'title': title,
2170
            'img': [i['content'] for i in imgs],
2171
        }
2172
2173
2174
class ThingsInSquares(GenericListableComic):
2175
    """Class to retrieve Things In Squares comics."""
2176
    # This can be retrieved in other languages
2177
    # Also on https://tapastic.com/series/Things-in-Squares
2178
    name = 'squares'
2179
    long_name = 'Things in squares'
2180
    url = 'http://www.thingsinsquares.com'
2181
2182
    @classmethod
2183
    def get_comic_info(cls, soup, tr):
2184
        """Get information about a particular comics."""
2185
        _, td2, td3 = tr.find_all('td')
2186
        a = td2.find('a')
2187
        date_str = td3.string
2188
        day = string_to_date(date_str, "%m.%d.%y")
2189
        title = a.string
2190
        title2 = soup.find('meta', property='og:title')['content']
2191
        desc = soup.find('meta', property='og:description')
2192
        description = desc['content'] if desc else ''
2193
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2194
        imgs = soup.find('div', class_='entry-content').find_all('img')
2195
        return {
2196
            'day': day.day,
2197
            'month': day.month,
2198
            'year': day.year,
2199
            'title': title,
2200
            'title2': title2,
2201
            'description': description,
2202
            'tags': tags,
2203
            'img': [i['src'] for i in imgs],
2204
            'alt': ' '.join(i['alt'] for i in imgs),
2205
        }
2206
2207
    @classmethod
2208
    def get_url_from_archive_element(cls, tr):
2209
        _, td2, __ = tr.find_all('td')
2210
        return td2.find('a')['href']
2211
2212
    @classmethod
2213
    def get_archive_elements(cls):
2214
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2215
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2216
2217
2218 View Code Duplication
class HappleTea(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2219
    """Class to retrieve Happle Tea Comics."""
2220
    name = 'happletea'
2221
    long_name = 'Happle Tea'
2222
    url = 'http://www.happletea.com'
2223
    get_first_comic_link = get_a_navi_navifirst
2224
    get_navi_link = get_link_rel_next
2225
2226
    @classmethod
2227
    def get_comic_info(cls, soup, link):
2228
        """Get information about a particular comics."""
2229
        imgs = soup.find('div', id='comic').find_all('img')
2230
        post = soup.find('div', class_='post-content')
2231
        title = post.find('h2', class_='post-title').string
2232
        author = post.find('a', rel='author').string
2233
        date_str = post.find('span', class_='post-date').string
2234
        day = string_to_date(date_str, "%B %d, %Y")
2235
        assert all(i['alt'] == i['title'] for i in imgs)
2236
        return {
2237
            'title': title,
2238
            'img': [i['src'] for i in imgs],
2239
            'alt': ''.join(i['alt'] for i in imgs),
2240
            'month': day.month,
2241
            'year': day.year,
2242
            'day': day.day,
2243
            'author': author,
2244
        }
2245
2246
2247
class RockPaperScissors(GenericNavigableComic):
2248
    """Class to retrieve Rock Paper Scissors comics."""
2249
    name = 'rps'
2250
    long_name = 'Rock Paper Scissors'
2251
    url = 'http://rps-comics.com'
2252
    get_first_comic_link = get_a_navi_navifirst
2253
    get_navi_link = get_link_rel_next
2254
2255
    @classmethod
2256
    def get_comic_info(cls, soup, link):
2257
        """Get information about a particular comics."""
2258
        title = soup.find('title').string
2259
        imgs = soup.find_all('meta', property='og:image')
2260
        short_url = soup.find('link', rel='shortlink')['href']
2261
        transcript = soup.find('div', id='transcript-content').string
2262
        return {
2263
            'title': title,
2264
            'transcript': transcript,
2265
            'short_url': short_url,
2266
            'img': [i['content'] for i in imgs],
2267
        }
2268
2269
2270
class FatAwesomeComics(GenericNavigableComic):
2271
    """Class to retrieve Fat Awesome Comics."""
2272
    # Also on http://fatawesomecomedy.tumblr.com
2273
    name = 'fatawesome'
2274
    long_name = 'Fat Awesome'
2275
    url = 'http://fatawesome.com/comics'
2276
    get_navi_link = get_a_rel_next
2277
    get_first_comic_link = simulate_first_link
2278
    first_url = 'http://fatawesome.com/shortbus/'
2279
2280
    @classmethod
2281
    def get_comic_info(cls, soup, link):
2282
        """Get information about a particular comics."""
2283
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2284
        description = soup.find('meta', attrs={'name': 'description'})['content']
2285
        tags_prop = soup.find('meta', property='article:tag')
2286
        tags = tags_prop['content'] if tags_prop else ""
2287
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2288
        day = string_to_date(date_str, "%Y-%m-%d")
2289
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2290
        assert len(imgs) == 1
2291
        return {
2292
            'title': title,
2293
            'description': description,
2294
            'tags': tags,
2295
            'alt': "".join(i['alt'] for i in imgs),
2296
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2297
            'month': day.month,
2298
            'year': day.year,
2299
            'day': day.day,
2300 View Code Duplication
        }
2301
2302
2303
class AnythingComic(GenericListableComic):
2304
    """Class to retrieve Anything Comics."""
2305
    # Also on http://tapastic.com/series/anything
2306
    name = 'anythingcomic'
2307
    long_name = 'Anything Comic'
2308
    url = 'http://www.anythingcomic.com'
2309
2310
    @classmethod
2311
    def get_archive_elements(cls):
2312
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2313
        # The first 2 <tr>'s do not correspond to comics
2314
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2315
2316
    @classmethod
2317
    def get_url_from_archive_element(cls, tr):
2318
        """Get url corresponding to an archive element."""
2319
        _, td_comic, td_date, _ = tr.find_all('td')
2320
        link = td_comic.find('a')
2321
        return urljoin_wrapper(cls.url, link['href'])
2322
2323
    @classmethod
2324
    def get_comic_info(cls, soup, tr):
2325
        """Get information about a particular comics."""
2326
        td_num, td_comic, td_date, _ = tr.find_all('td')
2327
        num = int(td_num.string)
2328
        link = td_comic.find('a')
2329
        title = link.string
2330
        imgs = soup.find_all('img', id='comic_image')
2331
        date_str = td_date.string
2332
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2333
        assert len(imgs) == 1
2334
        assert all(i.get('alt') == i.get('title') for i in imgs)
2335
        return {
2336
            'num': num,
2337
            'title': title,
2338
            'alt': imgs[0].get('alt', ''),
2339
            'img': [i['src'] for i in imgs],
2340
            'month': day.month,
2341
            'year': day.year,
2342
            'day': day.day,
2343
        }
2344
2345
2346 View Code Duplication
class LonnieMillsap(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2347
    """Class to retrieve Lonnie Millsap's comics."""
2348
    name = 'millsap'
2349
    long_name = 'Lonnie Millsap'
2350
    url = 'http://www.lonniemillsap.com'
2351
    get_navi_link = get_link_rel_next
2352
    get_first_comic_link = simulate_first_link
2353
    first_url = 'http://www.lonniemillsap.com/?p=42'
2354
2355
    @classmethod
2356
    def get_comic_info(cls, soup, link):
2357
        """Get information about a particular comics."""
2358
        title = soup.find('h2', class_='post-title').string
2359
        post = soup.find('div', class_='post-content')
2360
        author = post.find("span", class_="post-author").find("a").string
2361
        date_str = post.find("span", class_="post-date").string
2362
        day = string_to_date(date_str, "%B %d, %Y")
2363
        imgs = post.find("div", class_="entry").find_all("img")
2364
        return {
2365
            'title': title,
2366
            'author': author,
2367
            'img': [i['src'] for i in imgs],
2368
            'month': day.month,
2369
            'year': day.year,
2370
            'day': day.day,
2371
        }
2372
2373
2374 View Code Duplication
class LinsEditions(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2375
    """Class to retrieve L.I.N.S. Editions comics."""
2376
    # Also on https://linscomics.tumblr.com
2377
    # Now on https://warandpeas.com
2378
    name = 'lins'
2379
    long_name = 'L.I.N.S. Editions'
2380
    url = 'https://linsedition.com'
2381
    _categories = ('LINS', )
2382
    get_navi_link = get_link_rel_next
2383
    get_first_comic_link = simulate_first_link
2384
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2385
2386
    @classmethod
2387
    def get_comic_info(cls, soup, link):
2388
        """Get information about a particular comics."""
2389
        title = soup.find('meta', property='og:title')['content']
2390
        imgs = soup.find_all('meta', property='og:image')
2391
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2392
        day = string_to_date(date_str, "%Y-%m-%d")
2393
        return {
2394
            'title': title,
2395
            'img': [i['content'] for i in imgs],
2396
            'month': day.month,
2397
            'year': day.year,
2398
            'day': day.day,
2399
        }
2400
2401
2402
class ThorsThundershack(GenericNavigableComic):
2403
    """Class to retrieve Thor's Thundershack comics."""
2404
    # Also on http://tapastic.com/series/Thors-Thundershac
2405
    name = 'thor'
2406
    long_name = 'Thor\'s Thundershack'
2407
    url = 'http://www.thorsthundershack.com'
2408
    _categories = ('THOR', )
2409
    get_url_from_link = join_cls_url_to_href
2410
2411
    @classmethod
2412
    def get_first_comic_link(cls):
2413
        """Get link to first comics."""
2414
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2415
2416
    @classmethod
2417
    def get_navi_link(cls, last_soup, next_):
2418
        """Get link to next or previous comic."""
2419
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2420
            if link['href'] != '/comic':
2421
                return link
2422
        return None
2423
2424
    @classmethod
2425
    def get_comic_info(cls, soup, link):
2426
        """Get information about a particular comics."""
2427
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2428
        description = soup.find('div', itemprop='articleBody').text
2429
        author = soup.find('span', itemprop='author copyrightHolder').string
2430
        imgs = soup.find_all('img', itemprop='image')
2431
        assert all(i['title'] == i['alt'] for i in imgs)
2432
        alt = imgs[0]['alt'] if imgs else ""
2433
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2434
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2435
        return {
2436
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2437
            'month': day.month,
2438
            'year': day.year,
2439
            'day': day.day,
2440
            'author': author,
2441
            'title': title,
2442
            'alt': alt,
2443
            'description': description,
2444
        }
2445
2446
2447
class GerbilWithAJetpack(GenericNavigableComic):
2448
    """Class to retrieve GerbilWithAJetpack comics."""
2449
    name = 'gerbil'
2450
    long_name = 'Gerbil With A Jetpack'
2451
    url = 'http://gerbilwithajetpack.com'
2452
    get_first_comic_link = get_a_navi_navifirst
2453
    get_navi_link = get_a_rel_next
2454
2455
    @classmethod
2456
    def get_comic_info(cls, soup, link):
2457
        """Get information about a particular comics."""
2458
        title = soup.find('h2', class_='post-title').string
2459
        author = soup.find("span", class_="post-author").find("a").string
2460
        date_str = soup.find("span", class_="post-date").string
2461
        day = string_to_date(date_str, "%B %d, %Y")
2462
        imgs = soup.find("div", id="comic").find_all("img")
2463
        alt = imgs[0]['alt']
2464
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2465
        return {
2466
            'img': [i['src'] for i in imgs],
2467
            'title': title,
2468
            'alt': alt,
2469
            'author': author,
2470
            'day': day.day,
2471
            'month': day.month,
2472
            'year': day.year
2473
        }
2474
2475
2476 View Code Duplication
class EveryDayBlues(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2477
    """Class to retrieve EveryDayBlues Comics."""
2478
    name = "blues"
2479
    long_name = "Every Day Blues"
2480
    url = "http://everydayblues.net"
2481
    get_first_comic_link = get_a_navi_navifirst
2482
    get_navi_link = get_link_rel_next
2483
2484
    @classmethod
2485
    def get_comic_info(cls, soup, link):
2486
        """Get information about a particular comics."""
2487
        title = soup.find("h2", class_="post-title").string
2488
        author = soup.find("span", class_="post-author").find("a").string
2489
        date_str = soup.find("span", class_="post-date").string
2490
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2491
        imgs = soup.find("div", id="comic").find_all("img")
2492
        assert all(i['alt'] == i['title'] == title for i in imgs)
2493
        assert len(imgs) <= 1
2494
        return {
2495
            'img': [i['src'] for i in imgs],
2496
            'title': title,
2497
            'author': author,
2498
            'day': day.day,
2499
            'month': day.month,
2500
            'year': day.year
2501
        }
2502
2503
2504 View Code Duplication
class BiterComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2505
    """Class to retrieve Biter Comics."""
2506
    name = "biter"
2507
    long_name = "Biter Comics"
2508
    url = "http://www.bitercomics.com"
2509
    get_first_comic_link = get_a_navi_navifirst
2510
    get_navi_link = get_link_rel_next
2511
2512
    @classmethod
2513
    def get_comic_info(cls, soup, link):
2514
        """Get information about a particular comics."""
2515
        title = soup.find("h1", class_="entry-title").string
2516
        author = soup.find("span", class_="author vcard").find("a").string
2517
        date_str = soup.find("span", class_="entry-date").string
2518
        day = string_to_date(date_str, "%B %d, %Y")
2519
        imgs = soup.find("div", id="comic").find_all("img")
2520
        assert all(i['alt'] == i['title'] for i in imgs)
2521
        assert len(imgs) == 1
2522
        alt = imgs[0]['alt']
2523
        return {
2524
            'img': [i['src'] for i in imgs],
2525
            'title': title,
2526
            'alt': alt,
2527
            'author': author,
2528
            'day': day.day,
2529
            'month': day.month,
2530
            'year': day.year
2531
        }
2532
2533
2534 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2535
    """Class to retrieve The Awkward Yeti comics."""
2536
    # Also on http://www.gocomics.com/the-awkward-yeti
2537
    # Also on http://larstheyeti.tumblr.com
2538
    # Also on https://tapastic.com/series/TheAwkwardYeti
2539
    name = 'yeti'
2540
    long_name = 'The Awkward Yeti'
2541
    url = 'http://theawkwardyeti.com'
2542
    _categories = ('YETI', )
2543
    get_first_comic_link = get_a_navi_navifirst
2544
    get_navi_link = get_link_rel_next
2545
2546
    @classmethod
2547
    def get_comic_info(cls, soup, link):
2548
        """Get information about a particular comics."""
2549
        title = soup.find('h2', class_='post-title').string
2550
        date_str = soup.find("span", class_="post-date").string
2551
        day = string_to_date(date_str, "%B %d, %Y")
2552
        imgs = soup.find("div", id="comic").find_all("img")
2553
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2554
        return {
2555
            'img': [i['src'] for i in imgs],
2556
            'title': title,
2557
            'day': day.day,
2558
            'month': day.month,
2559
            'year': day.year
2560
        }
2561
2562
2563
class PleasantThoughts(GenericNavigableComic):
2564
    """Class to retrieve Pleasant Thoughts comics."""
2565
    name = 'pleasant'
2566
    long_name = 'Pleasant Thoughts'
2567
    url = 'http://pleasant-thoughts.com'
2568
    get_first_comic_link = get_a_navi_navifirst
2569
    get_navi_link = get_link_rel_next
2570
2571
    @classmethod
2572
    def get_comic_info(cls, soup, link):
2573
        """Get information about a particular comics."""
2574
        post = soup.find('div', class_='post-content')
2575
        title = post.find('h2', class_='post-title').string
2576
        imgs = post.find("div", class_="entry").find_all("img")
2577
        return {
2578
            'title': title,
2579
            'img': [i['src'] for i in imgs],
2580
        }
2581
2582
2583 View Code Duplication
class MisterAndMe(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2584
    """Class to retrieve Mister & Me Comics."""
2585
    # Also on http://www.gocomics.com/mister-and-me
2586
    # Also on https://tapastic.com/series/Mister-and-Me
2587
    name = 'mister'
2588
    long_name = 'Mister & Me'
2589
    url = 'http://www.mister-and-me.com'
2590
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2591
    get_navi_link = get_link_rel_next
2592
2593
    @classmethod
2594
    def get_comic_info(cls, soup, link):
2595
        """Get information about a particular comics."""
2596
        title = soup.find('h2', class_='post-title').string
2597
        author = soup.find("span", class_="post-author").find("a").string
2598
        date_str = soup.find("span", class_="post-date").string
2599
        day = string_to_date(date_str, "%B %d, %Y")
2600
        imgs = soup.find("div", id="comic").find_all("img")
2601
        assert all(i['alt'] == i['title'] for i in imgs)
2602
        assert len(imgs) <= 1
2603
        alt = imgs[0]['alt'] if imgs else ""
2604
        return {
2605
            'img': [i['src'] for i in imgs],
2606
            'title': title,
2607
            'alt': alt,
2608
            'author': author,
2609
            'day': day.day,
2610
            'month': day.month,
2611
            'year': day.year
2612
        }
2613
2614
2615
class LastPlaceComics(GenericNavigableComic):
2616
    """Class to retrieve Last Place Comics."""
2617
    name = 'lastplace'
2618
    long_name = 'Last Place Comics'
2619
    url = "http://lastplacecomics.com"
2620
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2621
    get_navi_link = get_link_rel_next
2622
2623
    @classmethod
2624
    def get_comic_info(cls, soup, link):
2625
        """Get information about a particular comics."""
2626
        title = soup.find('h2', class_='post-title').string
2627
        author = soup.find("span", class_="post-author").find("a").string
2628
        date_str = soup.find("span", class_="post-date").string
2629
        day = string_to_date(date_str, "%B %d, %Y")
2630
        imgs = soup.find("div", id="comic").find_all("img")
2631
        assert all(i['alt'] == i['title'] for i in imgs)
2632
        assert len(imgs) <= 1
2633
        alt = imgs[0]['alt'] if imgs else ""
2634
        return {
2635
            'img': [i['src'] for i in imgs],
2636
            'title': title,
2637
            'alt': alt,
2638
            'author': author,
2639
            'day': day.day,
2640
            'month': day.month,
2641
            'year': day.year
2642
        }
2643
2644
2645 View Code Duplication
class TalesOfAbsurdity(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2646
    """Class to retrieve Tales Of Absurdity comics."""
2647
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2648
    # Also on http://talesofabsurdity.tumblr.com
2649
    name = 'absurdity'
2650
    long_name = 'Tales of Absurdity'
2651
    url = 'http://talesofabsurdity.com'
2652
    _categories = ('ABSURDITY', )
2653
    get_first_comic_link = get_a_navi_navifirst
2654
    get_navi_link = get_a_navi_comicnavnext_navinext
2655
2656
    @classmethod
2657
    def get_comic_info(cls, soup, link):
2658
        """Get information about a particular comics."""
2659
        title = soup.find('h2', class_='post-title').string
2660
        author = soup.find("span", class_="post-author").find("a").string
2661
        date_str = soup.find("span", class_="post-date").string
2662
        day = string_to_date(date_str, "%B %d, %Y")
2663
        imgs = soup.find("div", id="comic").find_all("img")
2664
        assert all(i['alt'] == i['title'] for i in imgs)
2665
        alt = imgs[0]['alt'] if imgs else ""
2666
        return {
2667
            'img': [i['src'] for i in imgs],
2668
            'title': title,
2669
            'alt': alt,
2670
            'author': author,
2671
            'day': day.day,
2672
            'month': day.month,
2673
            'year': day.year
2674
        }
2675
2676
2677 View Code Duplication
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2678
    """Class to retrieve Endless Origami Comics."""
2679
    name = "origami"
2680
    long_name = "Endless Origami"
2681
    url = "http://endlessorigami.com"
2682
    get_first_comic_link = get_a_navi_navifirst
2683
    get_navi_link = get_link_rel_next
2684
2685
    @classmethod
2686
    def get_comic_info(cls, soup, link):
2687
        """Get information about a particular comics."""
2688
        title = soup.find('h2', class_='post-title').string
2689
        author = soup.find("span", class_="post-author").find("a").string
2690
        date_str = soup.find("span", class_="post-date").string
2691
        day = string_to_date(date_str, "%B %d, %Y")
2692
        imgs = soup.find("div", id="comic").find_all("img")
2693
        assert all(i['alt'] == i['title'] for i in imgs)
2694
        alt = imgs[0]['alt'] if imgs else ""
2695
        return {
2696
            'img': [i['src'] for i in imgs],
2697
            'title': title,
2698
            'alt': alt,
2699
            'author': author,
2700
            'day': day.day,
2701
            'month': day.month,
2702
            'year': day.year
2703
        }
2704
2705
2706
class PlanC(GenericNavigableComic):
2707
    """Class to retrieve Plan C comics."""
2708
    name = 'planc'
2709
    long_name = 'Plan C'
2710
    url = 'http://www.plancomic.com'
2711
    get_first_comic_link = get_a_navi_navifirst
2712
    get_navi_link = get_a_navi_comicnavnext_navinext
2713
2714
    @classmethod
2715
    def get_comic_info(cls, soup, link):
2716
        """Get information about a particular comics."""
2717
        title = soup.find('h2', class_='post-title').string
2718
        date_str = soup.find("span", class_="post-date").string
2719
        day = string_to_date(date_str, "%B %d, %Y")
2720
        imgs = soup.find('div', id='comic').find_all('img')
2721
        return {
2722
            'title': title,
2723
            'img': [i['src'] for i in imgs],
2724
            'month': day.month,
2725
            'year': day.year,
2726
            'day': day.day,
2727
        }
2728
2729
2730 View Code Duplication
class BuniComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2731
    """Class to retrieve Buni Comics."""
2732
    name = 'buni'
2733
    long_name = 'BuniComics'
2734
    url = 'http://www.bunicomic.com'
2735
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2736
    get_navi_link = get_link_rel_next
2737
2738
    @classmethod
2739
    def get_comic_info(cls, soup, link):
2740
        """Get information about a particular comics."""
2741
        imgs = soup.find('div', id='comic').find_all('img')
2742
        assert all(i['alt'] == i['title'] for i in imgs)
2743
        assert len(imgs) == 1
2744
        return {
2745
            'img': [i['src'] for i in imgs],
2746
            'title': imgs[0]['title'],
2747
        }
2748
2749
2750 View Code Duplication
class GenericCommitStrip(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2751
    """Generic class to retrieve Commit Strips in different languages."""
2752
    get_navi_link = get_a_rel_next
2753
    get_first_comic_link = simulate_first_link
2754
    first_url = NotImplemented
2755
2756
    @classmethod
2757
    def get_comic_info(cls, soup, link):
2758
        """Get information about a particular comics."""
2759
        desc = soup.find('meta', property='og:description')['content']
2760
        title = soup.find('meta', property='og:title')['content']
2761
        imgs = soup.find('div', class_='entry-content').find_all('img')
2762
        title2 = ' '.join(i.get('title', '') for i in imgs)
2763
        return {
2764
            'title': title,
2765
            'title2': title2,
2766
            'description': desc,
2767
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2768
        }
2769
2770
2771
class CommitStripFr(GenericCommitStrip):
2772
    """Class to retrieve Commit Strips in French."""
2773
    name = 'commit_fr'
2774
    long_name = 'Commit Strip (Fr)'
2775
    url = 'http://www.commitstrip.com/fr'
2776
    _categories = ('FRANCAIS', )
2777
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2778
2779
2780
class CommitStripEn(GenericCommitStrip):
2781
    """Class to retrieve Commit Strips in English."""
2782
    name = 'commit_en'
2783
    long_name = 'Commit Strip (En)'
2784
    url = 'http://www.commitstrip.com/en'
2785
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2786
2787
2788 View Code Duplication
class GenericBoumerie(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2789
    """Generic class to retrieve Boumeries comics in different languages."""
2790
    get_first_comic_link = get_a_navi_navifirst
2791
    get_navi_link = get_link_rel_next
2792
    date_format = NotImplemented
2793
    lang = NotImplemented
2794
2795
    @classmethod
2796
    def get_comic_info(cls, soup, link):
2797
        """Get information about a particular comics."""
2798
        title = soup.find('h2', class_='post-title').string
2799
        short_url = soup.find('link', rel='shortlink')['href']
2800
        author = soup.find("span", class_="post-author").find("a").string
2801
        date_str = soup.find('span', class_='post-date').string
2802
        day = string_to_date(date_str, cls.date_format, cls.lang)
2803
        imgs = soup.find('div', id='comic').find_all('img')
2804
        assert all(i['alt'] == i['title'] for i in imgs)
2805
        return {
2806
            'short_url': short_url,
2807
            'img': [i['src'] for i in imgs],
2808
            'title': title,
2809
            'author': author,
2810
            'month': day.month,
2811
            'year': day.year,
2812
            'day': day.day,
2813
        }
2814
2815
2816
class BoumerieEn(GenericBoumerie):
2817
    """Class to retrieve Boumeries comics in English."""
2818
    name = 'boumeries_en'
2819
    long_name = 'Boumeries (En)'
2820
    url = 'http://comics.boumerie.com'
2821
    date_format = "%B %d, %Y"
2822
    lang = 'en_GB.UTF-8'
2823
2824
2825
class BoumerieFr(GenericBoumerie):
2826
    """Class to retrieve Boumeries comics in French."""
2827
    name = 'boumeries_fr'
2828
    long_name = 'Boumeries (Fr)'
2829
    url = 'http://bd.boumerie.com'
2830
    _categories = ('FRANCAIS', )
2831
    date_format = "%A, %d %B %Y"
2832
    lang = "fr_FR.utf8"
2833
2834
2835 View Code Duplication
class UnearthedComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2836
    """Class to retrieve Unearthed comics."""
2837
    # Also on http://tapastic.com/series/UnearthedComics
2838
    # Also on https://unearthedcomics.tumblr.com
2839
    name = 'unearthed'
2840
    long_name = 'Unearthed Comics'
2841
    url = 'http://unearthedcomics.com'
2842
    _categories = ('UNEARTHED', )
2843
    get_navi_link = get_link_rel_next
2844
    get_first_comic_link = simulate_first_link
2845
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2846
2847
    @classmethod
2848
    def get_comic_info(cls, soup, link):
2849
        """Get information about a particular comics."""
2850
        short_url = soup.find('link', rel='shortlink')['href']
2851
        title_elt = soup.find('h1') or soup.find('h2')
2852
        title = title_elt.string if title_elt else ""
2853
        desc = soup.find('meta', property='og:description')
2854
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2855
        day = string_to_date(date_str, "%Y-%m-%d")
2856
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2857
        imgs = post.find_all('img')
2858
        return {
2859
            'title': title,
2860
            'description': desc,
2861
            'url2': short_url,
2862
            'img': [i['src'] for i in imgs],
2863
            'month': day.month,
2864
            'year': day.year,
2865
            'day': day.day,
2866
        }
2867
2868
2869 View Code Duplication
class Optipess(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2870
    """Class to retrieve Optipess comics."""
2871
    name = 'optipess'
2872
    long_name = 'Optipess'
2873
    url = 'http://www.optipess.com'
2874
    get_first_comic_link = get_a_navi_navifirst
2875
    get_navi_link = get_link_rel_next
2876
2877
    @classmethod
2878
    def get_comic_info(cls, soup, link):
2879
        """Get information about a particular comics."""
2880
        title = soup.find('h2', class_='post-title').string
2881
        author = soup.find("span", class_="post-author").find("a").string
2882
        comic = soup.find('div', id='comic')
2883
        imgs = comic.find_all('img') if comic else []
2884
        alt = imgs[0]['title'] if imgs else ""
2885
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2886
        date_str = soup.find('span', class_='post-date').string
2887
        day = string_to_date(date_str, "%B %d, %Y")
2888
        return {
2889
            'title': title,
2890
            'alt': alt,
2891
            'author': author,
2892
            'img': [i['src'] for i in imgs],
2893
            'month': day.month,
2894
            'year': day.year,
2895
            'day': day.day,
2896
        }
2897
2898
2899
class PainTrainComic(GenericNavigableComic):
2900
    """Class to retrieve Pain Train Comics."""
2901
    name = 'paintrain'
2902
    long_name = 'Pain Train Comics'
2903
    url = 'http://paintraincomic.com'
2904
    get_first_comic_link = get_a_navi_navifirst
2905
    get_navi_link = get_link_rel_next
2906
2907
    @classmethod
2908
    def get_comic_info(cls, soup, link):
2909
        """Get information about a particular comics."""
2910
        title = soup.find('h2', class_='post-title').string
2911
        short_url = soup.find('link', rel='shortlink')['href']
2912
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2913
        num = int(short_url_re.match(short_url).groups()[0])
2914
        imgs = soup.find('div', id='comic').find_all('img')
2915
        alt = imgs[0]['title']
2916
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2917
        date_str = soup.find('span', class_='post-date').string
2918
        day = string_to_date(date_str, "%d/%m/%Y")
2919
        return {
2920
            'short_url': short_url,
2921
            'num': num,
2922
            'img': [i['src'] for i in imgs],
2923
            'month': day.month,
2924
            'year': day.year,
2925
            'day': day.day,
2926
            'alt': alt,
2927
            'title': title,
2928
        }
2929
2930
2931
class MoonBeard(GenericNavigableComic):
2932
    """Class to retrieve MoonBeard comics."""
2933
    # Also on http://blog.squiresjam.es
2934
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2935
    name = 'moonbeard'
2936
    long_name = 'Moon Beard'
2937
    url = 'http://moonbeard.com'
2938
    get_first_comic_link = get_a_navi_navifirst
2939
    get_navi_link = get_a_navi_navinext
2940
2941
    @classmethod
2942
    def get_comic_info(cls, soup, link):
2943
        """Get information about a particular comics."""
2944
        title = soup.find('h2', class_='post-title').string
2945
        short_url = soup.find('link', rel='shortlink')['href']
2946
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2947
        num = int(short_url_re.match(short_url).groups()[0])
2948
        imgs = soup.find('div', id='comic').find_all('img')
2949
        alt = imgs[0]['title']
2950
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2951
        date_str = soup.find('span', class_='post-date').string
2952
        day = string_to_date(date_str, "%B %d, %Y")
2953
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2954
        author = soup.find('span', class_='post-author').string
2955
        return {
2956
            'short_url': short_url,
2957
            'num': num,
2958
            'img': [i['src'] for i in imgs],
2959
            'month': day.month,
2960
            'year': day.year,
2961
            'day': day.day,
2962
            'title': title,
2963
            'tags': tags,
2964
            'alt': alt,
2965
            'author': author,
2966
        }
2967
2968
2969
class AHammADay(GenericEmptyComic, GenericNavigableComic):
2970
    """Class to retrieve class A Hamm A Day comics."""
2971
    name = 'hamm'
2972
    long_name = 'A Hamm A Day'
2973
    url = 'http://www.ahammaday.com'
2974
    get_url_from_link = join_cls_url_to_href
2975
    get_first_comic_link = simulate_first_link
2976
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2977
2978
    @classmethod
2979
    def get_navi_link(cls, last_soup, next_):
2980
        """Get link to next or previous comic."""
2981
        # prev is next / next is prev
2982
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2983
2984 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2985
    def get_comic_info(cls, soup, link):
2986
        """Get information about a particular comics."""
2987
        date_str = soup.find('time', class_='published')['datetime']
2988
        day = string_to_date(date_str, "%Y-%m-%d")
2989
        author = soup.find('span', class_='blog-author').find('a').string
2990
        title = soup.find('meta', property='og:title')['content']
2991
        imgs = soup.find_all('meta', itemprop='image')
2992
        return {
2993
            'img': [i['content'] for i in imgs],
2994
            'title': title,
2995
            'author': author,
2996
            'day': day.day,
2997
            'month': day.month,
2998
            'year': day.year,
2999 View Code Duplication
        }
3000
3001
3002
class LittleLifeLines(GenericNavigableComic):
3003
    """Class to retrieve Little Life Lines comics."""
3004
    # Also on https://little-life-lines.tumblr.com
3005
    name = 'life'
3006
    long_name = 'Little Life Lines'
3007
    url = 'http://www.littlelifelines.com'
3008
    get_url_from_link = join_cls_url_to_href
3009
    get_first_comic_link = simulate_first_link
3010
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3011
3012
    @classmethod
3013
    def get_navi_link(cls, last_soup, next_):
3014
        """Get link to next or previous comic."""
3015
        # prev is next / next is prev
3016
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3017
        return li.find('a') if li else None
3018
3019
    @classmethod
3020
    def get_comic_info(cls, soup, link):
3021
        """Get information about a particular comics."""
3022
        title = soup.find('meta', property='og:title')['content']
3023
        desc = soup.find('meta', property='og:description')['content']
3024
        date_str = soup.find('time', class_='published')['datetime']
3025
        day = string_to_date(date_str, "%Y-%m-%d")
3026
        author = soup.find('a', rel='author').string
3027
        div_content = soup.find('div', class_="body entry-content")
3028
        imgs = div_content.find_all('img')
3029
        imgs = [i for i in imgs if i.get('src') is not None]
3030
        alt = imgs[0]['alt']
3031
        return {
3032
            'title': title,
3033
            'alt': alt,
3034
            'description': desc,
3035
            'author': author,
3036
            'day': day.day,
3037
            'month': day.month,
3038
            'year': day.year,
3039
            'img': [i['src'] for i in imgs],
3040
        }
3041
3042
3043
class GenericWordPressInkblot(GenericNavigableComic):
3044
    """Generic class to retrieve comics using WordPress with Inkblot."""
3045
    get_navi_link = get_link_rel_next
3046
3047
    @classmethod
3048
    def get_first_comic_link(cls):
3049
        """Get link to first comics."""
3050
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3051
3052 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3053
    def get_comic_info(cls, soup, link):
3054
        """Get information about a particular comics."""
3055
        title = soup.find('meta', property='og:title')['content']
3056
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3057
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3058
        day = string_to_date(date_str, "%Y-%m-%d")
3059
        return {
3060
            'title': title,
3061
            'day': day.day,
3062
            'month': day.month,
3063
            'year': day.year,
3064
            'img': [i['src'] for i in imgs],
3065
        }
3066
3067
3068
class EverythingsStupid(GenericWordPressInkblot):
3069
    """Class to retrieve Everything's stupid Comics."""
3070
    # Also on http://tapastic.com/series/EverythingsStupid
3071
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3072
    # Also on http://everythingsstupidcomics.tumblr.com
3073
    name = 'stupid'
3074
    long_name = "Everything's Stupid"
3075
    url = 'http://everythingsstupid.net'
3076
3077
3078
class TheIsmComics(GenericWordPressInkblot):
3079
    """Class to retrieve The Ism Comics."""
3080
    # Also on https://tapastic.com/series/TheIsm (?)
3081
    name = 'theism'
3082
    long_name = "The Ism"
3083
    url = 'http://www.theism-comics.com'
3084
3085
3086
class WoodenPlankStudios(GenericEmptyComic, GenericWordPressInkblot):
3087
    """Class to retrieve Wooden Plank Studios comics."""
3088
    name = 'woodenplank'
3089
    long_name = 'Wooden Plank Studios'
3090
    url = 'http://woodenplankstudios.com'
3091
3092
3093
class ElectricBunnyComic(GenericNavigableComic):
3094
    """Class to retrieve Electric Bunny Comics."""
3095
    # Also on http://electricbunnycomics.tumblr.com
3096
    name = 'bunny'
3097
    long_name = 'Electric Bunny Comic'
3098
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3099
    get_url_from_link = join_cls_url_to_href
3100
3101
    @classmethod
3102
    def get_first_comic_link(cls):
3103
        """Get link to first comics."""
3104
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3105
3106
    @classmethod
3107
    def get_navi_link(cls, last_soup, next_):
3108
        """Get link to next or previous comic."""
3109
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3110
        return img.parent if img else None
3111
3112
    @classmethod
3113
    def get_comic_info(cls, soup, link):
3114
        """Get information about a particular comics."""
3115
        title = soup.find('meta', property='og:title')['content']
3116
        imgs = soup.find_all('meta', property='og:image')
3117
        return {
3118
            'title': title,
3119
            'img': [i['content'] for i in imgs],
3120
        }
3121
3122
3123
class SheldonComics(GenericNavigableComic):
3124
    """Class to retrieve Sheldon comics."""
3125
    # Also on http://www.gocomics.com/sheldon
3126
    name = 'sheldon'
3127
    long_name = 'Sheldon Comics'
3128
    url = 'http://www.sheldoncomics.com'
3129
3130
    @classmethod
3131
    def get_first_comic_link(cls):
3132
        """Get link to first comics."""
3133
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3134
3135
    @classmethod
3136
    def get_navi_link(cls, last_soup, next_):
3137
        """Get link to next or previous comic."""
3138
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3139
            if link['href'] != 'http://www.sheldoncomics.com':
3140
                return link
3141
        return None
3142
3143
    @classmethod
3144
    def get_comic_info(cls, soup, link):
3145
        """Get information about a particular comics."""
3146
        imgs = soup.find("div", id="comic-foot").find_all("img")
3147
        assert all(i['alt'] == i['title'] for i in imgs)
3148
        assert len(imgs) == 1
3149
        title = imgs[0]['title']
3150
        return {
3151
            'title': title,
3152
            'img': [i['src'] for i in imgs],
3153
        }
3154
3155
3156 View Code Duplication
class Ubertool(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3157
    """Class to retrieve Ubertool comics."""
3158
    # Also on https://ubertool.tumblr.com
3159
    # Also on https://tapastic.com/series/ubertool
3160
    name = 'ubertool'
3161
    long_name = 'Ubertool'
3162
    url = 'http://ubertoolcomic.com'
3163
    _categories = ('UBERTOOL', )
3164
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3165
    get_navi_link = get_a_comicnavbase_comicnavnext
3166
3167
    @classmethod
3168
    def get_comic_info(cls, soup, link):
3169
        """Get information about a particular comics."""
3170
        title = soup.find('h2', class_='post-title').string
3171
        date_str = soup.find('span', class_='post-date').string
3172
        day = string_to_date(date_str, "%B %d, %Y")
3173
        imgs = soup.find('div', id='comic').find_all('img')
3174
        return {
3175
            'img': [i['src'] for i in imgs],
3176
            'title': title,
3177
            'month': day.month,
3178
            'year': day.year,
3179
            'day': day.day,
3180
        }
3181
3182
3183 View Code Duplication
class EarthExplodes(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3184
    """Class to retrieve The Earth Explodes comics."""
3185
    name = 'earthexplodes'
3186
    long_name = 'The Earth Explodes'
3187
    url = 'http://www.earthexplodes.com'
3188
    get_url_from_link = join_cls_url_to_href
3189
    get_first_comic_link = simulate_first_link
3190
    first_url = 'http://www.earthexplodes.com/comics/000/'
3191
3192
    @classmethod
3193
    def get_navi_link(cls, last_soup, next_):
3194
        """Get link to next or previous comic."""
3195
        return last_soup.find('a', id='next' if next_ else 'prev')
3196
3197
    @classmethod
3198
    def get_comic_info(cls, soup, link):
3199
        """Get information about a particular comics."""
3200
        title = soup.find('title').string
3201
        imgs = soup.find('div', id='image').find_all('img')
3202
        alt = imgs[0].get('title', '')
3203
        return {
3204
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3205
            'title': title,
3206
            'alt': alt,
3207
        }
3208
3209
3210 View Code Duplication
class PomComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3211
    """Class to retrieve PomComics."""
3212
    name = 'pom'
3213
    long_name = 'Pom Comics / Piece of Me'
3214
    url = 'http://www.pomcomic.com'
3215
    get_url_from_link = join_cls_url_to_href
3216
3217
    @classmethod
3218
    def get_first_comic_link(cls):
3219
        """Get link to first comics."""
3220
        return get_soup_at_url(cls.url).find('a', class_='btn_first')
3221
3222
    @classmethod
3223
    def get_navi_link(cls, last_soup, next_):
3224
        """Get link to next or previous comic."""
3225
        return last_soup.find('a', class_='btn_next' if next_ else 'btn_prev')
3226
3227
    @classmethod
3228
    def get_comic_info(cls, soup, link):
3229
        """Get information about a particular comics."""
3230
        title = soup.find('h1', id="comic-name").string
3231
        desc = soup.find('meta', property='og:description')['content']
3232
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3233
        imgs = soup.find('div', class_='comic').find_all('img')
3234
        return {
3235
            'title': title,
3236
            'desc': desc,
3237
            'tags': tags,
3238
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3239
        }
3240
3241
3242
class CubeDrone(GenericEmptyComic, GenericNavigableComic):
3243
    """Class to retrieve Cube Drone comics."""
3244
    name = 'cubedrone'
3245
    long_name = 'Cube Drone'
3246
    url = 'http://cube-drone.com/comics'
3247
    get_url_from_link = join_cls_url_to_href
3248
3249
    @classmethod
3250
    def get_first_comic_link(cls):
3251
        """Get link to first comics."""
3252
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3253
3254
    @classmethod
3255
    def get_navi_link(cls, last_soup, next_):
3256
        """Get link to next or previous comic."""
3257
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3258
        return last_soup.find('span', class_=class_).parent
3259
3260
    @classmethod
3261
    def get_comic_info(cls, soup, link):
3262
        """Get information about a particular comics."""
3263
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3264
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3265
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3266
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3267
        imgs = soup.find_all('img', class_='comic img-responsive')
3268
        title2 = imgs[0]['title']
3269
        alt = imgs[0]['alt']
3270
        return {
3271
            'url2': url2,
3272
            'title': title,
3273
            'title2': title2,
3274
            'alt': alt,
3275
            'img': [i['src'] for i in imgs],
3276
        }
3277
3278
3279
class MakeItStoopid(GenericNavigableComic):
3280
    """Class to retrieve Make It Stoopid Comics."""
3281
    name = 'stoopid'
3282
    long_name = 'Make it stoopid'
3283
    url = 'http://makeitstoopid.com/comic.php'
3284
3285
    @classmethod
3286
    def get_nav(cls, soup):
3287
        """Get the navigation elements from soup object."""
3288
        cnav = soup.find_all(class_='cnav')
3289
        nav1, nav2 = cnav[:5], cnav[5:]
3290
        assert nav1 == nav2
3291
        # begin, prev, archive, next_, end = nav1
3292
        return [None if i.get('href') is None else i for i in nav1]
3293
3294
    @classmethod
3295
    def get_first_comic_link(cls):
3296
        """Get link to first comics."""
3297
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3298
3299
    @classmethod
3300
    def get_navi_link(cls, last_soup, next_):
3301
        """Get link to next or previous comic."""
3302
        return cls.get_nav(last_soup)[3 if next_ else 1]
3303
3304
    @classmethod
3305
    def get_comic_info(cls, soup, link):
3306
        """Get information about a particular comics."""
3307
        title = link['title']
3308
        imgs = soup.find_all('img', id='comicimg')
3309
        return {
3310
            'title': title,
3311
            'img': [i['src'] for i in imgs],
3312
        }
3313
3314
3315 View Code Duplication
class OffTheLeashDog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3316
    """Class to retrieve Off The Leash Dog comics."""
3317
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3318
    # Also on http://www.rupertfawcettcartoons.com
3319
    name = 'offtheleash'
3320
    long_name = 'Off The Leash Dog'
3321
    url = 'http://offtheleashdogcartoons.com'
3322
    _categories = ('FAWCETT', )
3323
    get_navi_link = get_a_rel_next
3324
    get_first_comic_link = simulate_first_link
3325
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3326
3327
    @classmethod
3328
    def get_comic_info(cls, soup, link):
3329
        """Get information about a particular comics."""
3330
        print(link)
3331
        title = soup.find("h1", class_="entry-title").string
3332
        imgs = soup.find('div', class_='entry-content').find_all('img')
3333
        return {
3334
            'title': title,
3335
            'img': [i['src'] for i in imgs],
3336
        }
3337
3338
3339
class MarketoonistComics(GenericNavigableComic):
3340 View Code Duplication
    """Class to retrieve Marketoonist Comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3341
    name = 'marketoonist'
3342
    long_name = 'Marketoonist'
3343
    url = 'https://marketoonist.com/cartoons'
3344
    get_first_comic_link = simulate_first_link
3345
    get_navi_link = get_link_rel_next
3346
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3347
3348
    @classmethod
3349
    def get_comic_info(cls, soup, link):
3350
        """Get information about a particular comics."""
3351
        imgs = soup.find_all('meta', property='og:image')
3352
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3353
        day = string_to_date(date_str, "%Y-%m-%d")
3354
        title = soup.find('meta', property='og:title')['content']
3355
        return {
3356
            'img': [i['content'] for i in imgs],
3357
            'day': day.day,
3358
            'month': day.month,
3359
            'year': day.year,
3360
            'title': title,
3361
        }
3362
3363
3364
class ConsoliaComics(GenericNavigableComic):
3365
    """Class to retrieve Consolia comics."""
3366
    name = 'consolia'
3367
    long_name = 'consolia'
3368
    url = 'https://consolia-comic.com'
3369
    get_url_from_link = join_cls_url_to_href
3370
3371
    @classmethod
3372
    def get_first_comic_link(cls):
3373 View Code Duplication
        """Get link to first comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3374
        return get_soup_at_url(cls.url).find('a', class_='first')
3375
3376
    @classmethod
3377
    def get_navi_link(cls, last_soup, next_):
3378
        """Get link to next or previous comic."""
3379
        return last_soup.find('a', class_='next' if next_ else 'prev')
3380
3381
    @classmethod
3382
    def get_comic_info(cls, soup, link):
3383
        """Get information about a particular comics."""
3384
        title = soup.find('meta', property='og:title')['content']
3385
        date_str = soup.find('time')["datetime"]
3386
        day = string_to_date(date_str, "%Y-%m-%d")
3387
        imgs = soup.find_all('meta', property='og:image')
3388
        return {
3389
            'title': title,
3390
            'img': [i['content'] for i in imgs],
3391
            'day': day.day,
3392
            'month': day.month,
3393
            'year': day.year,
3394
        }
3395
3396
3397
class TuMourrasMoinsBete(GenericNavigableComic):
3398
    """Class to retrieve Tu Mourras Moins Bete comics."""
3399
    name = 'mourrasmoinsbete'
3400
    long_name = 'Tu Mourras Moins Bete'
3401
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3402
    _categories = ('FRANCAIS', )
3403
    get_first_comic_link = simulate_first_link
3404
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3405
3406
    @classmethod
3407
    def get_navi_link(cls, last_soup, next_):
3408
        """Get link to next or previous comic."""
3409
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3410
3411
    @classmethod
3412
    def get_comic_info(cls, soup, link):
3413
        """Get information about a particular comics."""
3414
        title = soup.find('title').string
3415
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3416
        author = soup.find('span', itemprop='author').string
3417
        return {
3418
            'img': [i['src'] for i in imgs],
3419
            'author': author,
3420
            'title': title,
3421
        }
3422
3423
3424
class GeekAndPoke(GenericNavigableComic):
3425
    """Class to retrieve Geek And Poke comics."""
3426
    name = 'geek'
3427
    long_name = 'Geek And Poke'
3428
    url = 'http://geek-and-poke.com'
3429
    get_url_from_link = join_cls_url_to_href
3430
    get_first_comic_link = simulate_first_link
3431
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3432
3433
    @classmethod
3434
    def get_navi_link(cls, last_soup, next_):
3435
        """Get link to next or previous comic."""
3436
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3437
3438
    @classmethod
3439
    def get_comic_info(cls, soup, link):
3440 View Code Duplication
        """Get information about a particular comics."""
3441
        title = soup.find('meta', property='og:title')['content']
3442
        desc = soup.find('meta', property='og:description')['content']
3443
        date_str = soup.find('time', class_='published')['datetime']
3444
        day = string_to_date(date_str, "%Y-%m-%d")
3445
        author = soup.find('a', rel='author').string
3446
        div_content = (soup.find('div', class_="body entry-content") or
3447
                       soup.find('div', class_="special-content"))
3448
        imgs = div_content.find_all('img')
3449
        imgs = [i for i in imgs if i.get('src') is not None]
3450
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3451
        alt = imgs[0].get('alt', "") if imgs else []
3452
        return {
3453
            'title': title,
3454
            'alt': alt,
3455
            'description': desc,
3456
            'author': author,
3457
            'day': day.day,
3458
            'month': day.month,
3459
            'year': day.year,
3460
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3461
        }
3462
3463
3464
class GloryOwlComix(GenericNavigableComic):
3465
    """Class to retrieve Glory Owl comics."""
3466
    name = 'gloryowl'
3467
    long_name = 'Glory Owl'
3468
    url = 'http://gloryowlcomix.blogspot.fr'
3469
    _categories = ('NSFW', 'FRANCAIS')
3470
    get_first_comic_link = simulate_first_link
3471
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3472
3473
    @classmethod
3474
    def get_navi_link(cls, last_soup, next_):
3475
        """Get link to next or previous comic."""
3476
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3477
3478
    @classmethod
3479
    def get_comic_info(cls, soup, link):
3480
        """Get information about a particular comics."""
3481
        title = soup.find('title').string
3482
        imgs = soup.find_all('link', rel='image_src')
3483
        author = soup.find('a', rel='author').string
3484
        return {
3485
            'img': [i['href'] for i in imgs],
3486
            'author': author,
3487
            'title': title,
3488
        }
3489
3490
3491
class GenericTumblrV1(GenericComic):
3492
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3493
    _categories = ('TUMBLR', )
3494
3495
    @classmethod
3496
    def get_next_comic(cls, last_comic):
3497
        """Generic implementation of get_next_comic for Tumblr comics."""
3498
        for p in cls.get_posts(last_comic):
3499
            comic = cls.get_comic_info(p)
3500
            if comic is not None:
3501
                yield comic
3502
3503
    @classmethod
3504
    def get_url_from_post(cls, post):
3505
        url = post['url']
3506
        if not url.startswith(cls.url):
3507
            print("url '%s' does not start with '%s'" % (url, cls.url))
3508
        return url
3509
3510
    @classmethod
3511
    def get_api_url(cls):
3512
        return urljoin_wrapper(cls.url, '/api/read/')
3513
3514
    @classmethod
3515
    def get_api_url_for_id(cls, tumblr_id):
3516
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3517
3518
    @classmethod
3519
    def get_comic_info(cls, post):
3520
        """Get information about a particular comics."""
3521
        type_ = post['type']
3522
        if type_ != 'photo':
3523
            return None
3524
        tumblr_id = int(post['id'])
3525
        api_url = cls.get_api_url_for_id(tumblr_id)
3526
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3527
        caption = post.find('photo-caption')
3528
        title = caption.string if caption else ""
3529
        tags = ' '.join(t.string for t in post.find_all('tag'))
3530
        # Photos may appear in 'photo' tags and/or straight in the post
3531
        photo_tags = post.find_all('photo')
3532
        if not photo_tags:
3533
            photo_tags = [post]
3534
        # Images are in multiple resolutions - taking the first one
3535
        imgs = [photo.find('photo-url') for photo in photo_tags]
3536
        return {
3537
            'url': cls.get_url_from_post(post),
3538
            'url2': post['url-with-slug'],
3539
            'day': day.day,
3540
            'month': day.month,
3541
            'year': day.year,
3542
            'title': title,
3543
            'tags': tags,
3544
            'img': [i.string for i in imgs],
3545
            'tumblr-id': tumblr_id,
3546
            'api_url': api_url,
3547
        }
3548
3549
    @classmethod
3550
    def get_posts(cls, last_comic, nb_post_per_call=10):
3551
        """Get posts using API. nb_post_per_call is max 50.
3552
3553
        Posts are retrieved from newer to older as per the tumblr v1 api
3554
        but are returned in chronological order."""
3555
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3556
        posts_acc = []
3557
        if last_comic is not None:
3558
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3559
            # might end up spending a lot of time looking for something that
3560
            # doesn't exist. Failing early and clearly might be a better option.
3561
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3562
            try:
3563
                get_soup_at_url(last_api_url)
3564
            except urllib.error.HTTPError:
3565
                try:
3566
                    get_soup_at_url(cls.url)
3567
                except urllib.error.HTTPError:
3568
                    print("Did not find previous post nor main url %s" % cls.url)
3569
                else:
3570
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3571
                return reversed(posts_acc)
3572
        api_url = cls.get_api_url()
3573
        posts = get_soup_at_url(api_url).find('posts')
3574
        start, total = int(posts['start']), int(posts['total'])
3575
        assert start == 0
3576
        for starting_num in range(0, total, nb_post_per_call):
3577
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3578
            posts2 = get_soup_at_url(api_url2).find('posts')
3579
            start2, total2 = int(posts2['start']), int(posts2['total'])
3580
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3581
            # This may happen and should be handled in the future
3582
            assert total == total2, "%d != %d" % (total, total2)
3583
            for p in posts2.find_all('post'):
3584
                tumblr_id = int(p['id'])
3585
                if waiting_for_id and waiting_for_id == tumblr_id:
3586
                    return reversed(posts_acc)
3587
                posts_acc.append(p)
3588
        if waiting_for_id is None:
3589
            return reversed(posts_acc)
3590
        print("Did not find %s : there might be a problem" % waiting_for_id)
3591
        return []
3592
3593
3594
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3595
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3596
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3597
    # Also on http://www.smbc-comics.com
3598
    name = 'smbc-tumblr'
3599
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3600
    url = 'http://smbc-comics.tumblr.com'
3601
    _categories = ('SMBC', )
3602
3603
3604
class IrwinCardozo(GenericTumblrV1):
3605
    """Class to retrieve Irwin Cardozo Comics."""
3606
    name = 'irwinc'
3607
    long_name = 'Irwin Cardozo'
3608
    url = 'http://irwincardozocomics.tumblr.com'
3609
3610
3611
class AccordingToDevin(GenericTumblrV1):
3612
    """Class to retrieve According To Devin comics."""
3613
    name = 'devin'
3614
    long_name = 'According To Devin'
3615
    url = 'http://accordingtodevin.tumblr.com'
3616
3617
3618
class ItsTheTieTumblr(GenericTumblrV1):
3619
    """Class to retrieve It's the tie comics."""
3620
    # Also on http://itsthetie.com
3621
    # Also on https://tapastic.com/series/itsthetie
3622
    name = 'tie-tumblr'
3623
    long_name = "It's the tie (from Tumblr)"
3624
    url = "http://itsthetie.tumblr.com"
3625
    _categories = ('TIE', )
3626
3627
3628
class OctopunsTumblr(GenericTumblrV1):
3629
    """Class to retrieve Octopuns comics."""
3630
    # Also on http://www.octopuns.net
3631
    name = 'octopuns-tumblr'
3632
    long_name = 'Octopuns (from Tumblr)'
3633
    url = 'http://octopuns.tumblr.com'
3634
3635
3636
class PicturesInBoxesTumblr(GenericTumblrV1):
3637
    """Class to retrieve Pictures In Boxes comics."""
3638
    # Also on http://www.picturesinboxes.com
3639
    name = 'picturesinboxes-tumblr'
3640
    long_name = 'Pictures in Boxes (from Tumblr)'
3641
    url = 'https://picturesinboxescomic.tumblr.com'
3642
3643
3644
class TubeyToonsTumblr(GenericTumblrV1):
3645
    """Class to retrieve TubeyToons comics."""
3646
    # Also on http://tapastic.com/series/Tubey-Toons
3647
    # Also on http://tubeytoons.com
3648
    name = 'tubeytoons-tumblr'
3649
    long_name = 'Tubey Toons (from Tumblr)'
3650
    url = 'https://tubeytoons.tumblr.com'
3651
    _categories = ('TUNEYTOONS', )
3652
3653
3654
class UnearthedComicsTumblr(GenericTumblrV1):
3655
    """Class to retrieve Unearthed comics."""
3656
    # Also on http://tapastic.com/series/UnearthedComics
3657
    # Also on http://unearthedcomics.com
3658
    name = 'unearthed-tumblr'
3659
    long_name = 'Unearthed Comics (from Tumblr)'
3660
    url = 'https://unearthedcomics.tumblr.com'
3661
    _categories = ('UNEARTHED', )
3662
3663
3664
class PieComic(GenericTumblrV1):
3665
    """Class to retrieve Pie Comic comics."""
3666
    name = 'pie'
3667
    long_name = 'Pie Comic'
3668
    url = "http://piecomic.tumblr.com"
3669
3670
3671
class MrEthanDiamond(GenericTumblrV1):
3672
    """Class to retrieve Mr Ethan Diamond comics."""
3673
    name = 'diamond'
3674
    long_name = 'Mr Ethan Diamond'
3675
    url = 'http://mrethandiamond.tumblr.com'
3676
3677
3678
class Flocci(GenericTumblrV1):
3679
    """Class to retrieve floccinaucinihilipilification comics."""
3680
    name = 'flocci'
3681
    long_name = 'floccinaucinihilipilification'
3682
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3683
3684
3685
class UpAndOut(GenericTumblrV1):
3686
    """Class to retrieve Up & Out comics."""
3687
    # Also on http://tapastic.com/series/UP-and-OUT
3688
    name = 'upandout'
3689
    long_name = 'Up And Out (from Tumblr)'
3690
    url = 'http://upandoutcomic.tumblr.com'
3691
3692
3693
class Pundemonium(GenericTumblrV1):
3694
    """Class to retrieve Pundemonium comics."""
3695
    name = 'pundemonium'
3696
    long_name = 'Pundemonium'
3697
    url = 'http://monstika.tumblr.com'
3698
3699
3700
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3701
    """Class to retrieve Poorly Drawn Lines comics."""
3702
    # Also on http://poorlydrawnlines.com
3703
    name = 'poorlydrawn-tumblr'
3704
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3705
    url = 'http://pdlcomics.tumblr.com'
3706
    _categories = ('POORLYDRAWN', )
3707
3708
3709
class PearShapedComics(GenericTumblrV1):
3710
    """Class to retrieve Pear Shaped Comics."""
3711
    name = 'pearshaped'
3712
    long_name = 'Pear-Shaped Comics'
3713
    url = 'http://pearshapedcomics.com'
3714
3715
3716
class PondScumComics(GenericTumblrV1):
3717
    """Class to retrieve Pond Scum Comics."""
3718
    name = 'pond'
3719
    long_name = 'Pond Scum'
3720
    url = 'http://pondscumcomic.tumblr.com'
3721
3722
3723
class MercworksTumblr(GenericTumblrV1):
3724
    """Class to retrieve Mercworks comics."""
3725
    # Also on http://mercworks.net
3726
    name = 'mercworks-tumblr'
3727
    long_name = 'Mercworks (from Tumblr)'
3728
    url = 'http://mercworks.tumblr.com'
3729
3730
3731
class OwlTurdTumblr(GenericTumblrV1):
3732
    """Class to retrieve Owl Turd comics."""
3733
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3734
    name = 'owlturd-tumblr'
3735
    long_name = 'Owl Turd (from Tumblr)'
3736
    url = 'http://owlturd.com'
3737
    _categories = ('OWLTURD', )
3738
3739
3740
class VectorBelly(GenericTumblrV1):
3741
    """Class to retrieve Vector Belly comics."""
3742
    # Also on http://vectorbelly.com
3743
    name = 'vector'
3744
    long_name = 'Vector Belly'
3745
    url = 'http://vectorbelly.tumblr.com'
3746
3747
3748
class GoneIntoRapture(GenericTumblrV1):
3749
    """Class to retrieve Gone Into Rapture comics."""
3750
    # Also on http://goneintorapture.tumblr.com
3751
    # Also on http://tapastic.com/series/Goneintorapture
3752
    name = 'rapture'
3753
    long_name = 'Gone Into Rapture'
3754
    url = 'http://goneintorapture.com'
3755
3756
3757
class TheOatmealTumblr(GenericTumblrV1):
3758
    """Class to retrieve The Oatmeal comics."""
3759
    # Also on http://theoatmeal.com
3760
    name = 'oatmeal-tumblr'
3761
    long_name = 'The Oatmeal (from Tumblr)'
3762
    url = 'http://oatmeal.tumblr.com'
3763
3764
3765
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3766
    """Class to retrieve Heck If I Know Comics."""
3767
    # Also on http://tapastic.com/series/Regular
3768
    name = 'heck-tumblr'
3769
    long_name = 'Heck if I Know comics (from Tumblr)'
3770
    url = 'http://heckifiknowcomics.com'
3771
3772
3773
class MyJetPack(GenericTumblrV1):
3774
    """Class to retrieve My Jet Pack comics."""
3775
    name = 'jetpack'
3776
    long_name = 'My Jet Pack'
3777
    url = 'http://myjetpack.tumblr.com'
3778
3779
3780
class CheerUpEmoKidTumblr(GenericTumblrV1):
3781
    """Class to retrieve CheerUpEmoKid comics."""
3782
    # Also on http://www.cheerupemokid.com
3783
    # Also on http://tapastic.com/series/CUEK
3784
    name = 'cuek-tumblr'
3785
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3786
    url = 'https://enzocomics.tumblr.com'
3787
3788
3789
class ForLackOfABetterComic(GenericTumblrV1):
3790
    """Class to retrieve For Lack Of A Better Comics."""
3791
    # Also on http://forlackofabettercomic.com
3792
    name = 'lack'
3793
    long_name = 'For Lack Of A Better Comic'
3794
    url = 'http://forlackofabettercomic.tumblr.com'
3795
3796
3797
class ZenPencilsTumblr(GenericTumblrV1):
3798
    """Class to retrieve ZenPencils comics."""
3799
    # Also on http://zenpencils.com
3800
    # Also on http://www.gocomics.com/zen-pencils
3801
    name = 'zenpencils-tumblr'
3802
    long_name = 'Zen Pencils (from Tumblr)'
3803
    url = 'http://zenpencils.tumblr.com'
3804
    _categories = ('ZENPENCILS', )
3805
3806
3807
class ThreeWordPhraseTumblr(GenericTumblrV1):
3808
    """Class to retrieve Three Word Phrase comics."""
3809
    # Also on http://threewordphrase.com
3810
    name = 'threeword-tumblr'
3811
    long_name = 'Three Word Phrase (from Tumblr)'
3812
    url = 'http://threewordphrase.tumblr.com'
3813
3814
3815
class TimeTrabbleTumblr(GenericTumblrV1):
3816
    """Class to retrieve Time Trabble comics."""
3817
    # Also on http://timetrabble.com
3818
    name = 'timetrabble-tumblr'
3819
    long_name = 'Time Trabble (from Tumblr)'
3820
    url = 'http://timetrabble.tumblr.com'
3821
3822
3823
class SafelyEndangeredTumblr(GenericTumblrV1):
3824
    """Class to retrieve Safely Endangered comics."""
3825
    # Also on http://www.safelyendangered.com
3826
    name = 'endangered-tumblr'
3827
    long_name = 'Safely Endangered (from Tumblr)'
3828
    url = 'http://tumblr.safelyendangered.com'
3829
3830
3831
class MouseBearComedyTumblr(GenericTumblrV1):
3832
    """Class to retrieve Mouse Bear Comedy comics."""
3833
    # Also on http://www.mousebearcomedy.com
3834
    name = 'mousebear-tumblr'
3835
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3836
    url = 'http://mousebearcomedy.tumblr.com'
3837
3838
3839
class BouletCorpTumblr(GenericTumblrV1):
3840
    """Class to retrieve BouletCorp comics."""
3841
    # Also on http://www.bouletcorp.com
3842
    name = 'boulet-tumblr'
3843
    long_name = 'Boulet Corp (from Tumblr)'
3844
    url = 'https://bouletcorp.tumblr.com'
3845
    _categories = ('BOULET', )
3846
3847
3848
class TheAwkwardYetiTumblr(GenericTumblrV1):
3849
    """Class to retrieve The Awkward Yeti comics."""
3850
    # Also on http://www.gocomics.com/the-awkward-yeti
3851
    # Also on http://theawkwardyeti.com
3852
    # Also on https://tapastic.com/series/TheAwkwardYeti
3853
    name = 'yeti-tumblr'
3854
    long_name = 'The Awkward Yeti (from Tumblr)'
3855
    url = 'http://larstheyeti.tumblr.com'
3856
    _categories = ('YETI', )
3857
3858
3859
class NellucNhoj(GenericTumblrV1):
3860
    """Class to retrieve NellucNhoj comics."""
3861
    name = 'nhoj'
3862
    long_name = 'Nelluc Nhoj'
3863
    url = 'http://nellucnhoj.com'
3864
3865
3866
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3867
    """Class to retrieve Down The Upward Spiral comics."""
3868
    # Also on http://www.downtheupwardspiral.com
3869
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
3870
    name = 'spiral-tumblr'
3871
    long_name = 'Down the Upward Spiral (from Tumblr)'
3872
    url = 'http://downtheupwardspiral.tumblr.com'
3873
3874
3875
class AsPerUsualTumblr(GenericTumblrV1):
3876
    """Class to retrieve As Per Usual comics."""
3877
    # Also on https://tapastic.com/series/AsPerUsual
3878
    name = 'usual-tumblr'
3879
    long_name = 'As Per Usual (from Tumblr)'
3880
    url = 'http://as-per-usual.tumblr.com'
3881
    categories = ('DAMILEE', )
3882
3883
3884
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3885
    """Class to retrieve Hot Comics For Cool People."""
3886
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3887
    # Also on http://hotcomics.biz (links to tumblr)
3888
    # Also on http://hcfcp.com (links to tumblr)
3889
    name = 'hotcomics-tumblr'
3890
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3891
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3892
    categories = ('DAMILEE', )
3893
3894
3895
class OneOneOneOneComicTumblr(GenericTumblrV1):
3896
    """Class to retrieve 1111 Comics."""
3897
    # Also on http://www.1111comics.me
3898
    # Also on https://tapastic.com/series/1111-Comics
3899
    name = '1111-tumblr'
3900
    long_name = '1111 Comics (from Tumblr)'
3901
    url = 'http://comics1111.tumblr.com'
3902
    _categories = ('ONEONEONEONE', )
3903
3904
3905
class JhallComicsTumblr(GenericTumblrV1):
3906
    """Class to retrieve Jhall Comics."""
3907
    # Also on http://jhallcomics.com
3908
    name = 'jhall-tumblr'
3909
    long_name = 'Jhall Comics (from Tumblr)'
3910
    url = 'http://jhallcomics.tumblr.com'
3911
3912
3913
class BerkeleyMewsTumblr(GenericTumblrV1):
3914
    """Class to retrieve Berkeley Mews comics."""
3915
    # Also on http://www.gocomics.com/berkeley-mews
3916
    # Also on http://www.berkeleymews.com
3917
    name = 'berkeley-tumblr'
3918
    long_name = 'Berkeley Mews (from Tumblr)'
3919
    url = 'http://mews.tumblr.com'
3920
    _categories = ('BERKELEY', )
3921
3922
3923
class JoanCornellaTumblr(GenericTumblrV1):
3924
    """Class to retrieve Joan Cornella comics."""
3925
    # Also on http://joancornella.net
3926
    name = 'cornella-tumblr'
3927
    long_name = 'Joan Cornella (from Tumblr)'
3928
    url = 'http://cornellajoan.tumblr.com'
3929
3930
3931
class RespawnComicTumblr(GenericTumblrV1):
3932
    """Class to retrieve Respawn Comic."""
3933
    # Also on http://respawncomic.com
3934
    name = 'respawn-tumblr'
3935
    long_name = 'Respawn Comic (from Tumblr)'
3936
    url = 'https://respawncomic.tumblr.com'
3937
3938
3939
class ChrisHallbeckTumblr(GenericTumblrV1):
3940
    """Class to retrieve Chris Hallbeck comics."""
3941
    # Also on https://tapastic.com/ChrisHallbeck
3942
    # Also on http://maximumble.com
3943
    # Also on http://minimumble.com
3944
    # Also on http://thebookofbiff.com
3945
    name = 'hallbeck-tumblr'
3946
    long_name = 'Chris Hallback (from Tumblr)'
3947
    url = 'https://chrishallbeck.tumblr.com'
3948
    _categories = ('HALLBACK', )
3949
3950
3951
class ComicNuggets(GenericTumblrV1):
3952
    """Class to retrieve Comic Nuggets."""
3953
    name = 'nuggets'
3954
    long_name = 'Comic Nuggets'
3955
    url = 'http://comicnuggets.com'
3956
3957
3958
class PigeonGazetteTumblr(GenericTumblrV1):
3959
    """Class to retrieve The Pigeon Gazette comics."""
3960
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3961
    name = 'pigeon-tumblr'
3962
    long_name = 'The Pigeon Gazette (from Tumblr)'
3963
    url = 'http://thepigeongazette.tumblr.com'
3964
3965
3966
class CancerOwl(GenericTumblrV1):
3967
    """Class to retrieve Cancer Owl comics."""
3968
    # Also on http://cancerowl.com
3969
    name = 'cancerowl-tumblr'
3970
    long_name = 'Cancer Owl (from Tumblr)'
3971
    url = 'http://cancerowl.tumblr.com'
3972
3973
3974
class FowlLanguageTumblr(GenericTumblrV1):
3975
    """Class to retrieve Fowl Language comics."""
3976
    # Also on http://www.fowllanguagecomics.com
3977
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3978
    # Also on http://www.gocomics.com/fowl-language
3979
    name = 'fowllanguage-tumblr'
3980
    long_name = 'Fowl Language Comics (from Tumblr)'
3981
    url = 'http://fowllanguagecomics.tumblr.com'
3982
    _categories = ('FOWLLANGUAGE', )
3983
3984
3985
class TheOdd1sOutTumblr(GenericTumblrV1):
3986
    """Class to retrieve The Odd 1s Out comics."""
3987
    # Also on http://theodd1sout.com
3988
    # Also on https://tapastic.com/series/Theodd1sout
3989
    name = 'theodd-tumblr'
3990
    long_name = 'The Odd 1s Out (from Tumblr)'
3991
    url = 'http://theodd1sout.tumblr.com'
3992
3993
3994
class TheUnderfoldTumblr(GenericTumblrV1):
3995
    """Class to retrieve The Underfold comics."""
3996
    # Also on http://theunderfold.com
3997
    name = 'underfold-tumblr'
3998
    long_name = 'The Underfold (from Tumblr)'
3999
    url = 'http://theunderfold.tumblr.com'
4000
4001
4002
class LolNeinTumblr(GenericTumblrV1):
4003
    """Class to retrieve Lol Nein comics."""
4004
    # Also on http://lolnein.com
4005
    name = 'lolnein-tumblr'
4006
    long_name = 'Lol Nein (from Tumblr)'
4007
    url = 'http://lolneincom.tumblr.com'
4008
4009
4010
class FatAwesomeComicsTumblr(GenericTumblrV1):
4011
    """Class to retrieve Fat Awesome Comics."""
4012
    # Also on http://fatawesome.com/comics
4013
    name = 'fatawesome-tumblr'
4014
    long_name = 'Fat Awesome (from Tumblr)'
4015
    url = 'http://fatawesomecomedy.tumblr.com'
4016
4017
4018
class TheWorldIsFlatTumblr(GenericTumblrV1):
4019
    """Class to retrieve The World Is Flat Comics."""
4020
    # Also on https://tapastic.com/series/The-World-is-Flat
4021
    name = 'flatworld-tumblr'
4022
    long_name = 'The World Is Flat (from Tumblr)'
4023
    url = 'http://theworldisflatcomics.com'
4024
4025
4026
class DorrisMc(GenericTumblrV1):
4027
    """Class to retrieve Dorris Mc Comics"""
4028
    # Also on http://www.gocomics.com/dorris-mccomics
4029
    name = 'dorrismc'
4030
    long_name = 'Dorris Mc'
4031
    url = 'http://dorrismccomics.com'
4032
4033
4034
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
4035
    """Class to retrieve Leleoz comics."""
4036
    # Also on https://tapastic.com/series/Leleoz
4037
    name = 'leleoz-tumblr'
4038
    long_name = 'Leleoz (from Tumblr)'
4039
    url = 'http://leleozcomics.tumblr.com'
4040
4041
4042
class MoonBeardTumblr(GenericTumblrV1):
4043
    """Class to retrieve MoonBeard comics."""
4044
    # Also on http://moonbeard.com
4045
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4046
    name = 'moonbeard-tumblr'
4047
    long_name = 'Moon Beard (from Tumblr)'
4048
    url = 'http://blog.squiresjam.es'
4049
4050
4051
class AComik(GenericTumblrV1):
4052
    """Class to retrieve A Comik"""
4053
    name = 'comik'
4054
    long_name = 'A Comik'
4055
    url = 'http://acomik.com'
4056
4057
4058
class ClassicRandy(GenericTumblrV1):
4059
    """Class to retrieve Classic Randy comics."""
4060
    name = 'randy'
4061
    long_name = 'Classic Randy'
4062
    url = 'http://classicrandy.tumblr.com'
4063
4064
4065
class DagssonTumblr(GenericTumblrV1):
4066
    """Class to retrieve Dagsson comics."""
4067
    # Also on http://www.dagsson.com
4068
    name = 'dagsson-tumblr'
4069
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4070
    url = 'https://hugleikurdagsson.tumblr.com'
4071
4072
4073
class LinsEditionsTumblr(GenericTumblrV1):
4074
    """Class to retrieve L.I.N.S. Editions comics."""
4075
    # Also on https://linsedition.com
4076
    # Now on http://warandpeas.tumblr.com
4077
    name = 'lins-tumblr'
4078
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4079
    url = 'https://linscomics.tumblr.com'
4080
    _categories = ('LINS', )
4081
4082
4083
class WarAndPeasTumblr(GenericTumblrV1):
4084
    """Class to retrieve War And Peas comics."""
4085
    # Was on https://linscomics.tumblr.com
4086
    name = 'warandpeas-tumblr'
4087
    long_name = 'War And Peas (from Tumblr)'
4088
    url = 'http://warandpeas.tumblr.com'
4089
    _categories = ('WARANDPEAS', )
4090
4091
4092
class OrigamiHotDish(GenericTumblrV1):
4093
    """Class to retrieve Origami Hot Dish comics."""
4094
    name = 'origamihotdish'
4095
    long_name = 'Origami Hot Dish'
4096
    url = 'http://origamihotdish.com'
4097
4098
4099
class HitAndMissComicsTumblr(GenericTumblrV1):
4100
    """Class to retrieve Hit and Miss Comics."""
4101
    name = 'hitandmiss'
4102
    long_name = 'Hit and Miss Comics'
4103
    url = 'https://hitandmisscomics.tumblr.com'
4104
4105
4106
class HMBlanc(GenericTumblrV1):
4107
    """Class to retrieve HM Blanc comics."""
4108
    name = 'hmblanc'
4109
    long_name = 'HM Blanc'
4110
    url = 'http://hmblanc.tumblr.com'
4111
4112
4113
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4114
    """Class to retrieve Tales Of Absurdity comics."""
4115
    # Also on http://talesofabsurdity.com
4116
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4117
    name = 'absurdity-tumblr'
4118
    long_name = 'Tales of Absurdity (from Tumblr)'
4119
    url = 'http://talesofabsurdity.tumblr.com'
4120
    _categories = ('ABSURDITY', )
4121
4122
4123
class RobbieAndBobby(GenericTumblrV1):
4124
    """Class to retrieve Robbie And Bobby comics."""
4125
    # Also on http://robbieandbobby.com
4126
    name = 'robbie-tumblr'
4127
    long_name = 'Robbie And Bobby (from Tumblr)'
4128
    url = 'http://robbieandbobby.tumblr.com'
4129
4130
4131
class ElectricBunnyComicTumblr(GenericTumblrV1):
4132
    """Class to retrieve Electric Bunny Comics."""
4133
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4134
    name = 'bunny-tumblr'
4135
    long_name = 'Electric Bunny Comic (from Tumblr)'
4136
    url = 'http://electricbunnycomics.tumblr.com'
4137
4138
4139
class Hoomph(GenericTumblrV1):
4140
    """Class to retrieve Hoomph comics."""
4141
    name = 'hoomph'
4142
    long_name = 'Hoomph'
4143
    url = 'http://hoom.ph'
4144
4145
4146
class BFGFSTumblr(GenericTumblrV1):
4147
    """Class to retrieve BFGFS comics."""
4148
    # Also on https://tapastic.com/series/BFGFS
4149
    # Also on http://bfgfs.com
4150
    name = 'bfgfs-tumblr'
4151
    long_name = 'BFGFS (from Tumblr)'
4152
    url = 'https://bfgfs.tumblr.com'
4153
4154
4155
class DoodleForFood(GenericTumblrV1):
4156
    """Class to retrieve Doodle For Food comics."""
4157
    # Also on https://tapastic.com/series/Doodle-for-Food
4158
    name = 'doodle'
4159
    long_name = 'Doodle For Food'
4160
    url = 'http://www.doodleforfood.com'
4161
4162
4163
class CassandraCalinTumblr(GenericTumblrV1):
4164
    """Class to retrieve C. Cassandra comics."""
4165
    # Also on http://cassandracalin.com
4166
    # Also on https://tapastic.com/series/C-Cassandra-comics
4167
    name = 'cassandra-tumblr'
4168
    long_name = 'Cassandra Calin (from Tumblr)'
4169
    url = 'http://c-cassandra.tumblr.com'
4170
4171
4172
class DougWasTaken(GenericTumblrV1):
4173
    """Class to retrieve Doug Was Taken comics."""
4174
    name = 'doug'
4175
    long_name = 'Doug Was Taken'
4176
    url = 'https://dougwastaken.tumblr.com'
4177
4178
4179
class MandatoryRollerCoaster(GenericTumblrV1):
4180
    """Class to retrieve Mandatory Roller Coaster comics."""
4181
    name = 'rollercoaster'
4182
    long_name = 'Mandatory Roller Coaster'
4183
    url = 'http://mandatoryrollercoaster.com'
4184
4185
4186
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4187
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4188
    name = 'cperspqccltt'
4189
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4190
    url = 'http://marcoandco.tumblr.com'
4191
4192
4193
class TheGrohlTroll(GenericTumblrV1):
4194
    """Class to retrieve The Grohl Troll comics."""
4195
    name = 'grohltroll'
4196
    long_name = 'The Grohl Troll'
4197
    url = 'http://thegrohltroll.com'
4198
4199
4200
class WebcomicName(GenericTumblrV1):
4201
    """Class to retrieve Webcomic Name comics."""
4202
    name = 'webcomicname'
4203
    long_name = 'Webcomic Name'
4204
    url = 'http://webcomicname.com'
4205
4206
4207
class BooksOfAdam(GenericTumblrV1):
4208
    """Class to retrieve Books of Adam comics."""
4209
    # Also on http://www.booksofadam.com
4210
    name = 'booksofadam'
4211
    long_name = 'Books of Adam'
4212
    url = 'http://booksofadam.tumblr.com'
4213
4214
4215
class HarkAVagrant(GenericTumblrV1):
4216
    """Class to retrieve Hark A Vagrant comics."""
4217
    # Also on http://www.harkavagrant.com
4218
    name = 'hark-tumblr'
4219
    long_name = 'Hark A Vagrant (from Tumblr)'
4220
    url = 'http://beatonna.tumblr.com'
4221
4222
4223
class OurSuperAdventureTumblr(GenericTumblrV1):
4224
    """Class to retrieve Our Super Adventure comics."""
4225
    # Also on https://tapastic.com/series/Our-Super-Adventure
4226
    # Also on http://www.oursuperadventure.com
4227
    # http://sarahgraley.com
4228
    name = 'superadventure-tumblr'
4229
    long_name = 'Our Super Adventure (from Tumblr)'
4230
    url = 'http://sarahssketchbook.tumblr.com'
4231
4232
4233
class JakeLikesOnions(GenericTumblrV1):
4234
    """Class to retrieve Jake Likes Onions comics."""
4235
    name = 'jake'
4236
    long_name = 'Jake Likes Onions'
4237
    url = 'http://jakelikesonions.com'
4238
4239
4240
class InYourFaceCake(GenericTumblrV1):
4241
    """Class to retrieve In Your Face Cake comics."""
4242
    name = 'inyourfacecake-tumblr'
4243
    long_name = 'In Your Face Cake (from Tumblr)'
4244
    url = 'https://in-your-face-cake.tumblr.com'
4245
4246
4247
class Robospunk(GenericTumblrV1):
4248
    """Class to retrieve Robospunk comics."""
4249
    name = 'robospunk'
4250
    long_name = 'Robospunk'
4251
    url = 'http://robospunk.com'
4252
4253
4254
class BananaTwinky(GenericTumblrV1):
4255
    """Class to retrieve Banana Twinky comics."""
4256
    name = 'banana'
4257
    long_name = 'Banana Twinky'
4258
    url = 'https://bananatwinky.tumblr.com'
4259
4260
4261
class YesterdaysPopcornTumblr(GenericTumblrV1):
4262
    """Class to retrieve Yesterday's Popcorn comics."""
4263
    # Also on http://www.yesterdayspopcorn.com
4264
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4265
    name = 'popcorn-tumblr'
4266
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4267
    url = 'http://yesterdayspopcorn.tumblr.com'
4268
4269
4270
class TwistedDoodles(GenericTumblrV1):
4271
    """Class to retrieve Twisted Doodles comics."""
4272
    name = 'twisted'
4273
    long_name = 'Twisted Doodles'
4274
    url = 'http://www.twisteddoodles.com'
4275
4276
4277
class UbertoolTumblr(GenericTumblrV1):
4278
    """Class to retrieve Ubertool comics."""
4279
    # Also on http://ubertoolcomic.com
4280
    # Also on https://tapastic.com/series/ubertool
4281
    name = 'ubertool-tumblr'
4282
    long_name = 'Ubertool (from Tumblr)'
4283
    url = 'https://ubertool.tumblr.com'
4284
    _categories = ('UBERTOOL', )
4285
4286
4287
class LittleLifeLinesTumblr(GenericTumblrV1):
4288
    """Class to retrieve Little Life Lines comics."""
4289
    # Also on http://www.littlelifelines.com
4290
    name = 'life-tumblr'
4291
    long_name = 'Little Life Lines (from Tumblr)'
4292
    url = 'https://little-life-lines.tumblr.com'
4293
4294
4295
class TheyCanTalk(GenericTumblrV1):
4296
    """Class to retrieve They Can Talk comics."""
4297
    name = 'theycantalk'
4298
    long_name = 'They Can Talk'
4299
    url = 'http://theycantalk.com'
4300
4301
4302
class Will5NeverCome(GenericTumblrV1):
4303
    """Class to retrieve Will 5:00 Never Come comics."""
4304
    name = 'will5'
4305
    long_name = 'Will 5:00 Never Come ?'
4306
    url = 'http://will5nevercome.com'
4307
4308
4309
class Sephko(GenericTumblrV1):
4310
    """Class to retrieve Sephko Comics."""
4311
    # Also on http://www.sephko.com
4312 View Code Duplication
    name = 'sephko'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4313
    long_name = 'Sephko'
4314
    url = 'https://sephko.tumblr.com'
4315
4316
4317
class BlazersAtDawn(GenericTumblrV1):
4318
    """Class to retrieve Blazers At Dawn Comics."""
4319
    name = 'blazers'
4320
    long_name = 'Blazers At Dawn'
4321
    url = 'http://blazersatdawn.tumblr.com'
4322
4323
4324
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4325
    """Class to retrieve Art By Moga Comics."""
4326
    name = 'moga'
4327
    long_name = 'Art By Moga'
4328
    url = 'http://artbymoga.tumblr.com'
4329
4330
4331
class VerbalVomitTumblr(GenericTumblrV1):
4332
    """Class to retrieve Verbal Vomit comics."""
4333
    # Also on http://www.verbal-vomit.com
4334
    name = 'vomit-tumblr'
4335
    long_name = 'Verbal Vomit (from Tumblr)'
4336
    url = 'http://verbalvomits.tumblr.com'
4337
4338
4339
class LibraryComic(GenericTumblrV1):
4340
    """Class to retrieve LibraryComic."""
4341
    # Also on http://librarycomic.com
4342
    name = 'library-tumblr'
4343
    long_name = 'LibraryComic (from Tumblr)'
4344
    url = 'https://librarycomic.tumblr.com'
4345
4346
4347
class TizzyStitchBirdTumblr(GenericTumblrV1):
4348
    """Class to retrieve Tizzy Stitch Bird comics."""
4349
    # Also on http://tizzystitchbird.com
4350
    # Also on https://tapastic.com/series/TizzyStitchbird
4351
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4352
    name = 'tizzy-tumblr'
4353
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4354
    url = 'http://tizzystitchbird.tumblr.com'
4355
4356
4357
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4358
    """Class to retrieve VictimsOfCircumsolar comics."""
4359
    # Also on http://www.victimsofcircumsolar.com
4360
    name = 'circumsolar-tumblr'
4361
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4362
    url = 'https://victimsofcomics.tumblr.com'
4363
4364
4365
class RockPaperCynicTumblr(GenericTumblrV1):
4366
    """Class to retrieve RockPaperCynic comics."""
4367
    # Also on http://www.rockpapercynic.com
4368
    # Also on https://tapastic.com/series/rockpapercynic
4369
    name = 'rpc-tumblr'
4370
    long_name = 'Rock Paper Cynic (from Tumblr)'
4371
    url = 'http://rockpapercynic.tumblr.com'
4372
4373
4374
class DeadlyPanelTumblr(GenericTumblrV1):
4375
    """Class to retrieve Deadly Panel comics."""
4376
    # Also on http://www.deadlypanel.com
4377
    # Also on https://tapastic.com/series/deadlypanel
4378
    name = 'deadly-tumblr'
4379
    long_name = 'Deadly Panel (from Tumblr)'
4380
    url = 'https://deadlypanel.tumblr.com'
4381
4382
4383
class CatanaComics(GenericTumblrV1):
4384
    """Class to retrieve Catana comics."""
4385
    name = 'catana'
4386
    long_name = 'Catana'
4387
    url = 'http://www.catanacomics.com'
4388
4389
4390
class AngryAtNothingTumblr(GenericTumblrV1):
4391
    """Class to retrieve Angry at Nothing comics."""
4392
    # Also on http://www.angryatnothing.net
4393
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4394
    name = 'angry-tumblr'
4395
    long_name = 'Angry At Nothing (from Tumblr)'
4396
    url = 'http://angryatnothing.tumblr.com'
4397
4398
4399
class ShanghaiTango(GenericTumblrV1):
4400
    """Class to retrieve Shanghai Tango comic."""
4401
    name = 'tango'
4402
    long_name = 'Shanghai Tango'
4403
    url = 'http://tango2010weibo.tumblr.com'
4404
4405
4406
class OffTheLeashDogTumblr(GenericTumblrV1):
4407
    """Class to retrieve Off The Leash Dog comics."""
4408
    # Also on http://offtheleashdogcartoons.com
4409
    # Also on http://www.rupertfawcettcartoons.com
4410
    name = 'offtheleash-tumblr'
4411
    long_name = 'Off The Leash Dog (from Tumblr)'
4412
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4413
    _categories = ('FAWCETT', )
4414
4415
4416
class ImogenQuestTumblr(GenericTumblrV1):
4417
    """Class to retrieve Imogen Quest comics."""
4418
    # Also on http://imogenquest.net
4419
    name = 'imogen-tumblr'
4420
    long_name = 'Imogen Quest (from Tumblr)'
4421
    url = 'http://imoquest.tumblr.com'
4422
4423
4424
class Shitfest(GenericTumblrV1):
4425
    """Class to retrieve Shitfest comics."""
4426
    name = 'shitfest'
4427
    long_name = 'Shitfest'
4428
    url = 'http://shitfestcomic.com'
4429
4430
4431
class IceCreamSandwichComics(GenericTumblrV1):
4432
    """Class to retrieve Ice Cream Sandwich Comics."""
4433
    name = 'icecream'
4434
    long_name = 'Ice Cream Sandwich Comics'
4435
    url = 'http://icecreamsandwichcomics.com'
4436
4437
4438
class Dustinteractive(GenericTumblrV1):
4439
    """Class to retrieve Dustinteractive comics."""
4440
    name = 'dustinteractive'
4441
    long_name = 'Dustinteractive'
4442
    url = 'http://dustinteractive.com'
4443
4444
4445
class StickyCinemaFloor(GenericTumblrV1):
4446
    """Class to retrieve Sticky Cinema Floor comics."""
4447
    name = 'stickycinema'
4448
    long_name = 'Sticky Cinema Floor'
4449
    url = 'https://stickycinemafloor.tumblr.com'
4450
4451
4452
class IncidentalComicsTumblr(GenericTumblrV1):
4453
    """Class to retrieve Incidental Comics."""
4454
    # Also on http://www.incidentalcomics.com
4455
    name = 'incidental-tumblr'
4456
    long_name = 'Incidental Comics (from Tumblr)'
4457
    url = 'http://incidentalcomics.tumblr.com'
4458
4459
4460
class HorovitzComics(GenericListableComic):
4461
    """Generic class to handle the logic common to the different comics from Horovitz."""
4462
    url = 'http://www.horovitzcomics.com'
4463
    _categories = ('HOROVITZ', )
4464
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4465
    link_re = NotImplemented
4466
    get_url_from_archive_element = join_cls_url_to_href
4467
4468
    @classmethod
4469
    def get_comic_info(cls, soup, link):
4470
        """Get information about a particular comics."""
4471
        href = link['href']
4472
        num = int(cls.link_re.match(href).groups()[0])
4473
        title = link.string
4474
        imgs = soup.find_all('img', id='comic')
4475
        assert len(imgs) == 1
4476
        year, month, day = [int(s)
4477
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4478
        return {
4479
            'title': title,
4480
            'day': day,
4481
            'month': month,
4482
            'year': year,
4483
            'img': [i['src'] for i in imgs],
4484
            'num': num,
4485
        }
4486
4487
    @classmethod
4488
    def get_archive_elements(cls):
4489
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4490
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4491
4492
4493
class HorovitzNew(HorovitzComics):
4494
    """Class to retrieve Horovitz new comics."""
4495
    name = 'horovitznew'
4496
    long_name = 'Horovitz New'
4497
    link_re = re.compile('^/comics/new/([0-9]+)$')
4498
4499
4500
class HorovitzClassic(HorovitzComics):
4501
    """Class to retrieve Horovitz classic comics."""
4502
    name = 'horovitzclassic'
4503
    long_name = 'Horovitz Classic'
4504
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4505
4506
4507
class GenericGoComic(GenericNavigableComic):
4508
    """Generic class to handle the logic common to comics from gocomics.com."""
4509
    _categories = ('GOCOMIC', )
4510
4511
    @classmethod
4512
    def get_first_comic_link(cls):
4513
        """Get link to first comics."""
4514
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4515
4516
    @classmethod
4517
    def get_navi_link(cls, last_soup, next_):
4518
        """Get link to next or previous comic."""
4519
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4520
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right hidden-sm-up sm '
4521
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4522
4523
    @classmethod
4524
    def get_url_from_link(cls, link):
4525
        gocomics = 'http://www.gocomics.com'
4526
        return urljoin_wrapper(gocomics, link['href'])
4527
4528 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4529
    def get_comic_info(cls, soup, link):
4530
        """Get information about a particular comics."""
4531
        date_str = soup.find('meta', property='article:published_time')['content']
4532
        day = string_to_date(date_str, "%Y-%m-%d")
4533
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4534
        author = soup.find('meta', property='article:author')['content']
4535
        tags = soup.find('meta', property='article:tag')['content']
4536
        return {
4537
            'day': day.day,
4538
            'month': day.month,
4539
            'year': day.year,
4540
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4541
            'author': author,
4542
            'tags': tags,
4543
        }
4544
4545
4546
class PearlsBeforeSwine(GenericGoComic):
4547
    """Class to retrieve Pearls Before Swine comics."""
4548
    name = 'pearls'
4549
    long_name = 'Pearls Before Swine'
4550
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4551
4552
4553
class Peanuts(GenericGoComic):
4554
    """Class to retrieve Peanuts comics."""
4555
    name = 'peanuts'
4556
    long_name = 'Peanuts'
4557
    url = 'http://www.gocomics.com/peanuts'
4558
4559
4560
class MattWuerker(GenericGoComic):
4561
    """Class to retrieve Matt Wuerker comics."""
4562
    name = 'wuerker'
4563
    long_name = 'Matt Wuerker'
4564
    url = 'http://www.gocomics.com/mattwuerker'
4565
4566
4567
class TomToles(GenericGoComic):
4568
    """Class to retrieve Tom Toles comics."""
4569
    name = 'toles'
4570
    long_name = 'Tom Toles'
4571
    url = 'http://www.gocomics.com/tomtoles'
4572
4573
4574
class BreakOfDay(GenericGoComic):
4575
    """Class to retrieve Break Of Day comics."""
4576
    name = 'breakofday'
4577
    long_name = 'Break Of Day'
4578
    url = 'http://www.gocomics.com/break-of-day'
4579
4580
4581
class Brevity(GenericGoComic):
4582
    """Class to retrieve Brevity comics."""
4583
    name = 'brevity'
4584
    long_name = 'Brevity'
4585
    url = 'http://www.gocomics.com/brevitypanel'
4586
4587
4588
class MichaelRamirez(GenericGoComic):
4589
    """Class to retrieve Michael Ramirez comics."""
4590
    name = 'ramirez'
4591
    long_name = 'Michael Ramirez'
4592
    url = 'http://www.gocomics.com/michaelramirez'
4593
4594
4595
class MikeLuckovich(GenericGoComic):
4596
    """Class to retrieve Mike Luckovich comics."""
4597
    name = 'luckovich'
4598
    long_name = 'Mike Luckovich'
4599
    url = 'http://www.gocomics.com/mikeluckovich'
4600
4601
4602
class JimBenton(GenericGoComic):
4603
    """Class to retrieve Jim Benton comics."""
4604
    # Also on http://jimbenton.tumblr.com
4605
    name = 'benton'
4606
    long_name = 'Jim Benton'
4607
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4608
4609
4610
class TheArgyleSweater(GenericGoComic):
4611
    """Class to retrieve the Argyle Sweater comics."""
4612
    name = 'argyle'
4613
    long_name = 'Argyle Sweater'
4614
    url = 'http://www.gocomics.com/theargylesweater'
4615
4616
4617
class SunnyStreet(GenericGoComic):
4618
    """Class to retrieve Sunny Street comics."""
4619
    # Also on http://www.sunnystreetcomics.com
4620
    name = 'sunny'
4621
    long_name = 'Sunny Street'
4622
    url = 'http://www.gocomics.com/sunny-street'
4623
4624
4625
class OffTheMark(GenericGoComic):
4626
    """Class to retrieve Off The Mark comics."""
4627
    # Also on https://www.offthemark.com
4628
    name = 'offthemark'
4629
    long_name = 'Off The Mark'
4630
    url = 'http://www.gocomics.com/offthemark'
4631
4632
4633
class WuMo(GenericGoComic):
4634
    """Class to retrieve WuMo comics."""
4635
    # Also on http://wumo.com
4636
    name = 'wumo'
4637
    long_name = 'WuMo'
4638
    url = 'http://www.gocomics.com/wumo'
4639
4640
4641
class LunarBaboon(GenericGoComic):
4642
    """Class to retrieve Lunar Baboon comics."""
4643
    # Also on http://www.lunarbaboon.com
4644
    # Also on https://tapastic.com/series/Lunarbaboon
4645
    name = 'lunarbaboon'
4646
    long_name = 'Lunar Baboon'
4647
    url = 'http://www.gocomics.com/lunarbaboon'
4648
4649
4650
class SandersenGocomic(GenericGoComic):
4651
    """Class to retrieve Sarah Andersen comics."""
4652
    # Also on http://sarahcandersen.com
4653
    # Also on http://tapastic.com/series/Doodle-Time
4654
    name = 'sandersen-goc'
4655
    long_name = 'Sarah Andersen (from GoComics)'
4656
    url = 'http://www.gocomics.com/sarahs-scribbles'
4657
4658
4659
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4660
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4661
    # Also on http://smbc-comics.tumblr.com
4662
    # Also on http://www.smbc-comics.com
4663
    name = 'smbc-goc'
4664
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4665
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4666
    _categories = ('SMBC', )
4667
4668
4669
class CalvinAndHobbesGoComic(GenericGoComic):
4670
    """Class to retrieve Calvin and Hobbes comics."""
4671
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4672
    name = 'calvin-goc'
4673
    long_name = 'Calvin and Hobbes (from GoComics)'
4674
    url = 'http://www.gocomics.com/calvinandhobbes'
4675
4676
4677
class RallGoComic(GenericGoComic):
4678
    """Class to retrieve Ted Rall comics."""
4679
    # Also on http://rall.com/comic
4680
    name = 'rall-goc'
4681
    long_name = "Ted Rall (from GoComics)"
4682
    url = "http://www.gocomics.com/ted-rall"
4683
    _categories = ('RALL', )
4684
4685
4686
class TheAwkwardYetiGoComic(GenericGoComic):
4687
    """Class to retrieve The Awkward Yeti comics."""
4688
    # Also on http://larstheyeti.tumblr.com
4689
    # Also on http://theawkwardyeti.com
4690
    # Also on https://tapastic.com/series/TheAwkwardYeti
4691
    name = 'yeti-goc'
4692
    long_name = 'The Awkward Yeti (from GoComics)'
4693
    url = 'http://www.gocomics.com/the-awkward-yeti'
4694
    _categories = ('YETI', )
4695
4696
4697
class BerkeleyMewsGoComics(GenericGoComic):
4698
    """Class to retrieve Berkeley Mews comics."""
4699
    # Also on http://mews.tumblr.com
4700
    # Also on http://www.berkeleymews.com
4701
    name = 'berkeley-goc'
4702
    long_name = 'Berkeley Mews (from GoComics)'
4703
    url = 'http://www.gocomics.com/berkeley-mews'
4704
    _categories = ('BERKELEY', )
4705
4706
4707
class SheldonGoComics(GenericGoComic):
4708
    """Class to retrieve Sheldon comics."""
4709
    # Also on http://www.sheldoncomics.com
4710
    name = 'sheldon-goc'
4711
    long_name = 'Sheldon Comics (from GoComics)'
4712
    url = 'http://www.gocomics.com/sheldon'
4713
4714
4715
class FowlLanguageGoComics(GenericGoComic):
4716
    """Class to retrieve Fowl Language comics."""
4717
    # Also on http://www.fowllanguagecomics.com
4718
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4719
    # Also on http://fowllanguagecomics.tumblr.com
4720
    name = 'fowllanguage-goc'
4721
    long_name = 'Fowl Language Comics (from GoComics)'
4722
    url = 'http://www.gocomics.com/fowl-language'
4723
    _categories = ('FOWLLANGUAGE', )
4724
4725
4726
class NickAnderson(GenericGoComic):
4727
    """Class to retrieve Nick Anderson comics."""
4728
    name = 'nickanderson'
4729
    long_name = 'Nick Anderson'
4730
    url = 'http://www.gocomics.com/nickanderson'
4731
4732
4733
class GarfieldGoComics(GenericGoComic):
4734
    """Class to retrieve Garfield comics."""
4735
    # Also on http://garfield.com
4736
    name = 'garfield-goc'
4737
    long_name = 'Garfield (from GoComics)'
4738
    url = 'http://www.gocomics.com/garfield'
4739
    _categories = ('GARFIELD', )
4740
4741
4742
class DorrisMcGoComics(GenericGoComic):
4743
    """Class to retrieve Dorris Mc Comics"""
4744
    # Also on http://dorrismccomics.com
4745
    name = 'dorrismc-goc'
4746
    long_name = 'Dorris Mc (from GoComics)'
4747
    url = 'http://www.gocomics.com/dorris-mccomics'
4748
4749
4750
class FoxTrot(GenericGoComic):
4751
    """Class to retrieve FoxTrot comics."""
4752
    name = 'foxtrot'
4753
    long_name = 'FoxTrot'
4754
    url = 'http://www.gocomics.com/foxtrot'
4755
4756
4757
class FoxTrotClassics(GenericGoComic):
4758
    """Class to retrieve FoxTrot Classics comics."""
4759
    name = 'foxtrot-classics'
4760
    long_name = 'FoxTrot Classics'
4761
    url = 'http://www.gocomics.com/foxtrotclassics'
4762
4763
4764
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4765
    """Class to retrieve Mister & Me Comics."""
4766
    # Also on http://www.mister-and-me.com
4767
    # Also on https://tapastic.com/series/Mister-and-Me
4768
    name = 'mister-goc'
4769
    long_name = 'Mister & Me (from GoComics)'
4770
    url = 'http://www.gocomics.com/mister-and-me'
4771
4772
4773
class NonSequitur(GenericGoComic):
4774
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4775
    name = 'nonsequitur'
4776
    long_name = 'Non Sequitur'
4777
    url = 'http://www.gocomics.com/nonsequitur'
4778
4779
4780
class GenericTapasticComic(GenericListableComic):
4781
    """Generic class to handle the logic common to comics from tapastic.com."""
4782
    _categories = ('TAPASTIC', )
4783
4784
    @classmethod
4785
    def get_comic_info(cls, soup, archive_elt):
4786
        """Get information about a particular comics."""
4787
        timestamp = int(archive_elt['publishDate']) / 1000.0
4788
        day = datetime.datetime.fromtimestamp(timestamp).date()
4789
        imgs = soup.find_all('img', class_='art-image')
4790
        if not imgs:
4791
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4792
            return None
4793
        assert len(imgs) > 0
4794
        return {
4795
            'day': day.day,
4796
            'year': day.year,
4797
            'month': day.month,
4798
            'img': [i['src'] for i in imgs],
4799
            'title': archive_elt['title'],
4800
        }
4801
4802
    @classmethod
4803
    def get_url_from_archive_element(cls, archive_elt):
4804
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4805
4806
    @classmethod
4807
    def get_archive_elements(cls):
4808
        pref, suff = 'episodeList : ', ','
4809
        # Information is stored in the javascript part
4810
        # I don't know the clean way to get it so this is the ugly way.
4811
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4812
        return json.loads(string)
4813
4814
4815
class VegetablesForDessert(GenericTapasticComic):
4816
    """Class to retrieve Vegetables For Dessert comics."""
4817
    # Also on http://vegetablesfordessert.tumblr.com
4818
    name = 'vegetables'
4819
    long_name = 'Vegetables For Dessert'
4820
    url = 'http://tapastic.com/series/vegetablesfordessert'
4821
4822
4823
class FowlLanguageTapa(GenericTapasticComic):
4824
    """Class to retrieve Fowl Language comics."""
4825
    # Also on http://www.fowllanguagecomics.com
4826
    # Also on http://fowllanguagecomics.tumblr.com
4827
    # Also on http://www.gocomics.com/fowl-language
4828
    name = 'fowllanguage-tapa'
4829
    long_name = 'Fowl Language Comics (from Tapastic)'
4830
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4831
    _categories = ('FOWLLANGUAGE', )
4832
4833
4834
class OscillatingProfundities(GenericTapasticComic):
4835
    """Class to retrieve Oscillating Profundities comics."""
4836
    name = 'oscillating'
4837
    long_name = 'Oscillating Profundities'
4838
    url = 'http://tapastic.com/series/oscillatingprofundities'
4839
4840
4841
class ZnoflatsComics(GenericTapasticComic):
4842
    """Class to retrieve Znoflats comics."""
4843
    name = 'znoflats'
4844
    long_name = 'Znoflats Comics'
4845
    url = 'http://tapastic.com/series/Znoflats-Comics'
4846
4847
4848
class SandersenTapastic(GenericTapasticComic):
4849
    """Class to retrieve Sarah Andersen comics."""
4850
    # Also on http://sarahcandersen.com
4851
    # Also on http://www.gocomics.com/sarahs-scribbles
4852
    name = 'sandersen-tapa'
4853
    long_name = 'Sarah Andersen (from Tapastic)'
4854
    url = 'http://tapastic.com/series/Doodle-Time'
4855
4856
4857
class TubeyToonsTapastic(GenericTapasticComic):
4858
    """Class to retrieve TubeyToons comics."""
4859
    # Also on http://tubeytoons.com
4860
    # Also on https://tubeytoons.tumblr.com
4861
    name = 'tubeytoons-tapa'
4862
    long_name = 'Tubey Toons (from Tapastic)'
4863
    url = 'http://tapastic.com/series/Tubey-Toons'
4864
    _categories = ('TUNEYTOONS', )
4865
4866
4867
class AnythingComicTapastic(GenericTapasticComic):
4868
    """Class to retrieve Anything Comics."""
4869
    # Also on http://www.anythingcomic.com
4870
    name = 'anythingcomic-tapa'
4871
    long_name = 'Anything Comic (from Tapastic)'
4872
    url = 'http://tapastic.com/series/anything'
4873
4874
4875
class UnearthedComicsTapastic(GenericTapasticComic):
4876
    """Class to retrieve Unearthed comics."""
4877
    # Also on http://unearthedcomics.com
4878
    # Also on https://unearthedcomics.tumblr.com
4879
    name = 'unearthed-tapa'
4880
    long_name = 'Unearthed Comics (from Tapastic)'
4881
    url = 'http://tapastic.com/series/UnearthedComics'
4882
    _categories = ('UNEARTHED', )
4883
4884
4885
class EverythingsStupidTapastic(GenericTapasticComic):
4886
    """Class to retrieve Everything's stupid Comics."""
4887
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4888
    # Also on http://everythingsstupid.net
4889
    name = 'stupid-tapa'
4890
    long_name = "Everything's Stupid (from Tapastic)"
4891
    url = 'http://tapastic.com/series/EverythingsStupid'
4892
4893
4894
class JustSayEhTapastic(GenericTapasticComic):
4895
    """Class to retrieve Just Say Eh comics."""
4896
    # Also on http://www.justsayeh.com
4897
    name = 'justsayeh-tapa'
4898
    long_name = 'Just Say Eh (from Tapastic)'
4899
    url = 'http://tapastic.com/series/Just-Say-Eh'
4900
4901
4902
class ThorsThundershackTapastic(GenericTapasticComic):
4903
    """Class to retrieve Thor's Thundershack comics."""
4904
    # Also on http://www.thorsthundershack.com
4905
    name = 'thor-tapa'
4906
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4907
    url = 'http://tapastic.com/series/Thors-Thundershac'
4908
    _categories = ('THOR', )
4909
4910
4911
class OwlTurdTapastic(GenericTapasticComic):
4912
    """Class to retrieve Owl Turd comics."""
4913
    # Also on http://owlturd.com
4914
    name = 'owlturd-tapa'
4915
    long_name = 'Owl Turd (from Tapastic)'
4916
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4917
    _categories = ('OWLTURD', )
4918
4919
4920
class GoneIntoRaptureTapastic(GenericTapasticComic):
4921
    """Class to retrieve Gone Into Rapture comics."""
4922
    # Also on http://goneintorapture.tumblr.com
4923
    # Also on http://goneintorapture.com
4924
    name = 'rapture-tapa'
4925
    long_name = 'Gone Into Rapture (from Tapastic)'
4926
    url = 'http://tapastic.com/series/Goneintorapture'
4927
4928
4929
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4930
    """Class to retrieve Heck If I Know Comics."""
4931
    # Also on http://heckifiknowcomics.com
4932
    name = 'heck-tapa'
4933
    long_name = 'Heck if I Know comics (from Tapastic)'
4934
    url = 'http://tapastic.com/series/Regular'
4935
4936
4937
class CheerUpEmoKidTapa(GenericTapasticComic):
4938
    """Class to retrieve CheerUpEmoKid comics."""
4939
    # Also on http://www.cheerupemokid.com
4940
    # Also on https://enzocomics.tumblr.com
4941
    name = 'cuek-tapa'
4942
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4943
    url = 'http://tapastic.com/series/CUEK'
4944
4945
4946
class BigFootJusticeTapa(GenericTapasticComic):
4947
    """Class to retrieve Big Foot Justice comics."""
4948
    # Also on http://bigfootjustice.com
4949
    name = 'bigfoot-tapa'
4950
    long_name = 'Big Foot Justice (from Tapastic)'
4951
    url = 'http://tapastic.com/series/bigfoot-justice'
4952
4953
4954
class UpAndOutTapa(GenericTapasticComic):
4955
    """Class to retrieve Up & Out comics."""
4956
    # Also on http://upandoutcomic.tumblr.com
4957
    name = 'upandout-tapa'
4958
    long_name = 'Up And Out (from Tapastic)'
4959
    url = 'http://tapastic.com/series/UP-and-OUT'
4960
4961
4962
class ToonHoleTapa(GenericTapasticComic):
4963
    """Class to retrieve Toon Holes comics."""
4964
    # Also on http://www.toonhole.com
4965
    name = 'toonhole-tapa'
4966
    long_name = 'Toon Hole (from Tapastic)'
4967
    url = 'http://tapastic.com/series/TOONHOLE'
4968
4969
4970
class AngryAtNothingTapa(GenericTapasticComic):
4971
    """Class to retrieve Angry at Nothing comics."""
4972
    # Also on http://www.angryatnothing.net
4973
    # Also on http://angryatnothing.tumblr.com
4974
    name = 'angry-tapa'
4975
    long_name = 'Angry At Nothing (from Tapastic)'
4976
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4977
4978
4979
class LeleozTapa(GenericTapasticComic):
4980
    """Class to retrieve Leleoz comics."""
4981
    # Also on http://leleozcomics.tumblr.com
4982
    name = 'leleoz-tapa'
4983
    long_name = 'Leleoz (from Tapastic)'
4984
    url = 'https://tapastic.com/series/Leleoz'
4985
4986
4987
class TheAwkwardYetiTapa(GenericTapasticComic):
4988
    """Class to retrieve The Awkward Yeti comics."""
4989
    # Also on http://www.gocomics.com/the-awkward-yeti
4990
    # Also on http://theawkwardyeti.com
4991
    # Also on http://larstheyeti.tumblr.com
4992
    name = 'yeti-tapa'
4993
    long_name = 'The Awkward Yeti (from Tapastic)'
4994
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4995
    _categories = ('YETI', )
4996
4997
4998
class AsPerUsualTapa(GenericTapasticComic):
4999
    """Class to retrieve As Per Usual comics."""
5000
    # Also on http://as-per-usual.tumblr.com
5001
    name = 'usual-tapa'
5002
    long_name = 'As Per Usual (from Tapastic)'
5003
    url = 'https://tapastic.com/series/AsPerUsual'
5004
    categories = ('DAMILEE', )
5005
5006
5007
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
5008
    """Class to retrieve Hot Comics For Cool People."""
5009
    # Also on http://hotcomicsforcoolpeople.tumblr.com
5010
    # Also on http://hotcomics.biz (links to tumblr)
5011
    # Also on http://hcfcp.com (links to tumblr)
5012
    name = 'hotcomics-tapa'
5013
    long_name = 'Hot Comics For Cool People (from Tapastic)'
5014
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
5015
    categories = ('DAMILEE', )
5016
5017
5018
class OneOneOneOneComicTapa(GenericTapasticComic):
5019
    """Class to retrieve 1111 Comics."""
5020
    # Also on http://www.1111comics.me
5021
    # Also on http://comics1111.tumblr.com
5022
    name = '1111-tapa'
5023
    long_name = '1111 Comics (from Tapastic)'
5024
    url = 'https://tapastic.com/series/1111-Comics'
5025
    _categories = ('ONEONEONEONE', )
5026
5027
5028
class TumbleDryTapa(GenericTapasticComic):
5029
    """Class to retrieve Tumble Dry comics."""
5030
    # Also on http://tumbledrycomics.com
5031
    name = 'tumbledry-tapa'
5032
    long_name = 'Tumblr Dry (from Tapastic)'
5033
    url = 'https://tapastic.com/series/TumbleDryComics'
5034
5035
5036
class DeadlyPanelTapa(GenericTapasticComic):
5037
    """Class to retrieve Deadly Panel comics."""
5038
    # Also on http://www.deadlypanel.com
5039
    # Also on https://deadlypanel.tumblr.com
5040
    name = 'deadly-tapa'
5041
    long_name = 'Deadly Panel (from Tapastic)'
5042
    url = 'https://tapastic.com/series/deadlypanel'
5043
5044
5045
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5046
    """Class to retrieve Chris Hallbeck comics."""
5047
    # Also on https://chrishallbeck.tumblr.com
5048
    # Also on http://maximumble.com
5049
    name = 'hallbeckmaxi-tapa'
5050
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5051
    url = 'https://tapastic.com/series/Maximumble'
5052
    _categories = ('HALLBACK', )
5053
5054
5055
class ChrisHallbeckMiniTapa(GenericEmptyComic, GenericTapasticComic):
5056
    """Class to retrieve Chris Hallbeck comics."""
5057
    # Also on https://chrishallbeck.tumblr.com
5058
    # Also on http://minimumble.com
5059
    name = 'hallbeckmini-tapa'
5060
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5061
    url = 'https://tapastic.com/series/Minimumble'
5062
    _categories = ('HALLBACK', )
5063
5064
5065
class ChrisHallbeckBiffTapa(GenericEmptyComic, GenericTapasticComic):
5066
    """Class to retrieve Chris Hallbeck comics."""
5067
    # Also on https://chrishallbeck.tumblr.com
5068
    # Also on http://thebookofbiff.com
5069
    name = 'hallbeckbiff-tapa'
5070
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5071
    url = 'https://tapastic.com/series/Biff'
5072
    _categories = ('HALLBACK', )
5073
5074
5075
class RandoWisTapa(GenericTapasticComic):
5076
    """Class to retrieve RandoWis comics."""
5077
    # Also on https://randowis.com
5078
    name = 'randowis-tapa'
5079
    long_name = 'RandoWis (from Tapastic)'
5080
    url = 'https://tapastic.com/series/RandoWis'
5081
5082
5083
class PigeonGazetteTapa(GenericTapasticComic):
5084
    """Class to retrieve The Pigeon Gazette comics."""
5085
    # Also on http://thepigeongazette.tumblr.com
5086
    name = 'pigeon-tapa'
5087
    long_name = 'The Pigeon Gazette (from Tapastic)'
5088
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5089
5090
5091
class TheOdd1sOutTapa(GenericTapasticComic):
5092
    """Class to retrieve The Odd 1s Out comics."""
5093
    # Also on http://theodd1sout.com
5094
    # Also on http://theodd1sout.tumblr.com
5095
    name = 'theodd-tapa'
5096
    long_name = 'The Odd 1s Out (from Tapastic)'
5097
    url = 'https://tapastic.com/series/Theodd1sout'
5098
5099
5100
class TheWorldIsFlatTapa(GenericTapasticComic):
5101
    """Class to retrieve The World Is Flat Comics."""
5102
    # Also on http://theworldisflatcomics.tumblr.com
5103
    name = 'flatworld-tapa'
5104
    long_name = 'The World Is Flat (from Tapastic)'
5105
    url = 'https://tapastic.com/series/The-World-is-Flat'
5106
5107
5108
class MisterAndMeTapa(GenericTapasticComic):
5109
    """Class to retrieve Mister & Me Comics."""
5110
    # Also on http://www.mister-and-me.com
5111
    # Also on http://www.gocomics.com/mister-and-me
5112
    name = 'mister-tapa'
5113
    long_name = 'Mister & Me (from Tapastic)'
5114
    url = 'https://tapastic.com/series/Mister-and-Me'
5115
5116
5117
class TalesOfAbsurdityTapa(GenericEmptyComic, GenericTapasticComic):
5118
    """Class to retrieve Tales Of Absurdity comics."""
5119
    # Also on http://talesofabsurdity.com
5120
    # Also on http://talesofabsurdity.tumblr.com
5121
    name = 'absurdity-tapa'
5122
    long_name = 'Tales of Absurdity (from Tapastic)'
5123
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5124
    _categories = ('ABSURDITY', )
5125
5126
5127
class BFGFSTapa(GenericTapasticComic):
5128
    """Class to retrieve BFGFS comics."""
5129
    # Also on http://bfgfs.com
5130
    # Also on https://bfgfs.tumblr.com
5131
    name = 'bfgfs-tapa'
5132
    long_name = 'BFGFS (from Tapastic)'
5133
    url = 'https://tapastic.com/series/BFGFS'
5134
5135
5136
class DoodleForFoodTapa(GenericTapasticComic):
5137
    """Class to retrieve Doodle For Food comics."""
5138
    # Also on http://www.doodleforfood.com
5139
    name = 'doodle-tapa'
5140
    long_name = 'Doodle For Food (from Tapastic)'
5141
    url = 'https://tapastic.com/series/Doodle-for-Food'
5142
5143
5144
class MrLovensteinTapa(GenericTapasticComic):
5145
    """Class to retrieve Mr Lovenstein comics."""
5146
    # Also on  https://tapastic.com/series/MrLovenstein
5147
    name = 'mrlovenstein-tapa'
5148
    long_name = 'Mr. Lovenstein (from Tapastic)'
5149
    url = 'https://tapastic.com/series/MrLovenstein'
5150
5151
5152
class CassandraCalinTapa(GenericTapasticComic):
5153
    """Class to retrieve C. Cassandra comics."""
5154
    # Also on http://cassandracalin.com
5155
    # Also on http://c-cassandra.tumblr.com
5156
    name = 'cassandra-tapa'
5157
    long_name = 'Cassandra Calin (from Tapastic)'
5158
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5159
5160
5161
class WafflesAndPancakes(GenericTapasticComic):
5162
    """Class to retrieve Waffles And Pancakes comics."""
5163
    # Also on http://wandpcomic.com
5164
    name = 'waffles'
5165
    long_name = 'Waffles And Pancakes'
5166
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5167
5168
5169
class YesterdaysPopcornTapastic(GenericTapasticComic):
5170
    """Class to retrieve Yesterday's Popcorn comics."""
5171
    # Also on http://www.yesterdayspopcorn.com
5172
    # Also on http://yesterdayspopcorn.tumblr.com
5173
    name = 'popcorn-tapa'
5174
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5175
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5176
5177
5178
class OurSuperAdventureTapastic(GenericEmptyComic, GenericTapasticComic):
5179
    """Class to retrieve Our Super Adventure comics."""
5180
    # Also on http://www.oursuperadventure.com
5181
    # http://sarahssketchbook.tumblr.com
5182
    # http://sarahgraley.com
5183
    name = 'superadventure-tapastic'
5184
    long_name = 'Our Super Adventure (from Tapastic)'
5185
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5186
5187
5188
class NamelessPCs(GenericTapasticComic):
5189
    """Class to retrieve Nameless PCs comics."""
5190
    # Also on http://namelesspcs.com
5191
    name = 'namelesspcs-tapa'
5192
    long_name = 'NamelessPCs (from Tapastic)'
5193
    url = 'https://tapastic.com/series/NamelessPC'
5194
5195
5196
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5197
    """Class to retrieve Down The Upward Spiral comics."""
5198
    # Also on http://www.downtheupwardspiral.com
5199
    # Also on http://downtheupwardspiral.tumblr.com
5200
    name = 'spiral-tapa'
5201
    long_name = 'Down the Upward Spiral (from Tapastic)'
5202
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5203
5204
5205
class UbertoolTapa(GenericTapasticComic):
5206
    """Class to retrieve Ubertool comics."""
5207
    # Also on http://ubertoolcomic.com
5208
    # Also on https://ubertool.tumblr.com
5209
    name = 'ubertool-tapa'
5210
    long_name = 'Ubertool (from Tapastic)'
5211
    url = 'https://tapastic.com/series/ubertool'
5212
    _categories = ('UBERTOOL', )
5213
5214
5215
class BarteNerdsTapa(GenericTapasticComic):
5216
    """Class to retrieve BarteNerds comics."""
5217
    # Also on http://www.bartenerds.com
5218
    name = 'bartenerds-tapa'
5219
    long_name = 'BarteNerds (from Tapastic)'
5220
    url = 'https://tapastic.com/series/BarteNERDS'
5221
5222
5223
class SmallBlueYonderTapa(GenericTapasticComic):
5224
    """Class to retrieve Small Blue Yonder comics."""
5225
    # Also on http://www.smallblueyonder.com
5226
    name = 'smallblue-tapa'
5227
    long_name = 'Small Blue Yonder (from Tapastic)'
5228
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5229
5230
5231
class TizzyStitchBirdTapa(GenericTapasticComic):
5232
    """Class to retrieve Tizzy Stitch Bird comics."""
5233
    # Also on http://tizzystitchbird.com
5234
    # Also on http://tizzystitchbird.tumblr.com
5235
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5236
    name = 'tizzy-tapa'
5237
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5238
    url = 'https://tapastic.com/series/TizzyStitchbird'
5239
5240
5241
class RockPaperCynicTapa(GenericTapasticComic):
5242
    """Class to retrieve RockPaperCynic comics."""
5243
    # Also on http://www.rockpapercynic.com
5244
    # Also on http://rockpapercynic.tumblr.com
5245
    name = 'rpc-tapa'
5246
    long_name = 'Rock Paper Cynic (from Tapastic)'
5247
    url = 'https://tapastic.com/series/rockpapercynic'
5248
5249
5250
class ItsTheTieTapa(GenericTapasticComic):
5251
    """Class to retrieve It's the tie comics."""
5252
    # Also on http://itsthetie.com
5253
    # Also on http://itsthetie.tumblr.com
5254
    name = 'tie-tapa'
5255
    long_name = "It's the tie (from Tapastic)"
5256
    url = "https://tapastic.com/series/itsthetie"
5257
    _categories = ('TIE', )
5258
5259
5260
class MomentumTapa(GenericTapasticComic):
5261
    """Class to retrieve Momentum comics."""
5262
    # Also on http://www.momentumcomic.com
5263
    name = 'momentum-tapa'
5264
    long_name = 'Momentum (from Tapastic)'
5265
    url = 'https://tapastic.com/series/momentum'
5266
5267
5268
def get_subclasses(klass):
5269
    """Gets the list of direct/indirect subclasses of a class"""
5270
    subclasses = klass.__subclasses__()
5271
    for derived in list(subclasses):
5272
        subclasses.extend(get_subclasses(derived))
5273
    return subclasses
5274
5275
5276
def remove_st_nd_rd_th_from_date(string):
5277
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5278
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5279
    return (string.replace('st', '')
5280
            .replace('nd', '')
5281
            .replace('rd', '')
5282
            .replace('th', '')
5283
            .replace('Augu', 'August'))
5284
5285
5286
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5287
    """Function to convert string to date object.
5288
    Wrapper around datetime.datetime.strptime."""
5289
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5290
    prev_locale = locale.setlocale(locale.LC_ALL)
5291
    if local != prev_locale:
5292
        locale.setlocale(locale.LC_ALL, local)
5293
    ret = datetime.datetime.strptime(string, date_format).date()
5294
    if local != prev_locale:
5295
        locale.setlocale(locale.LC_ALL, prev_locale)
5296
    return ret
5297
5298
5299
COMICS = set(get_subclasses(GenericComic))
5300
VALID_COMICS = [c for c in COMICS if c.name is not None]
5301
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5302
assert len(VALID_COMICS) == len(COMIC_NAMES)
5303
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5304
assert len(VALID_COMICS) == len(CLASS_NAMES)
5305