Completed
Push — master ( 4a060e...b8d65b )
by De
32s
created

SystemComic   A

Complexity

Total Complexity 3

Size/Duplication

Total Lines 27
Duplicated Lines 14.81 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
dl 4
loc 27
rs 10
c 1
b 0
f 0
wmc 3

2 Methods

Rating   Name   Duplication   Size   Complexity  
A get_first_comic_link() 0 4 1
A get_comic_info() 0 15 2

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    try:
333
        url = cls.first_url
334
    except AttributeError:
335
        url = input("Get starting URL: ")
336
    print(url)
337
    comic = cls.get_prev_link(get_soup_at_url(url))
338
    while comic:
339
        url = cls.get_url_from_link(comic)
340
        print(url)
341
        comic = cls.get_prev_link(get_soup_at_url(url))
342
    return {'href': url}
343
344
345
class GenericEmptyComic(GenericComic):
346
    """Generic class for comics where nothing is to be done.
347
348
    It can be useful to deactivate temporarily comics that do not work
349
    properly by replacing `def MyComic(GenericWhateverComic)` with
350
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
351
    _categories = ('EMPTY', )
352
353
    @classmethod
354
    def get_next_comic(cls, last_comic):
355
        """Implementation of get_next_comic returning no comics."""
356
        cls.log("comic is considered as empty - returning no comic")
357
        return []
358
359
360 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
361
    """Class to retrieve Extra Fabulous Comics."""
362
    # Also on https://extrafabulouscomics.tumblr.com
363
    name = 'efc'
364
    long_name = 'Extra Fabulous Comics'
365
    url = 'http://extrafabulouscomics.com'
366
    _categories = ('EFC', )
367
    get_first_comic_link = get_a_navi_navifirst
368
    get_navi_link = get_link_rel_next
369
370
    @classmethod
371
    def get_comic_info(cls, soup, link):
372
        """Get information about a particular comics."""
373
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
374
        imgs = soup.find_all('img', src=img_src_re)
375
        title = soup.find('meta', property='og:title')['content']
376
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
377
        day = string_to_date(date_str, "%Y-%m-%d")
378
        return {
379
            'title': title,
380
            'img': [i['src'] for i in imgs],
381
            'month': day.month,
382
            'year': day.year,
383
            'day': day.day,
384
            'prefix': title + '-'
385
        }
386
387
388 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
389
    """Generic class to retrieve comics from Le Monde blogs."""
390
    _categories = ('LEMONDE', 'FRANCAIS')
391
    get_navi_link = get_link_rel_next
392
    get_first_comic_link = simulate_first_link
393
    first_url = NotImplemented
394
395
    @classmethod
396
    def get_comic_info(cls, soup, link):
397
        """Get information about a particular comics."""
398
        url2 = soup.find('link', rel='shortlink')['href']
399
        title = soup.find('meta', property='og:title')['content']
400
        date_str = soup.find("span", class_="entry-date").string
401
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
402
        imgs = soup.find_all('meta', property='og:image')
403
        return {
404
            'title': title,
405
            'url2': url2,
406
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
407
            'month': day.month,
408
            'year': day.year,
409
            'day': day.day,
410
        }
411
412
413
class ZepWorld(GenericLeMondeBlog):
414
    """Class to retrieve Zep World comics."""
415
    name = "zep"
416
    long_name = "Zep World"
417
    url = "http://zepworld.blog.lemonde.fr"
418
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
419
420
421
class Vidberg(GenericLeMondeBlog):
422
    """Class to retrieve Vidberg comics."""
423
    name = 'vidberg'
424
    long_name = "Vidberg - l'actu en patates"
425
    url = "http://vidberg.blog.lemonde.fr"
426
    # Not the first but I didn't find an efficient way to retrieve it
427
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
428
429
430
class Plantu(GenericLeMondeBlog):
431
    """Class to retrieve Plantu comics."""
432
    name = 'plantu'
433
    long_name = "Plantu"
434
    url = "http://plantu.blog.lemonde.fr"
435
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
436
437
438
class XavierGorce(GenericLeMondeBlog):
439
    """Class to retrieve Xavier Gorce comics."""
440
    name = 'gorce'
441
    long_name = "Xavier Gorce"
442
    url = "http://xaviergorce.blog.lemonde.fr"
443
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
444
445
446
class CartooningForPeace(GenericLeMondeBlog):
447
    """Class to retrieve Cartooning For Peace comics."""
448
    name = 'forpeace'
449
    long_name = "Cartooning For Peace"
450
    url = "http://cartooningforpeace.blog.lemonde.fr"
451
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
452
453
454
class Aurel(GenericLeMondeBlog):
455
    """Class to retrieve Aurel comics."""
456
    name = 'aurel'
457
    long_name = "Aurel"
458
    url = "http://aurel.blog.lemonde.fr"
459
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
460
461
462
class LesCulottees(GenericLeMondeBlog):
463
    """Class to retrieve Les Culottees comics."""
464
    name = 'culottees'
465
    long_name = 'Les Culottees'
466
    url = "http://lesculottees.blog.lemonde.fr"
467
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
468
469
470
class UneAnneeAuLycee(GenericLeMondeBlog):
471
    """Class to retrieve Une Annee Au Lycee comics."""
472
    name = 'lycee'
473
    long_name = 'Une Annee au Lycee'
474
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
475
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
476
477
478
class Rall(GenericNavigableComic):
479
    """Class to retrieve Ted Rall comics."""
480
    # Also on http://www.gocomics.com/tedrall
481
    name = 'rall'
482
    long_name = "Ted Rall"
483
    url = "http://rall.com/comic"
484
    _categories = ('RALL', )
485
    get_navi_link = get_link_rel_next
486
    get_first_comic_link = simulate_first_link
487
    # Not the first but I didn't find an efficient way to retrieve it
488
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
489
490
    @classmethod
491
    def get_comic_info(cls, soup, link):
492
        """Get information about a particular comics."""
493
        title = soup.find('meta', property='og:title')['content']
494
        author = soup.find("span", class_="author vcard").find("a").string
495
        date_str = soup.find("span", class_="entry-date").string
496
        day = string_to_date(date_str, "%B %d, %Y")
497
        desc = soup.find('meta', property='og:description')['content']
498
        imgs = soup.find('div', class_='entry-content').find_all('img')
499
        imgs = imgs[:-7]  # remove social media buttons
500
        return {
501
            'title': title,
502
            'author': author,
503
            'month': day.month,
504
            'year': day.year,
505
            'day': day.day,
506
            'description': desc,
507
            'img': [i['src'] for i in imgs],
508
        }
509
510
511
class Dilem(GenericNavigableComic):
512
    """Class to retrieve Ali Dilem comics."""
513
    name = 'dilem'
514
    long_name = 'Ali Dilem'
515
    url = 'http://information.tv5monde.com/dilem'
516
    _categories = ('FRANCAIS', )
517
    get_url_from_link = join_cls_url_to_href
518
    get_first_comic_link = simulate_first_link
519
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
520
521
    @classmethod
522
    def get_navi_link(cls, last_soup, next_):
523
        """Get link to next or previous comic."""
524
        # prev is next / next is prev
525
        li = last_soup.find('li', class_='prev' if next_ else 'next')
526
        return li.find('a') if li else None
527
528
    @classmethod
529
    def get_comic_info(cls, soup, link):
530
        """Get information about a particular comics."""
531
        short_url = soup.find('link', rel='shortlink')['href']
532
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
533
        imgs = soup.find_all('meta', property='og:image')
534
        date_str = soup.find('span', property='dc:date')['content']
535
        date_str = date_str[:10]
536
        day = string_to_date(date_str, "%Y-%m-%d")
537
        return {
538
            'short_url': short_url,
539
            'title': title,
540
            'img': [i['content'] for i in imgs],
541
            'day': day.day,
542
            'month': day.month,
543
            'year': day.year,
544
        }
545
546
547
class SpaceAvalanche(GenericNavigableComic):
548
    """Class to retrieve Space Avalanche comics."""
549
    name = 'avalanche'
550
    long_name = 'Space Avalanche'
551
    url = 'http://www.spaceavalanche.com'
552
    get_navi_link = get_link_rel_next
553
554
    @classmethod
555
    def get_first_comic_link(cls):
556
        """Get link to first comics."""
557
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
558
559
    @classmethod
560
    def get_comic_info(cls, soup, link):
561
        """Get information about a particular comics."""
562
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
563
        title = link['title']
564
        url = cls.get_url_from_link(link)
565
        year, month, day = [int(s)
566
                            for s in url_date_re.match(url).groups()]
567
        imgs = soup.find("div", class_="entry").find_all("img")
568
        return {
569
            'title': title,
570
            'day': day,
571
            'month': month,
572
            'year': year,
573
            'img': [i['src'] for i in imgs],
574
        }
575
576
577
class ZenPencils(GenericNavigableComic):
578
    """Class to retrieve ZenPencils comics."""
579
    # Also on http://zenpencils.tumblr.com
580
    # Also on http://www.gocomics.com/zen-pencils
581
    name = 'zenpencils'
582
    long_name = 'Zen Pencils'
583
    url = 'http://zenpencils.com'
584
    _categories = ('ZENPENCILS', )
585
    get_navi_link = get_link_rel_next
586
    get_first_comic_link = simulate_first_link
587
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
588
589
    @classmethod
590
    def get_comic_info(cls, soup, link):
591
        """Get information about a particular comics."""
592
        imgs = soup.find('div', id='comic').find_all('img')
593
        # imgs2 = soup.find_all('meta', property='og:image')
594
        post = soup.find('div', class_='post-content')
595
        author = post.find("span", class_="post-author").find("a").string
596
        title = soup.find('h2', class_='post-title').string
597
        date_str = post.find('span', class_='post-date').string
598
        day = string_to_date(date_str, "%B %d, %Y")
599
        assert imgs
600
        assert all(i['alt'] == i['title'] for i in imgs)
601
        assert all(i['alt'] in (title, "") for i in imgs)
602
        return {
603
            'title': title,
604
            'author': author,
605
            'day': day.day,
606
            'month': day.month,
607
            'year': day.year,
608
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
609
        }
610
611
612
class ItsTheTie(GenericEmptyComic, GenericNavigableComic):
613
    """Class to retrieve It's the tie comics."""
614
    # Also on http://itsthetie.tumblr.com
615
    # Also on https://tapastic.com/series/itsthetie
616
    name = 'tie'
617
    long_name = "It's the tie"
618
    url = "http://itsthetie.com"
619
    _categories = ('TIE', )
620
    get_first_comic_link = get_div_navfirst_a
621
    get_navi_link = get_a_rel_next
622
623
    @classmethod
624
    def get_comic_info(cls, soup, link):
625
        """Get information about a particular comics."""
626
        title = soup.find('h1', class_='comic-title').find('a').string
627
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
628
        day = string_to_date(date_str, "%B %d, %Y")
629
        # Bonus images may or may not be in meta og:image.
630
        imgs = soup.find_all('meta', property='og:image')
631
        imgs_src = [i['content'] for i in imgs]
632
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
633
        bonus_src = [b['data-oversrc'] for b in bonus]
634
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
635
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
636
        tag_meta = soup.find('meta', property='article:tag')
637
        tags = tag_meta['content'] if tag_meta else ""
638
        return {
639
            'title': title,
640
            'month': day.month,
641
            'year': day.year,
642
            'day': day.day,
643
            'img': all_imgs_src,
644
            'tags': tags,
645
        }
646
647
648
class PenelopeBagieu(GenericNavigableComic):
649
    """Class to retrieve comics from Penelope Bagieu's blog."""
650
    name = 'bagieu'
651
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
652
    url = 'http://www.penelope-jolicoeur.com'
653
    _categories = ('FRANCAIS', )
654
    get_navi_link = get_link_rel_next
655
    get_first_comic_link = simulate_first_link
656
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
657
658
    @classmethod
659
    def get_comic_info(cls, soup, link):
660
        """Get information about a particular comics."""
661
        date_str = soup.find('h2', class_='date-header').string
662
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
663
        imgs = soup.find('div', class_='entry-body').find_all('img')
664
        title = soup.find('h3', class_='entry-header').string
665
        return {
666
            'title': title,
667
            'img': [i['src'] for i in imgs],
668
            'month': day.month,
669
            'year': day.year,
670
            'day': day.day,
671
        }
672
673
674
class OneOneOneOneComic(GenericEmptyComic, GenericNavigableComic):
675
    """Class to retrieve 1111 Comics."""
676
    # Also on http://comics1111.tumblr.com
677
    # Also on https://tapastic.com/series/1111-Comics
678
    name = '1111'
679
    long_name = '1111 Comics'
680
    url = 'http://www.1111comics.me'
681
    _categories = ('ONEONEONEONE', )
682
    get_first_comic_link = get_div_navfirst_a
683
    get_navi_link = get_link_rel_next
684
685
    @classmethod
686
    def get_comic_info(cls, soup, link):
687
        """Get information about a particular comics."""
688
        title = soup.find('h1', class_='comic-title').find('a').string
689
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
690
        day = string_to_date(date_str, "%B %d, %Y")
691
        imgs = soup.find_all('meta', property='og:image')
692
        return {
693
            'title': title,
694
            'month': day.month,
695
            'year': day.year,
696
            'day': day.day,
697
            'img': [i['content'] for i in imgs],
698
        }
699
700
701
class AngryAtNothing(GenericEmptyComic, GenericNavigableComic):
702
    """Class to retrieve Angry at Nothing comics."""
703
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
704
    # Also on http://angryatnothing.tumblr.com
705
    name = 'angry'
706
    long_name = 'Angry At Nothing'
707
    url = 'http://www.angryatnothing.net'
708
    get_first_comic_link = get_div_navfirst_a
709
    get_navi_link = get_a_rel_next
710
711
    @classmethod
712
    def get_comic_info(cls, soup, link):
713
        """Get information about a particular comics."""
714
        title = soup.find('h1', class_='comic-title').find('a').string
715
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
716
        day = string_to_date(date_str, "%B %d, %Y")
717
        imgs = soup.find_all('meta', property='og:image')
718
        return {
719
            'title': title,
720
            'month': day.month,
721
            'year': day.year,
722
            'day': day.day,
723
            'img': [i['content'] for i in imgs],
724
        }
725
726
727
class NeDroid(GenericNavigableComic):
728
    """Class to retrieve NeDroid comics."""
729
    name = 'nedroid'
730 View Code Duplication
    long_name = 'NeDroid'
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
731
    url = 'http://nedroid.com'
732
    get_first_comic_link = get_div_navfirst_a
733
    get_navi_link = get_link_rel_next
734
    get_url_from_link = join_cls_url_to_href
735
736
    @classmethod
737
    def get_comic_info(cls, soup, link):
738
        """Get information about a particular comics."""
739
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
740
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
741
        num = int(short_url_re.match(short_url).groups()[0])
742
        imgs = soup.find('div', id='comic').find_all('img')
743
        assert len(imgs) == 1
744
        title = imgs[0]['alt']
745
        title2 = imgs[0]['title']
746
        return {
747
            'short_url': short_url,
748
            'title': title,
749
            'title2': title2,
750
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
751
            'num': num,
752
        }
753
754
755
class Garfield(GenericNavigableComic):
756
    """Class to retrieve Garfield comics."""
757
    # Also on http://www.gocomics.com/garfield
758
    name = 'garfield'
759
    long_name = 'Garfield'
760
    url = 'https://garfield.com'
761
    _categories = ('GARFIELD', )
762
    get_first_comic_link = simulate_first_link
763
    first_url = 'https://garfield.com/comic/1978/06/19'
764
765
    @classmethod
766
    def get_navi_link(cls, last_soup, next_):
767
        """Get link to next or previous comic."""
768
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
769
770
    @classmethod
771
    def get_comic_info(cls, soup, link):
772
        """Get information about a particular comics."""
773
        url = cls.get_url_from_link(link)
774
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
775
        year, month, day = [int(s) for s in date_re.match(url).groups()]
776
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
777
        return {
778
            'month': month,
779
            'year': year,
780
            'day': day,
781
            'img': [i['src'] for i in imgs],
782 View Code Duplication
        }
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
783
784
785
class Dilbert(GenericNavigableComic):
786
    """Class to retrieve Dilbert comics."""
787
    # Also on http://www.gocomics.com/dilbert-classics
788
    name = 'dilbert'
789
    long_name = 'Dilbert'
790
    url = 'http://dilbert.com'
791
    get_url_from_link = join_cls_url_to_href
792
    get_first_comic_link = simulate_first_link
793
    first_url = 'http://dilbert.com/strip/1989-04-16'
794
795
    @classmethod
796
    def get_navi_link(cls, last_soup, next_):
797
        """Get link to next or previous comic."""
798
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
799
        return link.find('a') if link else None
800
801
    @classmethod
802
    def get_comic_info(cls, soup, link):
803
        """Get information about a particular comics."""
804
        title = soup.find('meta', property='og:title')['content']
805
        imgs = soup.find_all('meta', property='og:image')
806
        desc = soup.find('meta', property='og:description')['content']
807
        date_str = soup.find('meta', property='article:publish_date')['content']
808
        day = string_to_date(date_str, "%B %d, %Y")
809
        author = soup.find('meta', property='article:author')['content']
810
        tags = soup.find('meta', property='article:tag')['content']
811
        return {
812
            'title': title,
813
            'description': desc,
814
            'img': [i['content'] for i in imgs],
815
            'author': author,
816
            'tags': tags,
817
            'day': day.day,
818
            'month': day.month,
819
            'year': day.year
820
        }
821
822
823
class VictimsOfCircumsolar(GenericEmptyComic, GenericNavigableComic):
824
    """Class to retrieve VictimsOfCircumsolar comics."""
825
    # Also on https://victimsofcomics.tumblr.com
826
    name = 'circumsolar'
827
    long_name = 'Victims Of Circumsolar'
828
    url = 'http://www.victimsofcircumsolar.com'
829
    get_navi_link = get_a_navi_comicnavnext_navinext
830
    get_first_comic_link = simulate_first_link
831
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
832
833
    @classmethod
834
    def get_comic_info(cls, soup, link):
835
        """Get information about a particular comics."""
836
        # Date is on the archive page
837
        title = soup.find_all('meta', property='og:title')[-1]['content']
838
        desc = soup.find_all('meta', property='og:description')[-1]['content']
839
        imgs = soup.find('div', id='comic').find_all('img')
840
        assert all(i['title'] == i['alt'] == title for i in imgs)
841
        return {
842
            'title': title,
843
            'description': desc,
844
            'img': [i['src'] for i in imgs],
845
        }
846
847
848
class ThreeWordPhrase(GenericNavigableComic):
849
    """Class to retrieve Three Word Phrase comics."""
850
    # Also on http://www.threewordphrase.tumblr.com
851
    name = 'threeword'
852
    long_name = 'Three Word Phrase'
853
    url = 'http://threewordphrase.com'
854
    get_url_from_link = join_cls_url_to_href
855
856
    @classmethod
857
    def get_first_comic_link(cls):
858
        """Get link to first comics."""
859
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
860
861
    @classmethod
862
    def get_navi_link(cls, last_soup, next_):
863
        """Get link to next or previous comic."""
864
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
865
        return None if link.get('href') is None else link
866
867
    @classmethod
868
    def get_comic_info(cls, soup, link):
869
        """Get information about a particular comics."""
870
        title = soup.find('title')
871
        imgs = [img for img in soup.find_all('img')
872
                if not img['src'].endswith(
873
                    ('link.gif', '32.png', 'twpbookad.jpg',
874
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
875
        return {
876
            'title': title.string if title else None,
877
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
878
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
879
        }
880
881
882
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
883
    """Class to retrieve Deadly Panel comics."""
884
    # Also on https://tapastic.com/series/deadlypanel
885
    # Also on https://deadlypanel.tumblr.com
886
    name = 'deadly'
887
    long_name = 'Deadly Panel'
888
    url = 'http://www.deadlypanel.com'
889
    get_first_comic_link = get_a_navi_navifirst
890
    get_navi_link = get_a_navi_comicnavnext_navinext
891
892
    @classmethod
893
    def get_comic_info(cls, soup, link):
894
        """Get information about a particular comics."""
895
        imgs = soup.find('div', id='comic').find_all('img')
896
        assert all(i['alt'] == i['title'] for i in imgs)
897
        return {
898
            'img': [i['src'] for i in imgs],
899
        }
900
901
902
class TheGentlemanArmchair(GenericNavigableComic):
903
    """Class to retrieve The Gentleman Armchair comics."""
904
    name = 'gentlemanarmchair'
905
    long_name = 'The Gentleman Armchair'
906
    url = 'http://thegentlemansarmchair.com'
907
    get_first_comic_link = get_a_navi_navifirst
908
    get_navi_link = get_link_rel_next
909
910
    @classmethod
911
    def get_comic_info(cls, soup, link):
912
        """Get information about a particular comics."""
913
        title = soup.find('h2', class_='post-title').string
914
        author = soup.find("span", class_="post-author").find("a").string
915
        date_str = soup.find('span', class_='post-date').string
916
        day = string_to_date(date_str, "%B %d, %Y")
917
        imgs = soup.find('div', id='comic').find_all('img')
918
        return {
919
            'img': [i['src'] for i in imgs],
920
            'title': title,
921
            'author': author,
922
            'month': day.month,
923
            'year': day.year,
924
            'day': day.day,
925
        }
926
927
928
class ImogenQuest(GenericNavigableComic):
929
    """Class to retrieve Imogen Quest comics."""
930
    # Also on http://imoquest.tumblr.com
931
    name = 'imogen'
932
    long_name = 'Imogen Quest'
933
    url = 'http://imogenquest.net'
934
    get_first_comic_link = get_div_navfirst_a
935
    get_navi_link = get_a_rel_next
936
937
    @classmethod
938
    def get_comic_info(cls, soup, link):
939
        """Get information about a particular comics."""
940
        title = soup.find('h2', class_='post-title').string
941
        author = soup.find("span", class_="post-author").find("a").string
942
        date_str = soup.find('span', class_='post-date').string
943
        day = string_to_date(date_str, '%B %d, %Y')
944
        imgs = soup.find('div', class_='comicpane').find_all('img')
945
        assert all(i['alt'] == i['title'] for i in imgs)
946
        title2 = imgs[0]['title']
947
        return {
948
            'day': day.day,
949
            'month': day.month,
950
            'year': day.year,
951
            'img': [i['src'] for i in imgs],
952
            'title': title,
953
            'title2': title2,
954
            'author': author,
955
        }
956
957
958 View Code Duplication
class MyExtraLife(GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
959
    """Class to retrieve My Extra Life comics."""
960
    name = 'extralife'
961
    long_name = 'My Extra Life'
962
    url = 'http://www.myextralife.com'
963
    get_navi_link = get_link_rel_next
964
965
    @classmethod
966
    def get_first_comic_link(cls):
967
        """Get link to first comics."""
968
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
969
970
    @classmethod
971
    def get_comic_info(cls, soup, link):
972
        """Get information about a particular comics."""
973
        title = soup.find("h1", class_="comic_title").string
974
        date_str = soup.find("span", class_="comic_date").string
975
        day = string_to_date(date_str, "%B %d, %Y")
976
        imgs = soup.find_all("img", class_="comic")
977
        assert all(i['alt'] == i['title'] == title for i in imgs)
978
        return {
979
            'title': title,
980
            'img': [i['src'] for i in imgs if i["src"]],
981
            'day': day.day,
982
            'month': day.month,
983
            'year': day.year
984
        }
985
986
987
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
988
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
989
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
990
    # Also on http://smbc-comics.tumblr.com
991
    name = 'smbc'
992
    long_name = 'Saturday Morning Breakfast Cereal'
993
    url = 'http://www.smbc-comics.com'
994
    _categories = ('SMBC', )
995
    get_navi_link = get_a_rel_next
996
997
    @classmethod
998
    def get_first_comic_link(cls):
999
        """Get link to first comics."""
1000
        return get_soup_at_url(cls.url).find('a', rel='start')
1001
1002
    @classmethod
1003
    def get_comic_info(cls, soup, link):
1004
        """Get information about a particular comics."""
1005
        image1 = soup.find('img', id='cc-comic')
1006
        image_url1 = image1['src']
1007
        aftercomic = soup.find('div', id='aftercomic')
1008
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1009
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1010
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1011
        day = string_to_date(date_str, "%B %d, %Y")
1012
        return {
1013
            'title': image1['title'],
1014
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
1015
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i)) for i in imgs],
1016
            'day': day.day,
1017
            'month': day.month,
1018
            'year': day.year
1019
        }
1020
1021
1022
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1023
    """Class to retrieve Perry Bible Fellowship comics."""
1024
    name = 'pbf'
1025
    long_name = 'Perry Bible Fellowship'
1026
    url = 'http://pbfcomics.com'
1027
    get_url_from_archive_element = join_cls_url_to_href
1028
1029
    @classmethod
1030
    def get_archive_elements(cls):
1031
        soup = get_soup_at_url(cls.url)
1032
        thumbnails = soup.find('div', id='all_thumbnails')
1033
        return reversed(thumbnails.find_all('a'))
1034
1035
    @classmethod
1036
    def get_comic_info(cls, soup, link):
1037
        """Get information about a particular comics."""
1038
        name = soup.find('meta', property='og:title')['content']
1039
        imgs = soup.find_all('meta', property='og:image')
1040
        assert len(imgs) == 1
1041
        return {
1042
            'name': name,
1043
            'img': [i['content'] for i in imgs],
1044
        }
1045
1046 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1047
class Mercworks(GenericNavigableComic):
1048
    """Class to retrieve Mercworks comics."""
1049
    # Also on http://mercworks.tumblr.com
1050
    name = 'mercworks'
1051
    long_name = 'Mercworks'
1052
    url = 'http://mercworks.net'
1053
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1054
    get_navi_link = get_link_rel_next
1055
1056
    @classmethod
1057
    def get_comic_info(cls, soup, link):
1058
        """Get information about a particular comics."""
1059
        title = soup.find('meta', property='og:title')['content']
1060
        metadesc = soup.find('meta', property='og:description')
1061
        desc = metadesc['content'] if metadesc else ""
1062
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1063
        day = string_to_date(date_str, "%Y-%m-%d")
1064
        imgs = soup.find_all('meta', property='og:image')
1065
        return {
1066
            'img': [i['content'] for i in imgs],
1067
            'title': title,
1068
            'desc': desc,
1069
            'day': day.day,
1070
            'month': day.month,
1071
            'year': day.year
1072
        }
1073
1074
1075
class BerkeleyMews(GenericListableComic):
1076
    """Class to retrieve Berkeley Mews comics."""
1077
    # Also on http://mews.tumblr.com
1078
    # Also on http://www.gocomics.com/berkeley-mews
1079
    name = 'berkeley'
1080
    long_name = 'Berkeley Mews'
1081
    url = 'http://www.berkeleymews.com'
1082
    _categories = ('BERKELEY', )
1083
    get_url_from_archive_element = get_href
1084
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1085
1086
    @classmethod
1087
    def get_archive_elements(cls):
1088
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1089
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1090
1091
    @classmethod
1092
    def get_comic_info(cls, soup, link):
1093
        """Get information about a particular comics."""
1094
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1095
        url = cls.get_url_from_archive_element(link)
1096
        num = int(cls.comic_num_re.match(url).groups()[0])
1097
        img = soup.find('div', id='comic').find('img')
1098
        assert all(i['alt'] == i['title'] for i in [img])
1099
        title2 = img['title']
1100
        img_url = img['src']
1101
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1102
        return {
1103
            'num': num,
1104
            'title': link.string,
1105
            'title2': title2,
1106
            'img': [img_url],
1107
            'year': year,
1108
            'month': month,
1109
            'day': day,
1110
        }
1111
1112
1113
class GenericBouletCorp(GenericNavigableComic):
1114
    """Generic class to retrieve BouletCorp comics in different languages."""
1115
    # Also on https://bouletcorp.tumblr.com
1116
    _categories = ('BOULET', )
1117
    get_navi_link = get_link_rel_next
1118
1119
    @classmethod
1120
    def get_first_comic_link(cls):
1121
        """Get link to first comics."""
1122
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1123
1124
    @classmethod
1125
    def get_comic_info(cls, soup, link):
1126
        """Get information about a particular comics."""
1127
        url = cls.get_url_from_link(link)
1128
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1129
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1130
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1131
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1132
        title = soup.find('title').string
1133
        return {
1134
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1135
            'title': title,
1136
            'texts': texts,
1137
            'year': year,
1138
            'month': month,
1139
            'day': day,
1140
        }
1141
1142
1143
class BouletCorp(GenericBouletCorp):
1144
    """Class to retrieve BouletCorp comics."""
1145
    name = 'boulet'
1146
    long_name = 'Boulet Corp'
1147
    url = 'http://www.bouletcorp.com'
1148
    _categories = ('FRANCAIS', )
1149
1150
1151
class BouletCorpEn(GenericBouletCorp):
1152
    """Class to retrieve EnglishBouletCorp comics."""
1153
    name = 'boulet_en'
1154
    long_name = 'Boulet Corp English'
1155
    url = 'http://english.bouletcorp.com'
1156
1157
1158
class AmazingSuperPowers(GenericNavigableComic):
1159
    """Class to retrieve Amazing Super Powers comics."""
1160
    name = 'asp'
1161
    long_name = 'Amazing Super Powers'
1162
    url = 'http://www.amazingsuperpowers.com'
1163
    get_first_comic_link = get_a_navi_navifirst
1164
    get_navi_link = get_a_navi_navinext
1165
1166
    @classmethod
1167
    def get_comic_info(cls, soup, link):
1168
        """Get information about a particular comics."""
1169
        author = soup.find("span", class_="post-author").find("a").string
1170
        date_str = soup.find('span', class_='post-date').string
1171
        day = string_to_date(date_str, "%B %d, %Y")
1172
        imgs = soup.find('div', id='comic').find_all('img')
1173
        title = ' '.join(i['title'] for i in imgs)
1174
        assert all(i['alt'] == i['title'] for i in imgs)
1175
        return {
1176
            'title': title,
1177
            'author': author,
1178
            'img': [img['src'] for img in imgs],
1179
            'day': day.day,
1180
            'month': day.month,
1181
            'year': day.year
1182
        }
1183
1184
1185
class ToonHole(GenericNavigableComic):
1186
    """Class to retrieve Toon Holes comics."""
1187
    # Also on http://tapastic.com/series/TOONHOLE
1188
    name = 'toonhole'
1189
    long_name = 'Toon Hole'
1190
    url = 'http://www.toonhole.com'
1191 View Code Duplication
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1192
    get_navi_link = get_link_rel_next
1193
1194
    @classmethod
1195
    def get_comic_info(cls, soup, link):
1196
        """Get information about a particular comics."""
1197
        short_url = soup.find('link', rel='shortlink')['href']
1198
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1199
        day = string_to_date(date_str, "%B %d, %Y")
1200
        imgs = soup.find('div', id='comic').find_all('img')
1201
        if imgs:
1202
            img = imgs[0]
1203
            title = img['alt']
1204
            assert img['title'] == title
1205
        else:
1206
            title = ""
1207
        return {
1208
            'short_url': short_url,
1209
            'title': title,
1210
            'month': day.month,
1211
            'year': day.year,
1212
            'day': day.day,
1213
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1214
        }
1215
1216
1217
class Channelate(GenericNavigableComic):
1218
    """Class to retrieve Channelate comics."""
1219
    name = 'channelate'
1220
    long_name = 'Channelate'
1221
    url = 'http://www.channelate.com'
1222
    get_first_comic_link = get_div_navfirst_a
1223
    get_navi_link = get_link_rel_next
1224
    get_url_from_link = join_cls_url_to_href
1225
1226
    @classmethod
1227
    def get_comic_info(cls, soup, link):
1228
        """Get information about a particular comics."""
1229
        author = soup.find("span", class_="post-author").find("a").string
1230
        date_str = soup.find('span', class_='post-date').string
1231
        day = string_to_date(date_str, '%Y/%m/%d')
1232
        title = soup.find('meta', property='og:title')['content']
1233
        post = soup.find('div', id='comic')
1234
        imgs = post.find_all('img') if post else []
1235
        extra_url = None
1236
        extra_div = soup.find('div', id='extrapanelbutton')
1237
        if extra_div:
1238
            extra_url = extra_div.find('a')['href']
1239
            extra_soup = get_soup_at_url(extra_url)
1240
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1241
            imgs.extend(extra_imgs)
1242
        return {
1243
            'url_extra': extra_url,
1244
            'title': title,
1245
            'author': author,
1246
            'month': day.month,
1247
            'year': day.year,
1248
            'day': day.day,
1249
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1250
        }
1251
1252
1253
class CyanideAndHappiness(GenericNavigableComic):
1254
    """Class to retrieve Cyanide And Happiness comics."""
1255
    name = 'cyanide'
1256
    long_name = 'Cyanide and Happiness'
1257
    url = 'http://explosm.net'
1258
    _categories = ('NSFW', )
1259
    get_url_from_link = join_cls_url_to_href
1260
1261
    @classmethod
1262
    def get_first_comic_link(cls):
1263
        """Get link to first comics."""
1264
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1265
1266
    @classmethod
1267
    def get_navi_link(cls, last_soup, next_):
1268
        """Get link to next or previous comic."""
1269
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1270
        return None if link.get('href') is None else link
1271
1272
    @classmethod
1273
    def get_comic_info(cls, soup, link):
1274
        """Get information about a particular comics."""
1275
        url2 = soup.find('meta', property='og:url')['content']
1276
        num = int(url2.split('/')[-2])
1277
        date_str = soup.find('h3').find('a').string
1278
        day = string_to_date(date_str, '%Y.%m.%d')
1279
        author = soup.find('small', class_="author-credit-name").string
1280
        assert author.startswith('by ')
1281
        author = author[3:]
1282
        imgs = soup.find_all('img', id='main-comic')
1283
        return {
1284
            'num': num,
1285
            'author': author,
1286
            'month': day.month,
1287
            'year': day.year,
1288
            'day': day.day,
1289
            'prefix': '%d-' % num,
1290
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1291
        }
1292
1293
1294
class MrLovenstein(GenericComic):
1295
    """Class to retrieve Mr Lovenstein comics."""
1296
    # Also on https://tapastic.com/series/MrLovenstein
1297
    name = 'mrlovenstein'
1298
    long_name = 'Mr. Lovenstein'
1299
    url = 'http://www.mrlovenstein.com'
1300
1301
    @classmethod
1302
    def get_next_comic(cls, last_comic):
1303
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1304
        # TODO: more info from http://www.mrlovenstein.com/archive
1305
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1306
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1307
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1308
        first, last = min(nums), max(nums)
1309
        if last_comic:
1310
            first = last_comic['num'] + 1
1311
        for num in range(first, last + 1):
1312
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1313
            soup = get_soup_at_url(url)
1314
            imgs = list(
1315
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1316
            description = soup.find('meta', attrs={'name': 'description'})['content']
1317
            yield {
1318
                'url': url,
1319
                'num': num,
1320
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1321
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1322
                'description': description,
1323
            }
1324
1325
1326
class DinosaurComics(GenericListableComic):
1327
    """Class to retrieve Dinosaur Comics comics."""
1328
    name = 'dinosaur'
1329
    long_name = 'Dinosaur Comics'
1330
    url = 'http://www.qwantz.com'
1331
    get_url_from_archive_element = get_href
1332
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1333
1334
    @classmethod
1335
    def get_archive_elements(cls):
1336
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1337
        # first link is random -> skip it
1338
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1339
1340
    @classmethod
1341
    def get_comic_info(cls, soup, link):
1342
        """Get information about a particular comics."""
1343
        url = cls.get_url_from_archive_element(link)
1344
        num = int(cls.comic_link_re.match(url).groups()[0])
1345
        date_str = link.string
1346
        text = link.next_sibling.string
1347
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1348
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1349
        img = soup.find('img', src=comic_img_re)
1350
        return {
1351
            'month': day.month,
1352
            'year': day.year,
1353
            'day': day.day,
1354
            'img': [img.get('src')],
1355
            'title': img.get('title'),
1356
            'text': text,
1357
            'num': num,
1358
        }
1359
1360
1361 View Code Duplication
class ButterSafe(GenericListableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1362
    """Class to retrieve Butter Safe comics."""
1363
    name = 'butter'
1364
    long_name = 'ButterSafe'
1365
    url = 'http://buttersafe.com'
1366
    get_url_from_archive_element = get_href
1367
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1368
1369
    @classmethod
1370
    def get_archive_elements(cls):
1371
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1372
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1373
1374
    @classmethod
1375
    def get_comic_info(cls, soup, link):
1376
        """Get information about a particular comics."""
1377
        url = cls.get_url_from_archive_element(link)
1378
        title = link.string
1379
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1380
        img = soup.find('div', id='comic').find('img')
1381
        assert img['alt'] == title
1382
        return {
1383
            'title': title,
1384
            'day': day,
1385
            'month': month,
1386
            'year': year,
1387
            'img': [img['src']],
1388
        }
1389
1390
1391
class CalvinAndHobbes(GenericComic):
1392
    """Class to retrieve Calvin and Hobbes comics."""
1393
    # Also on http://www.gocomics.com/calvinandhobbes/
1394
    name = 'calvin'
1395
    long_name = 'Calvin and Hobbes'
1396
    # This is not through any official webpage but eh...
1397
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1398
1399
    @classmethod
1400
    def get_next_comic(cls, last_comic):
1401
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1402
        last_date = get_date_for_comic(
1403
            last_comic) if last_comic else date(1985, 11, 1)
1404
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1405
        img_re = re.compile('')
1406
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1407
            url = link['href']
1408
            year, month = link_re.match(url).groups()
1409
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1410
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1411
                month_url = urljoin_wrapper(cls.url, url)
1412
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1413
                    img_src = img['src']
1414
                    day = int(img_re.match(img_src).groups()[0])
1415
                    comic_date = date(int(year), int(month), day)
1416
                    if comic_date > last_date:
1417
                        yield {
1418
                            'url': month_url,
1419
                            'year': int(year),
1420
                            'month': int(month),
1421
                            'day': int(day),
1422
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1423
                        }
1424
                        last_date = comic_date
1425
1426
1427 View Code Duplication
class AbstruseGoose(GenericListableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1428
    """Class to retrieve AbstruseGoose Comics."""
1429
    name = 'abstruse'
1430
    long_name = 'Abstruse Goose'
1431
    url = 'http://abstrusegoose.com'
1432
    get_url_from_archive_element = get_href
1433
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1434
    comic_img_re = re.compile('^%s/strips/.*' % url)
1435
1436
    @classmethod
1437
    def get_archive_elements(cls):
1438
        archive_url = urljoin_wrapper(cls.url, 'archive')
1439
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1440
1441
    @classmethod
1442
    def get_comic_info(cls, soup, archive_elt):
1443
        comic_url = cls.get_url_from_archive_element(archive_elt)
1444
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1445
        return {
1446
            'num': num,
1447
            'title': archive_elt.string,
1448
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1449
        }
1450
1451
1452
class PhDComics(GenericNavigableComic):
1453
    """Class to retrieve PHD Comics."""
1454
    name = 'phd'
1455
    long_name = 'PhD Comics'
1456
    url = 'http://phdcomics.com/comics/archive.php'
1457
1458
    @classmethod
1459
    def get_first_comic_link(cls):
1460
        """Get link to first comics."""
1461
        soup = get_soup_at_url(cls.url)
1462
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1463
        return None if img is None else img.parent
1464
1465
    @classmethod
1466
    def get_navi_link(cls, last_soup, next_):
1467
        """Get link to next or previous comic."""
1468
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1469
        img = last_soup.find('img', src=url)
1470
        return None if img is None else img.parent
1471
1472
    @classmethod
1473
    def get_comic_info(cls, soup, link):
1474
        """Get information about a particular comics."""
1475
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1476
        imgs = soup.find_all('meta', property='og:image')
1477
        return {
1478
            'img': [i['content'] for i in imgs],
1479
            'title': title,
1480
        }
1481
1482 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
1483
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1484
    """Class to retrieve Octopuns comics."""
1485
    # Also on http://octopuns.tumblr.com
1486
    name = 'octopuns'
1487
    long_name = 'Octopuns'
1488
    url = 'http://www.octopuns.net'
1489
1490
    @classmethod
1491
    def get_first_comic_link(cls):
1492
        """Get link to first comics."""
1493
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1494
1495
    @classmethod
1496
    def get_navi_link(cls, last_soup, next_):
1497
        """Get link to next or previous comic."""
1498
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1499
        return None if link.get('href') is None else link
1500
1501
    @classmethod
1502
    def get_comic_info(cls, soup, link):
1503
        """Get information about a particular comics."""
1504
        title = soup.find('h3', class_='post-title entry-title').string
1505
        date_str = soup.find('h2', class_='date-header').string
1506
        day = string_to_date(date_str, "%A, %B %d, %Y")
1507
        imgs = soup.find_all('link', rel='image_src')
1508
        return {
1509
            'img': [i['href'] for i in imgs],
1510
            'title': title,
1511
            'day': day.day,
1512
            'month': day.month,
1513
            'year': day.year,
1514
        }
1515
1516
1517
class Quarktees(GenericNavigableComic):
1518
    """Class to retrieve the Quarktees comics."""
1519
    name = 'quarktees'
1520
    long_name = 'Quarktees'
1521
    url = 'http://www.quarktees.com/blogs/news'
1522
    get_url_from_link = join_cls_url_to_href
1523
    get_first_comic_link = simulate_first_link
1524
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1525
1526
    @classmethod
1527
    def get_navi_link(cls, last_soup, next_):
1528
        """Get link to next or previous comic."""
1529
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1530
1531
    @classmethod
1532
    def get_comic_info(cls, soup, link):
1533
        """Get information about a particular comics."""
1534
        title = soup.find('meta', property='og:title')['content']
1535
        article = soup.find('div', class_='single-article')
1536
        imgs = article.find_all('img')
1537
        return {
1538
            'title': title,
1539
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1540
        }
1541
1542
1543
class OverCompensating(GenericNavigableComic):
1544
    """Class to retrieve the Over Compensating comics."""
1545
    name = 'compensating'
1546
    long_name = 'Over Compensating'
1547
    url = 'http://www.overcompensating.com'
1548
    get_url_from_link = join_cls_url_to_href
1549
1550
    @classmethod
1551
    def get_first_comic_link(cls):
1552
        """Get link to first comics."""
1553
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1554
1555
    @classmethod
1556
    def get_navi_link(cls, last_soup, next_):
1557
        """Get link to next or previous comic."""
1558
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1559
1560
    @classmethod
1561
    def get_comic_info(cls, soup, link):
1562
        """Get information about a particular comics."""
1563
        img_src_re = re.compile('^/oc/comics/.*')
1564
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1565
        comic_url = cls.get_url_from_link(link)
1566
        num = int(comic_num_re.match(comic_url).groups()[0])
1567
        img = soup.find('img', src=img_src_re)
1568
        return {
1569
            'num': num,
1570
            'img': [urljoin_wrapper(comic_url, img['src'])],
1571
            'title': img.get('title')
1572
        }
1573
1574
1575
class Oglaf(GenericNavigableComic):
1576
    """Class to retrieve Oglaf comics."""
1577
    name = 'oglaf'
1578
    long_name = 'Oglaf [NSFW]'
1579
    url = 'http://oglaf.com'
1580
    _categories = ('NSFW', )
1581
    get_url_from_link = join_cls_url_to_href
1582
1583
    @classmethod
1584
    def get_first_comic_link(cls):
1585
        """Get link to first comics."""
1586
        return get_soup_at_url(cls.url).find("div", id="st").parent
1587
1588
    @classmethod
1589
    def get_navi_link(cls, last_soup, next_):
1590
        """Get link to next or previous comic."""
1591
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1592
        return div.parent if div else None
1593
1594
    @classmethod
1595
    def get_comic_info(cls, soup, link):
1596
        """Get information about a particular comics."""
1597
        title = soup.find('title').string
1598
        title_imgs = soup.find('div', id='tt').find_all('img')
1599
        assert len(title_imgs) == 1
1600
        strip_imgs = soup.find_all('img', id='strip')
1601
        assert len(strip_imgs) == 1
1602
        imgs = title_imgs + strip_imgs
1603
        desc = ' '.join(i['title'] for i in imgs)
1604
        return {
1605
            'title': title,
1606
            'img': [i['src'] for i in imgs],
1607
            'description': desc,
1608
        }
1609
1610
1611
class ScandinaviaAndTheWorld(GenericNavigableComic):
1612
    """Class to retrieve Scandinavia And The World comics."""
1613
    name = 'satw'
1614
    long_name = 'Scandinavia And The World'
1615
    url = 'http://satwcomic.com'
1616
    get_first_comic_link = simulate_first_link
1617
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1618
1619
    @classmethod
1620
    def get_navi_link(cls, last_soup, next_):
1621
        """Get link to next or previous comic."""
1622
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1623
1624
    @classmethod
1625
    def get_comic_info(cls, soup, link):
1626
        """Get information about a particular comics."""
1627
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1628
        desc = soup.find('meta', property='og:description')['content']
1629
        imgs = soup.find_all('img', itemprop="image")
1630
        return {
1631
            'title': title,
1632
            'description': desc,
1633
            'img': [i['src'] for i in imgs],
1634
        }
1635
1636
1637
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1638
    """Class to retrieve the Something Of That Ilk comics."""
1639
    name = 'somethingofthatilk'
1640
    long_name = 'Something Of That Ilk'
1641
    url = 'http://www.somethingofthatilk.com'
1642
1643
1644
class InfiniteMonkeyBusiness(GenericNavigableComic):
1645
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1646
    name = 'monkey'
1647
    long_name = 'Infinite Monkey Business'
1648
    url = 'http://infinitemonkeybusiness.net'
1649
    get_navi_link = get_a_navi_comicnavnext_navinext
1650
    get_first_comic_link = simulate_first_link
1651
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1652
1653
    @classmethod
1654
    def get_comic_info(cls, soup, link):
1655
        """Get information about a particular comics."""
1656
        title = soup.find('meta', property='og:title')['content']
1657
        imgs = soup.find('div', id='comic').find_all('img')
1658
        return {
1659
            'title': title,
1660
            'img': [i['src'] for i in imgs],
1661
        }
1662
1663
1664
class Wondermark(GenericListableComic):
1665
    """Class to retrieve the Wondermark comics."""
1666
    name = 'wondermark'
1667
    long_name = 'Wondermark'
1668
    url = 'http://wondermark.com'
1669
    get_url_from_archive_element = get_href
1670
1671
    @classmethod
1672
    def get_archive_elements(cls):
1673
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1674
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1675
1676
    @classmethod
1677
    def get_comic_info(cls, soup, link):
1678
        """Get information about a particular comics."""
1679
        date_str = soup.find('div', class_='postdate').find('em').string
1680
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1681
        div = soup.find('div', id='comic')
1682
        if div:
1683
            img = div.find('img')
1684
            img_src = [img['src']]
1685
            alt = img['alt']
1686
            assert alt == img['title']
1687
            title = soup.find('meta', property='og:title')['content']
1688
        else:
1689
            img_src = []
1690
            alt = ''
1691
            title = ''
1692
        return {
1693
            'month': day.month,
1694
            'year': day.year,
1695
            'day': day.day,
1696
            'img': img_src,
1697
            'title': title,
1698
            'alt': alt,
1699
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1700
        }
1701
1702
1703
class WarehouseComic(GenericNavigableComic):
1704
    """Class to retrieve Warehouse Comic comics."""
1705
    name = 'warehouse'
1706
    long_name = 'Warehouse Comic'
1707
    url = 'http://warehousecomic.com'
1708
    get_first_comic_link = get_a_navi_navifirst
1709
    get_navi_link = get_link_rel_next
1710
1711
    @classmethod
1712
    def get_comic_info(cls, soup, link):
1713
        """Get information about a particular comics."""
1714
        title = soup.find('h2', class_='post-title').string
1715
        date_str = soup.find('span', class_='post-date').string
1716
        day = string_to_date(date_str, "%B %d, %Y")
1717
        imgs = soup.find('div', id='comic').find_all('img')
1718
        return {
1719
            'img': [i['src'] for i in imgs],
1720
            'title': title,
1721
            'day': day.day,
1722
            'month': day.month,
1723
            'year': day.year,
1724
        }
1725
1726
1727
class JustSayEh(GenericNavigableComic):
1728
    """Class to retrieve Just Say Eh comics."""
1729
    # Also on http//tapastic.com/series/Just-Say-Eh
1730
    name = 'justsayeh'
1731
    long_name = 'Just Say Eh'
1732
    url = 'http://www.justsayeh.com'
1733
    get_first_comic_link = get_a_navi_navifirst
1734
    get_navi_link = get_a_navi_comicnavnext_navinext
1735
1736
    @classmethod
1737
    def get_comic_info(cls, soup, link):
1738
        """Get information about a particular comics."""
1739
        title = soup.find('h2', class_='post-title').string
1740
        imgs = soup.find("div", id="comic").find_all("img")
1741
        assert all(i['alt'] == i['title'] for i in imgs)
1742
        alt = imgs[0]['alt']
1743
        return {
1744
            'img': [i['src'] for i in imgs],
1745
            'title': title,
1746
            'alt': alt,
1747
        }
1748
1749
1750
class MouseBearComedy(GenericNavigableComic):
1751
    """Class to retrieve Mouse Bear Comedy comics."""
1752
    # Also on http://mousebearcomedy.tumblr.com
1753
    name = 'mousebear'
1754
    long_name = 'Mouse Bear Comedy'
1755
    url = 'http://www.mousebearcomedy.com'
1756
    get_first_comic_link = get_a_navi_navifirst
1757
    get_navi_link = get_a_navi_comicnavnext_navinext
1758
1759
    @classmethod
1760
    def get_comic_info(cls, soup, link):
1761
        """Get information about a particular comics."""
1762
        title = soup.find('h2', class_='post-title').string
1763
        author = soup.find("span", class_="post-author").find("a").string
1764
        date_str = soup.find("span", class_="post-date").string
1765
        day = string_to_date(date_str, '%B %d, %Y')
1766
        imgs = soup.find("div", id="comic").find_all("img")
1767
        assert all(i['alt'] == i['title'] == title for i in imgs)
1768
        return {
1769
            'day': day.day,
1770
            'month': day.month,
1771
            'year': day.year,
1772
            'img': [i['src'] for i in imgs],
1773
            'title': title,
1774
            'author': author,
1775
        }
1776
1777
1778
class BigFootJustice(GenericNavigableComic):
1779
    """Class to retrieve Big Foot Justice comics."""
1780
    # Also on http://tapastic.com/series/bigfoot-justice
1781
    name = 'bigfoot'
1782
    long_name = 'Big Foot Justice'
1783
    url = 'http://bigfootjustice.com'
1784
    get_first_comic_link = get_a_navi_navifirst
1785
    get_navi_link = get_a_navi_comicnavnext_navinext
1786
1787
    @classmethod
1788
    def get_comic_info(cls, soup, link):
1789
        """Get information about a particular comics."""
1790
        imgs = soup.find('div', id='comic').find_all('img')
1791
        assert all(i['title'] == i['alt'] for i in imgs)
1792
        title = ' '.join(i['title'] for i in imgs)
1793
        return {
1794
            'img': [i['src'] for i in imgs],
1795
            'title': title,
1796
        }
1797
1798
1799
class RespawnComic(GenericNavigableComic):
1800
    """Class to retrieve Respawn Comic."""
1801
    # Also on https://respawncomic.tumblr.com
1802
    name = 'respawn'
1803
    long_name = 'Respawn Comic'
1804
    url = 'http://respawncomic.com '
1805
    _categories = ('RESPAWN', )
1806
    get_navi_link = get_a_rel_next
1807
    get_first_comic_link = simulate_first_link
1808
    first_url = 'http://respawncomic.com/comic/c0001/'
1809
1810
    @classmethod
1811
    def get_comic_info(cls, soup, link):
1812
        """Get information about a particular comics."""
1813
        title = soup.find('meta', property='og:title')['content']
1814
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1815
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1816
        date_str = date_str[:10]
1817
        day = string_to_date(date_str, "%Y-%m-%d")
1818
        imgs = soup.find_all('meta', property='og:image')
1819
        skip_imgs = {
1820
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1821
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1822
        }
1823
        return {
1824
            'title': title,
1825
            'author': author,
1826
            'day': day.day,
1827
            'month': day.month,
1828
            'year': day.year,
1829
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1830
        }
1831
1832
1833
class SafelyEndangered(GenericNavigableComic):
1834
    """Class to retrieve Safely Endangered comics."""
1835
    # Also on http://tumblr.safelyendangered.com
1836
    name = 'endangered'
1837
    long_name = 'Safely Endangered'
1838
    url = 'http://www.safelyendangered.com'
1839
    get_navi_link = get_link_rel_next
1840
    get_first_comic_link = simulate_first_link
1841
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1842
1843
    @classmethod
1844
    def get_comic_info(cls, soup, link):
1845
        """Get information about a particular comics."""
1846
        title = soup.find('h2', class_='post-title').string
1847
        date_str = soup.find('span', class_='post-date').string
1848
        day = string_to_date(date_str, '%B %d, %Y')
1849
        imgs = soup.find('div', id='comic').find_all('img')
1850
        alt = imgs[0]['alt']
1851
        assert all(i['alt'] == i['title'] for i in imgs)
1852
        return {
1853
            'day': day.day,
1854
            'month': day.month,
1855
            'year': day.year,
1856
            'img': [i['src'] for i in imgs],
1857
            'title': title,
1858
            'alt': alt,
1859
        }
1860
1861
1862
class PicturesInBoxes(GenericNavigableComic):
1863
    """Class to retrieve Pictures In Boxes comics."""
1864
    # Also on https://picturesinboxescomic.tumblr.com
1865
    name = 'picturesinboxes'
1866
    long_name = 'Pictures in Boxes'
1867
    url = 'http://www.picturesinboxes.com'
1868
    get_navi_link = get_a_navi_navinext
1869
    get_first_comic_link = simulate_first_link
1870
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1871
1872
    @classmethod
1873
    def get_comic_info(cls, soup, link):
1874
        """Get information about a particular comics."""
1875
        title = soup.find('h2', class_='post-title').string
1876
        author = soup.find("span", class_="post-author").find("a").string
1877
        date_str = soup.find('span', class_='post-date').string
1878
        day = string_to_date(date_str, '%B %d, %Y')
1879
        imgs = soup.find('div', class_='comicpane').find_all('img')
1880
        assert imgs
1881
        assert all(i['title'] == i['alt'] == title for i in imgs)
1882
        return {
1883
            'day': day.day,
1884
            'month': day.month,
1885
            'year': day.year,
1886
            'img': [i['src'] for i in imgs],
1887
            'title': title,
1888
            'author': author,
1889
        }
1890
1891
1892
class Penmen(GenericNavigableComic):
1893
    """Class to retrieve Penmen comics."""
1894
    name = 'penmen'
1895
    long_name = 'Penmen'
1896
    url = 'http://penmen.com'
1897
    get_navi_link = get_link_rel_next
1898
    get_first_comic_link = simulate_first_link
1899
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1900
1901
    @classmethod
1902
    def get_comic_info(cls, soup, link):
1903
        """Get information about a particular comics."""
1904
        title = soup.find('title').string
1905
        imgs = soup.find('div', class_='entry-content').find_all('img')
1906
        short_url = soup.find('link', rel='shortlink')['href']
1907
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1908
        date_str = soup.find('time')['datetime'][:10]
1909
        day = string_to_date(date_str, "%Y-%m-%d")
1910
        return {
1911
            'title': title,
1912
            'short_url': short_url,
1913
            'img': [i['src'] for i in imgs],
1914
            'tags': tags,
1915
            'month': day.month,
1916
            'year': day.year,
1917
            'day': day.day,
1918
        }
1919
1920
1921
class TheDoghouseDiaries(GenericNavigableComic):
1922
    """Class to retrieve The Dog House Diaries comics."""
1923
    name = 'doghouse'
1924
    long_name = 'The Dog House Diaries'
1925
    url = 'http://thedoghousediaries.com'
1926
1927
    @classmethod
1928
    def get_first_comic_link(cls):
1929
        """Get link to first comics."""
1930
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1931
1932
    @classmethod
1933
    def get_navi_link(cls, last_soup, next_):
1934
        """Get link to next or previous comic."""
1935
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1936
1937
    @classmethod
1938
    def get_comic_info(cls, soup, link):
1939
        """Get information about a particular comics."""
1940
        comic_img_re = re.compile('^dhdcomics/.*')
1941
        img = soup.find('img', src=comic_img_re)
1942
        comic_url = cls.get_url_from_link(link)
1943
        return {
1944
            'title': soup.find('h2', id='titleheader').string,
1945
            'title2': soup.find('div', id='subtext').string,
1946
            'alt': img.get('title'),
1947
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1948
            'num': int(comic_url.split('/')[-1]),
1949
        }
1950
1951
1952
class InvisibleBread(GenericListableComic):
1953
    """Class to retrieve Invisible Bread comics."""
1954
    # Also on http://www.gocomics.com/invisible-bread
1955
    name = 'invisiblebread'
1956
    long_name = 'Invisible Bread'
1957
    url = 'http://invisiblebread.com'
1958
1959
    @classmethod
1960
    def get_archive_elements(cls):
1961
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1962
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1963
1964
    @classmethod
1965
    def get_url_from_archive_element(cls, td):
1966
        return td.find('a')['href']
1967
1968
    @classmethod
1969
    def get_comic_info(cls, soup, td):
1970
        """Get information about a particular comics."""
1971
        url = cls.get_url_from_archive_element(td)
1972
        title = td.find('a').string
1973
        month_and_day = td.previous_sibling.string
1974
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1975
        year = link_re.match(url).groups()[0]
1976
        date_str = month_and_day + ' ' + year
1977
        day = string_to_date(date_str, '%b %d %Y')
1978
        imgs = [soup.find('div', id='comic').find('img')]
1979
        assert len(imgs) == 1
1980
        assert all(i['title'] == i['alt'] == title for i in imgs)
1981
        return {
1982
            'month': day.month,
1983
            'year': day.year,
1984
            'day': day.day,
1985
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1986
            'title': title,
1987
        }
1988
1989
1990
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1991
    """Class to retrieve Disco Bleach Comics."""
1992
    name = 'discobleach'
1993
    long_name = 'Disco Bleach'
1994
    url = 'http://discobleach.com'
1995
1996
1997
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1998
    """Class to retrieve TubeyToons comics."""
1999
    # Also on http://tapastic.com/series/Tubey-Toons
2000
    # Also on https://tubeytoons.tumblr.com
2001
    name = 'tubeytoons'
2002
    long_name = 'Tubey Toons'
2003
    url = 'http://tubeytoons.com'
2004
    _categories = ('TUNEYTOONS', )
2005
2006
2007
class CompletelySeriousComics(GenericNavigableComic):
2008
    """Class to retrieve Completely Serious comics."""
2009
    name = 'completelyserious'
2010
    long_name = 'Completely Serious Comics'
2011
    url = 'http://completelyseriouscomics.com'
2012
    get_first_comic_link = get_a_navi_navifirst
2013
    get_navi_link = get_a_navi_navinext
2014
2015
    @classmethod
2016
    def get_comic_info(cls, soup, link):
2017
        """Get information about a particular comics."""
2018
        title = soup.find('h2', class_='post-title').string
2019
        author = soup.find('span', class_='post-author').contents[1].string
2020
        date_str = soup.find('span', class_='post-date').string
2021
        day = string_to_date(date_str, '%B %d, %Y')
2022
        imgs = soup.find('div', class_='comicpane').find_all('img')
2023
        assert imgs
2024
        alt = imgs[0]['title']
2025
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2026
        return {
2027
            'month': day.month,
2028
            'year': day.year,
2029
            'day': day.day,
2030
            'img': [i['src'] for i in imgs],
2031
            'title': title,
2032
            'alt': alt,
2033
            'author': author,
2034
        }
2035
2036
2037 View Code Duplication
class PoorlyDrawnLines(GenericListableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2038
    """Class to retrieve Poorly Drawn Lines comics."""
2039
    # Also on http://pdlcomics.tumblr.com
2040
    name = 'poorlydrawn'
2041
    long_name = 'Poorly Drawn Lines'
2042
    url = 'https://www.poorlydrawnlines.com'
2043
    _categories = ('POORLYDRAWN', )
2044
    get_url_from_archive_element = get_href
2045
2046
    @classmethod
2047
    def get_comic_info(cls, soup, link):
2048
        """Get information about a particular comics."""
2049
        imgs = soup.find('div', class_='post').find_all('img')
2050
        assert len(imgs) <= 1
2051
        return {
2052
            'img': [i['src'] for i in imgs],
2053
            'title': imgs[0].get('title', "") if imgs else "",
2054
        }
2055
2056
    @classmethod
2057
    def get_archive_elements(cls):
2058
        archive_url = urljoin_wrapper(cls.url, 'archive')
2059
        url_re = re.compile('^%s/comic/.' % cls.url)
2060
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2061
2062 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2063
class LoadingComics(GenericNavigableComic):
2064
    """Class to retrieve Loading Artist comics."""
2065
    name = 'loadingartist'
2066
    long_name = 'Loading Artist'
2067
    url = 'http://www.loadingartist.com/latest'
2068
2069
    @classmethod
2070
    def get_first_comic_link(cls):
2071
        """Get link to first comics."""
2072
        return get_soup_at_url(cls.url).find('a', title="First")
2073
2074
    @classmethod
2075
    def get_navi_link(cls, last_soup, next_):
2076
        """Get link to next or previous comic."""
2077
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2078
2079
    @classmethod
2080
    def get_comic_info(cls, soup, link):
2081
        """Get information about a particular comics."""
2082
        title = soup.find('h1').string
2083
        date_str = soup.find('span', class_='date').string.strip()
2084
        day = string_to_date(date_str, "%B %d, %Y")
2085
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2086
        return {
2087
            'title': title,
2088
            'img': [i['src'] for i in imgs],
2089
            'month': day.month,
2090
            'year': day.year,
2091
            'day': day.day,
2092
        }
2093
2094
2095
class ChuckleADuck(GenericNavigableComic):
2096
    """Class to retrieve Chuckle-A-Duck comics."""
2097
    name = 'chuckleaduck'
2098
    long_name = 'Chuckle-A-duck'
2099
    url = 'http://chuckleaduck.com'
2100
    get_first_comic_link = get_div_navfirst_a
2101
    get_navi_link = get_link_rel_next
2102
2103
    @classmethod
2104
    def get_comic_info(cls, soup, link):
2105
        """Get information about a particular comics."""
2106
        date_str = soup.find('span', class_='post-date').string
2107
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2108
        author = soup.find('span', class_='post-author').string
2109
        div = soup.find('div', id='comic')
2110
        imgs = div.find_all('img') if div else []
2111
        title = imgs[0]['title'] if imgs else ""
2112
        assert all(i['title'] == i['alt'] == title for i in imgs)
2113
        return {
2114
            'month': day.month,
2115
            'year': day.year,
2116
            'day': day.day,
2117
            'img': [i['src'] for i in imgs],
2118
            'title': title,
2119
            'author': author,
2120
        }
2121
2122
2123
class DepressedAlien(GenericNavigableComic):
2124
    """Class to retrieve Depressed Alien Comics."""
2125
    name = 'depressedalien'
2126
    long_name = 'Depressed Alien'
2127
    url = 'http://depressedalien.com'
2128
    get_url_from_link = join_cls_url_to_href
2129
2130
    @classmethod
2131
    def get_first_comic_link(cls):
2132
        """Get link to first comics."""
2133
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2134
2135
    @classmethod
2136
    def get_navi_link(cls, last_soup, next_):
2137
        """Get link to next or previous comic."""
2138
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2139
2140
    @classmethod
2141
    def get_comic_info(cls, soup, link):
2142
        """Get information about a particular comics."""
2143
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2144
        imgs = soup.find_all('meta', property='og:image')
2145
        return {
2146
            'title': title,
2147
            'img': [i['content'] for i in imgs],
2148 View Code Duplication
        }
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2149
2150
2151
class TurnOffUs(GenericListableComic):
2152
    """Class to retrieve TurnOffUs comics."""
2153
    name = 'turnoffus'
2154
    long_name = 'Turn Off Us'
2155
    url = 'http://turnoff.us'
2156
    get_url_from_archive_element = join_cls_url_to_href
2157
2158
    @classmethod
2159
    def get_archive_elements(cls):
2160
        archive_url = urljoin_wrapper(cls.url, 'all')
2161
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2162
        return reversed(post_list.find_all('a', class_='post-link'))
2163
2164
    @classmethod
2165
    def get_comic_info(cls, soup, archive_elt):
2166
        """Get information about a particular comics."""
2167
        title = soup.find('meta', property='og:title')['content']
2168
        imgs = soup.find_all('meta', property='og:image')
2169
        return {
2170
            'title': title,
2171
            'img': [i['content'] for i in imgs],
2172
        }
2173
2174
2175
class ThingsInSquares(GenericListableComic):
2176
    """Class to retrieve Things In Squares comics."""
2177
    # This can be retrieved in other languages
2178
    # Also on https://tapastic.com/series/Things-in-Squares
2179
    name = 'squares'
2180
    long_name = 'Things in squares'
2181
    url = 'http://www.thingsinsquares.com'
2182
2183
    @classmethod
2184
    def get_comic_info(cls, soup, tr):
2185
        """Get information about a particular comics."""
2186
        _, td2, td3 = tr.find_all('td')
2187
        a = td2.find('a')
2188
        date_str = td3.string
2189
        day = string_to_date(date_str, "%m.%d.%y")
2190
        title = a.string
2191
        title2 = soup.find('meta', property='og:title')['content']
2192
        desc = soup.find('meta', property='og:description')
2193
        description = desc['content'] if desc else ''
2194
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2195
        imgs = soup.find('div', class_='entry-content').find_all('img')
2196
        return {
2197
            'day': day.day,
2198
            'month': day.month,
2199
            'year': day.year,
2200
            'title': title,
2201
            'title2': title2,
2202
            'description': description,
2203
            'tags': tags,
2204
            'img': [i['src'] for i in imgs],
2205
            'alt': ' '.join(i['alt'] for i in imgs),
2206
        }
2207
2208
    @classmethod
2209
    def get_url_from_archive_element(cls, tr):
2210
        _, td2, __ = tr.find_all('td')
2211
        return td2.find('a')['href']
2212
2213
    @classmethod
2214
    def get_archive_elements(cls):
2215
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2216
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2217
2218
2219
class HappleTea(GenericNavigableComic):
2220
    """Class to retrieve Happle Tea Comics."""
2221
    name = 'happletea'
2222
    long_name = 'Happle Tea'
2223
    url = 'http://www.happletea.com'
2224
    get_first_comic_link = get_a_navi_navifirst
2225
    get_navi_link = get_link_rel_next
2226
2227
    @classmethod
2228
    def get_comic_info(cls, soup, link):
2229
        """Get information about a particular comics."""
2230
        imgs = soup.find('div', id='comic').find_all('img')
2231
        post = soup.find('div', class_='post-content')
2232
        title = post.find('h2', class_='post-title').string
2233
        author = post.find('a', rel='author').string
2234
        date_str = post.find('span', class_='post-date').string
2235
        day = string_to_date(date_str, "%B %d, %Y")
2236
        assert all(i['alt'] == i['title'] for i in imgs)
2237
        return {
2238
            'title': title,
2239
            'img': [i['src'] for i in imgs],
2240
            'alt': ''.join(i['alt'] for i in imgs),
2241
            'month': day.month,
2242
            'year': day.year,
2243
            'day': day.day,
2244
            'author': author,
2245
        }
2246
2247
2248
class RockPaperScissors(GenericNavigableComic):
2249
    """Class to retrieve Rock Paper Scissors comics."""
2250
    name = 'rps'
2251
    long_name = 'Rock Paper Scissors'
2252
    url = 'http://rps-comics.com'
2253
    get_first_comic_link = get_a_navi_navifirst
2254
    get_navi_link = get_link_rel_next
2255
2256
    @classmethod
2257
    def get_comic_info(cls, soup, link):
2258
        """Get information about a particular comics."""
2259
        title = soup.find('title').string
2260
        imgs = soup.find_all('meta', property='og:image')
2261
        short_url = soup.find('link', rel='shortlink')['href']
2262
        transcript = soup.find('div', id='transcript-content').string
2263
        return {
2264
            'title': title,
2265
            'transcript': transcript,
2266
            'short_url': short_url,
2267
            'img': [i['content'] for i in imgs],
2268
        }
2269
2270
2271
class FatAwesomeComics(GenericNavigableComic):
2272
    """Class to retrieve Fat Awesome Comics."""
2273
    # Also on http://fatawesomecomedy.tumblr.com
2274
    name = 'fatawesome'
2275
    long_name = 'Fat Awesome'
2276
    url = 'http://fatawesome.com/comics'
2277
    get_navi_link = get_a_rel_next
2278
    get_first_comic_link = simulate_first_link
2279
    first_url = 'http://fatawesome.com/shortbus/'
2280
2281
    @classmethod
2282
    def get_comic_info(cls, soup, link):
2283
        """Get information about a particular comics."""
2284
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2285
        description = soup.find('meta', attrs={'name': 'description'})['content']
2286
        tags_prop = soup.find('meta', property='article:tag')
2287
        tags = tags_prop['content'] if tags_prop else ""
2288
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2289
        day = string_to_date(date_str, "%Y-%m-%d")
2290
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2291
        assert len(imgs) == 1
2292
        return {
2293
            'title': title,
2294
            'description': description,
2295
            'tags': tags,
2296
            'alt': "".join(i['alt'] for i in imgs),
2297
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2298
            'month': day.month,
2299
            'year': day.year,
2300
            'day': day.day,
2301
        }
2302
2303
2304 View Code Duplication
class JuliasDrawings(GenericListableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2305
    """Class to retrieve Julia's Drawings."""
2306
    name = 'julia'
2307
    long_name = "Julia's Drawings"
2308
    url = 'https://drawings.jvns.ca'
2309
    get_url_from_archive_element = get_href
2310
2311
    @classmethod
2312
    def get_archive_elements(cls):
2313
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2314
        return [art.find('a') for art in reversed(articles)]
2315
2316
    @classmethod
2317
    def get_comic_info(cls, soup, archive_elt):
2318
        """Get information about a particular comics."""
2319
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2320
        day = string_to_date(date_str, "%Y-%m-%d")
2321
        title = soup.find('h3', class_='p-post-title').string
2322
        imgs = soup.find('section', class_='post-content').find_all('img')
2323
        return {
2324
            'title': title,
2325
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2326
            'month': day.month,
2327
            'year': day.year,
2328
            'day': day.day,
2329
        }
2330
2331
2332
class AnythingComic(GenericListableComic):
2333
    """Class to retrieve Anything Comics."""
2334
    # Also on http://tapastic.com/series/anything
2335
    name = 'anythingcomic'
2336
    long_name = 'Anything Comic'
2337
    url = 'http://www.anythingcomic.com'
2338
2339
    @classmethod
2340
    def get_archive_elements(cls):
2341
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2342
        # The first 2 <tr>'s do not correspond to comics
2343
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2344
2345
    @classmethod
2346
    def get_url_from_archive_element(cls, tr):
2347
        """Get url corresponding to an archive element."""
2348
        _, td_comic, td_date, _ = tr.find_all('td')
2349
        link = td_comic.find('a')
2350
        return urljoin_wrapper(cls.url, link['href'])
2351
2352
    @classmethod
2353
    def get_comic_info(cls, soup, tr):
2354
        """Get information about a particular comics."""
2355
        td_num, td_comic, td_date, _ = tr.find_all('td')
2356
        num = int(td_num.string)
2357
        link = td_comic.find('a')
2358
        title = link.string
2359
        imgs = soup.find_all('img', id='comic_image')
2360
        date_str = td_date.string
2361
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2362
        assert len(imgs) == 1
2363
        assert all(i.get('alt') == i.get('title') for i in imgs)
2364
        return {
2365
            'num': num,
2366
            'title': title,
2367
            'alt': imgs[0].get('alt', ''),
2368
            'img': [i['src'] for i in imgs],
2369
            'month': day.month,
2370
            'year': day.year,
2371
            'day': day.day,
2372
        }
2373
2374
2375
class LonnieMillsap(GenericNavigableComic):
2376
    """Class to retrieve Lonnie Millsap's comics."""
2377
    name = 'millsap'
2378
    long_name = 'Lonnie Millsap'
2379
    url = 'http://www.lonniemillsap.com'
2380
    get_navi_link = get_link_rel_next
2381
    get_first_comic_link = simulate_first_link
2382
    first_url = 'http://www.lonniemillsap.com/?p=42'
2383
2384
    @classmethod
2385
    def get_comic_info(cls, soup, link):
2386
        """Get information about a particular comics."""
2387
        title = soup.find('h2', class_='post-title').string
2388
        post = soup.find('div', class_='post-content')
2389
        author = post.find("span", class_="post-author").find("a").string
2390
        date_str = post.find("span", class_="post-date").string
2391
        day = string_to_date(date_str, "%B %d, %Y")
2392
        imgs = post.find("div", class_="entry").find_all("img")
2393
        return {
2394
            'title': title,
2395
            'author': author,
2396
            'img': [i['src'] for i in imgs],
2397
            'month': day.month,
2398
            'year': day.year,
2399
            'day': day.day,
2400
        }
2401
2402 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2403
class LinsEditions(GenericNavigableComic):
2404
    """Class to retrieve L.I.N.S. Editions comics."""
2405
    # Also on https://linscomics.tumblr.com
2406
    # Now on https://warandpeas.com
2407
    name = 'lins'
2408
    long_name = 'L.I.N.S. Editions'
2409
    url = 'https://linsedition.com'
2410
    _categories = ('LINS', )
2411
    get_navi_link = get_link_rel_next
2412
    get_first_comic_link = simulate_first_link
2413
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2414
2415
    @classmethod
2416
    def get_comic_info(cls, soup, link):
2417
        """Get information about a particular comics."""
2418
        title = soup.find('meta', property='og:title')['content']
2419
        imgs = soup.find_all('meta', property='og:image')
2420
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2421
        day = string_to_date(date_str, "%Y-%m-%d")
2422
        return {
2423
            'title': title,
2424
            'img': [i['content'] for i in imgs],
2425
            'month': day.month,
2426
            'year': day.year,
2427
            'day': day.day,
2428
        }
2429
2430
2431
class ThorsThundershack(GenericNavigableComic):
2432
    """Class to retrieve Thor's Thundershack comics."""
2433
    # Also on http://tapastic.com/series/Thors-Thundershac
2434
    name = 'thor'
2435
    long_name = 'Thor\'s Thundershack'
2436
    url = 'http://www.thorsthundershack.com'
2437
    _categories = ('THOR', )
2438
    get_url_from_link = join_cls_url_to_href
2439
2440
    @classmethod
2441
    def get_first_comic_link(cls):
2442
        """Get link to first comics."""
2443
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2444
2445
    @classmethod
2446
    def get_navi_link(cls, last_soup, next_):
2447
        """Get link to next or previous comic."""
2448
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2449
            if link['href'] != '/comic':
2450 View Code Duplication
                return link
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2451
        return None
2452
2453
    @classmethod
2454
    def get_comic_info(cls, soup, link):
2455
        """Get information about a particular comics."""
2456
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2457
        description = soup.find('div', itemprop='articleBody').text
2458
        author = soup.find('span', itemprop='author copyrightHolder').string
2459
        imgs = soup.find_all('img', itemprop='image')
2460
        assert all(i['title'] == i['alt'] for i in imgs)
2461
        alt = imgs[0]['alt'] if imgs else ""
2462
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2463
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2464
        return {
2465
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2466
            'month': day.month,
2467
            'year': day.year,
2468
            'day': day.day,
2469
            'author': author,
2470
            'title': title,
2471
            'alt': alt,
2472
            'description': description,
2473
        }
2474
2475
2476
class GerbilWithAJetpack(GenericNavigableComic):
2477
    """Class to retrieve GerbilWithAJetpack comics."""
2478
    name = 'gerbil'
2479
    long_name = 'Gerbil With A Jetpack'
2480
    url = 'http://gerbilwithajetpack.com'
2481
    get_first_comic_link = get_a_navi_navifirst
2482
    get_navi_link = get_a_rel_next
2483
2484
    @classmethod
2485
    def get_comic_info(cls, soup, link):
2486
        """Get information about a particular comics."""
2487
        title = soup.find('h2', class_='post-title').string
2488
        author = soup.find("span", class_="post-author").find("a").string
2489
        date_str = soup.find("span", class_="post-date").string
2490
        day = string_to_date(date_str, "%B %d, %Y")
2491
        imgs = soup.find("div", id="comic").find_all("img")
2492
        alt = imgs[0]['alt']
2493
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2494
        return {
2495
            'img': [i['src'] for i in imgs],
2496
            'title': title,
2497
            'alt': alt,
2498
            'author': author,
2499
            'day': day.day,
2500
            'month': day.month,
2501
            'year': day.year
2502
        }
2503
2504
2505
class EveryDayBlues(GenericEmptyComic, GenericNavigableComic):
2506
    """Class to retrieve EveryDayBlues Comics."""
2507
    name = "blues"
2508
    long_name = "Every Day Blues"
2509
    url = "http://everydayblues.net"
2510
    get_first_comic_link = get_a_navi_navifirst
2511
    get_navi_link = get_link_rel_next
2512
2513
    @classmethod
2514
    def get_comic_info(cls, soup, link):
2515
        """Get information about a particular comics."""
2516
        title = soup.find("h2", class_="post-title").string
2517
        author = soup.find("span", class_="post-author").find("a").string
2518
        date_str = soup.find("span", class_="post-date").string
2519
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2520
        imgs = soup.find("div", id="comic").find_all("img")
2521
        assert all(i['alt'] == i['title'] == title for i in imgs)
2522
        assert len(imgs) <= 1
2523
        return {
2524
            'img': [i['src'] for i in imgs],
2525
            'title': title,
2526
            'author': author,
2527
            'day': day.day,
2528
            'month': day.month,
2529
            'year': day.year
2530
        }
2531
2532
2533
class BiterComics(GenericNavigableComic):
2534
    """Class to retrieve Biter Comics."""
2535
    name = "biter"
2536
    long_name = "Biter Comics"
2537
    url = "http://www.bitercomics.com"
2538
    get_first_comic_link = get_a_navi_navifirst
2539
    get_navi_link = get_link_rel_next
2540
2541
    @classmethod
2542
    def get_comic_info(cls, soup, link):
2543
        """Get information about a particular comics."""
2544
        title = soup.find("h1", class_="entry-title").string
2545
        author = soup.find("span", class_="author vcard").find("a").string
2546
        date_str = soup.find("span", class_="entry-date").string
2547
        day = string_to_date(date_str, "%B %d, %Y")
2548
        imgs = soup.find("div", id="comic").find_all("img")
2549
        assert all(i['alt'] == i['title'] for i in imgs)
2550
        assert len(imgs) == 1
2551
        alt = imgs[0]['alt']
2552
        return {
2553
            'img': [i['src'] for i in imgs],
2554
            'title': title,
2555
            'alt': alt,
2556
            'author': author,
2557
            'day': day.day,
2558
            'month': day.month,
2559
            'year': day.year
2560
        }
2561
2562
2563
class TheAwkwardYeti(GenericNavigableComic):
2564
    """Class to retrieve The Awkward Yeti comics."""
2565
    # Also on http://www.gocomics.com/the-awkward-yeti
2566
    # Also on http://larstheyeti.tumblr.com
2567
    # Also on https://tapastic.com/series/TheAwkwardYeti
2568
    name = 'yeti'
2569
    long_name = 'The Awkward Yeti'
2570
    url = 'http://theawkwardyeti.com'
2571
    _categories = ('YETI', )
2572
    get_first_comic_link = get_a_navi_navifirst
2573
    get_navi_link = get_link_rel_next
2574
2575
    @classmethod
2576
    def get_comic_info(cls, soup, link):
2577
        """Get information about a particular comics."""
2578
        title = soup.find('h2', class_='post-title').string
2579
        date_str = soup.find("span", class_="post-date").string
2580
        day = string_to_date(date_str, "%B %d, %Y")
2581
        imgs = soup.find("div", id="comic").find_all("img")
2582
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2583
        return {
2584
            'img': [i['src'] for i in imgs],
2585
            'title': title,
2586
            'day': day.day,
2587
            'month': day.month,
2588
            'year': day.year
2589
        }
2590
2591
2592
class PleasantThoughts(GenericNavigableComic):
2593
    """Class to retrieve Pleasant Thoughts comics."""
2594
    name = 'pleasant'
2595
    long_name = 'Pleasant Thoughts'
2596
    url = 'http://pleasant-thoughts.com'
2597
    get_first_comic_link = get_a_navi_navifirst
2598
    get_navi_link = get_link_rel_next
2599
2600
    @classmethod
2601
    def get_comic_info(cls, soup, link):
2602
        """Get information about a particular comics."""
2603
        post = soup.find('div', class_='post-content')
2604
        title = post.find('h2', class_='post-title').string
2605
        imgs = post.find("div", class_="entry").find_all("img")
2606
        return {
2607
            'title': title,
2608
            'img': [i['src'] for i in imgs],
2609
        }
2610
2611
2612
class MisterAndMe(GenericNavigableComic):
2613
    """Class to retrieve Mister & Me Comics."""
2614
    # Also on http://www.gocomics.com/mister-and-me
2615
    # Also on https://tapastic.com/series/Mister-and-Me
2616
    name = 'mister'
2617
    long_name = 'Mister & Me'
2618
    url = 'http://www.mister-and-me.com'
2619
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2620
    get_navi_link = get_link_rel_next
2621
2622
    @classmethod
2623
    def get_comic_info(cls, soup, link):
2624
        """Get information about a particular comics."""
2625
        title = soup.find('h2', class_='post-title').string
2626
        author = soup.find("span", class_="post-author").find("a").string
2627
        date_str = soup.find("span", class_="post-date").string
2628
        day = string_to_date(date_str, "%B %d, %Y")
2629
        imgs = soup.find("div", id="comic").find_all("img")
2630
        assert all(i['alt'] == i['title'] for i in imgs)
2631
        assert len(imgs) <= 1
2632
        alt = imgs[0]['alt'] if imgs else ""
2633
        return {
2634
            'img': [i['src'] for i in imgs],
2635
            'title': title,
2636
            'alt': alt,
2637
            'author': author,
2638
            'day': day.day,
2639
            'month': day.month,
2640
            'year': day.year
2641
        }
2642
2643
2644
class LastPlaceComics(GenericNavigableComic):
2645
    """Class to retrieve Last Place Comics."""
2646
    name = 'lastplace'
2647
    long_name = 'Last Place Comics'
2648
    url = "http://lastplacecomics.com"
2649
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2650
    get_navi_link = get_link_rel_next
2651
2652
    @classmethod
2653
    def get_comic_info(cls, soup, link):
2654
        """Get information about a particular comics."""
2655
        title = soup.find('h2', class_='post-title').string
2656
        author = soup.find("span", class_="post-author").find("a").string
2657
        date_str = soup.find("span", class_="post-date").string
2658
        day = string_to_date(date_str, "%B %d, %Y")
2659
        imgs = soup.find("div", id="comic").find_all("img")
2660
        assert all(i['alt'] == i['title'] for i in imgs)
2661
        assert len(imgs) <= 1
2662
        alt = imgs[0]['alt'] if imgs else ""
2663
        return {
2664
            'img': [i['src'] for i in imgs],
2665
            'title': title,
2666
            'alt': alt,
2667
            'author': author,
2668
            'day': day.day,
2669
            'month': day.month,
2670
            'year': day.year
2671
        }
2672
2673
2674
class TalesOfAbsurdity(GenericNavigableComic):
2675
    """Class to retrieve Tales Of Absurdity comics."""
2676
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2677
    # Also on http://talesofabsurdity.tumblr.com
2678
    name = 'absurdity'
2679
    long_name = 'Tales of Absurdity'
2680
    url = 'http://talesofabsurdity.com'
2681
    _categories = ('ABSURDITY', )
2682
    get_first_comic_link = get_a_navi_navifirst
2683
    get_navi_link = get_a_navi_comicnavnext_navinext
2684
2685
    @classmethod
2686
    def get_comic_info(cls, soup, link):
2687
        """Get information about a particular comics."""
2688
        title = soup.find('h2', class_='post-title').string
2689
        author = soup.find("span", class_="post-author").find("a").string
2690
        date_str = soup.find("span", class_="post-date").string
2691
        day = string_to_date(date_str, "%B %d, %Y")
2692
        imgs = soup.find("div", id="comic").find_all("img")
2693
        assert all(i['alt'] == i['title'] for i in imgs)
2694
        alt = imgs[0]['alt'] if imgs else ""
2695
        return {
2696
            'img': [i['src'] for i in imgs],
2697
            'title': title,
2698
            'alt': alt,
2699
            'author': author,
2700
            'day': day.day,
2701
            'month': day.month,
2702
            'year': day.year
2703
        }
2704
2705
2706
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2707
    """Class to retrieve Endless Origami Comics."""
2708
    name = "origami"
2709
    long_name = "Endless Origami"
2710
    url = "http://endlessorigami.com"
2711
    get_first_comic_link = get_a_navi_navifirst
2712
    get_navi_link = get_link_rel_next
2713
2714
    @classmethod
2715
    def get_comic_info(cls, soup, link):
2716
        """Get information about a particular comics."""
2717
        title = soup.find('h2', class_='post-title').string
2718
        author = soup.find("span", class_="post-author").find("a").string
2719
        date_str = soup.find("span", class_="post-date").string
2720
        day = string_to_date(date_str, "%B %d, %Y")
2721
        imgs = soup.find("div", id="comic").find_all("img")
2722
        assert all(i['alt'] == i['title'] for i in imgs)
2723
        alt = imgs[0]['alt'] if imgs else ""
2724
        return {
2725
            'img': [i['src'] for i in imgs],
2726
            'title': title,
2727
            'alt': alt,
2728
            'author': author,
2729
            'day': day.day,
2730
            'month': day.month,
2731
            'year': day.year
2732
        }
2733
2734
2735
class PlanC(GenericNavigableComic):
2736
    """Class to retrieve Plan C comics."""
2737
    name = 'planc'
2738
    long_name = 'Plan C'
2739
    url = 'http://www.plancomic.com'
2740
    get_first_comic_link = get_a_navi_navifirst
2741
    get_navi_link = get_a_navi_comicnavnext_navinext
2742
2743
    @classmethod
2744
    def get_comic_info(cls, soup, link):
2745
        """Get information about a particular comics."""
2746
        title = soup.find('h2', class_='post-title').string
2747
        date_str = soup.find("span", class_="post-date").string
2748 View Code Duplication
        day = string_to_date(date_str, "%B %d, %Y")
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
2749
        imgs = soup.find('div', id='comic').find_all('img')
2750
        return {
2751
            'title': title,
2752
            'img': [i['src'] for i in imgs],
2753
            'month': day.month,
2754
            'year': day.year,
2755
            'day': day.day,
2756
        }
2757
2758
2759
class BuniComic(GenericNavigableComic):
2760
    """Class to retrieve Buni Comics."""
2761
    name = 'buni'
2762
    long_name = 'BuniComics'
2763
    url = 'http://www.bunicomic.com'
2764
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2765
    get_navi_link = get_link_rel_next
2766
2767
    @classmethod
2768
    def get_comic_info(cls, soup, link):
2769
        """Get information about a particular comics."""
2770
        imgs = soup.find('div', id='comic').find_all('img')
2771
        assert all(i['alt'] == i['title'] for i in imgs)
2772
        assert len(imgs) == 1
2773
        return {
2774
            'img': [i['src'] for i in imgs],
2775
            'title': imgs[0]['title'],
2776
        }
2777
2778
2779
class GenericCommitStrip(GenericNavigableComic):
2780
    """Generic class to retrieve Commit Strips in different languages."""
2781
    get_navi_link = get_a_rel_next
2782
    get_first_comic_link = simulate_first_link
2783
    first_url = NotImplemented
2784
2785
    @classmethod
2786
    def get_comic_info(cls, soup, link):
2787
        """Get information about a particular comics."""
2788
        desc = soup.find('meta', property='og:description')['content']
2789
        title = soup.find('meta', property='og:title')['content']
2790
        imgs = soup.find('div', class_='entry-content').find_all('img')
2791
        title2 = ' '.join(i.get('title', '') for i in imgs)
2792
        return {
2793
            'title': title,
2794
            'title2': title2,
2795
            'description': desc,
2796
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2797
        }
2798
2799
2800
class CommitStripFr(GenericCommitStrip):
2801
    """Class to retrieve Commit Strips in French."""
2802
    name = 'commit_fr'
2803
    long_name = 'Commit Strip (Fr)'
2804
    url = 'http://www.commitstrip.com/fr'
2805
    _categories = ('FRANCAIS', )
2806
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2807
2808
2809
class CommitStripEn(GenericCommitStrip):
2810
    """Class to retrieve Commit Strips in English."""
2811
    name = 'commit_en'
2812
    long_name = 'Commit Strip (En)'
2813
    url = 'http://www.commitstrip.com/en'
2814
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2815
2816
2817
class GenericBoumerie(GenericNavigableComic):
2818
    """Generic class to retrieve Boumeries comics in different languages."""
2819
    get_first_comic_link = get_a_navi_navifirst
2820
    get_navi_link = get_link_rel_next
2821
    date_format = NotImplemented
2822
    lang = NotImplemented
2823
2824
    @classmethod
2825
    def get_comic_info(cls, soup, link):
2826
        """Get information about a particular comics."""
2827
        title = soup.find('h2', class_='post-title').string
2828
        short_url = soup.find('link', rel='shortlink')['href']
2829
        author = soup.find("span", class_="post-author").find("a").string
2830
        date_str = soup.find('span', class_='post-date').string
2831
        day = string_to_date(date_str, cls.date_format, cls.lang)
2832
        imgs = soup.find('div', id='comic').find_all('img')
2833
        assert all(i['alt'] == i['title'] for i in imgs)
2834
        return {
2835
            'short_url': short_url,
2836
            'img': [i['src'] for i in imgs],
2837
            'title': title,
2838
            'author': author,
2839
            'month': day.month,
2840
            'year': day.year,
2841
            'day': day.day,
2842
        }
2843
2844
2845
class BoumerieEn(GenericBoumerie):
2846
    """Class to retrieve Boumeries comics in English."""
2847
    name = 'boumeries_en'
2848
    long_name = 'Boumeries (En)'
2849
    url = 'http://comics.boumerie.com'
2850
    date_format = "%B %d, %Y"
2851
    lang = 'en_GB.UTF-8'
2852
2853
2854
class BoumerieFr(GenericBoumerie):
2855
    """Class to retrieve Boumeries comics in French."""
2856
    name = 'boumeries_fr'
2857
    long_name = 'Boumeries (Fr)'
2858
    url = 'http://bd.boumerie.com'
2859
    _categories = ('FRANCAIS', )
2860
    date_format = "%A, %d %B %Y"
2861
    lang = "fr_FR.utf8"
2862
2863
2864
class UnearthedComics(GenericNavigableComic):
2865
    """Class to retrieve Unearthed comics."""
2866
    # Also on http://tapastic.com/series/UnearthedComics
2867
    # Also on https://unearthedcomics.tumblr.com
2868
    name = 'unearthed'
2869
    long_name = 'Unearthed Comics'
2870
    url = 'http://unearthedcomics.com'
2871
    _categories = ('UNEARTHED', )
2872
    get_navi_link = get_link_rel_next
2873
    get_first_comic_link = simulate_first_link
2874
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2875
2876
    @classmethod
2877
    def get_comic_info(cls, soup, link):
2878
        """Get information about a particular comics."""
2879
        short_url = soup.find('link', rel='shortlink')['href']
2880
        title_elt = soup.find('h1') or soup.find('h2')
2881
        title = title_elt.string if title_elt else ""
2882
        desc = soup.find('meta', property='og:description')
2883
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2884
        day = string_to_date(date_str, "%Y-%m-%d")
2885
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2886
        imgs = post.find_all('img')
2887
        return {
2888
            'title': title,
2889
            'description': desc,
2890
            'url2': short_url,
2891
            'img': [i['src'] for i in imgs],
2892
            'month': day.month,
2893
            'year': day.year,
2894
            'day': day.day,
2895
        }
2896
2897
2898
class Optipess(GenericNavigableComic):
2899
    """Class to retrieve Optipess comics."""
2900
    name = 'optipess'
2901
    long_name = 'Optipess'
2902
    url = 'http://www.optipess.com'
2903
    get_first_comic_link = get_a_navi_navifirst
2904
    get_navi_link = get_link_rel_next
2905
2906
    @classmethod
2907
    def get_comic_info(cls, soup, link):
2908
        """Get information about a particular comics."""
2909
        title = soup.find('h2', class_='post-title').string
2910
        author = soup.find("span", class_="post-author").find("a").string
2911
        comic = soup.find('div', id='comic')
2912
        imgs = comic.find_all('img') if comic else []
2913
        alt = imgs[0]['title'] if imgs else ""
2914
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2915
        date_str = soup.find('span', class_='post-date').string
2916
        day = string_to_date(date_str, "%B %d, %Y")
2917
        return {
2918
            'title': title,
2919
            'alt': alt,
2920
            'author': author,
2921
            'img': [i['src'] for i in imgs],
2922
            'month': day.month,
2923
            'year': day.year,
2924
            'day': day.day,
2925
        }
2926
2927
2928
class PainTrainComic(GenericNavigableComic):
2929
    """Class to retrieve Pain Train Comics."""
2930
    name = 'paintrain'
2931
    long_name = 'Pain Train Comics'
2932
    url = 'http://paintraincomic.com'
2933
    get_first_comic_link = get_a_navi_navifirst
2934
    get_navi_link = get_link_rel_next
2935
2936
    @classmethod
2937
    def get_comic_info(cls, soup, link):
2938
        """Get information about a particular comics."""
2939
        title = soup.find('h2', class_='post-title').string
2940
        short_url = soup.find('link', rel='shortlink')['href']
2941
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2942
        num = int(short_url_re.match(short_url).groups()[0])
2943
        imgs = soup.find('div', id='comic').find_all('img')
2944
        alt = imgs[0]['title']
2945
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2946
        date_str = soup.find('span', class_='post-date').string
2947
        day = string_to_date(date_str, "%d/%m/%Y")
2948
        return {
2949
            'short_url': short_url,
2950
            'num': num,
2951
            'img': [i['src'] for i in imgs],
2952
            'month': day.month,
2953
            'year': day.year,
2954
            'day': day.day,
2955
            'alt': alt,
2956
            'title': title,
2957
        }
2958
2959
2960
class MoonBeard(GenericNavigableComic):
2961
    """Class to retrieve MoonBeard comics."""
2962
    # Also on http://squireseses.tumblr.com
2963
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2964
    name = 'moonbeard'
2965
    long_name = 'Moon Beard'
2966
    url = 'http://moonbeard.com'
2967
    _categories = ('MOONBEARD', )
2968
    get_first_comic_link = get_a_navi_navifirst
2969
    get_navi_link = get_a_navi_navinext
2970
2971
    @classmethod
2972
    def get_comic_info(cls, soup, link):
2973
        """Get information about a particular comics."""
2974
        title = soup.find('h2', class_='post-title').string
2975
        short_url = soup.find('link', rel='shortlink')['href']
2976
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2977
        num = int(short_url_re.match(short_url).groups()[0])
2978
        imgs = soup.find('div', id='comic').find_all('img')
2979
        alt = imgs[0]['title']
2980
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2981
        date_str = soup.find('span', class_='post-date').string
2982
        day = string_to_date(date_str, "%B %d, %Y")
2983
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2984
        author = soup.find('span', class_='post-author').string
2985
        return {
2986
            'short_url': short_url,
2987
            'num': num,
2988
            'img': [i['src'] for i in imgs],
2989
            'month': day.month,
2990
            'year': day.year,
2991
            'day': day.day,
2992
            'title': title,
2993
            'tags': tags,
2994
            'alt': alt,
2995
            'author': author,
2996
        }
2997
2998
2999 View Code Duplication
class AHammADay(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
3000
    """Class to retrieve class A Hamm A Day comics."""
3001
    name = 'hamm'
3002
    long_name = 'A Hamm A Day'
3003
    url = 'http://www.ahammaday.com'
3004
    get_url_from_link = join_cls_url_to_href
3005
    get_first_comic_link = simulate_first_link
3006
    first_url = 'http://www.ahammaday.com/today/3/6/french'
3007
3008
    @classmethod
3009
    def get_navi_link(cls, last_soup, next_):
3010
        """Get link to next or previous comic."""
3011
        # prev is next / next is prev
3012
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
3013
3014
    @classmethod
3015
    def get_comic_info(cls, soup, link):
3016
        """Get information about a particular comics."""
3017
        date_str = soup.find('time', class_='published')['datetime']
3018
        day = string_to_date(date_str, "%Y-%m-%d")
3019
        author = soup.find('span', class_='blog-author').find('a').string
3020
        title = soup.find('meta', property='og:title')['content']
3021
        imgs = soup.find_all('meta', itemprop='image')
3022
        return {
3023
            'img': [i['content'] for i in imgs],
3024
            'title': title,
3025
            'author': author,
3026
            'day': day.day,
3027
            'month': day.month,
3028
            'year': day.year,
3029
        }
3030
3031
3032
class SystemComic(GenericNavigableComic):
3033
    """Class to retrieve System Comic."""
3034
    name = 'system'
3035
    long_name = 'System Comic'
3036
    url = 'http://www.systemcomic.com'
3037
    get_navi_link = get_a_rel_next
3038
3039
    @classmethod
3040
    def get_first_comic_link(cls):
3041
        """Get link to first comics."""
3042
        return get_soup_at_url(cls.url).find('li', class_='first').find('a')
3043
3044
    @classmethod
3045
    def get_comic_info(cls, soup, link):
3046
        """Get information about a particular comics."""
3047
        title = soup.find('meta', property='og:title')['content']
3048
        desc = soup.find('meta', property='og:description')['content']
3049
        date_str = soup.find('time')["datetime"]
3050
        day = string_to_date(date_str, "%Y-%m-%d")
3051
        imgs = soup.find('figure').find_all('img')
3052
        return {
3053
            'title': title,
3054
            'description': desc,
3055
            'day': day.day,
3056
            'month': day.month,
3057
            'year': day.year,
3058
            'img': [i['src'] for i in imgs],
3059
        }
3060
3061
3062
class LittleLifeLines(GenericNavigableComic):
3063
    """Class to retrieve Little Life Lines comics."""
3064
    # Also on https://little-life-lines.tumblr.com
3065
    name = 'life'
3066
    long_name = 'Little Life Lines'
3067
    url = 'http://www.littlelifelines.com'
3068
    get_url_from_link = join_cls_url_to_href
3069
    get_first_comic_link = simulate_first_link
3070
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3071
3072
    @classmethod
3073
    def get_navi_link(cls, last_soup, next_):
3074
        """Get link to next or previous comic."""
3075
        # prev is next / next is prev
3076
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3077
        return li.find('a') if li else None
3078
3079
    @classmethod
3080
    def get_comic_info(cls, soup, link):
3081
        """Get information about a particular comics."""
3082
        title = soup.find('meta', property='og:title')['content']
3083
        desc = soup.find('meta', property='og:description')['content']
3084
        date_str = soup.find('time', class_='published')['datetime']
3085
        day = string_to_date(date_str, "%Y-%m-%d")
3086
        author = soup.find('a', rel='author').string
3087
        div_content = soup.find('div', class_="body entry-content")
3088
        imgs = div_content.find_all('img')
3089
        imgs = [i for i in imgs if i.get('src') is not None]
3090
        alt = imgs[0]['alt']
3091
        return {
3092
            'title': title,
3093
            'alt': alt,
3094
            'description': desc,
3095
            'author': author,
3096
            'day': day.day,
3097
            'month': day.month,
3098
            'year': day.year,
3099
            'img': [i['src'] for i in imgs],
3100
        }
3101
3102
3103
class GenericWordPressInkblot(GenericNavigableComic):
3104
    """Generic class to retrieve comics using WordPress with Inkblot."""
3105
    get_navi_link = get_link_rel_next
3106
3107
    @classmethod
3108
    def get_first_comic_link(cls):
3109
        """Get link to first comics."""
3110
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3111
3112
    @classmethod
3113
    def get_comic_info(cls, soup, link):
3114
        """Get information about a particular comics."""
3115
        title = soup.find('meta', property='og:title')['content']
3116
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3117
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3118
        day = string_to_date(date_str, "%Y-%m-%d")
3119
        return {
3120
            'title': title,
3121
            'day': day.day,
3122
            'month': day.month,
3123
            'year': day.year,
3124
            'img': [i['src'] for i in imgs],
3125
        }
3126
3127
3128
class EverythingsStupid(GenericWordPressInkblot):
3129
    """Class to retrieve Everything's stupid Comics."""
3130
    # Also on http://tapastic.com/series/EverythingsStupid
3131
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3132
    # Also on http://everythingsstupidcomics.tumblr.com
3133
    name = 'stupid'
3134
    long_name = "Everything's Stupid"
3135
    url = 'http://everythingsstupid.net'
3136
3137
3138
class TheIsmComics(GenericWordPressInkblot):
3139
    """Class to retrieve The Ism Comics."""
3140
    # Also on https://tapastic.com/series/TheIsm (?)
3141
    name = 'theism'
3142
    long_name = "The Ism"
3143
    url = 'http://www.theism-comics.com'
3144
3145
3146
class WoodenPlankStudios(GenericWordPressInkblot):
3147
    """Class to retrieve Wooden Plank Studios comics."""
3148
    name = 'woodenplank'
3149
    long_name = 'Wooden Plank Studios'
3150
    url = 'http://woodenplankstudios.com'
3151
3152
3153
class ElectricBunnyComic(GenericNavigableComic):
3154
    """Class to retrieve Electric Bunny Comics."""
3155
    # Also on http://electricbunnycomics.tumblr.com
3156
    name = 'bunny'
3157
    long_name = 'Electric Bunny Comic'
3158
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3159
    get_url_from_link = join_cls_url_to_href
3160
3161
    @classmethod
3162
    def get_first_comic_link(cls):
3163
        """Get link to first comics."""
3164
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3165
3166
    @classmethod
3167
    def get_navi_link(cls, last_soup, next_):
3168
        """Get link to next or previous comic."""
3169
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3170
        return img.parent if img else None
3171
3172
    @classmethod
3173
    def get_comic_info(cls, soup, link):
3174
        """Get information about a particular comics."""
3175
        title = soup.find('meta', property='og:title')['content']
3176
        imgs = soup.find_all('meta', property='og:image')
3177
        return {
3178
            'title': title,
3179
            'img': [i['content'] for i in imgs],
3180
        }
3181
3182
3183
class SheldonComics(GenericNavigableComic):
3184
    """Class to retrieve Sheldon comics."""
3185
    # Also on http://www.gocomics.com/sheldon
3186
    name = 'sheldon'
3187
    long_name = 'Sheldon Comics'
3188
    url = 'http://www.sheldoncomics.com'
3189
3190
    @classmethod
3191
    def get_first_comic_link(cls):
3192
        """Get link to first comics."""
3193
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3194
3195
    @classmethod
3196
    def get_navi_link(cls, last_soup, next_):
3197
        """Get link to next or previous comic."""
3198
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3199
            if link['href'] != 'http://www.sheldoncomics.com':
3200
                return link
3201
        return None
3202
3203
    @classmethod
3204
    def get_comic_info(cls, soup, link):
3205
        """Get information about a particular comics."""
3206
        imgs = soup.find("div", id="comic-foot").find_all("img")
3207
        assert all(i['alt'] == i['title'] for i in imgs)
3208
        assert len(imgs) == 1
3209
        title = imgs[0]['title']
3210
        return {
3211
            'title': title,
3212
            'img': [i['src'] for i in imgs],
3213
        }
3214
3215
3216
class Ubertool(GenericNavigableComic):
3217
    """Class to retrieve Ubertool comics."""
3218
    # Also on https://ubertool.tumblr.com
3219
    # Also on https://tapastic.com/series/ubertool
3220
    name = 'ubertool'
3221
    long_name = 'Ubertool'
3222
    url = 'http://ubertoolcomic.com'
3223
    _categories = ('UBERTOOL', )
3224
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3225
    get_navi_link = get_a_comicnavbase_comicnavnext
3226
3227
    @classmethod
3228
    def get_comic_info(cls, soup, link):
3229
        """Get information about a particular comics."""
3230
        title = soup.find('h2', class_='post-title').string
3231
        date_str = soup.find('span', class_='post-date').string
3232
        day = string_to_date(date_str, "%B %d, %Y")
3233
        imgs = soup.find('div', id='comic').find_all('img')
3234
        return {
3235
            'img': [i['src'] for i in imgs],
3236
            'title': title,
3237
            'month': day.month,
3238
            'year': day.year,
3239
            'day': day.day,
3240
        }
3241
3242
3243
class EarthExplodes(GenericNavigableComic):
3244
    """Class to retrieve The Earth Explodes comics."""
3245
    name = 'earthexplodes'
3246
    long_name = 'The Earth Explodes'
3247
    url = 'http://www.earthexplodes.com'
3248
    get_url_from_link = join_cls_url_to_href
3249
    get_first_comic_link = simulate_first_link
3250
    first_url = 'http://www.earthexplodes.com/comics/000/'
3251
3252
    @classmethod
3253
    def get_navi_link(cls, last_soup, next_):
3254
        """Get link to next or previous comic."""
3255
        return last_soup.find('a', id='next' if next_ else 'prev')
3256
3257
    @classmethod
3258
    def get_comic_info(cls, soup, link):
3259
        """Get information about a particular comics."""
3260
        title = soup.find('title').string
3261
        imgs = soup.find('div', id='image').find_all('img')
3262
        alt = imgs[0].get('title', '')
3263
        return {
3264
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3265
            'title': title,
3266
            'alt': alt,
3267
        }
3268
3269
3270
class PomComics(GenericNavigableComic):
3271
    """Class to retrieve PomComics."""
3272
    name = 'pom'
3273
    long_name = 'Pom Comics / Piece of Me'
3274
    url = 'http://www.pomcomic.com'
3275
    get_url_from_link = join_cls_url_to_href
3276
3277
    @classmethod
3278
    def get_first_comic_link(cls):
3279
        """Get link to first comics."""
3280
        return get_soup_at_url(cls.url).find('a', class_='btn-first')
3281
3282
    @classmethod
3283
    def get_navi_link(cls, last_soup, next_):
3284
        """Get link to next or previous comic."""
3285
        return last_soup.find('a', class_='btn-next' if next_ else 'btn-prev')
3286
3287
    @classmethod
3288
    def get_comic_info(cls, soup, link):
3289
        """Get information about a particular comics."""
3290
        title = soup.find('h1').string
3291
        desc = soup.find('meta', property='og:description')['content']
3292
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3293
        imgs = soup.find('div', class_='comic').find_all('img')
3294
        return {
3295
            'title': title,
3296
            'desc': desc,
3297
            'tags': tags,
3298
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3299
        }
3300
3301
3302
class CubeDrone(GenericEmptyComic, GenericNavigableComic):
3303
    """Class to retrieve Cube Drone comics."""
3304
    name = 'cubedrone'
3305
    long_name = 'Cube Drone'
3306
    url = 'http://cube-drone.com/comics'
3307
    get_url_from_link = join_cls_url_to_href
3308
3309
    @classmethod
3310
    def get_first_comic_link(cls):
3311
        """Get link to first comics."""
3312
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3313
3314
    @classmethod
3315
    def get_navi_link(cls, last_soup, next_):
3316
        """Get link to next or previous comic."""
3317
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3318
        return last_soup.find('span', class_=class_).parent
3319
3320
    @classmethod
3321
    def get_comic_info(cls, soup, link):
3322
        """Get information about a particular comics."""
3323
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3324
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3325
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3326
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3327
        imgs = soup.find_all('img', class_='comic img-responsive')
3328
        title2 = imgs[0]['title']
3329
        alt = imgs[0]['alt']
3330
        return {
3331
            'url2': url2,
3332
            'title': title,
3333
            'title2': title2,
3334
            'alt': alt,
3335
            'img': [i['src'] for i in imgs],
3336
        }
3337
3338
3339
class MakeItStoopid(GenericNavigableComic):
3340
    """Class to retrieve Make It Stoopid Comics."""
3341
    name = 'stoopid'
3342
    long_name = 'Make it stoopid'
3343
    url = 'http://makeitstoopid.com/comic.php'
3344
3345
    @classmethod
3346
    def get_nav(cls, soup):
3347
        """Get the navigation elements from soup object."""
3348
        cnav = soup.find_all(class_='cnav')
3349
        nav1, nav2 = cnav[:5], cnav[5:]
3350
        assert nav1 == nav2
3351
        # begin, prev, archive, next_, end = nav1
3352
        return [None if i.get('href') is None else i for i in nav1]
3353
3354
    @classmethod
3355
    def get_first_comic_link(cls):
3356
        """Get link to first comics."""
3357
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3358
3359
    @classmethod
3360
    def get_navi_link(cls, last_soup, next_):
3361
        """Get link to next or previous comic."""
3362
        return cls.get_nav(last_soup)[3 if next_ else 1]
3363
3364
    @classmethod
3365
    def get_comic_info(cls, soup, link):
3366
        """Get information about a particular comics."""
3367
        title = link['title']
3368 View Code Duplication
        imgs = soup.find_all('img', id='comicimg')
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
3369
        return {
3370
            'title': title,
3371
            'img': [i['src'] for i in imgs],
3372
        }
3373
3374
3375
class OffTheLeashDog(GenericNavigableComic):
3376
    """Class to retrieve Off The Leash Dog comics."""
3377
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3378
    # Also on http://www.rupertfawcettcartoons.com
3379
    name = 'offtheleash'
3380
    long_name = 'Off The Leash Dog'
3381
    url = 'http://offtheleashdogcartoons.com'
3382
    _categories = ('FAWCETT', )
3383
    get_navi_link = get_a_rel_next
3384
    get_first_comic_link = simulate_first_link
3385
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3386
3387
    @classmethod
3388
    def get_comic_info(cls, soup, link):
3389
        """Get information about a particular comics."""
3390
        print(link)
3391
        title = soup.find("h1", class_="entry-title").string
3392
        imgs = soup.find('div', class_='entry-content').find_all('img')
3393
        return {
3394
            'title': title,
3395
            'img': [i['src'] for i in imgs],
3396
        }
3397
3398
3399
class MarketoonistComics(GenericNavigableComic):
3400
    """Class to retrieve Marketoonist Comics."""
3401
    name = 'marketoonist'
3402
    long_name = 'Marketoonist'
3403
    url = 'https://marketoonist.com/cartoons'
3404
    get_first_comic_link = simulate_first_link
3405
    get_navi_link = get_link_rel_next
3406
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3407
3408
    @classmethod
3409
    def get_comic_info(cls, soup, link):
3410
        """Get information about a particular comics."""
3411
        imgs = soup.find_all('meta', property='og:image')
3412
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3413
        day = string_to_date(date_str, "%Y-%m-%d")
3414
        title = soup.find('meta', property='og:title')['content']
3415
        return {
3416
            'img': [i['content'] for i in imgs],
3417
            'day': day.day,
3418
            'month': day.month,
3419
            'year': day.year,
3420
            'title': title,
3421
        }
3422
3423
3424
class ConsoliaComics(GenericNavigableComic):
3425
    """Class to retrieve Consolia comics."""
3426 View Code Duplication
    name = 'consolia'
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
3427
    long_name = 'consolia'
3428
    url = 'https://consolia-comic.com'
3429
    get_url_from_link = join_cls_url_to_href
3430
3431
    @classmethod
3432
    def get_first_comic_link(cls):
3433
        """Get link to first comics."""
3434
        return get_soup_at_url(cls.url).find('a', class_='first')
3435
3436
    @classmethod
3437
    def get_navi_link(cls, last_soup, next_):
3438
        """Get link to next or previous comic."""
3439
        return last_soup.find('a', class_='next' if next_ else 'prev')
3440
3441
    @classmethod
3442
    def get_comic_info(cls, soup, link):
3443
        """Get information about a particular comics."""
3444
        title = soup.find('meta', property='og:title')['content']
3445
        date_str = soup.find('time')["datetime"]
3446
        day = string_to_date(date_str, "%Y-%m-%d")
3447
        imgs = soup.find_all('meta', property='og:image')
3448
        return {
3449
            'title': title,
3450
            'img': [i['content'] for i in imgs],
3451
            'day': day.day,
3452
            'month': day.month,
3453
            'year': day.year,
3454
        }
3455
3456
3457
class TuMourrasMoinsBete(GenericNavigableComic):
3458
    """Class to retrieve Tu Mourras Moins Bete comics."""
3459
    name = 'mourrasmoinsbete'
3460
    long_name = 'Tu Mourras Moins Bete'
3461
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3462
    _categories = ('FRANCAIS', )
3463
    get_first_comic_link = simulate_first_link
3464
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3465
3466
    @classmethod
3467
    def get_navi_link(cls, last_soup, next_):
3468
        """Get link to next or previous comic."""
3469
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3470
3471
    @classmethod
3472
    def get_comic_info(cls, soup, link):
3473
        """Get information about a particular comics."""
3474
        title = soup.find('title').string
3475
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3476
        author = soup.find('span', itemprop='author').string
3477
        return {
3478
            'img': [i['src'] for i in imgs],
3479
            'author': author,
3480
            'title': title,
3481
        }
3482
3483
3484
class GeekAndPoke(GenericNavigableComic):
3485
    """Class to retrieve Geek And Poke comics."""
3486
    name = 'geek'
3487
    long_name = 'Geek And Poke'
3488
    url = 'http://geek-and-poke.com'
3489
    get_url_from_link = join_cls_url_to_href
3490
    get_first_comic_link = simulate_first_link
3491
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3492
3493 View Code Duplication
    @classmethod
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
3494
    def get_navi_link(cls, last_soup, next_):
3495
        """Get link to next or previous comic."""
3496
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3497
3498
    @classmethod
3499
    def get_comic_info(cls, soup, link):
3500
        """Get information about a particular comics."""
3501
        title = soup.find('meta', property='og:title')['content']
3502
        desc = soup.find('meta', property='og:description')['content']
3503
        date_str = soup.find('time', class_='published')['datetime']
3504
        day = string_to_date(date_str, "%Y-%m-%d")
3505
        author = soup.find('a', rel='author').string
3506
        div_content = (soup.find('div', class_="body entry-content") or
3507
                       soup.find('div', class_="special-content"))
3508
        imgs = div_content.find_all('img')
3509
        imgs = [i for i in imgs if i.get('src') is not None]
3510
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3511
        alt = imgs[0].get('alt', "") if imgs else []
3512
        return {
3513
            'title': title,
3514
            'alt': alt,
3515
            'description': desc,
3516
            'author': author,
3517
            'day': day.day,
3518
            'month': day.month,
3519
            'year': day.year,
3520
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3521
        }
3522
3523
3524
class GloryOwlComix(GenericNavigableComic):
3525
    """Class to retrieve Glory Owl comics."""
3526
    name = 'gloryowl'
3527
    long_name = 'Glory Owl'
3528
    url = 'http://gloryowlcomix.blogspot.fr'
3529
    _categories = ('NSFW', 'FRANCAIS')
3530
    get_first_comic_link = simulate_first_link
3531
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3532
3533
    @classmethod
3534
    def get_navi_link(cls, last_soup, next_):
3535
        """Get link to next or previous comic."""
3536
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3537
3538
    @classmethod
3539
    def get_comic_info(cls, soup, link):
3540
        """Get information about a particular comics."""
3541
        title = soup.find('title').string
3542
        imgs = soup.find_all('link', rel='image_src')
3543
        author = soup.find('a', rel='author').string
3544
        return {
3545
            'img': [i['href'] for i in imgs],
3546
            'author': author,
3547
            'title': title,
3548
        }
3549
3550
3551
class AtRandomComics(GenericNavigableComic):
3552
    """Class to retrieve At Random Comics."""
3553
    name = 'atrandom'
3554
    long_name = 'At Random Comics'
3555
    url = 'http://www.atrandomcomics.com'
3556
    get_url_from_link = join_cls_url_to_href
3557
    get_first_comic_link = simulate_first_link
3558
    first_url = 'http://www.atrandomcomics.com/at-random-comics-home/2015/5/5/can-of-worms'
3559
3560
    @classmethod
3561
    def get_navi_link(cls, last_soup, next_):
3562
        """Get link to next or previous comic."""
3563
        return last_soup.find('a', id='prevLink' if next_ else 'nextLink')
3564
3565
    @classmethod
3566
    def get_comic_info(cls, soup, link):
3567
        """Get information about a particular comics."""
3568
        title = soup.find('meta', property='og:title')['content']
3569
        desc = soup.find('meta', property='og:description')['content']
3570
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
3571
        day = string_to_date(date_str, "%Y-%m-%d")
3572
        author = soup.find('a', rel='author').string
3573
        imgs = soup.find_all('meta', property='og:image')
3574
        return {
3575
            'title': title,
3576
            'img': [i['content'] for i in imgs],
3577
            'month': day.month,
3578
            'year': day.year,
3579
            'day': day.day,
3580
            'author': author,
3581
            'description': desc,
3582
        }
3583
3584
3585
class GenericTumblrV1(GenericComic):
3586
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3587
    _categories = ('TUMBLR', )
3588
3589
    @classmethod
3590
    def get_next_comic(cls, last_comic):
3591
        """Generic implementation of get_next_comic for Tumblr comics."""
3592
        for p in cls.get_posts(last_comic):
3593
            comic = cls.get_comic_info(p)
3594
            if comic is not None:
3595
                yield comic
3596
3597
    @classmethod
3598
    def get_url_from_post(cls, post):
3599
        url = post['url']
3600
        if not url.startswith(cls.url):
3601
            print("url '%s' does not start with '%s'" % (url, cls.url))
3602
        return url
3603
3604
    @classmethod
3605
    def get_api_url(cls):
3606
        return urljoin_wrapper(cls.url, '/api/read/')
3607
3608
    @classmethod
3609
    def get_api_url_for_id(cls, tumblr_id):
3610
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3611
3612
    @classmethod
3613
    def get_comic_info(cls, post):
3614
        """Get information about a particular comics."""
3615
        type_ = post['type']
3616
        if type_ != 'photo':
3617
            return None
3618
        tumblr_id = int(post['id'])
3619
        api_url = cls.get_api_url_for_id(tumblr_id)
3620
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3621
        caption = post.find('photo-caption')
3622
        title = caption.string if caption else ""
3623
        tags = ' '.join(t.string for t in post.find_all('tag'))
3624
        # Photos may appear in 'photo' tags and/or straight in the post
3625
        photo_tags = post.find_all('photo')
3626
        if not photo_tags:
3627
            photo_tags = [post]
3628
        # Images are in multiple resolutions - taking the first one
3629
        imgs = [photo.find('photo-url') for photo in photo_tags]
3630
        return {
3631
            'url': cls.get_url_from_post(post),
3632
            'url2': post['url-with-slug'],
3633
            'day': day.day,
3634
            'month': day.month,
3635
            'year': day.year,
3636
            'title': title,
3637
            'tags': tags,
3638
            'img': [i.string for i in imgs],
3639
            'tumblr-id': tumblr_id,
3640
            'api_url': api_url,
3641
        }
3642
3643
    @classmethod
3644
    def get_posts(cls, last_comic, nb_post_per_call=10):
3645
        """Get posts using API. nb_post_per_call is max 50.
3646
3647
        Posts are retrieved from newer to older as per the tumblr v1 api
3648
        but are returned in chronological order."""
3649
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3650
        posts_acc = []
3651
        if last_comic is not None:
3652
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3653
            # might end up spending a lot of time looking for something that
3654
            # doesn't exist. Failing early and clearly might be a better option.
3655
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3656
            try:
3657
                get_soup_at_url(last_api_url)
3658
            except urllib.error.HTTPError:
3659
                try:
3660
                    get_soup_at_url(cls.url)
3661
                except urllib.error.HTTPError:
3662
                    print("Did not find previous post nor main url %s" % cls.url)
3663
                else:
3664
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3665
                return reversed(posts_acc)
3666
        api_url = cls.get_api_url()
3667
        posts = get_soup_at_url(api_url).find('posts')
3668
        start, total = int(posts['start']), int(posts['total'])
3669
        assert start == 0
3670
        for starting_num in range(0, total, nb_post_per_call):
3671
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3672
            posts2 = get_soup_at_url(api_url2).find('posts')
3673
            start2, total2 = int(posts2['start']), int(posts2['total'])
3674
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3675
            # This may happen and should be handled in the future
3676
            assert total == total2, "%d != %d" % (total, total2)
3677
            for p in posts2.find_all('post'):
3678
                tumblr_id = int(p['id'])
3679
                if waiting_for_id and waiting_for_id == tumblr_id:
3680
                    return reversed(posts_acc)
3681
                posts_acc.append(p)
3682
        if waiting_for_id is None:
3683
            return reversed(posts_acc)
3684
        print("Did not find %s : there might be a problem" % waiting_for_id)
3685
        return []
3686
3687
3688
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3689
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3690
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3691
    # Also on http://www.smbc-comics.com
3692
    name = 'smbc-tumblr'
3693
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3694
    url = 'http://smbc-comics.tumblr.com'
3695
    _categories = ('SMBC', )
3696
3697
3698
class IrwinCardozo(GenericTumblrV1):
3699
    """Class to retrieve Irwin Cardozo Comics."""
3700
    name = 'irwinc'
3701
    long_name = 'Irwin Cardozo'
3702
    url = 'http://irwincardozocomics.tumblr.com'
3703
3704
3705
class AccordingToDevin(GenericTumblrV1):
3706
    """Class to retrieve According To Devin comics."""
3707
    name = 'devin'
3708
    long_name = 'According To Devin'
3709
    url = 'http://accordingtodevin.tumblr.com'
3710
3711
3712
class ItsTheTieTumblr(GenericTumblrV1):
3713
    """Class to retrieve It's the tie comics."""
3714
    # Also on http://itsthetie.com
3715
    # Also on https://tapastic.com/series/itsthetie
3716
    name = 'tie-tumblr'
3717
    long_name = "It's the tie (from Tumblr)"
3718
    url = "http://itsthetie.tumblr.com"
3719
    _categories = ('TIE', )
3720
3721
3722
class OctopunsTumblr(GenericTumblrV1):
3723
    """Class to retrieve Octopuns comics."""
3724
    # Also on http://www.octopuns.net
3725
    name = 'octopuns-tumblr'
3726
    long_name = 'Octopuns (from Tumblr)'
3727
    url = 'http://octopuns.tumblr.com'
3728
3729
3730
class PicturesInBoxesTumblr(GenericTumblrV1):
3731
    """Class to retrieve Pictures In Boxes comics."""
3732
    # Also on http://www.picturesinboxes.com
3733
    name = 'picturesinboxes-tumblr'
3734
    long_name = 'Pictures in Boxes (from Tumblr)'
3735
    url = 'https://picturesinboxescomic.tumblr.com'
3736
3737
3738
class TubeyToonsTumblr(GenericTumblrV1):
3739
    """Class to retrieve TubeyToons comics."""
3740
    # Also on http://tapastic.com/series/Tubey-Toons
3741
    # Also on http://tubeytoons.com
3742
    name = 'tubeytoons-tumblr'
3743
    long_name = 'Tubey Toons (from Tumblr)'
3744
    url = 'https://tubeytoons.tumblr.com'
3745
    _categories = ('TUNEYTOONS', )
3746
3747
3748
class UnearthedComicsTumblr(GenericTumblrV1):
3749
    """Class to retrieve Unearthed comics."""
3750
    # Also on http://tapastic.com/series/UnearthedComics
3751
    # Also on http://unearthedcomics.com
3752
    name = 'unearthed-tumblr'
3753
    long_name = 'Unearthed Comics (from Tumblr)'
3754
    url = 'https://unearthedcomics.tumblr.com'
3755
    _categories = ('UNEARTHED', )
3756
3757
3758
class PieComic(GenericTumblrV1):
3759
    """Class to retrieve Pie Comic comics."""
3760
    name = 'pie'
3761
    long_name = 'Pie Comic'
3762
    url = "http://piecomic.tumblr.com"
3763
3764
3765
class MrEthanDiamond(GenericTumblrV1):
3766
    """Class to retrieve Mr Ethan Diamond comics."""
3767
    name = 'diamond'
3768
    long_name = 'Mr Ethan Diamond'
3769
    url = 'http://mrethandiamond.tumblr.com'
3770
3771
3772
class Flocci(GenericTumblrV1):
3773
    """Class to retrieve floccinaucinihilipilification comics."""
3774
    name = 'flocci'
3775
    long_name = 'floccinaucinihilipilification'
3776
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3777
3778
3779
class UpAndOut(GenericTumblrV1):
3780
    """Class to retrieve Up & Out comics."""
3781
    # Also on http://tapastic.com/series/UP-and-OUT
3782
    name = 'upandout'
3783
    long_name = 'Up And Out (from Tumblr)'
3784
    url = 'http://upandoutcomic.tumblr.com'
3785
3786
3787
class Pundemonium(GenericTumblrV1):
3788
    """Class to retrieve Pundemonium comics."""
3789
    name = 'pundemonium'
3790
    long_name = 'Pundemonium'
3791
    url = 'http://monstika.tumblr.com'
3792
3793
3794
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3795
    """Class to retrieve Poorly Drawn Lines comics."""
3796
    # Also on http://poorlydrawnlines.com
3797
    name = 'poorlydrawn-tumblr'
3798
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3799
    url = 'http://pdlcomics.tumblr.com'
3800
    _categories = ('POORLYDRAWN', )
3801
3802
3803
class PearShapedComics(GenericTumblrV1):
3804
    """Class to retrieve Pear Shaped Comics."""
3805
    name = 'pearshaped'
3806
    long_name = 'Pear-Shaped Comics'
3807
    url = 'http://pearshapedcomics.com'
3808
3809
3810
class PondScumComics(GenericTumblrV1):
3811
    """Class to retrieve Pond Scum Comics."""
3812
    name = 'pond'
3813
    long_name = 'Pond Scum'
3814
    url = 'http://pondscumcomic.tumblr.com'
3815
3816
3817
class MercworksTumblr(GenericTumblrV1):
3818
    """Class to retrieve Mercworks comics."""
3819
    # Also on http://mercworks.net
3820
    name = 'mercworks-tumblr'
3821
    long_name = 'Mercworks (from Tumblr)'
3822
    url = 'http://mercworks.tumblr.com'
3823
3824
3825
class OwlTurdTumblr(GenericTumblrV1):
3826
    """Class to retrieve Owl Turd comics."""
3827
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3828
    name = 'owlturd-tumblr'
3829
    long_name = 'Owl Turd (from Tumblr)'
3830
    url = 'http://owlturd.com'
3831
    _categories = ('OWLTURD', )
3832
3833
3834
class VectorBelly(GenericTumblrV1):
3835
    """Class to retrieve Vector Belly comics."""
3836
    # Also on http://vectorbelly.com
3837
    name = 'vector'
3838
    long_name = 'Vector Belly'
3839
    url = 'http://vectorbelly.tumblr.com'
3840
3841
3842
class GoneIntoRapture(GenericTumblrV1):
3843
    """Class to retrieve Gone Into Rapture comics."""
3844
    # Also on http://goneintorapture.tumblr.com
3845
    # Also on http://tapastic.com/series/Goneintorapture
3846
    name = 'rapture'
3847
    long_name = 'Gone Into Rapture'
3848
    url = 'http://goneintorapture.com'
3849
3850
3851
class TheOatmealTumblr(GenericTumblrV1):
3852
    """Class to retrieve The Oatmeal comics."""
3853
    # Also on http://theoatmeal.com
3854
    name = 'oatmeal-tumblr'
3855
    long_name = 'The Oatmeal (from Tumblr)'
3856
    url = 'http://oatmeal.tumblr.com'
3857
3858
3859
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3860
    """Class to retrieve Heck If I Know Comics."""
3861
    # Also on http://tapastic.com/series/Regular
3862
    name = 'heck-tumblr'
3863
    long_name = 'Heck if I Know comics (from Tumblr)'
3864
    url = 'http://heckifiknowcomics.com'
3865
3866
3867
class MyJetPack(GenericTumblrV1):
3868
    """Class to retrieve My Jet Pack comics."""
3869
    name = 'jetpack'
3870
    long_name = 'My Jet Pack'
3871
    url = 'http://myjetpack.tumblr.com'
3872
3873
3874
class CheerUpEmoKidTumblr(GenericTumblrV1):
3875
    """Class to retrieve CheerUpEmoKid comics."""
3876
    # Also on http://www.cheerupemokid.com
3877
    # Also on http://tapastic.com/series/CUEK
3878
    name = 'cuek-tumblr'
3879
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3880
    url = 'https://enzocomics.tumblr.com'
3881
3882
3883
class ForLackOfABetterComic(GenericTumblrV1):
3884
    """Class to retrieve For Lack Of A Better Comics."""
3885
    # Also on http://forlackofabettercomic.com
3886
    name = 'lack'
3887
    long_name = 'For Lack Of A Better Comic'
3888
    url = 'http://forlackofabettercomic.tumblr.com'
3889
3890
3891
class ZenPencilsTumblr(GenericTumblrV1):
3892
    """Class to retrieve ZenPencils comics."""
3893
    # Also on http://zenpencils.com
3894
    # Also on http://www.gocomics.com/zen-pencils
3895
    name = 'zenpencils-tumblr'
3896
    long_name = 'Zen Pencils (from Tumblr)'
3897
    url = 'http://zenpencils.tumblr.com'
3898
    _categories = ('ZENPENCILS', )
3899
3900
3901
class ThreeWordPhraseTumblr(GenericTumblrV1):
3902
    """Class to retrieve Three Word Phrase comics."""
3903
    # Also on http://threewordphrase.com
3904
    name = 'threeword-tumblr'
3905
    long_name = 'Three Word Phrase (from Tumblr)'
3906
    url = 'http://threewordphrase.tumblr.com'
3907
3908
3909
class TimeTrabbleTumblr(GenericTumblrV1):
3910
    """Class to retrieve Time Trabble comics."""
3911
    # Also on http://timetrabble.com
3912
    name = 'timetrabble-tumblr'
3913
    long_name = 'Time Trabble (from Tumblr)'
3914
    url = 'http://timetrabble.tumblr.com'
3915
3916
3917
class SafelyEndangeredTumblr(GenericTumblrV1):
3918
    """Class to retrieve Safely Endangered comics."""
3919
    # Also on http://www.safelyendangered.com
3920
    name = 'endangered-tumblr'
3921
    long_name = 'Safely Endangered (from Tumblr)'
3922
    url = 'http://tumblr.safelyendangered.com'
3923
3924
3925
class MouseBearComedyTumblr(GenericTumblrV1):
3926
    """Class to retrieve Mouse Bear Comedy comics."""
3927
    # Also on http://www.mousebearcomedy.com
3928
    name = 'mousebear-tumblr'
3929
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3930
    url = 'http://mousebearcomedy.tumblr.com'
3931
3932
3933
class BouletCorpTumblr(GenericTumblrV1):
3934
    """Class to retrieve BouletCorp comics."""
3935
    # Also on http://www.bouletcorp.com
3936
    name = 'boulet-tumblr'
3937
    long_name = 'Boulet Corp (from Tumblr)'
3938
    url = 'https://bouletcorp.tumblr.com'
3939
    _categories = ('BOULET', )
3940
3941
3942
class TheAwkwardYetiTumblr(GenericTumblrV1):
3943
    """Class to retrieve The Awkward Yeti comics."""
3944
    # Also on http://www.gocomics.com/the-awkward-yeti
3945
    # Also on http://theawkwardyeti.com
3946
    # Also on https://tapastic.com/series/TheAwkwardYeti
3947
    name = 'yeti-tumblr'
3948
    long_name = 'The Awkward Yeti (from Tumblr)'
3949
    url = 'http://larstheyeti.tumblr.com'
3950
    _categories = ('YETI', )
3951
3952
3953
class NellucNhoj(GenericTumblrV1):
3954
    """Class to retrieve NellucNhoj comics."""
3955
    name = 'nhoj'
3956
    long_name = 'Nelluc Nhoj'
3957
    url = 'http://nellucnhoj.com'
3958
3959
3960
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3961
    """Class to retrieve Down The Upward Spiral comics."""
3962
    # Also on http://www.downtheupwardspiral.com
3963
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
3964
    name = 'spiral-tumblr'
3965
    long_name = 'Down the Upward Spiral (from Tumblr)'
3966
    url = 'http://downtheupwardspiral.tumblr.com'
3967
3968
3969
class AsPerUsualTumblr(GenericTumblrV1):
3970
    """Class to retrieve As Per Usual comics."""
3971
    # Also on https://tapastic.com/series/AsPerUsual
3972
    name = 'usual-tumblr'
3973
    long_name = 'As Per Usual (from Tumblr)'
3974
    url = 'http://as-per-usual.tumblr.com'
3975
    categories = ('DAMILEE', )
3976
3977
3978
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3979
    """Class to retrieve Hot Comics For Cool People."""
3980
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3981
    # Also on http://hotcomics.biz (links to tumblr)
3982
    # Also on http://hcfcp.com (links to tumblr)
3983
    name = 'hotcomics-tumblr'
3984
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3985
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3986
    categories = ('DAMILEE', )
3987
3988
3989
class OneOneOneOneComicTumblr(GenericTumblrV1):
3990
    """Class to retrieve 1111 Comics."""
3991
    # Also on http://www.1111comics.me
3992
    # Also on https://tapastic.com/series/1111-Comics
3993
    name = '1111-tumblr'
3994
    long_name = '1111 Comics (from Tumblr)'
3995
    url = 'http://comics1111.tumblr.com'
3996
    _categories = ('ONEONEONEONE', )
3997
3998
3999
class JhallComicsTumblr(GenericTumblrV1):
4000
    """Class to retrieve Jhall Comics."""
4001
    # Also on http://jhallcomics.com
4002
    name = 'jhall-tumblr'
4003
    long_name = 'Jhall Comics (from Tumblr)'
4004
    url = 'http://jhallcomics.tumblr.com'
4005
4006
4007
class BerkeleyMewsTumblr(GenericTumblrV1):
4008
    """Class to retrieve Berkeley Mews comics."""
4009
    # Also on http://www.gocomics.com/berkeley-mews
4010
    # Also on http://www.berkeleymews.com
4011
    name = 'berkeley-tumblr'
4012
    long_name = 'Berkeley Mews (from Tumblr)'
4013
    url = 'http://mews.tumblr.com'
4014
    _categories = ('BERKELEY', )
4015
4016
4017
class JoanCornellaTumblr(GenericTumblrV1):
4018
    """Class to retrieve Joan Cornella comics."""
4019
    # Also on http://joancornella.net
4020
    name = 'cornella-tumblr'
4021
    long_name = 'Joan Cornella (from Tumblr)'
4022
    url = 'http://cornellajoan.tumblr.com'
4023
4024
4025
class RespawnComicTumblr(GenericTumblrV1):
4026
    """Class to retrieve Respawn Comic."""
4027
    # Also on http://respawncomic.com
4028
    name = 'respawn-tumblr'
4029
    long_name = 'Respawn Comic (from Tumblr)'
4030
    url = 'https://respawncomic.tumblr.com'
4031
4032
4033
class ChrisHallbeckTumblr(GenericTumblrV1):
4034
    """Class to retrieve Chris Hallbeck comics."""
4035
    # Also on https://tapastic.com/ChrisHallbeck
4036
    # Also on http://maximumble.com
4037
    # Also on http://minimumble.com
4038
    # Also on http://thebookofbiff.com
4039
    name = 'hallbeck-tumblr'
4040
    long_name = 'Chris Hallback (from Tumblr)'
4041
    url = 'https://chrishallbeck.tumblr.com'
4042
    _categories = ('HALLBACK', )
4043
4044
4045
class ComicNuggets(GenericTumblrV1):
4046
    """Class to retrieve Comic Nuggets."""
4047
    name = 'nuggets'
4048
    long_name = 'Comic Nuggets'
4049
    url = 'http://comicnuggets.com'
4050
4051
4052
class PigeonGazetteTumblr(GenericTumblrV1):
4053
    """Class to retrieve The Pigeon Gazette comics."""
4054
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
4055
    name = 'pigeon-tumblr'
4056
    long_name = 'The Pigeon Gazette (from Tumblr)'
4057
    url = 'http://thepigeongazette.tumblr.com'
4058
4059
4060
class CancerOwl(GenericTumblrV1):
4061
    """Class to retrieve Cancer Owl comics."""
4062
    # Also on http://cancerowl.com
4063
    name = 'cancerowl-tumblr'
4064
    long_name = 'Cancer Owl (from Tumblr)'
4065
    url = 'http://cancerowl.tumblr.com'
4066
4067
4068
class FowlLanguageTumblr(GenericTumblrV1):
4069
    """Class to retrieve Fowl Language comics."""
4070
    # Also on http://www.fowllanguagecomics.com
4071
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4072
    # Also on http://www.gocomics.com/fowl-language
4073
    name = 'fowllanguage-tumblr'
4074
    long_name = 'Fowl Language Comics (from Tumblr)'
4075
    url = 'http://fowllanguagecomics.tumblr.com'
4076
    _categories = ('FOWLLANGUAGE', )
4077
4078
4079
class TheOdd1sOutTumblr(GenericTumblrV1):
4080
    """Class to retrieve The Odd 1s Out comics."""
4081
    # Also on http://theodd1sout.com
4082
    # Also on https://tapastic.com/series/Theodd1sout
4083
    name = 'theodd-tumblr'
4084
    long_name = 'The Odd 1s Out (from Tumblr)'
4085
    url = 'http://theodd1sout.tumblr.com'
4086
4087
4088
class TheUnderfoldTumblr(GenericTumblrV1):
4089
    """Class to retrieve The Underfold comics."""
4090
    # Also on http://theunderfold.com
4091
    name = 'underfold-tumblr'
4092
    long_name = 'The Underfold (from Tumblr)'
4093
    url = 'http://theunderfold.tumblr.com'
4094
4095
4096
class LolNeinTumblr(GenericTumblrV1):
4097
    """Class to retrieve Lol Nein comics."""
4098
    # Also on http://lolnein.com
4099
    name = 'lolnein-tumblr'
4100
    long_name = 'Lol Nein (from Tumblr)'
4101
    url = 'http://lolneincom.tumblr.com'
4102
4103
4104
class FatAwesomeComicsTumblr(GenericTumblrV1):
4105
    """Class to retrieve Fat Awesome Comics."""
4106
    # Also on http://fatawesome.com/comics
4107
    name = 'fatawesome-tumblr'
4108
    long_name = 'Fat Awesome (from Tumblr)'
4109
    url = 'http://fatawesomecomedy.tumblr.com'
4110
4111
4112
class TheWorldIsFlatTumblr(GenericTumblrV1):
4113
    """Class to retrieve The World Is Flat Comics."""
4114
    # Also on https://tapastic.com/series/The-World-is-Flat
4115
    name = 'flatworld-tumblr'
4116
    long_name = 'The World Is Flat (from Tumblr)'
4117
    url = 'http://theworldisflatcomics.com'
4118
4119
4120
class DorrisMc(GenericTumblrV1):
4121
    """Class to retrieve Dorris Mc Comics"""
4122
    # Also on http://www.gocomics.com/dorris-mccomics
4123
    name = 'dorrismc'
4124
    long_name = 'Dorris Mc'
4125
    url = 'http://dorrismccomics.com'
4126
4127
4128
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
4129
    """Class to retrieve Leleoz comics."""
4130
    # Also on https://tapastic.com/series/Leleoz
4131
    name = 'leleoz-tumblr'
4132
    long_name = 'Leleoz (from Tumblr)'
4133
    url = 'http://leleozcomics.tumblr.com'
4134
4135
4136
class MoonBeardTumblr(GenericTumblrV1):
4137
    """Class to retrieve MoonBeard comics."""
4138
    # Also on http://moonbeard.com
4139
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4140
    name = 'moonbeard-tumblr'
4141
    long_name = 'Moon Beard (from Tumblr)'
4142
    url = 'http://squireseses.tumblr.com'
4143
    _categories = ('MOONBEARD', )
4144
4145
4146
class AComik(GenericTumblrV1):
4147
    """Class to retrieve A Comik"""
4148
    name = 'comik'
4149
    long_name = 'A Comik'
4150
    url = 'http://acomik.com'
4151
4152
4153
class ClassicRandy(GenericTumblrV1):
4154
    """Class to retrieve Classic Randy comics."""
4155
    name = 'randy'
4156
    long_name = 'Classic Randy'
4157
    url = 'http://classicrandy.tumblr.com'
4158
4159
4160
class DagssonTumblr(GenericTumblrV1):
4161
    """Class to retrieve Dagsson comics."""
4162
    # Also on http://www.dagsson.com
4163
    name = 'dagsson-tumblr'
4164
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4165
    url = 'https://hugleikurdagsson.tumblr.com'
4166
4167
4168
class LinsEditionsTumblr(GenericTumblrV1):
4169
    """Class to retrieve L.I.N.S. Editions comics."""
4170
    # Also on https://linsedition.com
4171
    # Now on http://warandpeas.tumblr.com
4172
    name = 'lins-tumblr'
4173
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4174
    url = 'https://linscomics.tumblr.com'
4175
    _categories = ('LINS', )
4176
4177
4178
class WarAndPeasTumblr(GenericTumblrV1):
4179
    """Class to retrieve War And Peas comics."""
4180
    # Was on https://linscomics.tumblr.com
4181
    name = 'warandpeas-tumblr'
4182
    long_name = 'War And Peas (from Tumblr)'
4183
    url = 'http://warandpeas.tumblr.com'
4184
    _categories = ('WARANDPEAS', )
4185
4186
4187
class OrigamiHotDish(GenericTumblrV1):
4188
    """Class to retrieve Origami Hot Dish comics."""
4189
    name = 'origamihotdish'
4190
    long_name = 'Origami Hot Dish'
4191
    url = 'http://origamihotdish.com'
4192
4193
4194
class HitAndMissComicsTumblr(GenericTumblrV1):
4195
    """Class to retrieve Hit and Miss Comics."""
4196
    name = 'hitandmiss'
4197
    long_name = 'Hit and Miss Comics'
4198
    url = 'https://hitandmisscomics.tumblr.com'
4199
4200
4201
class HMBlanc(GenericTumblrV1):
4202
    """Class to retrieve HM Blanc comics."""
4203
    name = 'hmblanc'
4204
    long_name = 'HM Blanc'
4205
    url = 'http://hmblanc.tumblr.com'
4206
4207
4208
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4209
    """Class to retrieve Tales Of Absurdity comics."""
4210
    # Also on http://talesofabsurdity.com
4211
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4212
    name = 'absurdity-tumblr'
4213
    long_name = 'Tales of Absurdity (from Tumblr)'
4214
    url = 'http://talesofabsurdity.tumblr.com'
4215
    _categories = ('ABSURDITY', )
4216
4217
4218
class RobbieAndBobby(GenericTumblrV1):
4219
    """Class to retrieve Robbie And Bobby comics."""
4220
    # Also on http://robbieandbobby.com
4221
    name = 'robbie-tumblr'
4222
    long_name = 'Robbie And Bobby (from Tumblr)'
4223
    url = 'http://robbieandbobby.tumblr.com'
4224
4225
4226
class ElectricBunnyComicTumblr(GenericTumblrV1):
4227
    """Class to retrieve Electric Bunny Comics."""
4228
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4229
    name = 'bunny-tumblr'
4230
    long_name = 'Electric Bunny Comic (from Tumblr)'
4231
    url = 'http://electricbunnycomics.tumblr.com'
4232
4233
4234
class Hoomph(GenericTumblrV1):
4235
    """Class to retrieve Hoomph comics."""
4236
    name = 'hoomph'
4237
    long_name = 'Hoomph'
4238
    url = 'http://hoom.ph'
4239
4240
4241
class BFGFSTumblr(GenericTumblrV1):
4242
    """Class to retrieve BFGFS comics."""
4243
    # Also on https://tapastic.com/series/BFGFS
4244
    # Also on http://bfgfs.com
4245
    name = 'bfgfs-tumblr'
4246
    long_name = 'BFGFS (from Tumblr)'
4247
    url = 'https://bfgfs.tumblr.com'
4248
4249
4250
class DoodleForFood(GenericTumblrV1):
4251
    """Class to retrieve Doodle For Food comics."""
4252
    # Also on https://tapastic.com/series/Doodle-for-Food
4253
    name = 'doodle'
4254
    long_name = 'Doodle For Food'
4255
    url = 'http://www.doodleforfood.com'
4256
4257
4258
class CassandraCalinTumblr(GenericTumblrV1):
4259
    """Class to retrieve C. Cassandra comics."""
4260
    # Also on http://cassandracalin.com
4261
    # Also on https://tapastic.com/series/C-Cassandra-comics
4262
    name = 'cassandra-tumblr'
4263
    long_name = 'Cassandra Calin (from Tumblr)'
4264
    url = 'http://c-cassandra.tumblr.com'
4265
4266
4267
class DougWasTaken(GenericTumblrV1):
4268
    """Class to retrieve Doug Was Taken comics."""
4269
    name = 'doug'
4270
    long_name = 'Doug Was Taken'
4271
    url = 'https://dougwastaken.tumblr.com'
4272
4273
4274
class MandatoryRollerCoaster(GenericTumblrV1):
4275
    """Class to retrieve Mandatory Roller Coaster comics."""
4276
    name = 'rollercoaster'
4277
    long_name = 'Mandatory Roller Coaster'
4278
    url = 'http://mandatoryrollercoaster.com'
4279
4280
4281
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4282
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4283
    name = 'cperspqccltt'
4284
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4285
    url = 'http://marcoandco.tumblr.com'
4286
4287
4288
class TheGrohlTroll(GenericTumblrV1):
4289
    """Class to retrieve The Grohl Troll comics."""
4290
    name = 'grohltroll'
4291
    long_name = 'The Grohl Troll'
4292
    url = 'http://thegrohltroll.com'
4293
4294
4295
class WebcomicName(GenericTumblrV1):
4296
    """Class to retrieve Webcomic Name comics."""
4297
    name = 'webcomicname'
4298
    long_name = 'Webcomic Name'
4299
    url = 'http://webcomicname.com'
4300
4301
4302
class BooksOfAdam(GenericTumblrV1):
4303
    """Class to retrieve Books of Adam comics."""
4304
    # Also on http://www.booksofadam.com
4305
    name = 'booksofadam'
4306
    long_name = 'Books of Adam'
4307
    url = 'http://booksofadam.tumblr.com'
4308
4309
4310
class HarkAVagrant(GenericTumblrV1):
4311
    """Class to retrieve Hark A Vagrant comics."""
4312 View Code Duplication
    # Also on http://www.harkavagrant.com
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
4313
    name = 'hark-tumblr'
4314
    long_name = 'Hark A Vagrant (from Tumblr)'
4315
    url = 'http://beatonna.tumblr.com'
4316
4317
4318
class OurSuperAdventureTumblr(GenericTumblrV1):
4319
    """Class to retrieve Our Super Adventure comics."""
4320
    # Also on https://tapastic.com/series/Our-Super-Adventure
4321
    # Also on http://www.oursuperadventure.com
4322
    # http://sarahgraley.com
4323
    name = 'superadventure-tumblr'
4324
    long_name = 'Our Super Adventure (from Tumblr)'
4325
    url = 'http://sarahssketchbook.tumblr.com'
4326
4327
4328
class JakeLikesOnions(GenericTumblrV1):
4329
    """Class to retrieve Jake Likes Onions comics."""
4330
    name = 'jake'
4331
    long_name = 'Jake Likes Onions'
4332
    url = 'http://jakelikesonions.com'
4333
4334
4335
class InYourFaceCake(GenericTumblrV1):
4336
    """Class to retrieve In Your Face Cake comics."""
4337
    name = 'inyourfacecake-tumblr'
4338
    long_name = 'In Your Face Cake (from Tumblr)'
4339
    url = 'https://in-your-face-cake.tumblr.com'
4340
4341
4342
class Robospunk(GenericTumblrV1):
4343
    """Class to retrieve Robospunk comics."""
4344
    name = 'robospunk'
4345
    long_name = 'Robospunk'
4346
    url = 'http://robospunk.com'
4347
4348
4349
class BananaTwinky(GenericTumblrV1):
4350
    """Class to retrieve Banana Twinky comics."""
4351
    name = 'banana'
4352
    long_name = 'Banana Twinky'
4353
    url = 'https://bananatwinky.tumblr.com'
4354
4355
4356
class YesterdaysPopcornTumblr(GenericTumblrV1):
4357
    """Class to retrieve Yesterday's Popcorn comics."""
4358
    # Also on http://www.yesterdayspopcorn.com
4359
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4360
    name = 'popcorn-tumblr'
4361
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4362
    url = 'http://yesterdayspopcorn.tumblr.com'
4363
4364
4365
class TwistedDoodles(GenericTumblrV1):
4366
    """Class to retrieve Twisted Doodles comics."""
4367
    name = 'twisted'
4368
    long_name = 'Twisted Doodles'
4369
    url = 'http://www.twisteddoodles.com'
4370
4371
4372
class UbertoolTumblr(GenericTumblrV1):
4373
    """Class to retrieve Ubertool comics."""
4374
    # Also on http://ubertoolcomic.com
4375
    # Also on https://tapastic.com/series/ubertool
4376
    name = 'ubertool-tumblr'
4377
    long_name = 'Ubertool (from Tumblr)'
4378
    url = 'https://ubertool.tumblr.com'
4379
    _categories = ('UBERTOOL', )
4380
4381
4382
class LittleLifeLinesTumblr(GenericTumblrV1):
4383
    """Class to retrieve Little Life Lines comics."""
4384
    # Also on http://www.littlelifelines.com
4385
    name = 'life-tumblr'
4386
    long_name = 'Little Life Lines (from Tumblr)'
4387
    url = 'https://little-life-lines.tumblr.com'
4388
4389
4390
class TheyCanTalk(GenericTumblrV1):
4391
    """Class to retrieve They Can Talk comics."""
4392
    name = 'theycantalk'
4393
    long_name = 'They Can Talk'
4394
    url = 'http://theycantalk.com'
4395
4396
4397
class Will5NeverCome(GenericTumblrV1):
4398
    """Class to retrieve Will 5:00 Never Come comics."""
4399
    name = 'will5'
4400
    long_name = 'Will 5:00 Never Come ?'
4401
    url = 'http://will5nevercome.com'
4402
4403
4404
class Sephko(GenericTumblrV1):
4405
    """Class to retrieve Sephko Comics."""
4406
    # Also on http://www.sephko.com
4407
    name = 'sephko'
4408
    long_name = 'Sephko'
4409
    url = 'https://sephko.tumblr.com'
4410
4411
4412
class BlazersAtDawn(GenericTumblrV1):
4413
    """Class to retrieve Blazers At Dawn Comics."""
4414
    name = 'blazers'
4415
    long_name = 'Blazers At Dawn'
4416
    url = 'http://blazersatdawn.tumblr.com'
4417
4418
4419
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4420
    """Class to retrieve Art By Moga Comics."""
4421
    name = 'moga'
4422
    long_name = 'Art By Moga'
4423
    url = 'http://artbymoga.tumblr.com'
4424
4425
4426
class VerbalVomitTumblr(GenericTumblrV1):
4427
    """Class to retrieve Verbal Vomit comics."""
4428
    # Also on http://www.verbal-vomit.com
4429
    name = 'vomit-tumblr'
4430
    long_name = 'Verbal Vomit (from Tumblr)'
4431
    url = 'http://verbalvomits.tumblr.com'
4432
4433
4434
class LibraryComic(GenericTumblrV1):
4435
    """Class to retrieve LibraryComic."""
4436
    # Also on http://librarycomic.com
4437
    name = 'library-tumblr'
4438
    long_name = 'LibraryComic (from Tumblr)'
4439
    url = 'https://librarycomic.tumblr.com'
4440
4441
4442
class TizzyStitchBirdTumblr(GenericTumblrV1):
4443
    """Class to retrieve Tizzy Stitch Bird comics."""
4444
    # Also on http://tizzystitchbird.com
4445
    # Also on https://tapastic.com/series/TizzyStitchbird
4446
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4447
    name = 'tizzy-tumblr'
4448
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4449
    url = 'http://tizzystitchbird.tumblr.com'
4450
4451
4452
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4453
    """Class to retrieve VictimsOfCircumsolar comics."""
4454
    # Also on http://www.victimsofcircumsolar.com
4455
    name = 'circumsolar-tumblr'
4456
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4457
    url = 'https://victimsofcomics.tumblr.com'
4458
4459
4460
class RockPaperCynicTumblr(GenericTumblrV1):
4461
    """Class to retrieve RockPaperCynic comics."""
4462
    # Also on http://www.rockpapercynic.com
4463
    # Also on https://tapastic.com/series/rockpapercynic
4464
    name = 'rpc-tumblr'
4465
    long_name = 'Rock Paper Cynic (from Tumblr)'
4466
    url = 'http://rockpapercynic.tumblr.com'
4467
4468
4469
class DeadlyPanelTumblr(GenericTumblrV1):
4470
    """Class to retrieve Deadly Panel comics."""
4471
    # Also on http://www.deadlypanel.com
4472
    # Also on https://tapastic.com/series/deadlypanel
4473
    name = 'deadly-tumblr'
4474
    long_name = 'Deadly Panel (from Tumblr)'
4475
    url = 'https://deadlypanel.tumblr.com'
4476
4477
4478
class CatanaComics(GenericTumblrV1):
4479
    """Class to retrieve Catana comics."""
4480
    name = 'catana'
4481
    long_name = 'Catana'
4482
    url = 'http://www.catanacomics.com'
4483
4484
4485
class AngryAtNothingTumblr(GenericTumblrV1):
4486
    """Class to retrieve Angry at Nothing comics."""
4487
    # Also on http://www.angryatnothing.net
4488
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4489
    name = 'angry-tumblr'
4490
    long_name = 'Angry At Nothing (from Tumblr)'
4491
    url = 'http://angryatnothing.tumblr.com'
4492
4493
4494
class ShanghaiTango(GenericTumblrV1):
4495
    """Class to retrieve Shanghai Tango comic."""
4496
    name = 'tango'
4497
    long_name = 'Shanghai Tango'
4498
    url = 'http://tango2010weibo.tumblr.com'
4499
4500
4501
class OffTheLeashDogTumblr(GenericTumblrV1):
4502
    """Class to retrieve Off The Leash Dog comics."""
4503
    # Also on http://offtheleashdogcartoons.com
4504
    # Also on http://www.rupertfawcettcartoons.com
4505
    name = 'offtheleash-tumblr'
4506
    long_name = 'Off The Leash Dog (from Tumblr)'
4507
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4508
    _categories = ('FAWCETT', )
4509
4510
4511
class ImogenQuestTumblr(GenericTumblrV1):
4512
    """Class to retrieve Imogen Quest comics."""
4513
    # Also on http://imogenquest.net
4514
    name = 'imogen-tumblr'
4515
    long_name = 'Imogen Quest (from Tumblr)'
4516
    url = 'http://imoquest.tumblr.com'
4517
4518
4519
class Shitfest(GenericTumblrV1):
4520
    """Class to retrieve Shitfest comics."""
4521
    name = 'shitfest'
4522
    long_name = 'Shitfest'
4523
    url = 'http://shitfestcomic.com'
4524
4525
4526
class IceCreamSandwichComics(GenericTumblrV1):
4527
    """Class to retrieve Ice Cream Sandwich Comics."""
4528
    name = 'icecream'
4529
    long_name = 'Ice Cream Sandwich Comics'
4530
    url = 'http://icecreamsandwichcomics.com'
4531
4532
4533
class Dustinteractive(GenericTumblrV1):
4534
    """Class to retrieve Dustinteractive comics."""
4535
    name = 'dustinteractive'
4536
    long_name = 'Dustinteractive'
4537
    url = 'http://dustinteractive.com'
4538
4539
4540
class StickyCinemaFloor(GenericTumblrV1):
4541
    """Class to retrieve Sticky Cinema Floor comics."""
4542
    name = 'stickycinema'
4543
    long_name = 'Sticky Cinema Floor'
4544
    url = 'https://stickycinemafloor.tumblr.com'
4545
4546
4547
class IncidentalComicsTumblr(GenericTumblrV1):
4548
    """Class to retrieve Incidental Comics."""
4549
    # Also on http://www.incidentalcomics.com
4550
    name = 'incidental-tumblr'
4551
    long_name = 'Incidental Comics (from Tumblr)'
4552
    url = 'http://incidentalcomics.tumblr.com'
4553
4554
4555
class APleasantWasteOfTimeTumblr(GenericTumblrV1):
4556
    """Class to retrieve A Pleasant Waste Of Time comics."""
4557
    # Also on https://tapas.io/series/A-Pleasant-
4558
    name = 'pleasant-waste-tumblr'
4559
    long_name = 'A Pleasant Waste Of Time (from Tumblr)'
4560
    url = 'https://artjcf.tumblr.com'
4561
    _categories = ('WASTE', )
4562
4563
4564
class HorovitzComicsTumblr(GenericTumblrV1):
4565
    """Class to retrieve Horovitz new comics."""
4566
    # Also on http://www.horovitzcomics.com
4567
    name = 'horovitz-tumblr'
4568
    long_name = 'Horovitz (from Tumblr)'
4569
    url = 'https://horovitzcomics.tumblr.com'
4570
    _categories = ('HOROVITZ', )
4571
4572
4573
class DeepDarkFearsTumblr(GenericTumblrV1):
4574
    """Class to retrieve DeepvDarkvFears comics."""
4575
    name = 'deep-dark-fears-tumblr'
4576
    long_name = 'Deep Dark Fears (from Tumblr)'
4577
    url = 'http://deep-dark-fears.tumblr.com'
4578
4579
4580
class ExtraFabulousComicsTumblr(GenericTumblrV1):
4581
    """Class to retrieve Extra Fabulous Comics."""
4582
    # Also on http://extrafabulouscomics.com
4583
    name = 'efc-tumblr'
4584
    long_name = 'Extra Fabulous Comics (from Tumblr)'
4585
    url = 'https://extrafabulouscomics.tumblr.com'
4586
    _categories = ('EFC', )
4587
4588
4589
class JamesOfNoTradesTumblr(GenericTumblrV1):
4590
    """Class to retrieve JamesOfNoTrades comics."""
4591
    # Also on http://jamesofnotrades.com
4592
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
4593
    # Also on https://tapas.io/series/James-of-No-Trades
4594
    name = 'jamesofnotrades-tumblr'
4595
    long_name = 'James Of No Trades (from Tumblr)'
4596
    url = 'http://jamesfregan.tumblr.com'
4597
    _categories = ('JAMESOFNOTRADES', )
4598
4599
4600
class InfiniteGuff(GenericTumblrV1):
4601
    """Class to retrieve Infinite Guff comics."""
4602
    name = 'infiniteguff'
4603
    long_name = 'Infinite Guff'
4604
    url = 'http://infiniteguff.com'
4605
4606
4607
class HorovitzComics(GenericEmptyComic, GenericListableComic):
4608
    """Generic class to handle the logic common to the different comics from Horovitz."""
4609
    # Also on https://horovitzcomics.tumblr.com
4610
    url = 'http://www.horovitzcomics.com'
4611
    _categories = ('HOROVITZ', )
4612
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4613
    link_re = NotImplemented
4614
    get_url_from_archive_element = join_cls_url_to_href
4615
4616
    @classmethod
4617
    def get_comic_info(cls, soup, link):
4618
        """Get information about a particular comics."""
4619
        href = link['href']
4620
        num = int(cls.link_re.match(href).groups()[0])
4621
        title = link.string
4622
        imgs = soup.find_all('img', id='comic')
4623
        assert len(imgs) == 1
4624
        year, month, day = [int(s)
4625
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4626
        return {
4627
            'title': title,
4628
            'day': day,
4629
            'month': month,
4630
            'year': year,
4631
            'img': [i['src'] for i in imgs],
4632
            'num': num,
4633
        }
4634
4635
    @classmethod
4636
    def get_archive_elements(cls):
4637
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4638
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4639
4640
4641
class HorovitzNew(HorovitzComics):
4642
    """Class to retrieve Horovitz new comics."""
4643
    name = 'horovitznew'
4644
    long_name = 'Horovitz New'
4645
    link_re = re.compile('^/comics/new/([0-9]+)$')
4646
4647
4648
class HorovitzClassic(HorovitzComics):
4649
    """Class to retrieve Horovitz classic comics."""
4650
    name = 'horovitzclassic'
4651
    long_name = 'Horovitz Classic'
4652
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4653
4654
4655
class GenericGoComic(GenericNavigableComic):
4656
    """Generic class to handle the logic common to comics from gocomics.com."""
4657
    _categories = ('GOCOMIC', )
4658
4659
    @classmethod
4660
    def get_first_comic_link(cls):
4661
        """Get link to first comics."""
4662
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4663
4664
    @classmethod
4665
    def get_navi_link(cls, last_soup, next_):
4666
        """Get link to next or previous comic."""
4667
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4668
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right hidden-sm-up sm '
4669
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4670
4671
    @classmethod
4672
    def get_url_from_link(cls, link):
4673
        gocomics = 'http://www.gocomics.com'
4674
        return urljoin_wrapper(gocomics, link['href'])
4675
4676
    @classmethod
4677
    def get_comic_info(cls, soup, link):
4678
        """Get information about a particular comics."""
4679
        date_str = soup.find('meta', property='article:published_time')['content']
4680
        day = string_to_date(date_str, "%Y-%m-%d")
4681
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4682
        author = soup.find('meta', property='article:author')['content']
4683
        tags = soup.find('meta', property='article:tag')['content']
4684
        return {
4685
            'day': day.day,
4686
            'month': day.month,
4687
            'year': day.year,
4688
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4689
            'author': author,
4690
            'tags': tags,
4691
        }
4692
4693
4694
class PearlsBeforeSwine(GenericGoComic):
4695
    """Class to retrieve Pearls Before Swine comics."""
4696
    name = 'pearls'
4697
    long_name = 'Pearls Before Swine'
4698
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4699
4700
4701
class Peanuts(GenericGoComic):
4702
    """Class to retrieve Peanuts comics."""
4703
    name = 'peanuts'
4704
    long_name = 'Peanuts'
4705
    url = 'http://www.gocomics.com/peanuts'
4706
4707
4708
class MattWuerker(GenericGoComic):
4709
    """Class to retrieve Matt Wuerker comics."""
4710
    name = 'wuerker'
4711
    long_name = 'Matt Wuerker'
4712
    url = 'http://www.gocomics.com/mattwuerker'
4713
4714
4715
class TomToles(GenericGoComic):
4716
    """Class to retrieve Tom Toles comics."""
4717
    name = 'toles'
4718
    long_name = 'Tom Toles'
4719
    url = 'http://www.gocomics.com/tomtoles'
4720
4721
4722
class BreakOfDay(GenericGoComic):
4723
    """Class to retrieve Break Of Day comics."""
4724
    name = 'breakofday'
4725
    long_name = 'Break Of Day'
4726
    url = 'http://www.gocomics.com/break-of-day'
4727
4728
4729
class Brevity(GenericGoComic):
4730
    """Class to retrieve Brevity comics."""
4731
    name = 'brevity'
4732
    long_name = 'Brevity'
4733
    url = 'http://www.gocomics.com/brevity'
4734
4735
4736
class MichaelRamirez(GenericGoComic):
4737
    """Class to retrieve Michael Ramirez comics."""
4738
    name = 'ramirez'
4739
    long_name = 'Michael Ramirez'
4740
    url = 'http://www.gocomics.com/michaelramirez'
4741
4742
4743
class MikeLuckovich(GenericGoComic):
4744
    """Class to retrieve Mike Luckovich comics."""
4745
    name = 'luckovich'
4746
    long_name = 'Mike Luckovich'
4747
    url = 'http://www.gocomics.com/mikeluckovich'
4748
4749
4750
class JimBenton(GenericGoComic):
4751
    """Class to retrieve Jim Benton comics."""
4752
    # Also on http://jimbenton.tumblr.com
4753
    name = 'benton'
4754
    long_name = 'Jim Benton'
4755
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4756
4757
4758
class TheArgyleSweater(GenericGoComic):
4759
    """Class to retrieve the Argyle Sweater comics."""
4760
    name = 'argyle'
4761
    long_name = 'Argyle Sweater'
4762
    url = 'http://www.gocomics.com/theargylesweater'
4763
4764
4765
class SunnyStreet(GenericGoComic):
4766
    """Class to retrieve Sunny Street comics."""
4767
    # Also on http://www.sunnystreetcomics.com
4768
    name = 'sunny'
4769
    long_name = 'Sunny Street'
4770
    url = 'http://www.gocomics.com/sunny-street'
4771
4772
4773
class OffTheMark(GenericGoComic):
4774
    """Class to retrieve Off The Mark comics."""
4775
    # Also on https://www.offthemark.com
4776
    name = 'offthemark'
4777
    long_name = 'Off The Mark'
4778
    url = 'http://www.gocomics.com/offthemark'
4779
4780
4781
class WuMo(GenericGoComic):
4782
    """Class to retrieve WuMo comics."""
4783
    # Also on http://wumo.com
4784
    name = 'wumo'
4785
    long_name = 'WuMo'
4786
    url = 'http://www.gocomics.com/wumo'
4787
4788
4789
class LunarBaboon(GenericGoComic):
4790
    """Class to retrieve Lunar Baboon comics."""
4791
    # Also on http://www.lunarbaboon.com
4792
    # Also on https://tapastic.com/series/Lunarbaboon
4793
    name = 'lunarbaboon'
4794
    long_name = 'Lunar Baboon'
4795
    url = 'http://www.gocomics.com/lunarbaboon'
4796
4797
4798
class SandersenGocomic(GenericGoComic):
4799
    """Class to retrieve Sarah Andersen comics."""
4800
    # Also on http://sarahcandersen.com
4801
    # Also on http://tapastic.com/series/Doodle-Time
4802
    name = 'sandersen-goc'
4803
    long_name = 'Sarah Andersen (from GoComics)'
4804
    url = 'http://www.gocomics.com/sarahs-scribbles'
4805
4806
4807
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4808
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4809
    # Also on http://smbc-comics.tumblr.com
4810
    # Also on http://www.smbc-comics.com
4811
    name = 'smbc-goc'
4812
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4813
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4814
    _categories = ('SMBC', )
4815
4816
4817
class CalvinAndHobbesGoComic(GenericGoComic):
4818
    """Class to retrieve Calvin and Hobbes comics."""
4819
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4820
    name = 'calvin-goc'
4821
    long_name = 'Calvin and Hobbes (from GoComics)'
4822
    url = 'http://www.gocomics.com/calvinandhobbes'
4823
4824
4825
class RallGoComic(GenericGoComic):
4826
    """Class to retrieve Ted Rall comics."""
4827
    # Also on http://rall.com/comic
4828
    name = 'rall-goc'
4829
    long_name = "Ted Rall (from GoComics)"
4830
    url = "http://www.gocomics.com/ted-rall"
4831
    _categories = ('RALL', )
4832
4833
4834
class TheAwkwardYetiGoComic(GenericGoComic):
4835
    """Class to retrieve The Awkward Yeti comics."""
4836
    # Also on http://larstheyeti.tumblr.com
4837
    # Also on http://theawkwardyeti.com
4838
    # Also on https://tapastic.com/series/TheAwkwardYeti
4839
    name = 'yeti-goc'
4840
    long_name = 'The Awkward Yeti (from GoComics)'
4841
    url = 'http://www.gocomics.com/the-awkward-yeti'
4842
    _categories = ('YETI', )
4843
4844
4845
class BerkeleyMewsGoComics(GenericGoComic):
4846
    """Class to retrieve Berkeley Mews comics."""
4847
    # Also on http://mews.tumblr.com
4848
    # Also on http://www.berkeleymews.com
4849
    name = 'berkeley-goc'
4850
    long_name = 'Berkeley Mews (from GoComics)'
4851
    url = 'http://www.gocomics.com/berkeley-mews'
4852
    _categories = ('BERKELEY', )
4853
4854
4855
class SheldonGoComics(GenericGoComic):
4856
    """Class to retrieve Sheldon comics."""
4857
    # Also on http://www.sheldoncomics.com
4858
    name = 'sheldon-goc'
4859
    long_name = 'Sheldon Comics (from GoComics)'
4860
    url = 'http://www.gocomics.com/sheldon'
4861
4862
4863
class FowlLanguageGoComics(GenericGoComic):
4864
    """Class to retrieve Fowl Language comics."""
4865
    # Also on http://www.fowllanguagecomics.com
4866
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4867
    # Also on http://fowllanguagecomics.tumblr.com
4868
    name = 'fowllanguage-goc'
4869
    long_name = 'Fowl Language Comics (from GoComics)'
4870
    url = 'http://www.gocomics.com/fowl-language'
4871
    _categories = ('FOWLLANGUAGE', )
4872
4873
4874
class NickAnderson(GenericGoComic):
4875
    """Class to retrieve Nick Anderson comics."""
4876
    name = 'nickanderson'
4877
    long_name = 'Nick Anderson'
4878
    url = 'http://www.gocomics.com/nickanderson'
4879
4880
4881
class GarfieldGoComics(GenericGoComic):
4882
    """Class to retrieve Garfield comics."""
4883
    # Also on http://garfield.com
4884
    name = 'garfield-goc'
4885
    long_name = 'Garfield (from GoComics)'
4886
    url = 'http://www.gocomics.com/garfield'
4887
    _categories = ('GARFIELD', )
4888
4889
4890
class DorrisMcGoComics(GenericGoComic):
4891
    """Class to retrieve Dorris Mc Comics"""
4892
    # Also on http://dorrismccomics.com
4893
    name = 'dorrismc-goc'
4894
    long_name = 'Dorris Mc (from GoComics)'
4895
    url = 'http://www.gocomics.com/dorris-mccomics'
4896
4897
4898
class FoxTrot(GenericGoComic):
4899
    """Class to retrieve FoxTrot comics."""
4900
    name = 'foxtrot'
4901
    long_name = 'FoxTrot'
4902
    url = 'http://www.gocomics.com/foxtrot'
4903
4904
4905
class FoxTrotClassics(GenericGoComic):
4906
    """Class to retrieve FoxTrot Classics comics."""
4907
    name = 'foxtrot-classics'
4908
    long_name = 'FoxTrot Classics'
4909
    url = 'http://www.gocomics.com/foxtrotclassics'
4910
4911
4912
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4913
    """Class to retrieve Mister & Me Comics."""
4914
    # Also on http://www.mister-and-me.com
4915
    # Also on https://tapastic.com/series/Mister-and-Me
4916
    name = 'mister-goc'
4917
    long_name = 'Mister & Me (from GoComics)'
4918
    url = 'http://www.gocomics.com/mister-and-me'
4919
4920
4921
class NonSequitur(GenericGoComic):
4922
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4923
    name = 'nonsequitur'
4924
    long_name = 'Non Sequitur'
4925
    url = 'http://www.gocomics.com/nonsequitur'
4926
4927
4928
class GenericTapasticComic(GenericListableComic):
4929
    """Generic class to handle the logic common to comics from tapastic.com."""
4930
    _categories = ('TAPASTIC', )
4931
4932
    @classmethod
4933
    def get_comic_info(cls, soup, archive_elt):
4934
        """Get information about a particular comics."""
4935
        timestamp = int(archive_elt['publishDate']) / 1000.0
4936
        day = datetime.datetime.fromtimestamp(timestamp).date()
4937
        imgs = soup.find_all('img', class_='art-image')
4938
        if not imgs:
4939
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4940
            return None
4941
        assert len(imgs) > 0
4942
        return {
4943
            'day': day.day,
4944
            'year': day.year,
4945
            'month': day.month,
4946
            'img': [i['src'] for i in imgs],
4947
            'title': archive_elt['title'],
4948
        }
4949
4950
    @classmethod
4951
    def get_url_from_archive_element(cls, archive_elt):
4952
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4953
4954
    @classmethod
4955
    def get_archive_elements(cls):
4956
        pref, suff = 'episodeList : ', ','
4957
        # Information is stored in the javascript part
4958
        # I don't know the clean way to get it so this is the ugly way.
4959
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4960
        return json.loads(string)
4961
4962
4963
class VegetablesForDessert(GenericTapasticComic):
4964
    """Class to retrieve Vegetables For Dessert comics."""
4965
    # Also on http://vegetablesfordessert.tumblr.com
4966
    name = 'vegetables'
4967
    long_name = 'Vegetables For Dessert'
4968
    url = 'http://tapastic.com/series/vegetablesfordessert'
4969
4970
4971
class FowlLanguageTapa(GenericTapasticComic):
4972
    """Class to retrieve Fowl Language comics."""
4973
    # Also on http://www.fowllanguagecomics.com
4974
    # Also on http://fowllanguagecomics.tumblr.com
4975
    # Also on http://www.gocomics.com/fowl-language
4976
    name = 'fowllanguage-tapa'
4977
    long_name = 'Fowl Language Comics (from Tapastic)'
4978
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4979
    _categories = ('FOWLLANGUAGE', )
4980
4981
4982
class OscillatingProfundities(GenericTapasticComic):
4983
    """Class to retrieve Oscillating Profundities comics."""
4984
    name = 'oscillating'
4985
    long_name = 'Oscillating Profundities'
4986
    url = 'http://tapastic.com/series/oscillatingprofundities'
4987
4988
4989
class ZnoflatsComics(GenericTapasticComic):
4990
    """Class to retrieve Znoflats comics."""
4991
    name = 'znoflats'
4992
    long_name = 'Znoflats Comics'
4993
    url = 'http://tapastic.com/series/Znoflats-Comics'
4994
4995
4996
class SandersenTapastic(GenericTapasticComic):
4997
    """Class to retrieve Sarah Andersen comics."""
4998
    # Also on http://sarahcandersen.com
4999
    # Also on http://www.gocomics.com/sarahs-scribbles
5000
    name = 'sandersen-tapa'
5001
    long_name = 'Sarah Andersen (from Tapastic)'
5002
    url = 'http://tapastic.com/series/Doodle-Time'
5003
5004
5005
class TubeyToonsTapastic(GenericTapasticComic):
5006
    """Class to retrieve TubeyToons comics."""
5007
    # Also on http://tubeytoons.com
5008
    # Also on https://tubeytoons.tumblr.com
5009
    name = 'tubeytoons-tapa'
5010
    long_name = 'Tubey Toons (from Tapastic)'
5011
    url = 'http://tapastic.com/series/Tubey-Toons'
5012
    _categories = ('TUNEYTOONS', )
5013
5014
5015
class AnythingComicTapastic(GenericTapasticComic):
5016
    """Class to retrieve Anything Comics."""
5017
    # Also on http://www.anythingcomic.com
5018
    name = 'anythingcomic-tapa'
5019
    long_name = 'Anything Comic (from Tapastic)'
5020
    url = 'http://tapastic.com/series/anything'
5021
5022
5023
class UnearthedComicsTapastic(GenericTapasticComic):
5024
    """Class to retrieve Unearthed comics."""
5025
    # Also on http://unearthedcomics.com
5026
    # Also on https://unearthedcomics.tumblr.com
5027
    name = 'unearthed-tapa'
5028
    long_name = 'Unearthed Comics (from Tapastic)'
5029
    url = 'http://tapastic.com/series/UnearthedComics'
5030
    _categories = ('UNEARTHED', )
5031
5032
5033
class EverythingsStupidTapastic(GenericTapasticComic):
5034
    """Class to retrieve Everything's stupid Comics."""
5035
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
5036
    # Also on http://everythingsstupid.net
5037
    name = 'stupid-tapa'
5038
    long_name = "Everything's Stupid (from Tapastic)"
5039
    url = 'http://tapastic.com/series/EverythingsStupid'
5040
5041
5042
class JustSayEhTapastic(GenericTapasticComic):
5043
    """Class to retrieve Just Say Eh comics."""
5044
    # Also on http://www.justsayeh.com
5045
    name = 'justsayeh-tapa'
5046
    long_name = 'Just Say Eh (from Tapastic)'
5047
    url = 'http://tapastic.com/series/Just-Say-Eh'
5048
5049
5050
class ThorsThundershackTapastic(GenericTapasticComic):
5051
    """Class to retrieve Thor's Thundershack comics."""
5052
    # Also on http://www.thorsthundershack.com
5053
    name = 'thor-tapa'
5054
    long_name = 'Thor\'s Thundershack (from Tapastic)'
5055
    url = 'http://tapastic.com/series/Thors-Thundershac'
5056
    _categories = ('THOR', )
5057
5058
5059
class OwlTurdTapastic(GenericTapasticComic):
5060
    """Class to retrieve Owl Turd comics."""
5061
    # Also on http://owlturd.com
5062
    name = 'owlturd-tapa'
5063
    long_name = 'Owl Turd (from Tapastic)'
5064
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
5065
    _categories = ('OWLTURD', )
5066
5067
5068
class GoneIntoRaptureTapastic(GenericTapasticComic):
5069
    """Class to retrieve Gone Into Rapture comics."""
5070
    # Also on http://goneintorapture.tumblr.com
5071
    # Also on http://goneintorapture.com
5072
    name = 'rapture-tapa'
5073
    long_name = 'Gone Into Rapture (from Tapastic)'
5074
    url = 'http://tapastic.com/series/Goneintorapture'
5075
5076
5077
class HeckIfIKnowComicsTapa(GenericTapasticComic):
5078
    """Class to retrieve Heck If I Know Comics."""
5079
    # Also on http://heckifiknowcomics.com
5080
    name = 'heck-tapa'
5081
    long_name = 'Heck if I Know comics (from Tapastic)'
5082
    url = 'http://tapastic.com/series/Regular'
5083
5084
5085
class CheerUpEmoKidTapa(GenericTapasticComic):
5086
    """Class to retrieve CheerUpEmoKid comics."""
5087
    # Also on http://www.cheerupemokid.com
5088
    # Also on https://enzocomics.tumblr.com
5089
    name = 'cuek-tapa'
5090
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
5091
    url = 'http://tapastic.com/series/CUEK'
5092
5093
5094
class BigFootJusticeTapa(GenericTapasticComic):
5095
    """Class to retrieve Big Foot Justice comics."""
5096
    # Also on http://bigfootjustice.com
5097
    name = 'bigfoot-tapa'
5098
    long_name = 'Big Foot Justice (from Tapastic)'
5099
    url = 'http://tapastic.com/series/bigfoot-justice'
5100
5101
5102
class UpAndOutTapa(GenericTapasticComic):
5103
    """Class to retrieve Up & Out comics."""
5104
    # Also on http://upandoutcomic.tumblr.com
5105
    name = 'upandout-tapa'
5106
    long_name = 'Up And Out (from Tapastic)'
5107
    url = 'http://tapastic.com/series/UP-and-OUT'
5108
5109
5110
class ToonHoleTapa(GenericTapasticComic):
5111
    """Class to retrieve Toon Holes comics."""
5112
    # Also on http://www.toonhole.com
5113
    name = 'toonhole-tapa'
5114
    long_name = 'Toon Hole (from Tapastic)'
5115
    url = 'http://tapastic.com/series/TOONHOLE'
5116
5117
5118
class AngryAtNothingTapa(GenericTapasticComic):
5119
    """Class to retrieve Angry at Nothing comics."""
5120
    # Also on http://www.angryatnothing.net
5121
    # Also on http://angryatnothing.tumblr.com
5122
    name = 'angry-tapa'
5123
    long_name = 'Angry At Nothing (from Tapastic)'
5124
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
5125
5126
5127
class LeleozTapa(GenericTapasticComic):
5128
    """Class to retrieve Leleoz comics."""
5129
    # Also on http://leleozcomics.tumblr.com
5130
    name = 'leleoz-tapa'
5131
    long_name = 'Leleoz (from Tapastic)'
5132
    url = 'https://tapastic.com/series/Leleoz'
5133
5134
5135
class TheAwkwardYetiTapa(GenericTapasticComic):
5136
    """Class to retrieve The Awkward Yeti comics."""
5137
    # Also on http://www.gocomics.com/the-awkward-yeti
5138
    # Also on http://theawkwardyeti.com
5139
    # Also on http://larstheyeti.tumblr.com
5140
    name = 'yeti-tapa'
5141
    long_name = 'The Awkward Yeti (from Tapastic)'
5142
    url = 'https://tapastic.com/series/TheAwkwardYeti'
5143
    _categories = ('YETI', )
5144
5145
5146
class AsPerUsualTapa(GenericTapasticComic):
5147
    """Class to retrieve As Per Usual comics."""
5148
    # Also on http://as-per-usual.tumblr.com
5149
    name = 'usual-tapa'
5150
    long_name = 'As Per Usual (from Tapastic)'
5151
    url = 'https://tapastic.com/series/AsPerUsual'
5152
    categories = ('DAMILEE', )
5153
5154
5155
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
5156
    """Class to retrieve Hot Comics For Cool People."""
5157
    # Also on http://hotcomicsforcoolpeople.tumblr.com
5158
    # Also on http://hotcomics.biz (links to tumblr)
5159
    # Also on http://hcfcp.com (links to tumblr)
5160
    name = 'hotcomics-tapa'
5161
    long_name = 'Hot Comics For Cool People (from Tapastic)'
5162
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
5163
    categories = ('DAMILEE', )
5164
5165
5166
class OneOneOneOneComicTapa(GenericTapasticComic):
5167
    """Class to retrieve 1111 Comics."""
5168
    # Also on http://www.1111comics.me
5169
    # Also on http://comics1111.tumblr.com
5170
    name = '1111-tapa'
5171
    long_name = '1111 Comics (from Tapastic)'
5172
    url = 'https://tapastic.com/series/1111-Comics'
5173
    _categories = ('ONEONEONEONE', )
5174
5175
5176
class TumbleDryTapa(GenericTapasticComic):
5177
    """Class to retrieve Tumble Dry comics."""
5178
    # Also on http://tumbledrycomics.com
5179
    name = 'tumbledry-tapa'
5180
    long_name = 'Tumblr Dry (from Tapastic)'
5181
    url = 'https://tapastic.com/series/TumbleDryComics'
5182
5183
5184
class DeadlyPanelTapa(GenericTapasticComic):
5185
    """Class to retrieve Deadly Panel comics."""
5186
    # Also on http://www.deadlypanel.com
5187
    # Also on https://deadlypanel.tumblr.com
5188
    name = 'deadly-tapa'
5189
    long_name = 'Deadly Panel (from Tapastic)'
5190
    url = 'https://tapastic.com/series/deadlypanel'
5191
5192
5193
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5194
    """Class to retrieve Chris Hallbeck comics."""
5195
    # Also on https://chrishallbeck.tumblr.com
5196
    # Also on http://maximumble.com
5197
    name = 'hallbeckmaxi-tapa'
5198
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5199
    url = 'https://tapastic.com/series/Maximumble'
5200
    _categories = ('HALLBACK', )
5201
5202
5203
class ChrisHallbeckMiniTapa(GenericEmptyComic, GenericTapasticComic):
5204
    """Class to retrieve Chris Hallbeck comics."""
5205
    # Also on https://chrishallbeck.tumblr.com
5206
    # Also on http://minimumble.com
5207
    name = 'hallbeckmini-tapa'
5208
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5209
    url = 'https://tapastic.com/series/Minimumble'
5210
    _categories = ('HALLBACK', )
5211
5212
5213
class ChrisHallbeckBiffTapa(GenericEmptyComic, GenericTapasticComic):
5214
    """Class to retrieve Chris Hallbeck comics."""
5215
    # Also on https://chrishallbeck.tumblr.com
5216
    # Also on http://thebookofbiff.com
5217
    name = 'hallbeckbiff-tapa'
5218
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5219
    url = 'https://tapastic.com/series/Biff'
5220
    _categories = ('HALLBACK', )
5221
5222
5223
class RandoWisTapa(GenericTapasticComic):
5224
    """Class to retrieve RandoWis comics."""
5225
    # Also on https://randowis.com
5226
    name = 'randowis-tapa'
5227
    long_name = 'RandoWis (from Tapastic)'
5228
    url = 'https://tapastic.com/series/RandoWis'
5229
5230
5231
class PigeonGazetteTapa(GenericTapasticComic):
5232
    """Class to retrieve The Pigeon Gazette comics."""
5233
    # Also on http://thepigeongazette.tumblr.com
5234
    name = 'pigeon-tapa'
5235
    long_name = 'The Pigeon Gazette (from Tapastic)'
5236
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5237
5238
5239
class TheOdd1sOutTapa(GenericTapasticComic):
5240
    """Class to retrieve The Odd 1s Out comics."""
5241
    # Also on http://theodd1sout.com
5242
    # Also on http://theodd1sout.tumblr.com
5243
    name = 'theodd-tapa'
5244
    long_name = 'The Odd 1s Out (from Tapastic)'
5245
    url = 'https://tapastic.com/series/Theodd1sout'
5246
5247
5248
class TheWorldIsFlatTapa(GenericTapasticComic):
5249
    """Class to retrieve The World Is Flat Comics."""
5250
    # Also on http://theworldisflatcomics.tumblr.com
5251
    name = 'flatworld-tapa'
5252
    long_name = 'The World Is Flat (from Tapastic)'
5253
    url = 'https://tapastic.com/series/The-World-is-Flat'
5254
5255
5256
class MisterAndMeTapa(GenericTapasticComic):
5257
    """Class to retrieve Mister & Me Comics."""
5258
    # Also on http://www.mister-and-me.com
5259
    # Also on http://www.gocomics.com/mister-and-me
5260
    name = 'mister-tapa'
5261
    long_name = 'Mister & Me (from Tapastic)'
5262
    url = 'https://tapastic.com/series/Mister-and-Me'
5263
5264
5265
class TalesOfAbsurdityTapa(GenericEmptyComic, GenericTapasticComic):
5266
    """Class to retrieve Tales Of Absurdity comics."""
5267
    # Also on http://talesofabsurdity.com
5268
    # Also on http://talesofabsurdity.tumblr.com
5269
    name = 'absurdity-tapa'
5270
    long_name = 'Tales of Absurdity (from Tapastic)'
5271
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5272
    _categories = ('ABSURDITY', )
5273
5274
5275
class BFGFSTapa(GenericTapasticComic):
5276
    """Class to retrieve BFGFS comics."""
5277
    # Also on http://bfgfs.com
5278
    # Also on https://bfgfs.tumblr.com
5279
    name = 'bfgfs-tapa'
5280
    long_name = 'BFGFS (from Tapastic)'
5281
    url = 'https://tapastic.com/series/BFGFS'
5282
5283
5284
class DoodleForFoodTapa(GenericTapasticComic):
5285
    """Class to retrieve Doodle For Food comics."""
5286
    # Also on http://www.doodleforfood.com
5287
    name = 'doodle-tapa'
5288
    long_name = 'Doodle For Food (from Tapastic)'
5289
    url = 'https://tapastic.com/series/Doodle-for-Food'
5290
5291
5292
class MrLovensteinTapa(GenericTapasticComic):
5293
    """Class to retrieve Mr Lovenstein comics."""
5294
    # Also on  https://tapastic.com/series/MrLovenstein
5295
    name = 'mrlovenstein-tapa'
5296
    long_name = 'Mr. Lovenstein (from Tapastic)'
5297
    url = 'https://tapastic.com/series/MrLovenstein'
5298
5299
5300
class CassandraCalinTapa(GenericTapasticComic):
5301
    """Class to retrieve C. Cassandra comics."""
5302
    # Also on http://cassandracalin.com
5303
    # Also on http://c-cassandra.tumblr.com
5304
    name = 'cassandra-tapa'
5305
    long_name = 'Cassandra Calin (from Tapastic)'
5306
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5307
5308
5309
class WafflesAndPancakes(GenericTapasticComic):
5310
    """Class to retrieve Waffles And Pancakes comics."""
5311
    # Also on http://wandpcomic.com
5312
    name = 'waffles'
5313
    long_name = 'Waffles And Pancakes'
5314
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5315
5316
5317
class YesterdaysPopcornTapastic(GenericTapasticComic):
5318
    """Class to retrieve Yesterday's Popcorn comics."""
5319
    # Also on http://www.yesterdayspopcorn.com
5320
    # Also on http://yesterdayspopcorn.tumblr.com
5321
    name = 'popcorn-tapa'
5322
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5323
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5324
5325
5326
class OurSuperAdventureTapastic(GenericEmptyComic, GenericTapasticComic):
5327
    """Class to retrieve Our Super Adventure comics."""
5328
    # Also on http://www.oursuperadventure.com
5329
    # http://sarahssketchbook.tumblr.com
5330
    # http://sarahgraley.com
5331
    name = 'superadventure-tapastic'
5332
    long_name = 'Our Super Adventure (from Tapastic)'
5333
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5334
5335
5336
class NamelessPCs(GenericTapasticComic):
5337
    """Class to retrieve Nameless PCs comics."""
5338
    # Also on http://namelesspcs.com
5339
    name = 'namelesspcs-tapa'
5340
    long_name = 'NamelessPCs (from Tapastic)'
5341
    url = 'https://tapastic.com/series/NamelessPC'
5342
5343
5344
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5345
    """Class to retrieve Down The Upward Spiral comics."""
5346
    # Also on http://www.downtheupwardspiral.com
5347
    # Also on http://downtheupwardspiral.tumblr.com
5348
    name = 'spiral-tapa'
5349
    long_name = 'Down the Upward Spiral (from Tapastic)'
5350
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5351
5352
5353
class UbertoolTapa(GenericTapasticComic):
5354
    """Class to retrieve Ubertool comics."""
5355
    # Also on http://ubertoolcomic.com
5356
    # Also on https://ubertool.tumblr.com
5357
    name = 'ubertool-tapa'
5358
    long_name = 'Ubertool (from Tapastic)'
5359
    url = 'https://tapastic.com/series/ubertool'
5360
    _categories = ('UBERTOOL', )
5361
5362
5363
class BarteNerdsTapa(GenericEmptyComic, GenericTapasticComic):
5364
    """Class to retrieve BarteNerds comics."""
5365
    # Also on http://www.bartenerds.com
5366
    name = 'bartenerds-tapa'
5367
    long_name = 'BarteNerds (from Tapastic)'
5368
    url = 'https://tapastic.com/series/BarteNERDS'
5369
5370
5371
class SmallBlueYonderTapa(GenericTapasticComic):
5372
    """Class to retrieve Small Blue Yonder comics."""
5373
    # Also on http://www.smallblueyonder.com
5374
    name = 'smallblue-tapa'
5375
    long_name = 'Small Blue Yonder (from Tapastic)'
5376
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5377
5378
5379
class TizzyStitchBirdTapa(GenericTapasticComic):
5380
    """Class to retrieve Tizzy Stitch Bird comics."""
5381
    # Also on http://tizzystitchbird.com
5382
    # Also on http://tizzystitchbird.tumblr.com
5383
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5384
    name = 'tizzy-tapa'
5385
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5386
    url = 'https://tapastic.com/series/TizzyStitchbird'
5387
5388
5389
class RockPaperCynicTapa(GenericTapasticComic):
5390
    """Class to retrieve RockPaperCynic comics."""
5391
    # Also on http://www.rockpapercynic.com
5392
    # Also on http://rockpapercynic.tumblr.com
5393
    name = 'rpc-tapa'
5394
    long_name = 'Rock Paper Cynic (from Tapastic)'
5395
    url = 'https://tapastic.com/series/rockpapercynic'
5396
5397
5398
class IsItCanonTapa(GenericTapasticComic):
5399
    """Class to retrieve Is It Canon comics."""
5400
    # Also on http://www.isitcanon.com
5401
    name = 'canon-tapa'
5402
    long_name = 'Is It Canon (from Tapastic)'
5403
    url = 'http://tapastic.com/series/isitcanon'
5404
5405
5406
class ItsTheTieTapa(GenericTapasticComic):
5407
    """Class to retrieve It's the tie comics."""
5408
    # Also on http://itsthetie.com
5409
    # Also on http://itsthetie.tumblr.com
5410
    name = 'tie-tapa'
5411
    long_name = "It's the tie (from Tapastic)"
5412
    url = "https://tapastic.com/series/itsthetie"
5413
    _categories = ('TIE', )
5414
5415
5416
class JamesOfNoTradesTapa(GenericTapasticComic):
5417
    """Class to retrieve JamesOfNoTrades comics."""
5418
    # Also on http://jamesofnotrades.com
5419
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
5420
    # Also on http://jamesfregan.tumblr.com
5421
    name = 'jamesofnotrades-tapa'
5422
    long_name = 'James Of No Trades (from Tapastic)'
5423
    url = 'https://tapas.io/series/James-of-No-Trades'
5424
    _categories = ('JAMESOFNOTRADES', )
5425
5426
5427
class MomentumTapa(GenericTapasticComic):
5428
    """Class to retrieve Momentum comics."""
5429
    # Also on http://www.momentumcomic.com
5430
    name = 'momentum-tapa'
5431
    long_name = 'Momentum (from Tapastic)'
5432
    url = 'https://tapastic.com/series/momentum'
5433
5434
5435
class APleasantWasteOfTimeTapa(GenericTapasticComic):
5436
    """Class to retrieve A Pleasant Waste Of Time comics."""
5437
    # Also on https://artjcf.tumblr.com
5438
    name = 'pleasant-waste-tapa'
5439
    long_name = 'A Pleasant Waste Of Time (from Tapastic)'
5440
    url = 'https://tapas.io/series/A-Pleasant-'
5441
    _categories = ('WASTE', )
5442
5443
5444
def get_subclasses(klass):
5445
    """Gets the list of direct/indirect subclasses of a class"""
5446
    subclasses = klass.__subclasses__()
5447
    for derived in list(subclasses):
5448
        subclasses.extend(get_subclasses(derived))
5449
    return subclasses
5450
5451
5452
def remove_st_nd_rd_th_from_date(string):
5453
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5454
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5455
    return (string.replace('st', '')
5456
            .replace('nd', '')
5457
            .replace('rd', '')
5458
            .replace('th', '')
5459
            .replace('Augu', 'August'))
5460
5461
5462
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5463
    """Function to convert string to date object.
5464
    Wrapper around datetime.datetime.strptime."""
5465
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5466
    prev_locale = locale.setlocale(locale.LC_ALL)
5467
    if local != prev_locale:
5468
        locale.setlocale(locale.LC_ALL, local)
5469
    ret = datetime.datetime.strptime(string, date_format).date()
5470
    if local != prev_locale:
5471
        locale.setlocale(locale.LC_ALL, prev_locale)
5472
    return ret
5473
5474
5475
COMICS = set(get_subclasses(GenericComic))
5476
VALID_COMICS = [c for c in COMICS if c.name is not None]
5477
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5478
assert len(VALID_COMICS) == len(COMIC_NAMES)
5479
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5480
assert len(VALID_COMICS) == len(CLASS_NAMES)
5481