Completed
Push — master ( 1dbf17...58f52d )
by Dongxin
01:16
created

escape_link()   A

Complexity

Conditions 3

Size

Total Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
c 0
b 0
f 0
dl 0
loc 7
rs 9.4285
1
# coding: utf-8
2
"""
3
    mistune
4
    ~~~~~~~
5
6
    The fastest markdown parser in pure Python with renderer feature.
7
8
    :copyright: (c) 2014 - 2016 by Hsiaoming Yang.
9
"""
10
11
import re
12
import inspect
13
14
__version__ = '0.7.3'
15
__author__ = 'Hsiaoming Yang <[email protected]>'
16
__all__ = [
17
    'BlockGrammar', 'BlockLexer',
18
    'InlineGrammar', 'InlineLexer',
19
    'Renderer', 'Markdown',
20
    'markdown', 'escape',
21
]
22
23
24
_key_pattern = re.compile(r'\s+')
25
_nonalpha_pattern = re.compile(r'\W')
26
_escape_pattern = re.compile(r'&(?!#?\w+;)')
27
_newline_pattern = re.compile(r'\r\n|\r')
28
_block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M)
29
_block_code_leading_pattern = re.compile(r'^ {4}', re.M)
30
_inline_tags = [
31
    'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data',
32
    'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark',
33
    'ruby', 'rt', 'rp', 'bdi', 'bdo', 'span', 'br', 'wbr', 'ins', 'del',
34
    'img', 'font',
35
]
36
_pre_tags = ['pre', 'script', 'style']
37
_valid_end = r'(?!:/|[^\w\s@]*@)\b'
38
_valid_attr = r'''\s*[a-zA-Z\-](?:\=(?:"[^"]*"|'[^']*'|\d+))*'''
39
_block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end)
40
_scheme_blacklist = ('javascript:', 'vbscript:')
41
42
43
def _pure_pattern(regex):
44
    pattern = regex.pattern
45
    if pattern.startswith('^'):
46
        pattern = pattern[1:]
47
    return pattern
48
49
50
def _keyify(key):
51
    return _key_pattern.sub(' ', key.lower())
52
53
54
def escape(text, quote=False, smart_amp=True):
55
    """Replace special characters "&", "<" and ">" to HTML-safe sequences.
56
57
    The original cgi.escape will always escape "&", but you can control
58
    this one for a smart escape amp.
59
60
    :param quote: if set to True, " and ' will be escaped.
61
    :param smart_amp: if set to False, & will always be escaped.
62
    """
63
    if smart_amp:
64
        text = _escape_pattern.sub('&amp;', text)
65
    else:
66
        text = text.replace('&', '&amp;')
67
    text = text.replace('<', '&lt;')
68
    text = text.replace('>', '&gt;')
69
    if quote:
70
        text = text.replace('"', '&quot;')
71
        text = text.replace("'", '&#39;')
72
    return text
73
74
75
def escape_link(url):
76
    """Remove dangerous URL schemes like javascript: and escape afterwards."""
77
    lower_url = url.lower().strip('\x00\x1a \n\r\t')
78
    for scheme in _scheme_blacklist:
79
        if lower_url.startswith(scheme):
80
            return ''
81
    return escape(url, quote=True, smart_amp=False)
82 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
83
84
def preprocessing(text, tab=4):
85
    text = _newline_pattern.sub('\n', text)
86
    text = text.expandtabs(tab)
87
    text = text.replace('\u00a0', ' ')
88
    text = text.replace('\u2424', '\n')
89
    pattern = re.compile(r'^ +$', re.M)
90
    return pattern.sub('', text)
91
92
93
class BlockGrammar(object):
94
    """Grammars for block level tokens."""
95
96
    def_links = re.compile(
97
        r'^ *\[([^^\]]+)\]: *'  # [key]:
98
        r'<?([^\s>]+)>?'  # <link> or link
99
        r'(?: +["(]([^\n]+)[")])? *(?:\n+|$)'
100
    )
101
    def_footnotes = re.compile(
102
        r'^\[\^([^\]]+)\]: *('
103
        r'[^\n]*(?:\n+|$)'  # [^key]:
104
        r'(?: {1,}[^\n]*(?:\n+|$))*'
105
        r')'
106
    )
107
108
    newline = re.compile(r'^\n+')
109
    block_code = re.compile(r'^( {4}[^\n]+\n*)+')
110
    fences = re.compile(
111
        r'^ *(`{3,}|~{3,}) *(\S+)? *\n'  # ```lang
112
        r'([\s\S]+?)\s*'
113
        r'\1 *(?:\n+|$)'  # ```
114
    )
115
    hrule = re.compile(r'^ {0,3}[-*_](?: *[-*_]){2,} *(?:\n+|$)')
116
    heading = re.compile(r'^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)')
117
    lheading = re.compile(r'^([^\n]+)\n *(=|-)+ *(?:\n+|$)')
118
    block_quote = re.compile(r'^( *>[^\n]+(\n[^\n]+)*\n*)+')
119
    list_block = re.compile(
120
        r'^( *)([*+-]|\d+\.) [\s\S]+?'
121
        r'(?:'
122
        r'\n+(?=\1?(?:[-*_] *){3,}(?:\n+|$))'  # hrule
123
        r'|\n+(?=%s)'  # def links
124
        r'|\n+(?=%s)'  # def footnotes
125
        r'|\n{2,}'
126
        r'(?! )'
127
        r'(?!\1(?:[*+-]|\d+\.) )\n*'
128
        r'|'
129
        r'\s*$)' % (
130
            _pure_pattern(def_links),
131
            _pure_pattern(def_footnotes),
132
        )
133
    )
134
    list_item = re.compile(
135
        r'^(( *)(?:[*+-]|\d+\.) [^\n]*'
136
        r'(?:\n(?!\2(?:[*+-]|\d+\.) )[^\n]*)*)',
137
        flags=re.M
138
    )
139
    list_bullet = re.compile(r'^ *(?:[*+-]|\d+\.) +')
140
    paragraph = re.compile(
141
        r'^((?:[^\n]+\n?(?!'
142
        r'%s|%s|%s|%s|%s|%s|%s|%s|%s'
143
        r'))+)\n*' % (
144
            _pure_pattern(fences).replace(r'\1', r'\2'),
145
            _pure_pattern(list_block).replace(r'\1', r'\3'),
146
            _pure_pattern(hrule),
147
            _pure_pattern(heading),
148
            _pure_pattern(lheading),
149
            _pure_pattern(block_quote),
150
            _pure_pattern(def_links),
151
            _pure_pattern(def_footnotes),
152
            '<' + _block_tag,
153
        )
154
    )
155
    block_html = re.compile(
156
        r'^ *(?:%s|%s|%s) *(?:\n{2,}|\s*$)' % (
157
            r'<!--[\s\S]*?-->',
158
            r'<(%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_block_tag, _valid_attr),
159
            r'<%s(?:%s)*?\s*\/?>' % (_block_tag, _valid_attr),
160 View Code Duplication
        )
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
161
    )
162
    table = re.compile(
163
        r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*'
164
    )
165
    nptable = re.compile(
166
        r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*'
167
    )
168
    text = re.compile(r'^[^\n]+')
169
170
171
class BlockLexer(object):
172
    """Block level lexer for block grammars."""
173
    grammar_class = BlockGrammar
174
175
    default_rules = [
176
        'newline', 'hrule', 'block_code', 'fences', 'heading',
177
        'nptable', 'lheading', 'block_quote',
178
        'list_block', 'block_html', 'def_links',
179
        'def_footnotes', 'table', 'paragraph', 'text'
180
    ]
181
182
    list_rules = (
183
        'newline', 'block_code', 'fences', 'lheading', 'hrule',
184
        'block_quote', 'list_block', 'block_html', 'text',
185
    )
186
187
    footnote_rules = (
188
        'newline', 'block_code', 'fences', 'heading',
189
        'nptable', 'lheading', 'hrule', 'block_quote',
190
        'list_block', 'block_html', 'table', 'paragraph', 'text'
191
    )
192
193
    def __init__(self, rules=None, **kwargs):
194
        self.tokens = []
195
        self.def_links = {}
196
        self.def_footnotes = {}
197
198
        if not rules:
199
            rules = self.grammar_class()
200
201
        self.rules = rules
202
203
    def __call__(self, text, rules=None):
204
        return self.parse(text, rules)
205
206
    def parse(self, text, rules=None):
207
        text = text.rstrip('\n')
208
209
        if not rules:
210
            rules = self.default_rules
211
212
        def manipulate(text):
213
            for key in rules:
214
                rule = getattr(self.rules, key)
215
                m = rule.match(text)
216
                if not m:
217
                    continue
218
                getattr(self, 'parse_%s' % key)(m)
219
                return m
220
            return False  # pragma: no cover
221
222
        while text:
223
            m = manipulate(text)
224
            if m is not False:
225
                text = text[len(m.group(0)):]
226
                continue
227
            if text:  # pragma: no cover
228
                raise RuntimeError('Infinite loop at: %s' % text)
229
        return self.tokens
230
231
    def parse_newline(self, m):
232
        length = len(m.group(0))
233
        if length > 1:
234
            self.tokens.append({'type': 'newline'})
235
236
    def parse_block_code(self, m):
237
        # clean leading whitespace
238
        code = _block_code_leading_pattern.sub('', m.group(0))
239
        self.tokens.append({
240
            'type': 'code',
241
            'lang': None,
242
            'text': code,
243
        })
244
245
    def parse_fences(self, m):
246
        self.tokens.append({
247
            'type': 'code',
248
            'lang': m.group(2),
249
            'text': m.group(3),
250
        })
251
252
    def parse_heading(self, m):
253
        self.tokens.append({
254
            'type': 'heading',
255
            'level': len(m.group(1)),
256
            'text': m.group(2),
257
        })
258
259
    def parse_lheading(self, m):
260
        """Parse setext heading."""
261
        self.tokens.append({
262
            'type': 'heading',
263
            'level': 1 if m.group(2) == '=' else 2,
264
            'text': m.group(1),
265
        })
266
267
    def parse_hrule(self, m):
268
        self.tokens.append({'type': 'hrule'})
269
270
    def parse_list_block(self, m):
271
        bull = m.group(2)
272
        self.tokens.append({
273
            'type': 'list_start',
274
            'ordered': '.' in bull,
275
        })
276
        cap = m.group(0)
277
        self._process_list_item(cap, bull)
278
        self.tokens.append({'type': 'list_end'})
279
280
    def _process_list_item(self, cap, bull):
281
        cap = self.rules.list_item.findall(cap)
282
283
        _next = False
284
        length = len(cap)
285
286
        for i in range(length):
287
            item = cap[i][0]
288
289
            # remove the bullet
290
            space = len(item)
291
            item = self.rules.list_bullet.sub('', item)
292
293
            # outdent
294
            if '\n ' in item:
295
                space = space - len(item)
296
                pattern = re.compile(r'^ {1,%d}' % space, flags=re.M)
297
                item = pattern.sub('', item)
298
299
            # determine whether item is loose or not
300
            loose = _next
301
            if not loose and re.search(r'\n\n(?!\s*$)', item):
302
                loose = True
303
304
            rest = len(item)
305
            if i != length - 1 and rest:
306
                _next = item[rest-1] == '\n'
307
                if not loose:
308
                    loose = _next
309
310
            if loose:
311
                t = 'loose_item_start'
312
            else:
313
                t = 'list_item_start'
314
315
            self.tokens.append({'type': t})
316
            # recurse
317
            self.parse(item, self.list_rules)
318
            self.tokens.append({'type': 'list_item_end'})
319
320
    def parse_block_quote(self, m):
321
        self.tokens.append({'type': 'block_quote_start'})
322
        # clean leading >
323
        cap = _block_quote_leading_pattern.sub('', m.group(0))
324
        self.parse(cap)
325
        self.tokens.append({'type': 'block_quote_end'})
326
327
    def parse_def_links(self, m):
328
        key = _keyify(m.group(1))
329
        self.def_links[key] = {
330
            'link': m.group(2),
331
            'title': m.group(3),
332
        }
333
334
    def parse_def_footnotes(self, m):
335
        key = _keyify(m.group(1))
336
        if key in self.def_footnotes:
337
            # footnote is already defined
338
            return
339
340
        self.def_footnotes[key] = 0
341
342
        self.tokens.append({
343
            'type': 'footnote_start',
344
            'key': key,
345
        })
346
347
        text = m.group(2)
348
349
        if '\n' in text:
350
            lines = text.split('\n')
351
            whitespace = None
352
            for line in lines[1:]:
353
                space = len(line) - len(line.lstrip())
354
                if space and (not whitespace or space < whitespace):
355
                    whitespace = space
356
            newlines = [lines[0]]
357
            for line in lines[1:]:
358
                newlines.append(line[whitespace:])
359
            text = '\n'.join(newlines)
360
361
        self.parse(text, self.footnote_rules)
362
363
        self.tokens.append({
364
            'type': 'footnote_end',
365
            'key': key,
366
        })
367
368
    def parse_table(self, m):
369
        item = self._process_table(m)
370
371
        cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3))
372
        cells = cells.split('\n')
373
        for i, v in enumerate(cells):
374
            v = re.sub(r'^ *\| *| *\| *$', '', v)
375
            cells[i] = re.split(r' *\| *', v)
376
377
        item['cells'] = cells
378
        self.tokens.append(item)
379
380
    def parse_nptable(self, m):
381
        item = self._process_table(m)
382
383
        cells = re.sub(r'\n$', '', m.group(3))
384
        cells = cells.split('\n')
385
        for i, v in enumerate(cells):
386
            cells[i] = re.split(r' *\| *', v)
387
388
        item['cells'] = cells
389
        self.tokens.append(item)
390
391
    def _process_table(self, m):
392
        header = re.sub(r'^ *| *\| *$', '', m.group(1))
393
        header = re.split(r' *\| *', header)
394
        align = re.sub(r' *|\| *$', '', m.group(2))
395
        align = re.split(r' *\| *', align)
396
397
        for i, v in enumerate(align):
398
            if re.search(r'^ *-+: *$', v):
399
                align[i] = 'right'
400
            elif re.search(r'^ *:-+: *$', v):
401
                align[i] = 'center'
402
            elif re.search(r'^ *:-+ *$', v):
403
                align[i] = 'left'
404
            else:
405
                align[i] = None
406
407
        item = {
408
            'type': 'table',
409
            'header': header,
410
            'align': align,
411
        }
412
        return item
413
414
    def parse_block_html(self, m):
415
        tag = m.group(1)
416
        if not tag:
417
            text = m.group(0)
418
            self.tokens.append({
419
                'type': 'close_html',
420
                'text': text
421
            })
422
        else:
423
            attr = m.group(2)
424
            text = m.group(3)
425
            self.tokens.append({
426
                'type': 'open_html',
427
                'tag': tag,
428
                'extra': attr,
429
                'text': text
430 View Code Duplication
            })
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
431
432
    def parse_paragraph(self, m):
433
        text = m.group(1).rstrip('\n')
434
        self.tokens.append({'type': 'paragraph', 'text': text})
435
436
    def parse_text(self, m):
437
        text = m.group(0)
438
        self.tokens.append({'type': 'text', 'text': text})
439
440
441
class InlineGrammar(object):
442
    """Grammars for inline level tokens."""
443
444
    escape = re.compile(r'^\\([\\`*{}\[\]()#+\-.!_>~|])')  # \* \+ \! ....
445
    inline_html = re.compile(
446
        r'^(?:%s|%s|%s)' % (
447
            r'<!--[\s\S]*?-->',
448
            r'<(\w+%s)((?:%s)*?)\s*>([\s\S]*?)<\/\1>' % (_valid_end, _valid_attr),
449
            r'<\w+%s(?:%s)*?\s*\/?>' % (_valid_end, _valid_attr),
450
        )
451
    )
452
    autolink = re.compile(r'^<([^ >]+(@|:)[^ >]+)>')
453
    link = re.compile(
454
        r'^!?\[('
455
        r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
456
        r')\]\('
457
        r'''\s*(<)?([\s\S]*?)(?(2)>)(?:\s+['"]([\s\S]*?)['"])?\s*'''
458
        r'\)'
459
    )
460
    reflink = re.compile(
461
        r'^!?\[('
462
        r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
463
        r')\]\s*\[([^^\]]*)\]'
464
    )
465
    nolink = re.compile(r'^!?\[((?:\[[^\]]*\]|[^\[\]])*)\]')
466
    url = re.compile(r'''^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])''')
467
    double_emphasis = re.compile(
468
        r'^_{2}([\s\S]+?)_{2}(?!_)'  # __word__
469
        r'|'
470
        r'^\*{2}([\s\S]+?)\*{2}(?!\*)'  # **word**
471
    )
472
    emphasis = re.compile(
473
        r'^\b_((?:__|[^_])+?)_\b'  # _word_
474
        r'|'
475
        r'^\*((?:\*\*|[^\*])+?)\*(?!\*)'  # *word*
476
    )
477
    code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)')  # `code`
478
    linebreak = re.compile(r'^ {2,}\n(?!\s*$)')
479
    strikethrough = re.compile(r'^~~(?=\S)([\s\S]*?\S)~~')  # ~~word~~
480
    footnote = re.compile(r'^\[\^([^\]]+)\]')
481
    text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| {2,}\n|$)')
482
483
    def hard_wrap(self):
484
        """Grammar for hard wrap linebreak. You don't need to add two
485
        spaces at the end of a line.
486
        """
487
        self.linebreak = re.compile(r'^ *\n(?!\s*$)')
488
        self.text = re.compile(
489
            r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| *\n|$)'
490
        )
491
492
493 View Code Duplication
class InlineLexer(object):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
494
    """Inline level lexer for inline grammars."""
495
    grammar_class = InlineGrammar
496
497
    default_rules = [
498
        'escape', 'inline_html', 'autolink', 'url',
499
        'footnote', 'link', 'reflink', 'nolink',
500
        'double_emphasis', 'emphasis', 'code',
501
        'linebreak', 'strikethrough', 'text',
502
    ]
503
    inline_html_rules = [
504
        'escape', 'autolink', 'url', 'link', 'reflink',
505
        'nolink', 'double_emphasis', 'emphasis', 'code',
506
        'linebreak', 'strikethrough', 'text',
507
    ]
508
509
    def __init__(self, renderer, rules=None, **kwargs):
510
        self.renderer = renderer
511
        self.links = {}
512
        self.footnotes = {}
513
        self.footnote_index = 0
514
515
        if not rules:
516
            rules = self.grammar_class()
517
518
        kwargs.update(self.renderer.options)
519
        if kwargs.get('hard_wrap'):
520
            rules.hard_wrap()
521
522
        self.rules = rules
523
524
        self._in_link = False
525
        self._in_footnote = False
526
        self._parse_inline_html = kwargs.get('parse_inline_html')
527
528
    def __call__(self, text, rules=None):
529
        return self.output(text, rules)
530
531
    def setup(self, links, footnotes):
532
        self.footnote_index = 0
533
        self.links = links or {}
534
        self.footnotes = footnotes or {}
535
536
    def output(self, text, rules=None):
537
        text = text.rstrip('\n')
538
        if not rules:
539
            rules = list(self.default_rules)
540
541
        if self._in_footnote and 'footnote' in rules:
542
            rules.remove('footnote')
543
544
        output = self.renderer.placeholder()
545
546
        def manipulate(text):
547
            for key in rules:
548
                pattern = getattr(self.rules, key)
549
                m = pattern.match(text)
550
                if not m:
551
                    continue
552
                self.line_match = m
553
                out = getattr(self, 'output_%s' % key)(m)
554
                if out is not None:
555
                    return m, out
556
            return False  # pragma: no cover
557
558
        while text:
559
            ret = manipulate(text)
560
            if ret is not False:
561
                m, out = ret
562
                output += out
563
                text = text[len(m.group(0)):]
564
                continue
565
            if text:  # pragma: no cover
566
                raise RuntimeError('Infinite loop at: %s' % text)
567
568
        return output
569
570
    def output_escape(self, m):
571
        text = m.group(1)
572
        return self.renderer.escape(text)
573
574
    def output_autolink(self, m):
575
        link = m.group(1)
576
        if m.group(2) == '@':
577
            is_email = True
578
        else:
579
            is_email = False
580
        return self.renderer.autolink(link, is_email)
581
582
    def output_url(self, m):
583
        link = m.group(1)
584
        if self._in_link:
585
            return self.renderer.text(link)
586
        return self.renderer.autolink(link, False)
587
588
    def output_inline_html(self, m):
589
        tag = m.group(1)
590
        if self._parse_inline_html and tag in _inline_tags:
591
            text = m.group(3)
592
            if tag == 'a':
593
                self._in_link = True
594
                text = self.output(text, rules=self.inline_html_rules)
595
                self._in_link = False
596
            else:
597
                text = self.output(text, rules=self.inline_html_rules)
598
            extra = m.group(2) or ''
599
            html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
600
        else:
601
            html = m.group(0)
602
        return self.renderer.inline_html(html)
603
604
    def output_footnote(self, m):
605
        key = _keyify(m.group(1))
606
        if key not in self.footnotes:
607
            return None
608
        if self.footnotes[key]:
609
            return None
610
        self.footnote_index += 1
611
        self.footnotes[key] = self.footnote_index
612
        return self.renderer.footnote_ref(key, self.footnote_index)
613
614
    def output_link(self, m):
615
        return self._process_link(m, m.group(3), m.group(4))
616
617
    def output_reflink(self, m):
618
        key = _keyify(m.group(2) or m.group(1))
619
        if key not in self.links:
620
            return None
621
        ret = self.links[key]
622
        return self._process_link(m, ret['link'], ret['title'])
623
624
    def output_nolink(self, m):
625
        key = _keyify(m.group(1))
626
        if key not in self.links:
627
            return None
628
        ret = self.links[key]
629
        return self._process_link(m, ret['link'], ret['title'])
630
631
    def _process_link(self, m, link, title=None):
632
        line = m.group(0)
633
        text = m.group(1)
634
        if line[0] == '!':
635
            return self.renderer.image(link, title, text)
636
637
        self._in_link = True
638
        text = self.output(text)
639
        self._in_link = False
640
        return self.renderer.link(link, title, text)
641
642
    def output_double_emphasis(self, m):
643
        text = m.group(2) or m.group(1)
644
        text = self.output(text)
645
        return self.renderer.double_emphasis(text)
646
647
    def output_emphasis(self, m):
648
        text = m.group(2) or m.group(1)
649
        text = self.output(text)
650
        return self.renderer.emphasis(text)
651
652
    def output_code(self, m):
653
        text = m.group(2)
654
        return self.renderer.codespan(text)
655
656
    def output_linebreak(self, m):
657
        return self.renderer.linebreak()
658
659
    def output_strikethrough(self, m):
660
        text = self.output(m.group(1))
661
        return self.renderer.strikethrough(text)
662
663
    def output_text(self, m):
664
        text = m.group(0)
665
        return self.renderer.text(text)
666
667
668
class Renderer(object):
669
    """The default HTML renderer for rendering Markdown.
670
    """
671
672
    def __init__(self, **kwargs):
673
        self.options = kwargs
674
675
    def placeholder(self):
676
        """Returns the default, empty output value for the renderer.
677
678
        All renderer methods use the '+=' operator to append to this value.
679
        Default is a string so rendering HTML can build up a result string with
680
        the rendered Markdown.
681
682
        Can be overridden by Renderer subclasses to be types like an empty
683
        list, allowing the renderer to create a tree-like structure to
684
        represent the document (which can then be reprocessed later into a
685
        separate format like docx or pdf).
686
        """
687
        return ''
688
689
    def block_code(self, code, lang=None):
690
        """Rendering block level code. ``pre > code``.
691
692
        :param code: text content of the code block.
693
        :param lang: language of the given code.
694
        """
695
        code = code.rstrip('\n')
696
        if not lang:
697
            code = escape(code, smart_amp=False)
698
            return '<pre><code>%s\n</code></pre>\n' % code
699
        code = escape(code, quote=True, smart_amp=False)
700
        return '<pre><code class="lang-%s">%s\n</code></pre>\n' % (lang, code)
701
702
    def block_quote(self, text):
703
        """Rendering <blockquote> with the given text.
704
705
        :param text: text content of the blockquote.
706
        """
707
        return '<blockquote>%s\n</blockquote>\n' % text.rstrip('\n')
708
709
    def block_html(self, html):
710
        """Rendering block level pure html content.
711
712
        :param html: text content of the html snippet.
713
        """
714
        if self.options.get('skip_style') and \
715
           html.lower().startswith('<style'):
716
            return ''
717
        if self.options.get('escape'):
718
            return escape(html)
719
        return html
720
721
    def header(self, text, level, raw=None):
722
        """Rendering header/heading tags like ``<h1>`` ``<h2>``.
723
724
        :param text: rendered text content for the header.
725
        :param level: a number for the header level, for example: 1.
726
        :param raw: raw text content of the header.
727
        """
728
        return '<h%d>%s</h%d>\n' % (level, text, level)
729
730
    def hrule(self):
731
        """Rendering method for ``<hr>`` tag."""
732
        if self.options.get('use_xhtml'):
733
            return '<hr />\n'
734
        return '<hr>\n'
735
736
    def list(self, body, ordered=True):
737
        """Rendering list tags like ``<ul>`` and ``<ol>``.
738
739
        :param body: body contents of the list.
740
        :param ordered: whether this list is ordered or not.
741
        """
742
        tag = 'ul'
743
        if ordered:
744
            tag = 'ol'
745
        return '<%s>\n%s</%s>\n' % (tag, body, tag)
746
747
    def list_item(self, text):
748
        """Rendering list item snippet. Like ``<li>``."""
749
        return '<li>%s</li>\n' % text
750
751
    def paragraph(self, text):
752
        """Rendering paragraph tags. Like ``<p>``."""
753
        return '<p>%s</p>\n' % text.strip(' ')
754
755
    def table(self, header, body):
756
        """Rendering table element. Wrap header and body in it.
757
758
        :param header: header part of the table.
759
        :param body: body part of the table.
760
        """
761
        return (
762
            '<table>\n<thead>%s</thead>\n'
763
            '<tbody>\n%s</tbody>\n</table>\n'
764
        ) % (header, body)
765
766
    def table_row(self, content):
767
        """Rendering a table row. Like ``<tr>``.
768
769
        :param content: content of current table row.
770
        """
771
        return '<tr>\n%s</tr>\n' % content
772
773
    def table_cell(self, content, **flags):
774
        """Rendering a table cell. Like ``<th>`` ``<td>``.
775
776
        :param content: content of current table cell.
777
        :param header: whether this is header or not.
778
        :param align: align of current table cell.
779
        """
780
        if flags['header']:
781
            tag = 'th'
782
        else:
783
            tag = 'td'
784
        align = flags['align']
785
        if not align:
786
            return '<%s>%s</%s>\n' % (tag, content, tag)
787
        return '<%s style="text-align:%s">%s</%s>\n' % (
788
            tag, align, content, tag
789
        )
790
791
    def double_emphasis(self, text):
792
        """Rendering **strong** text.
793
794
        :param text: text content for emphasis.
795
        """
796
        return '<strong>%s</strong>' % text
797
798
    def emphasis(self, text):
799
        """Rendering *emphasis* text.
800
801
        :param text: text content for emphasis.
802
        """
803
        return '<em>%s</em>' % text
804
805
    def codespan(self, text):
806
        """Rendering inline `code` text.
807
808
        :param text: text content for inline code.
809
        """
810
        text = escape(text.rstrip(), smart_amp=False)
811
        return '<code>%s</code>' % text
812
813
    def linebreak(self):
814
        """Rendering line break like ``<br>``."""
815
        if self.options.get('use_xhtml'):
816
            return '<br />\n'
817
        return '<br>\n'
818
819
    def strikethrough(self, text):
820
        """Rendering ~~strikethrough~~ text.
821
822
        :param text: text content for strikethrough.
823
        """
824
        return '<del>%s</del>' % text
825
826
    def text(self, text):
827
        """Rendering unformatted text.
828
829
        :param text: text content.
830
        """
831
        if self.options.get('parse_block_html'):
832
            return text
833
        return escape(text)
834
835
    def escape(self, text):
836
        """Rendering escape sequence.
837
838
        :param text: text content.
839
        """
840
        return escape(text)
841
842
    def autolink(self, link, is_email=False):
843
        """Rendering a given link or email address.
844
845
        :param link: link content or email address.
846
        :param is_email: whether this is an email or not.
847
        """
848
        text = link = escape(link)
849
        if is_email:
850
            link = 'mailto:%s' % link
851
        return '<a href="%s">%s</a>' % (link, text)
852
853
    def link(self, link, title, text):
854
        """Rendering a given link with content and title.
855
856
        :param link: href link for ``<a>`` tag.
857
        :param title: title content for `title` attribute.
858
        :param text: text content for description.
859
        """
860
        link = escape_link(link)
861
        if not title:
862
            return '<a href="%s">%s</a>' % (link, text)
863
        title = escape(title, quote=True)
864
        return '<a href="%s" title="%s">%s</a>' % (link, title, text)
865
866
    def image(self, src, title, text):
867
        """Rendering a image with title and text.
868
869
        :param src: source link of the image.
870
        :param title: title text of the image.
871
        :param text: alt text of the image.
872
        """
873
        src = escape_link(src)
874
        text = escape(text, quote=True)
875
        if title:
876
            title = escape(title, quote=True)
877
            html = '<img src="%s" alt="%s" title="%s"' % (src, text, title)
878
        else:
879
            html = '<img src="%s" alt="%s"' % (src, text)
880
        if self.options.get('use_xhtml'):
881
            return '%s />' % html
882
        return '%s>' % html
883
884
    def inline_html(self, html):
885
        """Rendering span level pure html content.
886
887
        :param html: text content of the html snippet.
888
        """
889
        if self.options.get('escape'):
890
            return escape(html)
891
        return html
892
893
    def newline(self):
894
        """Rendering newline element."""
895
        return ''
896
897
    def footnote_ref(self, key, index):
898
        """Rendering the ref anchor of a footnote.
899
900
        :param key: identity key for the footnote.
901
        :param index: the index count of current footnote.
902
        """
903
        html = (
904
            '<sup class="footnote-ref" id="fnref-%s">'
905
            '<a href="#fn-%s" rel="footnote">%d</a></sup>'
906
        ) % (escape(key), escape(key), index)
907
        return html
908
909
    def footnote_item(self, key, text):
910
        """Rendering a footnote item.
911
912
        :param key: identity key for the footnote.
913
        :param text: text content of the footnote.
914
        """
915
        back = (
916
            '<a href="#fnref-%s" rev="footnote">&#8617;</a>'
917
        ) % escape(key)
918
        text = text.rstrip()
919
        if text.endswith('</p>'):
920
            text = re.sub(r'<\/p>$', r'%s</p>' % back, text)
921
        else:
922
            text = '%s<p>%s</p>' % (text, back)
923
        html = '<li id="fn-%s">%s</li>\n' % (escape(key), text)
924
        return html
925
926
    def footnotes(self, text):
927
        """Wrapper for all footnotes.
928
929
        :param text: contents of all footnotes.
930
        """
931
        html = '<div class="footnotes">\n%s<ol>%s</ol>\n</div>\n'
932
        return html % (self.hrule(), text)
933
934
935
class Markdown(object):
936
    """The Markdown parser.
937
938
    :param renderer: An instance of ``Renderer``.
939
    :param inline: An inline lexer class or instance.
940
    :param block: A block lexer class or instance.
941
    """
942
    def __init__(self, renderer=None, inline=None, block=None, **kwargs):
943
        if not renderer:
944
            renderer = Renderer(**kwargs)
945
        else:
946
            kwargs.update(renderer.options)
947
948
        self.renderer = renderer
949
950
        if inline and inspect.isclass(inline):
951
            inline = inline(renderer, **kwargs)
952
        if block and inspect.isclass(block):
953
            block = block(**kwargs)
954
955
        if inline:
956
            self.inline = inline
957
        else:
958
            self.inline = InlineLexer(renderer, **kwargs)
959
960
        self.block = block or BlockLexer(BlockGrammar())
961
        self.footnotes = []
962
        self.tokens = []
963
964
        # detect if it should parse text in block html
965
        self._parse_block_html = kwargs.get('parse_block_html')
966
967
    def __call__(self, text):
968
        return self.parse(text)
969
970
    def render(self, text):
971
        """Render the Markdown text.
972
973
        :param text: markdown formatted text content.
974
        """
975
        return self.parse(text)
976
977
    def parse(self, text):
978
        out = self.output(preprocessing(text))
979
980
        keys = self.block.def_footnotes
981
982
        # reset block
983
        self.block.def_links = {}
984
        self.block.def_footnotes = {}
985
986
        # reset inline
987
        self.inline.links = {}
988
        self.inline.footnotes = {}
989
990
        if not self.footnotes:
991
            return out
992
993
        footnotes = filter(lambda o: keys.get(o['key']), self.footnotes)
994
        self.footnotes = sorted(
995
            footnotes, key=lambda o: keys.get(o['key']), reverse=True
996
        )
997
998
        body = self.renderer.placeholder()
999
        while self.footnotes:
1000
            note = self.footnotes.pop()
1001
            body += self.renderer.footnote_item(
1002
                note['key'], note['text']
1003
            )
1004
1005
        out += self.renderer.footnotes(body)
1006
        return out
1007
1008
    def pop(self):
1009
        if not self.tokens:
1010
            return None
1011
        self.token = self.tokens.pop()
1012
        return self.token
1013
1014
    def peek(self):
1015
        if self.tokens:
1016
            return self.tokens[-1]
1017
        return None  # pragma: no cover
1018
1019
    def output(self, text, rules=None):
1020
        self.tokens = self.block(text, rules)
1021
        self.tokens.reverse()
1022
1023
        self.inline.setup(self.block.def_links, self.block.def_footnotes)
1024
1025
        out = self.renderer.placeholder()
1026
        while self.pop():
1027
            out += self.tok()
1028
        return out
1029
1030
    def tok(self):
1031
        t = self.token['type']
1032
1033
        # sepcial cases
1034
        if t.endswith('_start'):
1035
            t = t[:-6]
1036
1037
        return getattr(self, 'output_%s' % t)()
1038
1039
    def tok_text(self):
1040
        text = self.token['text']
1041
        while self.peek()['type'] == 'text':
1042
            text += '\n' + self.pop()['text']
1043
        return self.inline(text)
1044
1045
    def output_newline(self):
1046
        return self.renderer.newline()
1047
1048
    def output_hrule(self):
1049
        return self.renderer.hrule()
1050
1051
    def output_heading(self):
1052
        return self.renderer.header(
1053
            self.inline(self.token['text']),
1054
            self.token['level'],
1055
            self.token['text'],
1056
        )
1057
1058
    def output_code(self):
1059
        return self.renderer.block_code(
1060
            self.token['text'], self.token['lang']
1061
        )
1062
1063
    def output_table(self):
1064
        aligns = self.token['align']
1065
        aligns_length = len(aligns)
1066
        cell = self.renderer.placeholder()
1067
1068
        # header part
1069
        header = self.renderer.placeholder()
1070
        for i, value in enumerate(self.token['header']):
1071
            align = aligns[i] if i < aligns_length else None
1072
            flags = {'header': True, 'align': align}
1073
            cell += self.renderer.table_cell(self.inline(value), **flags)
1074
1075
        header += self.renderer.table_row(cell)
1076
1077
        # body part
1078
        body = self.renderer.placeholder()
1079
        for i, row in enumerate(self.token['cells']):
1080
            cell = self.renderer.placeholder()
1081
            for j, value in enumerate(row):
1082
                align = aligns[j] if j < aligns_length else None
1083
                flags = {'header': False, 'align': align}
1084
                cell += self.renderer.table_cell(self.inline(value), **flags)
1085
            body += self.renderer.table_row(cell)
1086
1087
        return self.renderer.table(header, body)
1088
1089
    def output_block_quote(self):
1090
        body = self.renderer.placeholder()
1091
        while self.pop()['type'] != 'block_quote_end':
1092
            body += self.tok()
1093
        return self.renderer.block_quote(body)
1094
1095
    def output_list(self):
1096
        ordered = self.token['ordered']
1097
        body = self.renderer.placeholder()
1098
        while self.pop()['type'] != 'list_end':
1099
            body += self.tok()
1100
        return self.renderer.list(body, ordered)
1101
1102
    def output_list_item(self):
1103
        body = self.renderer.placeholder()
1104
        while self.pop()['type'] != 'list_item_end':
1105
            if self.token['type'] == 'text':
1106
                body += self.tok_text()
1107
            else:
1108
                body += self.tok()
1109
1110
        return self.renderer.list_item(body)
1111
1112
    def output_loose_item(self):
1113
        body = self.renderer.placeholder()
1114
        while self.pop()['type'] != 'list_item_end':
1115
            body += self.tok()
1116
        return self.renderer.list_item(body)
1117
1118
    def output_footnote(self):
1119
        self.inline._in_footnote = True
1120
        body = self.renderer.placeholder()
1121
        key = self.token['key']
1122
        while self.pop()['type'] != 'footnote_end':
1123
            body += self.tok()
1124
        self.footnotes.append({'key': key, 'text': body})
1125
        self.inline._in_footnote = False
1126
        return self.renderer.placeholder()
1127
1128
    def output_close_html(self):
1129
        text = self.token['text']
1130
        return self.renderer.block_html(text)
1131
1132
    def output_open_html(self):
1133
        text = self.token['text']
1134
        tag = self.token['tag']
1135
        if self._parse_block_html and tag not in _pre_tags:
1136
            text = self.inline(text, rules=self.inline.inline_html_rules)
1137
        extra = self.token.get('extra') or ''
1138
        html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
1139
        return self.renderer.block_html(html)
1140
1141
    def output_paragraph(self):
1142
        return self.renderer.paragraph(self.inline(self.token['text']))
1143
1144
    def output_text(self):
1145
        return self.renderer.paragraph(self.tok_text())
1146
1147
1148
def markdown(text, escape=True, **kwargs):
1149
    """Render markdown formatted text to html.
1150
1151
    :param text: markdown formatted text content.
1152
    :param escape: if set to False, all html tags will not be escaped.
1153
    :param use_xhtml: output with xhtml tags.
1154
    :param hard_wrap: if set to True, it will use the GFM line breaks feature.
1155
    :param parse_block_html: parse text only in block level html.
1156
    :param parse_inline_html: parse text only in inline level html.
1157
    """
1158
    return Markdown(escape=escape, **kwargs)(text)
1159