InlineGrammar   A
last analyzed

Complexity

Total Complexity 1

Size/Duplication

Total Lines 50
Duplicated Lines 0 %

Importance

Changes 3
Bugs 1 Features 2
Metric Value
c 3
b 1
f 2
dl 0
loc 50
rs 10
wmc 1

1 Method

Rating   Name   Duplication   Size   Complexity  
A hard_wrap() 0 7 1
1
# coding: utf-8
2
"""
3
    mistune
4
    ~~~~~~~
5
6
    The fastest markdown parser in pure Python with renderer feature.
7
8
    :copyright: (c) 2014 - 2017 by Hsiaoming Yang.
9
"""
10
11
import re
12
import inspect
13
14
__version__ = '0.8.3'
15
__author__ = 'Hsiaoming Yang <[email protected]>'
16
__all__ = [
17
    'BlockGrammar', 'BlockLexer',
18
    'InlineGrammar', 'InlineLexer',
19
    'Renderer', 'Markdown',
20
    'markdown', 'escape',
21
]
22
23
24
_key_pattern = re.compile(r'\s+')
25
_nonalpha_pattern = re.compile(r'\W')
26
_escape_pattern = re.compile(r'&(?!#?\w+;)')
27
_newline_pattern = re.compile(r'\r\n|\r')
28
_block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M)
29
_block_code_leading_pattern = re.compile(r'^ {4}', re.M)
30
_inline_tags = [
31
    'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data',
32
    'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark',
33
    'ruby', 'rt', 'rp', 'bdi', 'bdo', 'span', 'br', 'wbr', 'ins', 'del',
34
    'img', 'font',
35
]
36
_pre_tags = ['pre', 'script', 'style']
37
_valid_end = r'(?!:/|[^\w\s@]*@)\b'
38
_valid_attr = r'''\s*[a-zA-Z\-](?:\=(?:"[^"]*"|'[^']*'|[^\s'">]+))?'''
39
_block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end)
40
_scheme_blacklist = ('javascript:', 'vbscript:')
41
42
43
def _pure_pattern(regex):
44
    pattern = regex.pattern
45
    if pattern.startswith('^'):
46
        pattern = pattern[1:]
47
    return pattern
48
49
50
def _keyify(key):
51
    key = escape(key.lower(), quote=True)
52
    return _key_pattern.sub(' ', key)
53
54
55
def escape(text, quote=False, smart_amp=True):
56
    """Replace special characters "&", "<" and ">" to HTML-safe sequences.
57
58
    The original cgi.escape will always escape "&", but you can control
59
    this one for a smart escape amp.
60
61
    :param quote: if set to True, " and ' will be escaped.
62
    :param smart_amp: if set to False, & will always be escaped.
63
    """
64
    if smart_amp:
65
        text = _escape_pattern.sub('&amp;', text)
66
    else:
67
        text = text.replace('&', '&amp;')
68
    text = text.replace('<', '&lt;')
69
    text = text.replace('>', '&gt;')
70
    if quote:
71
        text = text.replace('"', '&quot;')
72
        text = text.replace("'", '&#39;')
73
    return text
74
75
76
def escape_link(url):
77
    """Remove dangerous URL schemes like javascript: and escape afterwards."""
78
    lower_url = url.lower().strip('\x00\x1a \n\r\t')
79
80
    for scheme in _scheme_blacklist:
81
        if re.sub(r'[^A-Za-z0-9\/:]+', '', lower_url).startswith(scheme):
82
            return ''
83
    return escape(url, quote=True, smart_amp=False)
84
85
86
def preprocessing(text, tab=4):
87
    text = _newline_pattern.sub('\n', text)
88
    text = text.expandtabs(tab)
89
    text = text.replace('\u2424', '\n')
90
    pattern = re.compile(r'^ +$', re.M)
91
    return pattern.sub('', text)
92
93
94
class BlockGrammar(object):
95
    """Grammars for block level tokens."""
96
97
    def_links = re.compile(
98
        r'^ *\[([^^\]]+)\]: *'  # [key]:
99
        r'<?([^\s>]+)>?'  # <link> or link
100
        r'(?: +["(]([^\n]+)[")])? *(?:\n+|$)'
101
    )
102
    def_footnotes = re.compile(
103
        r'^\[\^([^\]]+)\]: *('
104
        r'[^\n]*(?:\n+|$)'  # [^key]:
105
        r'(?: {1,}[^\n]*(?:\n+|$))*'
106
        r')'
107
    )
108
109
    newline = re.compile(r'^\n+')
110
    block_code = re.compile(r'^( {4}[^\n]+\n*)+')
111
    fences = re.compile(
112
        r'^ *(`{3,}|~{3,}) *(\S+)? *\n'  # ```lang
113
        r'([\s\S]+?)\s*'
114
        r'\1 *(?:\n+|$)'  # ```
115
    )
116
    hrule = re.compile(r'^ {0,3}[-*_](?: *[-*_]){2,} *(?:\n+|$)')
117
    heading = re.compile(r'^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)')
118
    lheading = re.compile(r'^([^\n]+)\n *(=|-)+ *(?:\n+|$)')
119
    block_quote = re.compile(r'^( *>[^\n]+(\n[^\n]+)*\n*)+')
120
    list_block = re.compile(
121
        r'^( *)(?=[*+-]|\d+\.)(([*+-])?(?:\d+\.)?) [\s\S]+?'
122
        r'(?:'
123
        r'\n+(?=\1?(?:[-*_] *){3,}(?:\n+|$))'  # hrule
124
        r'|\n+(?=%s)'  # def links
125
        r'|\n+(?=%s)'  # def footnotes\
126
        r'|\n+(?=\1(?(3)\d+\.|[*+-]) )'   # heterogeneous bullet
127
        r'|\n{2,}'
128
        r'(?! )'
129
        r'(?!\1(?:[*+-]|\d+\.) )\n*'
130
        r'|'
131
        r'\s*$)' % (
132
            _pure_pattern(def_links),
133
            _pure_pattern(def_footnotes),
134
        )
135
    )
136
    list_item = re.compile(
137
        r'^(( *)(?:[*+-]|\d+\.) [^\n]*'
138
        r'(?:\n(?!\2(?:[*+-]|\d+\.) )[^\n]*)*)',
139
        flags=re.M
140
    )
141
    list_bullet = re.compile(r'^ *(?:[*+-]|\d+\.) +')
142
    paragraph = re.compile(
143
        r'^((?:[^\n]+\n?(?!'
144
        r'%s|%s|%s|%s|%s|%s|%s|%s|%s'
145
        r'))+)\n*' % (
146
            _pure_pattern(fences).replace(r'\1', r'\2'),
147
            _pure_pattern(list_block).replace(r'\1', r'\3'),
148
            _pure_pattern(hrule),
149
            _pure_pattern(heading),
150
            _pure_pattern(lheading),
151
            _pure_pattern(block_quote),
152
            _pure_pattern(def_links),
153
            _pure_pattern(def_footnotes),
154
            '<' + _block_tag,
155
        )
156
    )
157
    block_html = re.compile(
158
        r'^ *(?:%s|%s|%s) *(?:\n{2,}|\s*$)' % (
159
            r'<!--[\s\S]*?-->',
160
            r'<(%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_block_tag, _valid_attr),
161
            r'<%s(?:%s)*?\s*\/?>' % (_block_tag, _valid_attr),
162
        )
163
    )
164
    table = re.compile(
165
        r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*'
166
    )
167
    nptable = re.compile(
168
        r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*'
169
    )
170
    text = re.compile(r'^[^\n]+')
171
172
173
class BlockLexer(object):
174
    """Block level lexer for block grammars."""
175
    grammar_class = BlockGrammar
176
177
    default_rules = [
178
        'newline', 'hrule', 'block_code', 'fences', 'heading',
179
        'nptable', 'lheading', 'block_quote',
180
        'list_block', 'block_html', 'def_links',
181
        'def_footnotes', 'table', 'paragraph', 'text'
182
    ]
183
184
    list_rules = (
185
        'newline', 'block_code', 'fences', 'lheading', 'hrule',
186
        'block_quote', 'list_block', 'block_html', 'text',
187
    )
188
189
    footnote_rules = (
190
        'newline', 'block_code', 'fences', 'heading',
191
        'nptable', 'lheading', 'hrule', 'block_quote',
192
        'list_block', 'block_html', 'table', 'paragraph', 'text'
193
    )
194
195
    def __init__(self, rules=None, **kwargs):
196
        self.tokens = []
197
        self.def_links = {}
198
        self.def_footnotes = {}
199
200
        if not rules:
201
            rules = self.grammar_class()
202
203
        self.rules = rules
204
205
    def __call__(self, text, rules=None):
206
        return self.parse(text, rules)
207
208
    def parse(self, text, rules=None):
209
        text = text.rstrip('\n')
210
211
        if not rules:
212
            rules = self.default_rules
213
214
        def manipulate(text):
215
            for key in rules:
216
                rule = getattr(self.rules, key)
217
                m = rule.match(text)
218
                if not m:
219
                    continue
220
                getattr(self, 'parse_%s' % key)(m)
221
                return m
222
            return False  # pragma: no cover
223
224
        while text:
225
            m = manipulate(text)
226
            if m is not False:
227
                text = text[len(m.group(0)):]
228
                continue
229
            if text:  # pragma: no cover
230
                raise RuntimeError('Infinite loop at: %s' % text)
231
        return self.tokens
232
233
    def parse_newline(self, m):
234
        length = len(m.group(0))
235
        if length > 1:
236
            self.tokens.append({'type': 'newline'})
237
238
    def parse_block_code(self, m):
239
        # clean leading whitespace
240
        code = _block_code_leading_pattern.sub('', m.group(0))
241
        self.tokens.append({
242
            'type': 'code',
243
            'lang': None,
244
            'text': code,
245
        })
246
247
    def parse_fences(self, m):
248
        self.tokens.append({
249
            'type': 'code',
250
            'lang': m.group(2),
251
            'text': m.group(3),
252
        })
253
254
    def parse_heading(self, m):
255
        self.tokens.append({
256
            'type': 'heading',
257
            'level': len(m.group(1)),
258
            'text': m.group(2),
259
        })
260
261
    def parse_lheading(self, m):
262
        """Parse setext heading."""
263
        self.tokens.append({
264
            'type': 'heading',
265
            'level': 1 if m.group(2) == '=' else 2,
266
            'text': m.group(1),
267
        })
268
269
    def parse_hrule(self, m):
270
        self.tokens.append({'type': 'hrule'})
271
272
    def parse_list_block(self, m):
273
        bull = m.group(2)
274
        self.tokens.append({
275
            'type': 'list_start',
276
            'ordered': '.' in bull,
277
        })
278
        cap = m.group(0)
279
        self._process_list_item(cap, bull)
280
        self.tokens.append({'type': 'list_end'})
281
282
    def _process_list_item(self, cap, bull):
283
        cap = self.rules.list_item.findall(cap)
284
285
        _next = False
286
        length = len(cap)
287
288
        for i in range(length):
289
            item = cap[i][0]
290
291
            # remove the bullet
292
            space = len(item)
293
            item = self.rules.list_bullet.sub('', item)
294
295
            # outdent
296
            if '\n ' in item:
297
                space = space - len(item)
298
                pattern = re.compile(r'^ {1,%d}' % space, flags=re.M)
299
                item = pattern.sub('', item)
300
301
            # determine whether item is loose or not
302
            loose = _next
303
            if not loose and re.search(r'\n\n(?!\s*$)', item):
304
                loose = True
305
306
            rest = len(item)
307
            if i != length - 1 and rest:
308
                _next = item[rest-1] == '\n'
309
                if not loose:
310
                    loose = _next
311
312
            if loose:
313
                t = 'loose_item_start'
314
            else:
315
                t = 'list_item_start'
316
317
            self.tokens.append({'type': t})
318
            # recurse
319
            self.parse(item, self.list_rules)
320
            self.tokens.append({'type': 'list_item_end'})
321
322
    def parse_block_quote(self, m):
323
        self.tokens.append({'type': 'block_quote_start'})
324
        # clean leading >
325
        cap = _block_quote_leading_pattern.sub('', m.group(0))
326
        self.parse(cap)
327
        self.tokens.append({'type': 'block_quote_end'})
328
329
    def parse_def_links(self, m):
330
        key = _keyify(m.group(1))
331
        self.def_links[key] = {
332
            'link': m.group(2),
333
            'title': m.group(3),
334
        }
335
336
    def parse_def_footnotes(self, m):
337
        key = _keyify(m.group(1))
338
        if key in self.def_footnotes:
339
            # footnote is already defined
340
            return
341
342
        self.def_footnotes[key] = 0
343
344
        self.tokens.append({
345
            'type': 'footnote_start',
346
            'key': key,
347
        })
348
349
        text = m.group(2)
350
351
        if '\n' in text:
352
            lines = text.split('\n')
353
            whitespace = None
354
            for line in lines[1:]:
355
                space = len(line) - len(line.lstrip())
356
                if space and (not whitespace or space < whitespace):
357
                    whitespace = space
358
            newlines = [lines[0]]
359
            for line in lines[1:]:
360
                newlines.append(line[whitespace:])
361
            text = '\n'.join(newlines)
362
363
        self.parse(text, self.footnote_rules)
364
365
        self.tokens.append({
366
            'type': 'footnote_end',
367
            'key': key,
368
        })
369
370
    def parse_table(self, m):
371
        item = self._process_table(m)
372
373
        cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3))
374
        cells = cells.split('\n')
375
        for i, v in enumerate(cells):
376
            v = re.sub(r'^ *\| *| *\| *$', '', v)
377
            cells[i] = re.split(r' *(?<!\\)\| *', v)
378
379
        item['cells'] = self._process_cells(cells)
380
        self.tokens.append(item)
381
382
    def parse_nptable(self, m):
383
        item = self._process_table(m)
384
385
        cells = re.sub(r'\n$', '', m.group(3))
386
        cells = cells.split('\n')
387
        for i, v in enumerate(cells):
388
            cells[i] = re.split(r' *(?<!\\)\| *', v)
389
390
        item['cells'] = self._process_cells(cells)
391
        self.tokens.append(item)
392
393
    def _process_table(self, m):
394
        header = re.sub(r'^ *| *\| *$', '', m.group(1))
395
        header = re.split(r' *\| *', header)
396
        align = re.sub(r' *|\| *$', '', m.group(2))
397
        align = re.split(r' *\| *', align)
398
399
        for i, v in enumerate(align):
400
            if re.search(r'^ *-+: *$', v):
401
                align[i] = 'right'
402
            elif re.search(r'^ *:-+: *$', v):
403
                align[i] = 'center'
404
            elif re.search(r'^ *:-+ *$', v):
405
                align[i] = 'left'
406
            else:
407
                align[i] = None
408
409
        item = {
410
            'type': 'table',
411
            'header': header,
412
            'align': align,
413
        }
414
        return item
415
416
    def _process_cells(self, cells):
417
        for i, line in enumerate(cells):
418
            for c, cell in enumerate(line):
419
                # de-escape any pipe inside the cell here
420
                cells[i][c] = re.sub('\\\\\|', '|', cell)
421
422
        return cells
423
424
    def parse_block_html(self, m):
425
        tag = m.group(1)
426
        if not tag:
427
            text = m.group(0)
428
            self.tokens.append({
429
                'type': 'close_html',
430
                'text': text
431
            })
432
        else:
433
            attr = m.group(2)
434
            text = m.group(3)
435
            self.tokens.append({
436
                'type': 'open_html',
437
                'tag': tag,
438
                'extra': attr,
439
                'text': text
440
            })
441
442
    def parse_paragraph(self, m):
443
        text = m.group(1).rstrip('\n')
444
        self.tokens.append({'type': 'paragraph', 'text': text})
445
446
    def parse_text(self, m):
447
        text = m.group(0)
448
        self.tokens.append({'type': 'text', 'text': text})
449
450
451
class InlineGrammar(object):
452
    """Grammars for inline level tokens."""
453
454
    escape = re.compile(r'^\\([\\`*{}\[\]()#+\-.!_>~|])')  # \* \+ \! ....
455
    inline_html = re.compile(
456
        r'^(?:%s|%s|%s)' % (
457
            r'<!--[\s\S]*?-->',
458
            r'<(\w+%s)((?:%s)*?)\s*>([\s\S]*?)<\/\1>' % (
459
                _valid_end, _valid_attr),
460
            r'<\w+%s(?:%s)*?\s*\/?>' % (_valid_end, _valid_attr),
461
        )
462
    )
463
    autolink = re.compile(r'^<([^ >]+(@|:)[^ >]+)>')
464
    link = re.compile(
465
        r'^!?\[('
466
        r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
467
        r')\]\('
468
        r'''\s*(<)?([\s\S]*?)(?(2)>)(?:\s+['"]([\s\S]*?)['"])?\s*'''
469
        r'\)'
470
    )
471
    reflink = re.compile(
472
        r'^!?\[('
473
        r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
474
        r')\]\s*\[([^^\]]*)\]'
475
    )
476
    nolink = re.compile(r'^!?\[((?:\[[^\]]*\]|[^\[\]])*)\]')
477
    url = re.compile(r'''^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])''')
478
    double_emphasis = re.compile(
479
        r'^_{2}([\s\S]+?)_{2}(?!_)'  # __word__
480
        r'|'
481
        r'^\*{2}([\s\S]+?)\*{2}(?!\*)'  # **word**
482
    )
483
    emphasis = re.compile(
484
        r'^\b_((?:__|[^_])+?)_\b'  # _word_
485
        r'|'
486
        r'^\*((?:\*\*|[^\*])+?)\*(?!\*)'  # *word*
487
    )
488
    code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)')  # `code`
489
    linebreak = re.compile(r'^ {2,}\n(?!\s*$)')
490
    strikethrough = re.compile(r'^~~(?=\S)([\s\S]*?\S)~~')  # ~~word~~
491
    footnote = re.compile(r'^\[\^([^\]]+)\]')
492
    text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| {2,}\n|$)')
493
494
    def hard_wrap(self):
495
        """Grammar for hard wrap linebreak. You don't need to add two
496
        spaces at the end of a line.
497
        """
498
        self.linebreak = re.compile(r'^ *\n(?!\s*$)')
499
        self.text = re.compile(
500
            r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| *\n|$)'
501
        )
502
503
504
class InlineLexer(object):
505
    """Inline level lexer for inline grammars."""
506
    grammar_class = InlineGrammar
507
508
    default_rules = [
509
        'escape', 'inline_html', 'autolink', 'url',
510
        'footnote', 'link', 'reflink', 'nolink',
511
        'double_emphasis', 'emphasis', 'code',
512
        'linebreak', 'strikethrough', 'text',
513
    ]
514
    inline_html_rules = [
515
        'escape', 'inline_html', 'autolink', 'url', 'link', 'reflink',
516
        'nolink', 'double_emphasis', 'emphasis', 'code',
517
        'linebreak', 'strikethrough', 'text',
518
    ]
519
520
    def __init__(self, renderer, rules=None, **kwargs):
521
        self.renderer = renderer
522
        self.links = {}
523
        self.footnotes = {}
524
        self.footnote_index = 0
525
526
        if not rules:
527
            rules = self.grammar_class()
528
529
        kwargs.update(self.renderer.options)
530
        if kwargs.get('hard_wrap'):
531
            rules.hard_wrap()
532
533
        self.rules = rules
534
535
        self._in_link = False
536
        self._in_footnote = False
537
        self._parse_inline_html = kwargs.get('parse_inline_html')
538
539
    def __call__(self, text, rules=None):
540
        return self.output(text, rules)
541
542
    def setup(self, links, footnotes):
543
        self.footnote_index = 0
544
        self.links = links or {}
545
        self.footnotes = footnotes or {}
546
547
    def output(self, text, rules=None):
548
        text = text.rstrip('\n')
549
        if not rules:
550
            rules = list(self.default_rules)
551
552
        if self._in_footnote and 'footnote' in rules:
553
            rules.remove('footnote')
554
555
        output = self.renderer.placeholder()
556
557
        def manipulate(text):
558
            for key in rules:
559
                pattern = getattr(self.rules, key)
560
                m = pattern.match(text)
561
                if not m:
562
                    continue
563
                self.line_match = m
564
                out = getattr(self, 'output_%s' % key)(m)
565
                if out is not None:
566
                    return m, out
567
            return False  # pragma: no cover
568
569
        while text:
570
            ret = manipulate(text)
571
            if ret is not False:
572
                m, out = ret
573
                output += out
574
                text = text[len(m.group(0)):]
575
                continue
576
            if text:  # pragma: no cover
577
                raise RuntimeError('Infinite loop at: %s' % text)
578
579
        return output
580
581
    def output_escape(self, m):
582
        text = m.group(1)
583
        return self.renderer.escape(text)
584
585
    def output_autolink(self, m):
586
        link = m.group(1)
587
        if m.group(2) == '@':
588
            is_email = True
589
        else:
590
            is_email = False
591
        return self.renderer.autolink(link, is_email)
592
593
    def output_url(self, m):
594
        link = m.group(1)
595
        if self._in_link:
596
            return self.renderer.text(link)
597
        return self.renderer.autolink(link, False)
598
599
    def output_inline_html(self, m):
600
        tag = m.group(1)
601
        if self._parse_inline_html and tag in _inline_tags:
602
            text = m.group(3)
603
            if tag == 'a':
604
                self._in_link = True
605
                text = self.output(text, rules=self.inline_html_rules)
606
                self._in_link = False
607
            else:
608
                text = self.output(text, rules=self.inline_html_rules)
609
            extra = m.group(2) or ''
610
            html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
611
        else:
612
            html = m.group(0)
613
        return self.renderer.inline_html(html)
614
615
    def output_footnote(self, m):
616
        key = _keyify(m.group(1))
617
        if key not in self.footnotes:
618
            return None
619
        if self.footnotes[key]:
620
            return None
621
        self.footnote_index += 1
622
        self.footnotes[key] = self.footnote_index
623
        return self.renderer.footnote_ref(key, self.footnote_index)
624
625
    def output_link(self, m):
626
        return self._process_link(m, m.group(3), m.group(4))
627
628
    def output_reflink(self, m):
629
        key = _keyify(m.group(2) or m.group(1))
630
        if key not in self.links:
631
            return None
632
        ret = self.links[key]
633
        return self._process_link(m, ret['link'], ret['title'])
634
635
    def output_nolink(self, m):
636
        key = _keyify(m.group(1))
637
        if key not in self.links:
638
            return None
639
        ret = self.links[key]
640
        return self._process_link(m, ret['link'], ret['title'])
641
642
    def _process_link(self, m, link, title=None):
643
        line = m.group(0)
644
        text = m.group(1)
645
        if line[0] == '!':
646
            return self.renderer.image(link, title, text)
647
648
        self._in_link = True
649
        text = self.output(text)
650
        self._in_link = False
651
        return self.renderer.link(link, title, text)
652
653
    def output_double_emphasis(self, m):
654
        text = m.group(2) or m.group(1)
655
        text = self.output(text)
656
        return self.renderer.double_emphasis(text)
657
658
    def output_emphasis(self, m):
659
        text = m.group(2) or m.group(1)
660
        text = self.output(text)
661
        return self.renderer.emphasis(text)
662
663
    def output_code(self, m):
664
        text = m.group(2)
665
        return self.renderer.codespan(text)
666
667
    def output_linebreak(self, m):
668
        return self.renderer.linebreak()
669
670
    def output_strikethrough(self, m):
671
        text = self.output(m.group(1))
672
        return self.renderer.strikethrough(text)
673
674
    def output_text(self, m):
675
        text = m.group(0)
676
        return self.renderer.text(text)
677
678
679
class Renderer(object):
680
    """The default HTML renderer for rendering Markdown.
681
    """
682
683
    def __init__(self, **kwargs):
684
        self.options = kwargs
685
686
    def placeholder(self):
687
        """Returns the default, empty output value for the renderer.
688
689
        All renderer methods use the '+=' operator to append to this value.
690
        Default is a string so rendering HTML can build up a result string with
691
        the rendered Markdown.
692
693
        Can be overridden by Renderer subclasses to be types like an empty
694
        list, allowing the renderer to create a tree-like structure to
695
        represent the document (which can then be reprocessed later into a
696
        separate format like docx or pdf).
697
        """
698
        return ''
699
700
    def block_code(self, code, lang=None):
701
        """Rendering block level code. ``pre > code``.
702
703
        :param code: text content of the code block.
704
        :param lang: language of the given code.
705
        """
706
        code = code.rstrip('\n')
707
        if not lang:
708
            code = escape(code, smart_amp=False)
709
            return '<pre><code>%s\n</code></pre>\n' % code
710
        code = escape(code, quote=True, smart_amp=False)
711
        return '<pre><code class="lang-%s">%s\n</code></pre>\n' % (lang, code)
712
713
    def block_quote(self, text):
714
        """Rendering <blockquote> with the given text.
715
716
        :param text: text content of the blockquote.
717
        """
718
        return '<blockquote>%s\n</blockquote>\n' % text.rstrip('\n')
719
720
    def block_html(self, html):
721
        """Rendering block level pure html content.
722
723
        :param html: text content of the html snippet.
724
        """
725
        if self.options.get('skip_style') and \
726
           html.lower().startswith('<style'):
727
            return ''
728
        if self.options.get('escape'):
729
            return escape(html)
730
        return html
731
732
    def header(self, text, level, raw=None):
733
        """Rendering header/heading tags like ``<h1>`` ``<h2>``.
734
735
        :param text: rendered text content for the header.
736
        :param level: a number for the header level, for example: 1.
737
        :param raw: raw text content of the header.
738
        """
739
        return '<h%d>%s</h%d>\n' % (level, text, level)
740
741
    def hrule(self):
742
        """Rendering method for ``<hr>`` tag."""
743
        if self.options.get('use_xhtml'):
744
            return '<hr />\n'
745
        return '<hr>\n'
746
747
    def list(self, body, ordered=True):
748
        """Rendering list tags like ``<ul>`` and ``<ol>``.
749
750
        :param body: body contents of the list.
751
        :param ordered: whether this list is ordered or not.
752
        """
753
        tag = 'ul'
754
        if ordered:
755
            tag = 'ol'
756
        return '<%s>\n%s</%s>\n' % (tag, body, tag)
757
758
    def list_item(self, text):
759
        """Rendering list item snippet. Like ``<li>``."""
760
        return '<li>%s</li>\n' % text
761
762
    def paragraph(self, text):
763
        """Rendering paragraph tags. Like ``<p>``."""
764
        return '<p>%s</p>\n' % text.strip(' ')
765
766
    def table(self, header, body):
767
        """Rendering table element. Wrap header and body in it.
768
769
        :param header: header part of the table.
770
        :param body: body part of the table.
771
        """
772
        return (
773
            '<table>\n<thead>%s</thead>\n'
774
            '<tbody>\n%s</tbody>\n</table>\n'
775
        ) % (header, body)
776
777
    def table_row(self, content):
778
        """Rendering a table row. Like ``<tr>``.
779
780
        :param content: content of current table row.
781
        """
782
        return '<tr>\n%s</tr>\n' % content
783
784
    def table_cell(self, content, **flags):
785
        """Rendering a table cell. Like ``<th>`` ``<td>``.
786
787
        :param content: content of current table cell.
788
        :param header: whether this is header or not.
789
        :param align: align of current table cell.
790
        """
791
        if flags['header']:
792
            tag = 'th'
793
        else:
794
            tag = 'td'
795
        align = flags['align']
796
        if not align:
797
            return '<%s>%s</%s>\n' % (tag, content, tag)
798
        return '<%s style="text-align:%s">%s</%s>\n' % (
799
            tag, align, content, tag
800
        )
801
802
    def double_emphasis(self, text):
803
        """Rendering **strong** text.
804
805
        :param text: text content for emphasis.
806
        """
807
        return '<strong>%s</strong>' % text
808
809
    def emphasis(self, text):
810
        """Rendering *emphasis* text.
811
812
        :param text: text content for emphasis.
813
        """
814
        return '<em>%s</em>' % text
815
816
    def codespan(self, text):
817
        """Rendering inline `code` text.
818
819
        :param text: text content for inline code.
820
        """
821
        text = escape(text.rstrip(), smart_amp=False)
822
        return '<code>%s</code>' % text
823
824
    def linebreak(self):
825
        """Rendering line break like ``<br>``."""
826
        if self.options.get('use_xhtml'):
827
            return '<br />\n'
828
        return '<br>\n'
829
830
    def strikethrough(self, text):
831
        """Rendering ~~strikethrough~~ text.
832
833
        :param text: text content for strikethrough.
834
        """
835
        return '<del>%s</del>' % text
836
837
    def text(self, text):
838
        """Rendering unformatted text.
839
840
        :param text: text content.
841
        """
842
        if self.options.get('parse_block_html'):
843
            return text
844
        return escape(text)
845
846
    def escape(self, text):
847
        """Rendering escape sequence.
848
849
        :param text: text content.
850
        """
851
        return escape(text)
852
853
    def autolink(self, link, is_email=False):
854
        """Rendering a given link or email address.
855
856
        :param link: link content or email address.
857
        :param is_email: whether this is an email or not.
858
        """
859
        text = link = escape_link(link)
860
        if is_email:
861
            link = 'mailto:%s' % link
862
        return '<a href="%s">%s</a>' % (link, text)
863
864
    def link(self, link, title, text):
865
        """Rendering a given link with content and title.
866
867
        :param link: href link for ``<a>`` tag.
868
        :param title: title content for `title` attribute.
869
        :param text: text content for description.
870
        """
871
        link = escape_link(link)
872
        if not title:
873
            return '<a href="%s">%s</a>' % (link, text)
874
        title = escape(title, quote=True)
875
        return '<a href="%s" title="%s">%s</a>' % (link, title, text)
876
877
    def image(self, src, title, text):
878
        """Rendering a image with title and text.
879
880
        :param src: source link of the image.
881
        :param title: title text of the image.
882
        :param text: alt text of the image.
883
        """
884
        src = escape_link(src)
885
        text = escape(text, quote=True)
886
        if title:
887
            title = escape(title, quote=True)
888
            html = '<img src="%s" alt="%s" title="%s"' % (src, text, title)
889
        else:
890
            html = '<img src="%s" alt="%s"' % (src, text)
891
        if self.options.get('use_xhtml'):
892
            return '%s />' % html
893
        return '%s>' % html
894
895
    def inline_html(self, html):
896
        """Rendering span level pure html content.
897
898
        :param html: text content of the html snippet.
899
        """
900
        if self.options.get('escape'):
901
            return escape(html)
902
        return html
903
904
    def newline(self):
905
        """Rendering newline element."""
906
        return ''
907
908
    def footnote_ref(self, key, index):
909
        """Rendering the ref anchor of a footnote.
910
911
        :param key: identity key for the footnote.
912
        :param index: the index count of current footnote.
913
        """
914
        html = (
915
            '<sup class="footnote-ref" id="fnref-%s">'
916
            '<a href="#fn-%s">%d</a></sup>'
917
        ) % (escape(key), escape(key), index)
918
        return html
919
920
    def footnote_item(self, key, text):
921
        """Rendering a footnote item.
922
923
        :param key: identity key for the footnote.
924
        :param text: text content of the footnote.
925
        """
926
        back = (
927
            '<a href="#fnref-%s" class="footnote">&#8617;</a>'
928
        ) % escape(key)
929
        text = text.rstrip()
930
        if text.endswith('</p>'):
931
            text = re.sub(r'<\/p>$', r'%s</p>' % back, text)
932
        else:
933
            text = '%s<p>%s</p>' % (text, back)
934
        html = '<li id="fn-%s">%s</li>\n' % (escape(key), text)
935
        return html
936
937
    def footnotes(self, text):
938
        """Wrapper for all footnotes.
939
940
        :param text: contents of all footnotes.
941
        """
942
        html = '<div class="footnotes">\n%s<ol>%s</ol>\n</div>\n'
943
        return html % (self.hrule(), text)
944
945
946
class Markdown(object):
947
    """The Markdown parser.
948
949
    :param renderer: An instance of ``Renderer``.
950
    :param inline: An inline lexer class or instance.
951
    :param block: A block lexer class or instance.
952
    """
953
    def __init__(self, renderer=None, inline=None, block=None, **kwargs):
954
        if not renderer:
955
            renderer = Renderer(**kwargs)
956
        else:
957
            kwargs.update(renderer.options)
958
959
        self.renderer = renderer
960
961
        if inline and inspect.isclass(inline):
962
            inline = inline(renderer, **kwargs)
963
        if block and inspect.isclass(block):
964
            block = block(**kwargs)
965
966
        if inline:
967
            self.inline = inline
968
        else:
969
            self.inline = InlineLexer(renderer, **kwargs)
970
971
        self.block = block or BlockLexer(BlockGrammar())
972
        self.footnotes = []
973
        self.tokens = []
974
975
        # detect if it should parse text in block html
976
        self._parse_block_html = kwargs.get('parse_block_html')
977
978
    def __call__(self, text):
979
        return self.parse(text)
980
981
    def render(self, text):
982
        """Render the Markdown text.
983
984
        :param text: markdown formatted text content.
985
        """
986
        return self.parse(text)
987
988
    def parse(self, text):
989
        out = self.output(preprocessing(text))
990
991
        keys = self.block.def_footnotes
992
993
        # reset block
994
        self.block.def_links = {}
995
        self.block.def_footnotes = {}
996
997
        # reset inline
998
        self.inline.links = {}
999
        self.inline.footnotes = {}
1000
1001
        if not self.footnotes:
1002
            return out
1003
1004
        footnotes = filter(lambda o: keys.get(o['key']), self.footnotes)
1005
        self.footnotes = sorted(
1006
            footnotes, key=lambda o: keys.get(o['key']), reverse=True
1007
        )
1008
1009
        body = self.renderer.placeholder()
1010
        while self.footnotes:
1011
            note = self.footnotes.pop()
1012
            body += self.renderer.footnote_item(
1013
                note['key'], note['text']
1014
            )
1015
1016
        out += self.renderer.footnotes(body)
1017
        return out
1018
1019
    def pop(self):
1020
        if not self.tokens:
1021
            return None
1022
        self.token = self.tokens.pop()
1023
        return self.token
1024
1025
    def peek(self):
1026
        if self.tokens:
1027
            return self.tokens[-1]
1028
        return None  # pragma: no cover
1029
1030
    def output(self, text, rules=None):
1031
        self.tokens = self.block(text, rules)
1032
        self.tokens.reverse()
1033
1034
        self.inline.setup(self.block.def_links, self.block.def_footnotes)
1035
1036
        out = self.renderer.placeholder()
1037
        while self.pop():
1038
            out += self.tok()
1039
        return out
1040
1041
    def tok(self):
1042
        t = self.token['type']
1043
1044
        # sepcial cases
1045
        if t.endswith('_start'):
1046
            t = t[:-6]
1047
1048
        return getattr(self, 'output_%s' % t)()
1049
1050
    def tok_text(self):
1051
        text = self.token['text']
1052
        while self.peek()['type'] == 'text':
1053
            text += '\n' + self.pop()['text']
1054
        return self.inline(text)
1055
1056
    def output_newline(self):
1057
        return self.renderer.newline()
1058
1059
    def output_hrule(self):
1060
        return self.renderer.hrule()
1061
1062
    def output_heading(self):
1063
        return self.renderer.header(
1064
            self.inline(self.token['text']),
1065
            self.token['level'],
1066
            self.token['text'],
1067
        )
1068
1069
    def output_code(self):
1070
        return self.renderer.block_code(
1071
            self.token['text'], self.token['lang']
1072
        )
1073
1074
    def output_table(self):
1075
        aligns = self.token['align']
1076
        aligns_length = len(aligns)
1077
        cell = self.renderer.placeholder()
1078
1079
        # header part
1080
        header = self.renderer.placeholder()
1081
        for i, value in enumerate(self.token['header']):
1082
            align = aligns[i] if i < aligns_length else None
1083
            flags = {'header': True, 'align': align}
1084
            cell += self.renderer.table_cell(self.inline(value), **flags)
1085
1086
        header += self.renderer.table_row(cell)
1087
1088
        # body part
1089
        body = self.renderer.placeholder()
1090
        for i, row in enumerate(self.token['cells']):
1091
            cell = self.renderer.placeholder()
1092
            for j, value in enumerate(row):
1093
                align = aligns[j] if j < aligns_length else None
1094
                flags = {'header': False, 'align': align}
1095
                cell += self.renderer.table_cell(self.inline(value), **flags)
1096
            body += self.renderer.table_row(cell)
1097
1098
        return self.renderer.table(header, body)
1099
1100
    def output_block_quote(self):
1101
        body = self.renderer.placeholder()
1102
        while self.pop()['type'] != 'block_quote_end':
1103
            body += self.tok()
1104
        return self.renderer.block_quote(body)
1105
1106
    def output_list(self):
1107
        ordered = self.token['ordered']
1108
        body = self.renderer.placeholder()
1109
        while self.pop()['type'] != 'list_end':
1110
            body += self.tok()
1111
        return self.renderer.list(body, ordered)
1112
1113
    def output_list_item(self):
1114
        body = self.renderer.placeholder()
1115
        while self.pop()['type'] != 'list_item_end':
1116
            if self.token['type'] == 'text':
1117
                body += self.tok_text()
1118
            else:
1119
                body += self.tok()
1120
1121
        return self.renderer.list_item(body)
1122
1123
    def output_loose_item(self):
1124
        body = self.renderer.placeholder()
1125
        while self.pop()['type'] != 'list_item_end':
1126
            body += self.tok()
1127
        return self.renderer.list_item(body)
1128
1129
    def output_footnote(self):
1130
        self.inline._in_footnote = True
1131
        body = self.renderer.placeholder()
1132
        key = self.token['key']
1133
        while self.pop()['type'] != 'footnote_end':
1134
            body += self.tok()
1135
        self.footnotes.append({'key': key, 'text': body})
1136
        self.inline._in_footnote = False
1137
        return self.renderer.placeholder()
1138
1139
    def output_close_html(self):
1140
        text = self.token['text']
1141
        return self.renderer.block_html(text)
1142
1143
    def output_open_html(self):
1144
        text = self.token['text']
1145
        tag = self.token['tag']
1146
        if self._parse_block_html and tag not in _pre_tags:
1147
            text = self.inline(text, rules=self.inline.inline_html_rules)
1148
        extra = self.token.get('extra') or ''
1149
        html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
1150
        return self.renderer.block_html(html)
1151
1152
    def output_paragraph(self):
1153
        return self.renderer.paragraph(self.inline(self.token['text']))
1154
1155
    def output_text(self):
1156
        return self.renderer.paragraph(self.tok_text())
1157
1158
1159
def markdown(text, escape=True, **kwargs):
1160
    """Render markdown formatted text to html.
1161
1162
    :param text: markdown formatted text content.
1163
    :param escape: if set to False, all html tags will not be escaped.
1164
    :param use_xhtml: output with xhtml tags.
1165
    :param hard_wrap: if set to True, it will use the GFM line breaks feature.
1166
    :param parse_block_html: parse text only in block level html.
1167
    :param parse_inline_html: parse text only in inline level html.
1168
    """
1169
    return Markdown(escape=escape, **kwargs)(text)
1170