Passed
Push — master ( ec62d3...86008f )
by Dongxin
42s
created

InlineParser.parseCloseBracket()   F

Complexity

Conditions 21

Size

Total Lines 109

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 21
c 2
b 0
f 0
dl 0
loc 109
rs 2

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like InlineParser.parseCloseBracket() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from __future__ import absolute_import, unicode_literals
2
3
import re
4
import sys
5
from CommonMark import common
6
from CommonMark.common import normalize_uri, unescape_string
7
from CommonMark.node import Node
8
9
if sys.version_info >= (3, 0):
10
    if sys.version_info >= (3, 4):
11
        import html.parser
12
        HTMLunescape = html.parser.HTMLParser().unescape
13
    else:
14
        from .entitytrans import _unescape
15
        HTMLunescape = _unescape
16
else:
17
    from CommonMark import entitytrans
18
    HTMLunescape = entitytrans._unescape
19
20
# Some regexps used in inline parser:
21
22
ESCAPED_CHAR = '\\\\' + common.ESCAPABLE
23
24
rePunctuation = re.compile(
25
    r'[!"#$%&\'()*+,\-./:;<=>?@\[\]^_`{|}~\xA1\xA7\xAB\xB6\xB7\xBB'
26
    r'\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3'
27
    r'\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F'
28
    r'\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E'
29
    r'\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12'
30
    r'\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB'
31
    r'\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736'
32
    r'\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-'
33
    r'\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F'
34
    r'\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E'
35
    r'\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5'
36
    r'\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC'
37
    r'\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42\u3001-\u3003\u3008-\u3011'
38
    r'\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673'
39
    r'\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E'
40
    r'\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0'
41
    r'\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63'
42
    r'\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B'
43
    r'\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-'
44
    r'\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58'
45
    r'\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD804[\uDC47-\uDC4D'
46
    r'\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC9\uDDCD'
47
    r'\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDCC6\uDDC1-\uDDD7'
48
    r'\uDE41-\uDE43\uDF3C-\uDF3E]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F'
49
    r'\uDEF5\uDF37-\uDF3B\uDF44]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]'
50
)
51
52
reLinkTitle = re.compile(
53
    '^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' +
54
    '|' +
55
    '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' +
56
    '|' +
57
    '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))')
58
reLinkDestinationBraces = re.compile(
59
    '^(?:[<](?:[^ <>\\t\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' +
60
    '\\\\)*[>])')
61
62
reEscapable = re.compile('^' + common.ESCAPABLE)
63
reEntityHere = re.compile('^' + common.ENTITY, re.IGNORECASE)
64
reTicks = re.compile(r'`+')
65
reTicksHere = re.compile(r'^`+')
66
reEllipses = re.compile(r'\.\.\.')
67
reDash = re.compile(r'--+')
68
reEmailAutolink = re.compile(
69
    r"^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9]"
70
    r"(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?"
71
    r"(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>")
72
reAutolink = re.compile(
73
    r'^<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>',
74
    re.IGNORECASE)
75
reSpnl = re.compile(r'^ *(?:\n *)?')
76
reWhitespaceChar = re.compile(r'^^[ \t\n\x0b\x0c\x0d]')
77
reWhitespace = re.compile(r'[ \t\n\x0b\x0c\x0d]+')
78
reUnicodeWhitespaceChar = re.compile(r'^\s')
79
reFinalSpace = re.compile(r' *$')
80
reInitialSpace = re.compile(r'^ *')
81
reSpaceAtEndOfLine = re.compile(r'^ *(?:\n|$)')
82
reLinkLabel = re.compile('^\\[(?:[^\\\\\\[\\]]|' + ESCAPED_CHAR +
83
                         '|\\\\){0,1000}\\]')
84
# Matches a string of non-special characters.
85
reMain = re.compile(r'^[^\n`\[\]\\!<&*_\'"]+', re.MULTILINE)
86
87
88
def normalizeReference(s):
89
    """Normalize reference label.
90
91
    Collapse internal whitespace to single space, remove
92
    leading/trailing whitespace, case fold.
93
    """
94
    return re.sub(r'\s+', ' ', s.strip()).upper()
95
96
97
def text(s):
98
    node = Node('text', None)
99
    node.literal = s
100
    return node
101
102
103
def smart_dashes(chars):
104
    en_count = 0
105
    em_count = 0
106
    if len(chars) % 3 == 0:
107
        # If divisible by 3, use all em dashes
108
        em_count = len(chars) / 3
109
    elif len(chars) % 2 == 0:
110
        # If divisble by 2, use all en dashes
111
        en_count = len(chars) / 2
112
    elif len(chars) % 3 == 2:
113
        # if 2 extra dashes, use en dashfor last 2;
114
        # em dashes for rest
115
        en_count = 1
116
        em_count = (len(chars) - 2) / 3
117
    else:
118
        # Use en dashes for last 4 hyphens; em dashes for rest
119
        en_count = 2
120
        em_count = (len(chars) - 4) / 3
121
    return ('\u2014' * em_count) + ('\u2013' * en_count)
122
123
124
class InlineParser(object):
125
    """INLINE PARSER
126
127
    These are methods of an InlineParser class, defined below.
128
    An InlineParser keeps track of a subject (a string to be
129
    parsed) and a position in that subject.
130
    """
131
132
    def __init__(self, options={}):
133
        self.subject = ''
134
        self.brackets = None
135
        self.pos = 0
136
        self.refmap = {}
137
        self.options = options
138
139
    def match(self, regexString):
140
        """
141
        If regexString matches at current position in the subject, advance
142
        position in subject and return the match; otherwise return None.
143
        """
144
        match = re.search(regexString, self.subject[self.pos:])
145
        if match is None:
146
            return None
147
        else:
148
            self.pos += match.end()
149
            return match.group()
150
151
    def peek(self):
152
        """ Returns the character at the current subject position, or None if
153
        there are no more characters."""
154
        if self.pos < len(self.subject):
155
            return self.subject[self.pos]
156
        else:
157
            return None
158
159
    def spnl(self):
160
        """ Parse zero or more space characters, including at
161
        most one newline."""
162
        self.match(reSpnl)
163
        return True
164
165
    # All of the parsers below try to match something at the current position
166
    # in the subject.  If they succeed in matching anything, they
167
    # push an inline matched, advancing the subject.
168
169
    def parseBackticks(self, block):
170
        """ Attempt to parse backticks, adding either a backtick code span or a
171
        literal sequence of backticks to the 'inlines' list."""
172
        ticks = self.match(reTicksHere)
173
        if ticks is None:
174
            return False
175
        after_open_ticks = self.pos
176
        matched = self.match(reTicks)
177
        while matched is not None:
178
            if (matched == ticks):
179
                node = Node('code', None)
180
                c = self.subject[after_open_ticks:self.pos - len(ticks)]
181
                c = c.strip()
182
                c = re.sub(reWhitespace, ' ', c)
183
                node.literal = c
184
                block.append_child(node)
185
                return True
186
            matched = self.match(reTicks)
187
        # If we got here, we didn't match a closing backtick sequence.
188
        self.pos = after_open_ticks
189
        block.append_child(text(ticks))
190
        return True
191
192
    def parseBackslash(self, block):
193
        """
194
        Parse a backslash-escaped special character, adding either the
195
        escaped character, a hard line break (if the backslash is followed
196
        by a newline), or a literal backslash to the block's children.
197
        Assumes current character is a backslash.
198
        """
199
        subj = self.subject
200
        self.pos += 1
201
202
        try:
203
            subjchar = subj[self.pos]
204
        except IndexError:
205
            subjchar = None
206
207
        if self.peek() == '\n':
208
            self.pos += 1
209
            node = Node('linebreak', None)
210
            block.append_child(node)
211
        elif subjchar and re.search(reEscapable, subjchar):
212
            block.append_child(text(subjchar))
213
            self.pos += 1
214
        else:
215
            block.append_child(text('\\'))
216
217
        return True
218
219
    def parseAutolink(self, block):
220
        """Attempt to parse an autolink (URL or email in pointy brackets)."""
221
        m = self.match(reEmailAutolink)
222
223
        if m:
224
            # email
225
            dest = m[1:-1]
226
            node = Node('link', None)
227
            node.destination = normalize_uri('mailto:' + dest)
228
            node.title = ''
229
            node.append_child(text(dest))
230
            block.append_child(node)
231
            return True
232
        else:
233
            m = self.match(reAutolink)
234
            if m:
235
                # link
236
                dest = m[1:-1]
237
                node = Node('link', None)
238
                node.destination = normalize_uri(dest)
239
                node.title = ''
240
                node.append_child(text(dest))
241
                block.append_child(node)
242
                return True
243
244
        return False
245
246
    def parseHtmlTag(self, block):
247
        """Attempt to parse a raw HTML tag."""
248
        m = self.match(common.reHtmlTag)
249
        if m is None:
250
            return False
251
        else:
252
            node = Node('html_inline', None)
253
            node.literal = m
254
            block.append_child(node)
255
            return True
256
257
    def scanDelims(self, c):
258
        """
259
        Scan a sequence of characters == c, and return information about
260
        the number of delimiters and whether they are positioned such that
261
        they can open and/or close emphasis or strong emphasis.  A utility
262
        function for strong/emph parsing.
263
        """
264
        numdelims = 0
265
        startpos = self.pos
266
267
        if c == "'" or c == '"':
268
            numdelims += 1
269
            self.pos += 1
270
        else:
271
            while (self.peek() == c):
272
                numdelims += 1
273
                self.pos += 1
274
275
        if numdelims == 0:
276
            return None
277
278
        c_before = '\n' if startpos == 0 else self.subject[startpos - 1]
279
280
        c_after = self.peek()
281
        if c_after is None:
282
            c_after = '\n'
283
284
        # Python 2 doesn't recognize '\xa0' as whitespace
285
        after_is_whitespace = re.search(reUnicodeWhitespaceChar, c_after) or \
286
            c_after == '\xa0'
287
        after_is_punctuation = re.search(rePunctuation, c_after)
288
        before_is_whitespace = re.search(
289
            reUnicodeWhitespaceChar, c_before) or \
290
            c_before == '\xa0'
291
        before_is_punctuation = re.search(rePunctuation, c_before)
292
293
        left_flanking = not after_is_whitespace and \
294
            (not after_is_punctuation or
295
             before_is_whitespace or
296
             before_is_punctuation)
297
        right_flanking = not before_is_whitespace and \
298
            (not before_is_punctuation or
299
             after_is_whitespace or
300
             after_is_punctuation)
301
        if c == '_':
302
            can_open = left_flanking and \
303
                (not right_flanking or before_is_punctuation)
304
            can_close = right_flanking and \
305
                (not left_flanking or after_is_punctuation)
306
        elif c == "'" or c == '"':
307
            can_open = left_flanking and not right_flanking
308
            can_close = right_flanking
309
        else:
310
            can_open = left_flanking
311
            can_close = right_flanking
312
313
        self.pos = startpos
314
        return {
315
            'numdelims': numdelims,
316
            'can_open': can_open,
317
            'can_close': can_close,
318
        }
319
320
    def handleDelim(self, cc, block):
321
        """Handle a delimiter marker for emphasis or a quote."""
322
        res = self.scanDelims(cc)
323
        if not res:
324
            return False
325
        numdelims = res.get('numdelims')
326
        startpos = self.pos
327
328
        self.pos += numdelims
329
        if cc == "'":
330
            contents = '\u2019'
331
        elif cc == '"':
332
            contents = '\u201C'
333
        else:
334
            contents = self.subject[startpos:self.pos]
335
        node = text(contents)
336
        block.append_child(node)
337
338
        # Add entry to stack for this opener
339
        self.delimiters = {
340
            'cc': cc,
341
            'numdelims': numdelims,
342
            'origdelims': numdelims,
343
            'node': node,
344
            'previous': self.delimiters,
345
            'next': None,
346
            'can_open': res.get('can_open'),
347
            'can_close': res.get('can_close'),
348
        }
349
        if self.delimiters['previous'] is not None:
350
            self.delimiters['previous']['next'] = self.delimiters
351
        return True
352
353
    def removeDelimiter(self, delim):
354
        if delim.get('previous') is not None:
355
            delim['previous']['next'] = delim.get('next')
356
        if delim.get('next') is None:
357
            # Top of stack
358
            self.delimiters = delim.get('previous')
359
        else:
360
            delim['next']['previous'] = delim.get('previous')
361
362
    @staticmethod
363
    def removeDelimitersBetween(bottom, top):
364
        if bottom.get('next') != top:
365
            bottom['next'] = top
366
            top['previous'] = bottom
367
368
    def processEmphasis(self, stack_bottom):
369
        openers_bottom = {
370
            '_': stack_bottom,
371
            '*': stack_bottom,
372
            "'": stack_bottom,
373
            '"': stack_bottom,
374
        }
375
        odd_match = False
376
        use_delims = 0
377
378
        # Find first closer above stack_bottom
379
        closer = self.delimiters
380
        while closer is not None and closer.get('previous') != stack_bottom:
381
            closer = closer.get('previous')
382
383
        # Move forward, looking for closers, and handling each
384
        while closer is not None:
385
            if not closer.get('can_close'):
386
                closer = closer.get('next')
387
            else:
388
                # found emphasis closer. now look back for first
389
                # matching opener:
390
                opener = closer.get('previous')
391
                opener_found = False
392
                closercc = closer.get('cc')
393
                while (opener is not None and opener != stack_bottom and
394
                       opener != openers_bottom[closercc]):
395
                    odd_match = (closer.get('can_open') or
396
                                 opener.get('can_close')) and \
397
                                 (opener.get('origdelims') +
398
                                  closer.get('origdelims')) % 3 == 0
399
                    if opener.get('cc') == closercc and \
400
                       opener.get('can_open') and \
401
                       not odd_match:
402
                        opener_found = True
403
                        break
404
                    opener = opener.get('previous')
405
                old_closer = closer
406
407
                if closercc == '*' or closercc == '_':
408
                    if not opener_found:
409
                        closer = closer.get('next')
410
                    else:
411
                        # Calculate actual number of delimiters used from
412
                        # closer
413
                        use_delims = 2 if (
414
                            closer['numdelims'] >= 2 and
415
                            opener['numdelims'] >= 2) else 1
416
417
                        opener_inl = opener.get('node')
418
                        closer_inl = closer.get('node')
419
420
                        # Remove used delimiters from stack elts and inlines
421
                        opener['numdelims'] -= use_delims
422
                        closer['numdelims'] -= use_delims
423
                        opener_inl.literal = opener_inl.literal[
424
                            :len(opener_inl.literal) - use_delims]
425
                        closer_inl.literal = closer_inl.literal[
426
                            :len(closer_inl.literal) - use_delims]
427
428
                        # Build contents for new Emph element
429
                        if use_delims == 1:
430
                            emph = Node('emph', None)
431
                        else:
432
                            emph = Node('strong', None)
433
434
                        tmp = opener_inl.nxt
435
                        while tmp and tmp != closer_inl:
436
                            nxt = tmp.nxt
437
                            tmp.unlink()
438
                            emph.append_child(tmp)
439
                            tmp = nxt
440
441
                        opener_inl.insert_after(emph)
442
443
                        # Remove elts between opener and closer in delimiters
444
                        # stack
445
                        self.removeDelimitersBetween(opener, closer)
446
447
                        # If opener has 0 delims, remove it and the inline
448
                        if opener['numdelims'] == 0:
449
                            opener_inl.unlink()
450
                            self.removeDelimiter(opener)
451
452
                        if closer['numdelims'] == 0:
453
                            closer_inl.unlink()
454
                            tempstack = closer['next']
455
                            self.removeDelimiter(closer)
456
                            closer = tempstack
457
458
                elif closercc == "'":
459
                    closer['node'].literal = '\u2019'
460
                    if opener_found:
461
                        opener['node'].literal = '\u2018'
462
                    closer = closer['next']
463
464
                elif closercc == '"':
465
                    closer['node'].literal = '\u201D'
466
                    if opener_found:
467
                        opener['node'].literal = '\u201C'
468
                    closer = closer['next']
469
470
                if not opener_found and not odd_match:
471
                    # Set lower bound for future searches for openers:
472
                    # We don't do this with odd_match because a **
473
                    # that doesn't match an earlier * might turn into
474
                    # an opener, and the * might be matched by something
475
                    # else.
476
                    openers_bottom[closercc] = old_closer['previous']
477
                    if not old_closer['can_open']:
478
                        # We can remove a closer that can't be an opener,
479
                        # once we've seen there's no matching opener:
480
                        self.removeDelimiter(old_closer)
481
482
        # Remove all delimiters
483
        while self.delimiters is not None and self.delimiters != stack_bottom:
484
            self.removeDelimiter(self.delimiters)
485
486
    def parseLinkTitle(self):
487
        """
488
        Attempt to parse link title (sans quotes), returning the string
489
        or None if no match.
490
        """
491
        title = self.match(reLinkTitle)
492
        if title is None:
493
            return None
494
        else:
495
            # chop off quotes from title and unescape:
496
            return unescape_string(title[1:-1])
497
498
    def parseLinkDestination(self):
499
        """
500
        Attempt to parse link destination, returning the string or
501
        None if no match.
502
        """
503
        res = self.match(reLinkDestinationBraces)
504
        if res is None:
505
            # TODO handrolled parser; res should be None or the string
506
            savepos = self.pos
507
            openparens = 0
508
            c = self.peek()
509
            while c is not None:
510
                if c == '\\':
511
                    self.pos += 1
512
                    if self.peek() is not None:
513
                        self.pos += 1
514
                elif c == '(':
515
                    self.pos += 1
516
                    openparens += 1
517
                elif c == ')':
518
                    if openparens < 1:
519
                        break
520
                    else:
521
                        self.pos += 1
522
                        openparens -= 1
523
                elif re.search(reWhitespaceChar, c):
524
                    break
525
                else:
526
                    self.pos += 1
527
                c = self.peek()
528
            res = self.subject[savepos:self.pos]
529
            return normalize_uri(unescape_string(res))
530
        else:
531
            # chop off surrounding <..>:
532
            return normalize_uri(unescape_string(res[1:-1]))
533
534
    def parseLinkLabel(self):
535
        """
536
        Attempt to parse a link label, returning number of
537
        characters parsed.
538
        """
539
        # Note: our regex will allow something of form [..\];
540
        # we disallow it here rather than using lookahead in the regex:
541
        m = self.match(reLinkLabel)
542
        if m is None or len(m) > 1001 or re.search(r'([^\\]\\\]$|\[\n\]$)', m):
543
            return 0
544
        else:
545
            return len(m)
546
547
    def parseOpenBracket(self, block):
548
        """
549
        Add open bracket to delimiter stack and add a text node to
550
        block's children.
551
        """
552
        startpos = self.pos
553
        self.pos += 1
554
555
        node = text('[')
556
        block.append_child(node)
557
558
        # Add entry to stack for this opener
559
        self.addBracket(node, startpos, False)
560
        return True
561
562
    def parseBang(self, block):
563
        """
564
        If next character is [, and ! delimiter to delimiter stack and
565
        add a text node to block's children. Otherwise just add a text
566
        node.
567
        """
568
        startpos = self.pos
569
        self.pos += 1
570
        if self.peek() == '[':
571
            self.pos += 1
572
573
            node = text('![')
574
            block.append_child(node)
575
576
            # Add entry to stack for this openeer
577
            self.addBracket(node, startpos + 1, True)
578
        else:
579
            block.append_child(text('!'))
580
581
        return True
582
583
    def parseCloseBracket(self, block):
584
        """
585
        Try to match close bracket against an opening in the delimiter
586
        stack. Add either a link or image, or a plain [ character,
587
        to block's children. If there is a matching delimiter,
588
        remove it from the delimiter stack.
589
        """
590
        title = None
591
        matched = False
592
        self.pos += 1
593
        startpos = self.pos
594
595
        # get last [ or ![
596
        opener = self.brackets
597
598
        if opener is None:
599
            # no matched opener, just return a literal
600
            block.append_child(text(']'))
601
            return True
602
603
        if not opener.get('active'):
604
            # no matched opener, just return a literal
605
            block.append_child(text(']'))
606
            # take opener off brackets stack
607
            self.removeBracket()
608
            return True
609
610
        # If we got here, opener is a potential opener
611
        is_image = opener.get('image')
612
613
        # Check to see if we have a link/image
614
615
        savepos = self.pos
616
617
        # Inline link?
618
        if self.peek() == '(':
619
            self.pos += 1
620
            self.spnl()
621
            dest = self.parseLinkDestination()
622
            if dest is not None and self.spnl():
623
                # make sure there's a space before the title
624
                if re.search(reWhitespaceChar, self.subject[self.pos-1]):
625
                    title = self.parseLinkTitle()
626
                if self.spnl() and self.peek() == ')':
627
                    self.pos += 1
628
                    matched = True
629
            else:
630
                self.pos = savepos
631
632
        if not matched:
633
            # Next, see if there's a link label
634
            beforelabel = self.pos
635
            n = self.parseLinkLabel()
636
            if n > 2:
637
                reflabel = self.subject[beforelabel:beforelabel + n]
638
            elif not opener.get('bracket_after'):
639
                # Empty or missing second label means to use the first
640
                # label as the reference.  The reference must not
641
                # contain a bracket. If we know there's a bracket, we
642
                # don't even bother checking it.
643
                reflabel = self.subject[opener.get('index'):startpos]
644
            if n == 0:
645
                # If shortcut reference link, rewind before spaces we skipped.
646
                self.pos = savepos
647
648
            if reflabel:
649
                # lookup rawlabel in refmap
650
                link = self.refmap.get(normalizeReference(reflabel))
651
                if link:
652
                    dest = link['destination']
653
                    title = link['title']
654
                    matched = True
655
656
        if matched:
657
            node = Node('image' if is_image else 'link', None)
658
659
            node.destination = dest
660
            node.title = title or ''
661
            tmp = opener.get('node').nxt
662
            while tmp:
663
                nxt = tmp.nxt
664
                tmp.unlink()
665
                node.append_child(tmp)
666
                tmp = nxt
667
            block.append_child(node)
668
            self.processEmphasis(opener.get('previousDelimiter'))
669
            self.removeBracket()
670
            opener.get('node').unlink()
671
672
            # We remove this bracket and processEmphasis will remove
673
            # later delimiters.
674
            # Now, for a link, we also deactivate earlier link openers.
675
            # (no links in links)
676
            if not is_image:
677
                opener = self.brackets
678
                while opener is not None:
679
                    if not opener.get('image'):
680
                        # deactivate this opener
681
                        opener['active'] = False
682
                    opener = opener.get('previous')
683
684
            return True
685
        else:
686
            # no match
687
            # remove this opener from stack
688
            self.removeBracket()
689
            self.pos = startpos
690
            block.append_child(text(']'))
691
            return True
692
693
    def addBracket(self, node, index, image):
694
        if self.brackets is not None:
695
            self.brackets['bracketAfter'] = True
696
697
        self.brackets = {
698
            'node': node,
699
            'previous': self.brackets,
700
            'previousDelimiter': self.delimiters,
701
            'index': index,
702
            'image': image,
703
            'active': True,
704
        }
705
706
    def removeBracket(self):
707
        self.brackets = self.brackets.get('previous')
708
709
    def parseEntity(self, block):
710
        """Attempt to parse an entity."""
711
        m = self.match(reEntityHere)
712
        if m:
713
            block.append_child(text(HTMLunescape(m)))
714
            return True
715
        else:
716
            return False
717
718
    def parseString(self, block):
719
        """
720
        Parse a run of ordinary characters, or a single character with
721
        a special meaning in markdown, as a plain string.
722
        """
723
        m = self.match(reMain)
724
        if m:
725
            if self.options.get('smart'):
726
                s = re.sub(reEllipses, '\u2026', m)
727
                s = re.sub(reDash, lambda x: smart_dashes(x.group()), s)
728
                block.append_child(text(s))
729
            else:
730
                block.append_child(text(m))
731
            return True
732
        else:
733
            return False
734
735
    def parseNewline(self, block):
736
        """
737
        Parse a newline.  If it was preceded by two spaces, return a hard
738
        line break; otherwise a soft line break.
739
        """
740
        # assume we're at a \n
741
        self.pos += 1
742
        lastc = block.last_child
743
        if lastc and lastc.t == 'text' and lastc.literal[-1] == ' ':
744
            linebreak = len(lastc.literal) >= 2 and lastc.literal[-2] == ' '
745
            lastc.literal = re.sub(reFinalSpace, '', lastc.literal)
746
            if linebreak:
747
                node = Node('linebreak', None)
748
            else:
749
                node = Node('softbreak', None)
750
            block.append_child(node)
751
        else:
752
            block.append_child(Node('softbreak', None))
753
754
        # gobble leading spaces in next line
755
        self.match(reInitialSpace)
756
        return True
757
758
    def parseReference(self, s, refmap):
759
        """Attempt to parse a link reference, modifying refmap."""
760
        self.subject = s
761
        self.pos = 0
762
        startpos = self.pos
763
764
        # label:
765
        match_chars = self.parseLinkLabel()
766
        if match_chars == 0 or match_chars == 2:
767
            return 0
768
        else:
769
            rawlabel = self.subject[:match_chars]
770
771
        # colon:
772
        if (self.peek() == ':'):
773
            self.pos += 1
774
        else:
775
            self.pos = startpos
776
            return 0
777
778
        # link url
779
        self.spnl()
780
781
        dest = self.parseLinkDestination()
782
        if (dest is None or len(dest) == 0):
783
            self.pos = startpos
784
            return 0
785
786
        beforetitle = self.pos
787
        self.spnl()
788
        title = self.parseLinkTitle()
789
        if title is None:
790
            title = ''
791
            # rewind before spaces
792
            self.pos = beforetitle
793
794
        # make sure we're at line end:
795
        at_line_end = True
796
        if self.match(reSpaceAtEndOfLine) is None:
797
            if title == '':
798
                at_line_end = False
799
            else:
800
                # the potential title we found is not at the line end,
801
                # but it could still be a legal link reference if we
802
                # discard the title
803
                title == ''
804
                # rewind before spaces
805
                self.pos = beforetitle
806
                # and instead check if the link URL is at the line end
807
                at_line_end = self.match(reSpaceAtEndOfLine) is not None
808
809
        if not at_line_end:
810
            self.pos = startpos
811
            return 0
812
813
        normlabel = normalizeReference(rawlabel)
814
        if refmap.get(normlabel) == '':
815
            # label must contain non-whitespace characters
816
            self.pos = startpos
817
            return 0
818
819
        if refmap.get(normlabel) is None:
820
            refmap[normlabel] = {
821
                'destination': dest,
822
                'title': title
823
            }
824
        return (self.pos - startpos)
825
826
    def parseInline(self, block):
827
        """
828
        Parse the next inline element in subject, advancing subject
829
        position.
830
831
        On success, add the result to block's children and return True.
832
        On failure, return False.
833
        """
834
        res = False
835
        c = self.peek()
836
        if c is None:
837
            return False
838
        if c == '\n':
839
            res = self.parseNewline(block)
840
        elif c == '\\':
841
            res = self.parseBackslash(block)
842
        elif c == '`':
843
            res = self.parseBackticks(block)
844
        elif c == '*' or c == '_':
845
            res = self.handleDelim(c, block)
846
        elif c == "'" or c == '"':
847
            res = self.options.get('smart') and self.handleDelim(c, block)
848
        elif c == '[':
849
            res = self.parseOpenBracket(block)
850
        elif c == '!':
851
            res = self.parseBang(block)
852
        elif c == ']':
853
            res = self.parseCloseBracket(block)
854
        elif c == '<':
855
            res = self.parseAutolink(block) or self.parseHtmlTag(block)
856
        elif c == '&':
857
            res = self.parseEntity(block)
858
        else:
859
            res = self.parseString(block)
860
861
        if not res:
862
            self.pos += 1
863
            block.append_child(text(c))
864
865
        return True
866
867
    def parseInlines(self, block):
868
        """
869
        Parse string content in block into inline children,
870
        using refmap to resolve references.
871
        """
872
        self.subject = block.string_content.strip()
873
        self.pos = 0
874
        self.delimiters = None
875
        self.brackets = None
876
        while (self.parseInline(block)):
877
            pass
878
        # allow raw string to be garbage collected
879
        block.string_content = None
880
        self.processEmphasis(None)
881
882
    parse = parseInlines
883