Passed
Push — master ( 0c68df...e3ee72 )
by Dongxin
01:27
created

InlineParser.parseCloseBracket()   F

Complexity

Conditions 21

Size

Total Lines 109

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 21
c 1
b 0
f 0
dl 0
loc 109
rs 2

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like InlineParser.parseCloseBracket() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from __future__ import absolute_import, unicode_literals
2
3
import re
4
import sys
5
from CommonMark import common
6
from CommonMark.common import normalize_uri, unescape_string
7
from CommonMark.node import Node
8
9
if sys.version_info >= (3, 0):
10
    if sys.version_info >= (3, 4):
11
        import html.parser
12
        HTMLunescape = html.parser.HTMLParser().unescape
13
    else:
14
        from .entitytrans import _unescape
15
        HTMLunescape = _unescape
16
else:
17
    from CommonMark import entitytrans
18
    HTMLunescape = entitytrans._unescape
19
20
# Some regexps used in inline parser:
21
22
ESCAPED_CHAR = '\\\\' + common.ESCAPABLE
23
24
rePunctuation = re.compile(
25
    r'^[\u2000-\u206F\u2E00-\u2E7F\\' + "'" + '!"#\$%&\(\)'
26
    r'\*\+,\-\.\/:;<=>\?@\[\]\^_`\{\|\}~]')
27
28
reLinkTitle = re.compile(
29
    '^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' +
30
    '|' +
31
    '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' +
32
    '|' +
33
    '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))')
34
reLinkDestinationBraces = re.compile(
35
    '^(?:[<](?:[^ <>\\t\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' +
36
    '\\\\)*[>])')
37
38
reEscapable = re.compile('^' + common.ESCAPABLE)
39
reEntityHere = re.compile('^' + common.ENTITY, re.IGNORECASE)
40
reTicks = re.compile(r'`+')
41
reTicksHere = re.compile(r'^`+')
42
reEllipses = re.compile(r'\.\.\.')
43
reDash = re.compile(r'--+')
44
reEmailAutolink = re.compile(
45
    r"^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9]"
46
    r"(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?"
47
    r"(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>")
48
reAutolink = re.compile(
49
    r'^<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>',
50
    re.IGNORECASE)
51
reSpnl = re.compile(r'^ *(?:\n *)?')
52
reWhitespaceChar = re.compile(r'^^[ \t\n\x0b\x0c\x0d]')
53
reWhitespace = re.compile(r'[ \t\n\x0b\x0c\x0d]+')
54
reUnicodeWhitespaceChar = re.compile(r'^\s')
55
reFinalSpace = re.compile(r' *$')
56
reInitialSpace = re.compile(r'^ *')
57
reSpaceAtEndOfLine = re.compile(r'^ *(?:\n|$)')
58
reLinkLabel = re.compile('^\\[(?:[^\\\\\\[\\]]|' + ESCAPED_CHAR +
59
                         '|\\\\){0,1000}\\]')
60
# Matches a string of non-special characters.
61
reMain = re.compile(r'^[^\n`\[\]\\!<&*_\'"]+', re.MULTILINE)
62
63
64
def normalizeReference(s):
65
    """Normalize reference label.
66
67
    Collapse internal whitespace to single space, remove
68
    leading/trailing whitespace, case fold.
69
    """
70
    return re.sub(r'\s+', ' ', s.strip()).upper()
71
72
73
def text(s):
74
    node = Node('text', None)
75
    node.literal = s
76
    return node
77
78
79
def smart_dashes(chars):
80
    en_count = 0
81
    em_count = 0
82
    if len(chars) % 3 == 0:
83
        # If divisible by 3, use all em dashes
84
        em_count = len(chars) / 3
85
    elif len(chars) % 2 == 0:
86
        # If divisble by 2, use all en dashes
87
        en_count = len(chars) / 2
88
    elif len(chars) % 3 == 2:
89
        # if 2 extra dashes, use en dashfor last 2;
90
        # em dashes for rest
91
        en_count = 1
92
        em_count = (len(chars) - 2) / 3
93
    else:
94
        # Use en dashes for last 4 hyphens; em dashes for rest
95
        en_count = 2
96
        em_count = (len(chars) - 4) / 3
97
    return ('\u2014' * em_count) + ('\u2013' * en_count)
98
99
100
class InlineParser(object):
101
    """INLINE PARSER
102
103
    These are methods of an InlineParser class, defined below.
104
    An InlineParser keeps track of a subject (a string to be
105
    parsed) and a position in that subject.
106
    """
107
108
    def __init__(self, options={}):
109
        self.subject = ''
110
        self.brackets = None
111
        self.pos = 0
112
        self.refmap = {}
113
        self.options = options
114
115
    def match(self, regexString):
116
        """
117
        If regexString matches at current position in the subject, advance
118
        position in subject and return the match; otherwise return None.
119
        """
120
        match = re.search(regexString, self.subject[self.pos:])
121
        if match is None:
122
            return None
123
        else:
124
            self.pos += match.end()
125
            return match.group()
126
127
    def peek(self):
128
        """ Returns the character at the current subject position, or None if
129
        there are no more characters."""
130
        if self.pos < len(self.subject):
131
            return self.subject[self.pos]
132
        else:
133
            return None
134
135
    def spnl(self):
136
        """ Parse zero or more space characters, including at
137
        most one newline."""
138
        self.match(reSpnl)
139
        return True
140
141
    # All of the parsers below try to match something at the current position
142
    # in the subject.  If they succeed in matching anything, they
143
    # push an inline matched, advancing the subject.
144
145
    def parseBackticks(self, block):
146
        """ Attempt to parse backticks, adding either a backtick code span or a
147
        literal sequence of backticks to the 'inlines' list."""
148
        ticks = self.match(reTicksHere)
149
        if ticks is None:
150
            return False
151
        after_open_ticks = self.pos
152
        matched = self.match(reTicks)
153
        while matched is not None:
154
            if (matched == ticks):
155
                node = Node('code', None)
156
                c = self.subject[after_open_ticks:self.pos - len(ticks)]
157
                c = c.strip()
158
                c = re.sub(reWhitespace, ' ', c)
159
                node.literal = c
160
                block.append_child(node)
161
                return True
162
            matched = self.match(reTicks)
163
        # If we got here, we didn't match a closing backtick sequence.
164
        self.pos = after_open_ticks
165
        block.append_child(text(ticks))
166
        return True
167
168
    def parseBackslash(self, block):
169
        """
170
        Parse a backslash-escaped special character, adding either the
171
        escaped character, a hard line break (if the backslash is followed
172
        by a newline), or a literal backslash to the block's children.
173
        Assumes current character is a backslash.
174
        """
175
        subj = self.subject
176
        self.pos += 1
177
178
        try:
179
            subjchar = subj[self.pos]
180
        except IndexError:
181
            subjchar = None
182
183
        if self.peek() == '\n':
184
            self.pos += 1
185
            node = Node('linebreak', None)
186
            block.append_child(node)
187
        elif subjchar and re.match(reEscapable, subjchar):
188
            block.append_child(text(subjchar))
189
            self.pos += 1
190
        else:
191
            block.append_child(text('\\'))
192
193
        return True
194
195
    def parseAutolink(self, block):
196
        """Attempt to parse an autolink (URL or email in pointy brackets)."""
197
        m = self.match(reEmailAutolink)
198
199
        if m:
200
            # email
201
            dest = m[1:-1]
202
            node = Node('link', None)
203
            node.destination = normalize_uri('mailto:' + dest)
204
            node.title = ''
205
            node.append_child(text(dest))
206
            block.append_child(node)
207
            return True
208
        else:
209
            m = self.match(reAutolink)
210
            if m:
211
                # link
212
                dest = m[1:-1]
213
                node = Node('link', None)
214
                node.destination = normalize_uri(dest)
215
                node.title = ''
216
                node.append_child(text(dest))
217
                block.append_child(node)
218
                return True
219
220
        return False
221
222
    def parseHtmlTag(self, block):
223
        """Attempt to parse a raw HTML tag."""
224
        m = self.match(common.reHtmlTag)
225
        if m is None:
226
            return False
227
        else:
228
            node = Node('html_inline', None)
229
            node.literal = m
230
            block.append_child(node)
231
            return True
232
233
    def scanDelims(self, c):
234
        """
235
        Scan a sequence of characters == c, and return information about
236
        the number of delimiters and whether they are positioned such that
237
        they can open and/or close emphasis or strong emphasis.  A utility
238
        function for strong/emph parsing.
239
        """
240
        numdelims = 0
241
        startpos = self.pos
242
243
        if c == "'" or c == '"':
244
            numdelims += 1
245
            self.pos += 1
246
        else:
247
            while (self.peek() == c):
248
                numdelims += 1
249
                self.pos += 1
250
251
        if numdelims == 0:
252
            return None
253
254
        c_before = '\n' if startpos == 0 else self.subject[startpos - 1]
255
256
        c_after = self.peek()
257
        if c_after is None:
258
            c_after = '\n'
259
260
        # Python 2 doesn't recognize '\xa0' as whitespace
261
        after_is_whitespace = re.match(reUnicodeWhitespaceChar, c_after) or \
262
            c_after == '\xa0'
263
        after_is_punctuation = re.match(rePunctuation, c_after)
264
        before_is_whitespace = re.match(reUnicodeWhitespaceChar, c_before) or \
265
            c_before == '\xa0'
266
        before_is_punctuation = re.match(rePunctuation, c_before)
267
268
        left_flanking = not after_is_whitespace and \
269
            not (after_is_punctuation and
270
                 not before_is_whitespace and
271
                 not before_is_punctuation)
272
        right_flanking = not before_is_whitespace and \
273
            not (before_is_punctuation and
274
                 not after_is_whitespace and
275
                 not after_is_punctuation)
276
        if c == '_':
277
            can_open = left_flanking and \
278
                (not right_flanking or before_is_punctuation)
279
            can_close = right_flanking and \
280
                (not left_flanking or after_is_punctuation)
281
        elif c == "'" or c == '"':
282
            can_open = left_flanking and not right_flanking
283
            can_close = right_flanking
284
        else:
285
            can_open = left_flanking
286
            can_close = right_flanking
287
288
        self.pos = startpos
289
        return {
290
            'numdelims': numdelims,
291
            'can_open': can_open,
292
            'can_close': can_close,
293
        }
294
295
    def handleDelim(self, cc, block):
296
        """Handle a delimiter marker for emphasis or a quote."""
297
        res = self.scanDelims(cc)
298
        if not res:
299
            return False
300
        numdelims = res.get('numdelims')
301
        startpos = self.pos
302
303
        self.pos += numdelims
304
        if cc == "'":
305
            contents = '\u2019'
306
        elif cc == '"':
307
            contents = '\u201C'
308
        else:
309
            contents = self.subject[startpos:self.pos]
310
        node = text(contents)
311
        block.append_child(node)
312
313
        # Add entry to stack for this opener
314
        self.delimiters = {
315
            'cc': cc,
316
            'numdelims': numdelims,
317
            'origdelims': numdelims,
318
            'node': node,
319
            'previous': self.delimiters,
320
            'next': None,
321
            'can_open': res.get('can_open'),
322
            'can_close': res.get('can_close'),
323
        }
324
        if self.delimiters['previous'] is not None:
325
            self.delimiters['previous']['next'] = self.delimiters
326
        return True
327
328
    def removeDelimiter(self, delim):
329
        if delim.get('previous') is not None:
330
            delim['previous']['next'] = delim.get('next')
331
        if delim.get('next') is None:
332
            # Top of stack
333
            self.delimiters = delim.get('previous')
334
        else:
335
            delim['next']['previous'] = delim.get('previous')
336
337
    @staticmethod
338
    def removeDelimitersBetween(bottom, top):
339
        if bottom.get('next') != top:
340
            bottom['next'] = top
341
            top['previous'] = bottom
342
343
    def processEmphasis(self, stack_bottom):
344
        openers_bottom = {
345
            '_': stack_bottom,
346
            '*': stack_bottom,
347
            "'": stack_bottom,
348
            '"': stack_bottom,
349
        }
350
        odd_match = False
351
        use_delims = 0
352
353
        # Find first closer above stack_bottom
354
        closer = self.delimiters
355
        while closer is not None and closer.get('previous') != stack_bottom:
356
            closer = closer.get('previous')
357
358
        # Move forward, looking for closers, and handling each
359
        while closer is not None:
360
            if not closer.get('can_close'):
361
                closer = closer.get('next')
362
            else:
363
                # found emphasis closer. now look back for first
364
                # matching opener:
365
                opener = closer.get('previous')
366
                opener_found = False
367
                closercc = closer.get('cc')
368
                while (opener is not None and opener != stack_bottom and
369
                       opener != openers_bottom[closercc]):
370
                    odd_match = (closer.get('can_open') or
371
                                 opener.get('can_close')) and \
372
                                 (opener.get('origdelims') +
373
                                  closer.get('origdelims')) % 3 == 0
374
                    if opener.get('cc') == closercc and \
375
                       opener.get('can_open') and \
376
                       not odd_match:
377
                        opener_found = True
378
                        break
379
                    opener = opener.get('previous')
380
                old_closer = closer
381
382
                if closercc == '*' or closercc == '_':
383
                    if not opener_found:
384
                        closer = closer.get('next')
385
                    else:
386
                        # Calculate actual number of delimiters used from
387
                        # closer
388
                        if closer['numdelims'] < 3 or opener['numdelims'] < 3:
389
                            if closer['numdelims'] <= opener['numdelims']:
390
                                use_delims = closer['numdelims']
391
                            else:
392
                                use_delims = opener['numdelims']
393
                        else:
394
                            if closer['numdelims'] % 2 == 0:
395
                                use_delims = 2
396
                            else:
397
                                use_delims = 1
398
399
                        opener_inl = opener.get('node')
400
                        closer_inl = closer.get('node')
401
402
                        # Remove used delimiters from stack elts and inlines
403
                        opener['numdelims'] -= use_delims
404
                        closer['numdelims'] -= use_delims
405
                        opener_inl.literal = opener_inl.literal[
406
                            :len(opener_inl.literal) - use_delims]
407
                        closer_inl.literal = closer_inl.literal[
408
                            :len(closer_inl.literal) - use_delims]
409
410
                        # Build contents for new Emph element
411
                        if use_delims == 1:
412
                            emph = Node('emph', None)
413
                        else:
414
                            emph = Node('strong', None)
415
416
                        tmp = opener_inl.nxt
417
                        while tmp and tmp != closer_inl:
418
                            nxt = tmp.nxt
419
                            tmp.unlink()
420
                            emph.append_child(tmp)
421
                            tmp = nxt
422
423
                        opener_inl.insert_after(emph)
424
425
                        # Remove elts between opener and closer in delimiters
426
                        # stack
427
                        self.removeDelimitersBetween(opener, closer)
428
429
                        # If opener has 0 delims, remove it and the inline
430
                        if opener['numdelims'] == 0:
431
                            opener_inl.unlink()
432
                            self.removeDelimiter(opener)
433
434
                        if closer['numdelims'] == 0:
435
                            closer_inl.unlink()
436
                            tempstack = closer['next']
437
                            self.removeDelimiter(closer)
438
                            closer = tempstack
439
440
                elif closercc == "'":
441
                    closer['node'].literal = '\u2019'
442
                    if opener_found:
443
                        opener['node'].literal = '\u2018'
444
                    closer = closer['next']
445
446
                elif closercc == '"':
447
                    closer['node'].literal = '\u201D'
448
                    if opener_found:
449
                        opener['node'].literal = '\u201C'
450
                    closer = closer['next']
451
452
                if not opener_found and not odd_match:
453
                    # Set lower bound for future searches for openers:
454
                    # We don't do this with odd_match because a **
455
                    # that doesn't match an earlier * might turn into
456
                    # an opener, and the * might be matched by something
457
                    # else.
458
                    openers_bottom[closercc] = old_closer['previous']
459
                    if not old_closer['can_open']:
460
                        # We can remove a closer that can't be an opener,
461
                        # once we've seen there's no matching opener:
462
                        self.removeDelimiter(old_closer)
463
464
        # Remove all delimiters
465
        while self.delimiters is not None and self.delimiters != stack_bottom:
466
            self.removeDelimiter(self.delimiters)
467
468
    def parseLinkTitle(self):
469
        """
470
        Attempt to parse link title (sans quotes), returning the string
471
        or None if no match.
472
        """
473
        title = self.match(reLinkTitle)
474
        if title is None:
475
            return None
476
        else:
477
            # chop off quotes from title and unescape:
478
            return unescape_string(title[1:-1])
479
480
    def parseLinkDestination(self):
481
        """
482
        Attempt to parse link destination, returning the string or
483
        None if no match.
484
        """
485
        res = self.match(reLinkDestinationBraces)
486
        if res is None:
487
            # TODO handrolled parser; res should be None or the string
488
            savepos = self.pos
489
            openparens = 0
490
            c = self.peek()
491
            while c is not None:
492
                if c == '\\':
493
                    self.pos += 1
494
                    if self.peek() is not None:
495
                        self.pos += 1
496
                elif c == '(':
497
                    self.pos += 1
498
                    openparens += 1
499
                elif c == ')':
500
                    if openparens < 1:
501
                        break
502
                    else:
503
                        self.pos += 1
504
                        openparens -= 1
505
                elif re.match(reWhitespaceChar, c):
506
                    break
507
                else:
508
                    self.pos += 1
509
                c = self.peek()
510
            res = self.subject[savepos:self.pos]
511
            return normalize_uri(unescape_string(res))
512
        else:
513
            # chop off surrounding <..>:
514
            return normalize_uri(unescape_string(res[1:-1]))
515
516
    def parseLinkLabel(self):
517
        """
518
        Attempt to parse a link label, returning number of
519
        characters parsed.
520
        """
521
        m = self.match(reLinkLabel)
522
        if m is None or len(m) > 1001 or re.match(r'\[\s+\]', m):
523
            return 0
524
        else:
525
            return len(m)
526
527
    def parseOpenBracket(self, block):
528
        """
529
        Add open bracket to delimiter stack and add a text node to
530
        block's children.
531
        """
532
        startpos = self.pos
533
        self.pos += 1
534
535
        node = text('[')
536
        block.append_child(node)
537
538
        # Add entry to stack for this opener
539
        self.addBracket(node, startpos, False)
540
        return True
541
542
    def parseBang(self, block):
543
        """
544
        If next character is [, and ! delimiter to delimiter stack and
545
        add a text node to block's children. Otherwise just add a text
546
        node.
547
        """
548
        startpos = self.pos
549
        self.pos += 1
550
        if self.peek() == '[':
551
            self.pos += 1
552
553
            node = text('![')
554
            block.append_child(node)
555
556
            # Add entry to stack for this openeer
557
            self.addBracket(node, startpos + 1, True)
558
        else:
559
            block.append_child(text('!'))
560
561
        return True
562
563
    def parseCloseBracket(self, block):
564
        """
565
        Try to match close bracket against an opening in the delimiter
566
        stack. Add either a link or image, or a plain [ character,
567
        to block's children. If there is a matching delimiter,
568
        remove it from the delimiter stack.
569
        """
570
        title = None
571
        matched = False
572
        self.pos += 1
573
        startpos = self.pos
574
575
        # get last [ or ![
576
        opener = self.brackets
577
578
        if opener is None:
579
            # no matched opener, just return a literal
580
            block.append_child(text(']'))
581
            return True
582
583
        if not opener.get('active'):
584
            # no matched opener, just return a literal
585
            block.append_child(text(']'))
586
            # take opener off brackets stack
587
            self.removeBracket()
588
            return True
589
590
        # If we got here, opener is a potential opener
591
        is_image = opener.get('image')
592
593
        # Check to see if we have a link/image
594
595
        savepos = self.pos
596
597
        # Inline link?
598
        if self.peek() == '(':
599
            self.pos += 1
600
            self.spnl()
601
            dest = self.parseLinkDestination()
602
            if dest is not None and self.spnl():
603
                # make sure there's a space before the title
604
                if re.match(reWhitespaceChar, self.subject[self.pos-1]):
605
                    title = self.parseLinkTitle()
606
                if self.spnl() and self.peek() == ')':
607
                    self.pos += 1
608
                    matched = True
609
            else:
610
                self.pos = savepos
611
612
        if not matched:
613
            # Next, see if there's a link label
614
            beforelabel = self.pos
615
            n = self.parseLinkLabel()
616
            if n > 2:
617
                reflabel = self.subject[beforelabel:beforelabel + n]
618
            elif not opener.get('bracket_after'):
619
                # Empty or missing second label means to use the first
620
                # label as the reference.  The reference must not
621
                # contain a bracket. If we know there's a bracket, we
622
                # don't even bother checking it.
623
                reflabel = self.subject[opener.get('index'):startpos]
624
            if n == 0:
625
                # If shortcut reference link, rewind before spaces we skipped.
626
                self.pos = savepos
627
628
            if reflabel:
629
                # lookup rawlabel in refmap
630
                link = self.refmap.get(normalizeReference(reflabel))
631
                if link:
632
                    dest = link['destination']
633
                    title = link['title']
634
                    matched = True
635
636
        if matched:
637
            node = Node('image' if is_image else 'link', None)
638
639
            node.destination = dest
640
            node.title = title or ''
641
            tmp = opener.get('node').nxt
642
            while tmp:
643
                nxt = tmp.nxt
644
                tmp.unlink()
645
                node.append_child(tmp)
646
                tmp = nxt
647
            block.append_child(node)
648
            self.processEmphasis(opener.get('previousDelimiter'))
649
            self.removeBracket()
650
            opener.get('node').unlink()
651
652
            # We remove this bracket and processEmphasis will remove
653
            # later delimiters.
654
            # Now, for a link, we also deactivate earlier link openers.
655
            # (no links in links)
656
            if not is_image:
657
                opener = self.brackets
658
                while opener is not None:
659
                    if not opener.get('image'):
660
                        # deactivate this opener
661
                        opener['active'] = False
662
                    opener = opener.get('previous')
663
664
            return True
665
        else:
666
            # no match
667
            # remove this opener from stack
668
            self.removeBracket()
669
            self.pos = startpos
670
            block.append_child(text(']'))
671
            return True
672
673
    def addBracket(self, node, index, image):
674
        if self.brackets is not None:
675
            self.brackets['bracketAfter'] = True
676
677
        self.brackets = {
678
            'node': node,
679
            'previous': self.brackets,
680
            'previousDelimiter': self.delimiters,
681
            'index': index,
682
            'image': image,
683
            'active': True,
684
        }
685
686
    def removeBracket(self):
687
        self.brackets = self.brackets.get('previous')
688
689
    def parseEntity(self, block):
690
        """Attempt to parse an entity."""
691
        m = self.match(reEntityHere)
692
        if m:
693
            block.append_child(text(HTMLunescape(m)))
694
            return True
695
        else:
696
            return False
697
698
    def parseString(self, block):
699
        """
700
        Parse a run of ordinary characters, or a single character with
701
        a special meaning in markdown, as a plain string.
702
        """
703
        m = self.match(reMain)
704
        if m:
705
            if self.options.get('smart'):
706
                s = re.sub(reEllipses, '\u2026', m)
707
                s = re.sub(reDash, lambda x: smart_dashes(x.group()), s)
708
                block.append_child(text(s))
709
            else:
710
                block.append_child(text(m))
711
            return True
712
        else:
713
            return False
714
715
    def parseNewline(self, block):
716
        """
717
        Parse a newline.  If it was preceded by two spaces, return a hard
718
        line break; otherwise a soft line break.
719
        """
720
        # assume we're at a \n
721
        self.pos += 1
722
        lastc = block.last_child
723
        if lastc and lastc.t == 'text' and lastc.literal[-1] == ' ':
724
            linebreak = len(lastc.literal) >= 2 and lastc.literal[-2] == ' '
725
            lastc.literal = re.sub(reFinalSpace, '', lastc.literal)
726
            if linebreak:
727
                node = Node('linebreak', None)
728
            else:
729
                node = Node('softbreak', None)
730
            block.append_child(node)
731
        else:
732
            block.append_child(Node('softbreak', None))
733
734
        # gobble leading spaces in next line
735
        self.match(reInitialSpace)
736
        return True
737
738
    def parseReference(self, s, refmap):
739
        """Attempt to parse a link reference, modifying refmap."""
740
        self.subject = s
741
        self.pos = 0
742
        startpos = self.pos
743
744
        # label:
745
        match_chars = self.parseLinkLabel()
746
        if match_chars == 0 or match_chars == 2:
747
            return 0
748
        else:
749
            rawlabel = self.subject[:match_chars]
750
751
        # colon:
752
        if (self.peek() == ':'):
753
            self.pos += 1
754
        else:
755
            self.pos = startpos
756
            return 0
757
758
        # link url
759
        self.spnl()
760
761
        dest = self.parseLinkDestination()
762
        if (dest is None or len(dest) == 0):
763
            self.pos = startpos
764
            return 0
765
766
        beforetitle = self.pos
767
        self.spnl()
768
        title = self.parseLinkTitle()
769
        if title is None:
770
            title = ''
771
            # rewind before spaces
772
            self.pos = beforetitle
773
774
        # make sure we're at line end:
775
        at_line_end = True
776
        if self.match(reSpaceAtEndOfLine) is None:
777
            if title == '':
778
                at_line_end = False
779
            else:
780
                # the potential title we found is not at the line end,
781
                # but it could still be a legal link reference if we
782
                # discard the title
783
                title == ''
784
                # rewind before spaces
785
                self.pos = beforetitle
786
                # and instead check if the link URL is at the line end
787
                at_line_end = self.match(reSpaceAtEndOfLine) is not None
788
789
        if not at_line_end:
790
            self.pos = startpos
791
            return 0
792
793
        normlabel = normalizeReference(rawlabel)
794
        if refmap.get(normlabel) == '':
795
            # label must contain non-whitespace characters
796
            self.pos = startpos
797
            return 0
798
799
        if refmap.get(normlabel) is None:
800
            refmap[normlabel] = {
801
                'destination': dest,
802
                'title': title
803
            }
804
        return (self.pos - startpos)
805
806
    def parseInline(self, block):
807
        """
808
        Parse the next inline element in subject, advancing subject
809
        position.
810
811
        On success, add the result to block's children and return True.
812
        On failure, return False.
813
        """
814
        res = False
815
        c = self.peek()
816
        if c is None:
817
            return False
818
        if c == '\n':
819
            res = self.parseNewline(block)
820
        elif c == '\\':
821
            res = self.parseBackslash(block)
822
        elif c == '`':
823
            res = self.parseBackticks(block)
824
        elif c == '*' or c == '_':
825
            res = self.handleDelim(c, block)
826
        elif c == "'" or c == '"':
827
            res = self.options.get('smart') and self.handleDelim(c, block)
828
        elif c == '[':
829
            res = self.parseOpenBracket(block)
830
        elif c == '!':
831
            res = self.parseBang(block)
832
        elif c == ']':
833
            res = self.parseCloseBracket(block)
834
        elif c == '<':
835
            res = self.parseAutolink(block) or self.parseHtmlTag(block)
836
        elif c == '&':
837
            res = self.parseEntity(block)
838
        else:
839
            res = self.parseString(block)
840
841
        if not res:
842
            self.pos += 1
843
            block.append_child(text(c))
844
845
        return True
846
847
    def parseInlines(self, block):
848
        """
849
        Parse string content in block into inline children,
850
        using refmap to resolve references.
851
        """
852
        self.subject = block.string_content.strip()
853
        self.pos = 0
854
        self.delimiters = None
855
        self.brackets = None
856
        while (self.parseInline(block)):
857
            pass
858
        # allow raw string to be garbage collected
859
        block.string_content = None
860
        self.processEmphasis(None)
861
862
    parse = parseInlines
863