Passed
Push — master ( 0c68df...e3ee72 )
by Dongxin
01:27
created

Parser.incorporate_line()   F

Complexity

Conditions 28

Size

Total Lines 143

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 28
c 1
b 0
f 0
dl 0
loc 143
rs 2

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like Parser.incorporate_line() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from __future__ import absolute_import, unicode_literals
2
3
import re
4
from importlib import import_module
5
from CommonMark import common
6
from CommonMark.common import unescape_string
7
from CommonMark.inlines import InlineParser
8
from CommonMark.node import Node
9
from CommonMark.utils import to_camel_case
10
11
12
CODE_INDENT = 4
13
reHtmlBlockOpen = [
14
    re.compile(r'.'),  # dummy for 0
15
    re.compile(r'^<(?:script|pre|style)(?:\s|>|$)', re.IGNORECASE),
16
    re.compile(r'^<!--'),
17
    re.compile(r'^<[?]'),
18
    re.compile(r'^<![A-Z]'),
19
    re.compile(r'^<!\[CDATA\['),
20
    re.compile(
21
        r'^<[/]?(?:address|article|aside|base|basefont|blockquote|body|'
22
        r'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|'
23
        r'fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|'
24
        r'header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|'
25
        r'nav|noframes|ol|optgroup|option|p|param|section|source|title|'
26
        r'summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)'
27
        r'(?:\s|[/]?[>]|$)',
28
        re.IGNORECASE),
29
    re.compile(
30
        '^(?:' + common.OPENTAG + '|' + common.CLOSETAG + ')\\s*$',
31
        re.IGNORECASE),
32
]
33
reHtmlBlockClose = [
34
    re.compile(r'.'),  # dummy for 0
35
    re.compile(r'<\/(?:script|pre|style)>', re.IGNORECASE),
36
    re.compile(r'-->'),
37
    re.compile(r'\?>'),
38
    re.compile(r'>'),
39
    re.compile(r'\]\]>'),
40
]
41
reThematicBreak = re.compile(
42
    r'^(?:(?:\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$')
43
reMaybeSpecial = re.compile(r'^[#`~*+_=<>0-9-]')
44
reNonSpace = re.compile(r'[^ \t\f\v\r\n]')
45
reBulletListMarker = re.compile(r'^[*+-]')
46
reOrderedListMarker = re.compile(r'^(\d{1,9})([.)])')
47
reATXHeadingMarker = re.compile(r'^#{1,6}(?:[ \t]+|$)')
48
reCodeFence = re.compile(r'^`{3,}(?!.*`)|^~{3,}(?!.*~)')
49
reClosingCodeFence = re.compile(r'^(?:`{3,}|~{3,})(?= *$)')
50
reSetextHeadingLine = re.compile(r'^(?:=+|-+)[ \t]*$')
51
reLineEnding = re.compile(r'\r\n|\n|\r')
52
53
54
def is_blank(s):
55
    """Returns True if string contains only space characters."""
56
    return re.search(reNonSpace, s) is None
57
58
59
def is_space_or_tab(s):
60
    return s == ' ' or s == '\t'
61
62
63
def peek(ln, pos):
64
    if pos < len(ln):
65
        return ln[pos]
66
    else:
67
        return None
68
69
70
def ends_with_blank_line(block):
71
    """ Returns true if block ends with a blank line,
72
    descending if needed into lists and sublists."""
73
    while block:
74
        if block.last_line_blank:
75
            return True
76
        if (block.t == 'list' or block.t == 'item'):
77
            block = block.last_child
78
        else:
79
            break
80
81
    return False
82
83
84
def parse_list_marker(parser, container):
85
    """ Parse a list marker and return data on the marker (type,
86
    start, delimiter, bullet character, padding) or None."""
87
    rest = parser.current_line[parser.next_nonspace:]
88
    data = {
89
        'type': None,
90
        'tight': True,  # lists are tight by default
91
        'bullet_char': None,
92
        'start': None,
93
        'delimiter': None,
94
        'padding': None,
95
        'marker_offset': parser.indent,
96
    }
97
    m = re.match(reBulletListMarker, rest)
98
    m2 = re.match(reOrderedListMarker, rest)
99
    if m:
100
        data['type'] = 'bullet'
101
        data['bullet_char'] = m.group()[0]
102
    elif m2 and (container.t != 'paragraph' or m2.group(1) == '1'):
103
        m = m2
104
        data['type'] = 'ordered'
105
        data['start'] = int(m.group(1))
106
        data['delimiter'] = m.group(2)
107
    else:
108
        return None
109
110
    # make sure we have spaces after
111
    nextc = peek(parser.current_line, parser.next_nonspace + len(m.group()))
112
    if not (nextc is None or nextc == '\t' or nextc == ' '):
113
        return None
114
115
    # if it interrupts paragraph, make sure first line isn't blank
116
    if container.t == 'paragraph' and \
117
       not re.search(
118
           reNonSpace,
119
           parser.current_line[parser.next_nonspace + len(m.group()):]):
120
        return None
121
122
    # we've got a match! advance offset and calculate padding
123
    parser.advance_next_nonspace()  # to start of marker
124
    parser.advance_offset(len(m.group()), True)  # to end of marker
125
    spaces_start_col = parser.column
126
    spaces_start_offset = parser.offset
127
    while True:
128
        parser.advance_offset(1, True)
129
        nextc = peek(parser.current_line, parser.offset)
130
        if parser.column - spaces_start_col < 5 and \
131
           is_space_or_tab(nextc):
132
            pass
133
        else:
134
            break
135
    blank_item = peek(parser.current_line, parser.offset) is None
136
    spaces_after_marker = parser.column - spaces_start_col
137
    if spaces_after_marker >= 5 or \
138
       spaces_after_marker < 1 or \
139
       blank_item:
140
        data['padding'] = len(m.group()) + 1
141
        parser.column = spaces_start_col
142
        parser.offset = spaces_start_offset
143
        if is_space_or_tab(peek(parser.current_line, parser.offset)):
144
            parser.advance_offset(1, True)
145
    else:
146
        data['padding'] = len(m.group()) + spaces_after_marker
147
148
    return data
149
150
151
def lists_match(list_data, item_data):
152
    """
153
    Returns True if the two list items are of the same type,
154
    with the same delimiter and bullet character.  This is used
155
    in agglomerating list items into lists.
156
    """
157
    return list_data.get('type') == item_data.get('type') and \
158
        list_data.get('delimiter') == item_data.get('delimiter') and \
159
        list_data.get('bullet_char') == item_data.get('bullet_char')
160
161
162
class Block(object):
163
    accepts_lines = None
164
165
    @staticmethod
166
    def continue_(parser=None, container=None):
167
        return
168
169
    @staticmethod
170
    def finalize(parser=None, block=None):
171
        return
172
173
    @staticmethod
174
    def can_contain(t):
175
        return
176
177
178
class Document(Block):
179
    accepts_lines = False
180
181
    @staticmethod
182
    def continue_(parser=None, container=None):
183
        return 0
184
185
    @staticmethod
186
    def finalize(parser=None, block=None):
187
        return
188
189
    @staticmethod
190
    def can_contain(t):
191
        return t != 'item'
192
193
194
class List(Block):
195
    accepts_lines = False
196
197
    @staticmethod
198
    def continue_(parser=None, container=None):
199
        return 0
200
201
    @staticmethod
202
    def finalize(parser=None, block=None):
203
        item = block.first_child
204
        while item:
205
            # check for non-final list item ending with blank line:
206
            if ends_with_blank_line(item) and item.nxt:
207
                block.list_data['tight'] = False
208
                break
209
            # recurse into children of list item, to see if there are
210
            # spaces between any of them:
211
            subitem = item.first_child
212
            while subitem:
213
                if ends_with_blank_line(subitem) and \
214
                   (item.nxt or subitem.nxt):
215
                    block.list_data['tight'] = False
216
                    break
217
                subitem = subitem.nxt
218
            item = item.nxt
219
220
    @staticmethod
221
    def can_contain(t):
222
        return t == 'item'
223
224
225
class BlockQuote(Block):
226
    accepts_lines = False
227
228
    @staticmethod
229
    def continue_(parser=None, container=None):
230
        ln = parser.current_line
231
        if not parser.indented and peek(ln, parser.next_nonspace) == '>':
232
            parser.advance_next_nonspace()
233
            parser.advance_offset(1, False)
234
            if is_space_or_tab(peek(ln, parser.offset)):
235
                parser.advance_offset(1, True)
236
        else:
237
            return 1
238
        return 0
239
240
    @staticmethod
241
    def finalize(parser=None, block=None):
242
        return
243
244
    @staticmethod
245
    def can_contain(t):
246
        return t != 'item'
247
248
249
class Item(Block):
250
    accepts_lines = False
251
252
    @staticmethod
253
    def continue_(parser=None, container=None):
254
        if parser.blank:
255
            if container.first_child is None:
256
                # Blank line after empty list item
257
                return 1
258
            else:
259
                parser.advance_next_nonspace()
260
        elif parser.indent >= (container.list_data['marker_offset'] +
261
                               container.list_data['padding']):
262
            parser.advance_offset(
263
                container.list_data['marker_offset'] +
264
                container.list_data['padding'], True)
265
        else:
266
            return 1
267
        return 0
268
269
    @staticmethod
270
    def finalize(parser=None, block=None):
271
        return
272
273
    @staticmethod
274
    def can_contain(t):
275
        return t != 'item'
276
277
278
class Heading(Block):
279
    accepts_lines = False
280
281
    @staticmethod
282
    def continue_(parser=None, container=None):
283
        # A heading can never container > 1 line, so fail to match:
284
        return 1
285
286
    @staticmethod
287
    def finalize(parser=None, block=None):
288
        return
289
290
    @staticmethod
291
    def can_contain(t):
292
        return False
293
294
295
class ThematicBreak(Block):
296
    accepts_lines = False
297
298
    @staticmethod
299
    def continue_(parser=None, container=None):
300
        # A thematic break can never container > 1 line, so fail to match:
301
        return 1
302
303
    @staticmethod
304
    def finalize(parser=None, block=None):
305
        return
306
307
    @staticmethod
308
    def can_contain(t):
309
        return False
310
311
312
class CodeBlock(Block):
313
    accepts_lines = True
314
315
    @staticmethod
316
    def continue_(parser=None, container=None):
317
        ln = parser.current_line
318
        indent = parser.indent
319
        if container.is_fenced:
320
            match = indent <= 3 and \
321
                len(ln) >= parser.next_nonspace + 1 and \
322
                ln[parser.next_nonspace] == container.fence_char and \
323
                re.match(reClosingCodeFence, ln[parser.next_nonspace:])
324
            if match and len(match.group()) >= container.fence_length:
325
                # closing fence - we're at end of line, so we can return
326
                parser.finalize(container, parser.line_number)
327
                return 2
328
            else:
329
                # skip optional spaces of fence offset
330
                i = container.fence_offset
331
                while i > 0 and is_space_or_tab(peek(ln, parser.offset)):
332
                    parser.advance_offset(1, True)
333
                    i -= 1
334
        else:
335
            # indented
336
            if indent >= CODE_INDENT:
337
                parser.advance_offset(CODE_INDENT, True)
338
            elif parser.blank:
339
                parser.advance_next_nonspace()
340
            else:
341
                return 1
342
        return 0
343
344
    @staticmethod
345
    def finalize(parser=None, block=None):
346
        if block.is_fenced:
347
            # first line becomes info string
348
            content = block.string_content
349
            newline_pos = content.index('\n')
350
            first_line = content[0:newline_pos]
351
            rest = content[newline_pos + 1:]
352
            block.info = unescape_string(first_line.strip())
353
            block.literal = rest
354
        else:
355
            # indented
356
            block.literal = re.sub(r'(\n *)+$', '\n', block.string_content)
357
358
        block.string_content = None
359
360
    @staticmethod
361
    def can_contain(t):
362
        return False
363
364
365
class HtmlBlock(Block):
366
    accepts_lines = True
367
368
    @staticmethod
369
    def continue_(parser=None, container=None):
370
        if parser.blank and (container.html_block_type == 6 or
371
                             container.html_block_type == 7):
372
            return 1
373
        else:
374
            return 0
375
376
    @staticmethod
377
    def finalize(parser=None, block=None):
378
        block.literal = re.sub(r'(\n *)+$', '', block.string_content)
379
        # allow GC
380
        block.string_content = None
381
382
    @staticmethod
383
    def can_contain(t):
384
        return False
385
386
387
class Paragraph(Block):
388
    accepts_lines = True
389
390
    @staticmethod
391
    def continue_(parser=None, container=None):
392
        return 1 if parser.blank else 0
393
394
    @staticmethod
395
    def finalize(parser=None, block=None):
396
        has_reference_defs = False
397
398
        # try parsing the beginning as link reference definitions:
399
        while peek(block.string_content, 0) == '[':
400
            pos = parser.inline_parser.parseReference(
401
                block.string_content, parser.refmap)
402
            if not pos:
403
                break
404
            block.string_content = block.string_content[pos:]
405
            has_reference_defs = True
406
        if has_reference_defs and is_blank(block.string_content):
407
            block.unlink()
408
409
    @staticmethod
410
    def can_contain(t):
411
        return False
412
413
414
class BlockStarts(object):
415
    """Block start functions.
416
417
    Return values:
418
    0 = no match
419
    1 = matched container, keep going
420
    2 = matched leaf, no more block starts
421
    """
422
    METHODS = [
423
        'block_quote',
424
        'atx_heading',
425
        'fenced_code_block',
426
        'html_block',
427
        'setext_heading',
428
        'thematic_break',
429
        'list_item',
430
        'indented_code_block',
431
    ]
432
433
    @staticmethod
434
    def block_quote(parser, container=None):
435
        if not parser.indented and \
436
           peek(parser.current_line, parser.next_nonspace) == '>':
437
            parser.advance_next_nonspace()
438
            parser.advance_offset(1, False)
439
            # optional following space
440
            if is_space_or_tab(peek(parser.current_line, parser.offset)):
441
                parser.advance_offset(1, True)
442
            parser.close_unmatched_blocks()
443
            parser.add_child('block_quote', parser.next_nonspace)
444
            return 1
445
446
        return 0
447
448
    @staticmethod
449
    def atx_heading(parser, container=None):
450
        if not parser.indented:
451
            m = re.match(reATXHeadingMarker,
452
                         parser.current_line[parser.next_nonspace:])
453
            if m:
454
                parser.advance_next_nonspace()
455
                parser.advance_offset(len(m.group()), False)
456
                parser.close_unmatched_blocks()
457
                container = parser.add_child('heading', parser.next_nonspace)
458
                # number of #s
459
                container.level = len(m.group().strip())
460
                # remove trailing ###s:
461
                container.string_content = re.sub(
462
                    r' +#+ *$', '', re.sub(
463
                        r'^ *#+ *$', '', parser.current_line[parser.offset:]))
464
                parser.advance_offset(
465
                    len(parser.current_line) - parser.offset, False)
466
                return 2
467
468
        return 0
469
470
    @staticmethod
471
    def fenced_code_block(parser, container=None):
472
        if not parser.indented:
473
            m = re.match(
474
                reCodeFence,
475
                parser.current_line[parser.next_nonspace:])
476
            if m:
477
                fence_length = len(m.group())
478
                parser.close_unmatched_blocks()
479
                container = parser.add_child(
480
                    'code_block', parser.next_nonspace)
481
                container.is_fenced = True
482
                container.fence_length = fence_length
483
                container.fence_char = m.group()[0]
484
                container.fence_offset = parser.indent
485
                parser.advance_next_nonspace()
486
                parser.advance_offset(fence_length, False)
487
                return 2
488
489
        return 0
490
491
    @staticmethod
492
    def html_block(parser, container=None):
493
        if not parser.indented and \
494
           peek(parser.current_line, parser.next_nonspace) == '<':
495
            s = parser.current_line[parser.next_nonspace:]
496
497
            for block_type in range(1, 8):
498
                if re.search(reHtmlBlockOpen[block_type], s) and \
499
                   (block_type < 7 or container.t != 'paragraph'):
500
                    parser.close_unmatched_blocks()
501
                    # We don't adjust parser.offset;
502
                    # spaces are part of the HTML block:
503
                    b = parser.add_child('html_block', parser.offset)
504
                    b.html_block_type = block_type
505
                    return 2
506
        return 0
507
508
    @staticmethod
509
    def setext_heading(parser, container=None):
510
        if not parser.indented and container.t == 'paragraph':
511
            m = re.match(
512
                reSetextHeadingLine,
513
                parser.current_line[parser.next_nonspace:])
514
            if m:
515
                parser.close_unmatched_blocks()
516
                heading = Node('heading', container.sourcepos)
517
                heading.level = 1 if m.group()[0] == '=' else 2
518
                heading.string_content = container.string_content
519
                container.insert_after(heading)
520
                container.unlink()
521
                parser.tip = heading
522
                parser.advance_offset(
523
                    len(parser.current_line) - parser.offset, False)
524
                return 2
525
526
        return 0
527
528
    @staticmethod
529
    def thematic_break(parser, container=None):
530
        if not parser.indented and re.search(
531
                reThematicBreak, parser.current_line[parser.next_nonspace:]):
532
            parser.close_unmatched_blocks()
533
            parser.add_child('thematic_break', parser.next_nonspace)
534
            parser.advance_offset(
535
                len(parser.current_line) - parser.offset, False)
536
            return 2
537
        return 0
538
539
    @staticmethod
540
    def list_item(parser, container=None):
541
        if (not parser.indented or container.t == 'list'):
542
            data = parse_list_marker(parser, container)
543
            if data:
544
                parser.close_unmatched_blocks()
545
546
                # add the list if needed
547
                if parser.tip.t != 'list' or \
548
                   not lists_match(container.list_data, data):
549
                    container = parser.add_child('list', parser.next_nonspace)
550
                    container.list_data = data
551
552
                # add the list item
553
                container = parser.add_child('item', parser.next_nonspace)
554
                container.list_data = data
555
                return 1
556
557
        return 0
558
559
    @staticmethod
560
    def indented_code_block(parser, container=None):
561
        if parser.indented and \
562
           parser.tip.t != 'paragraph' and \
563
                           not parser.blank:
564
            # indented code
565
            parser.advance_offset(CODE_INDENT, True)
566
            parser.close_unmatched_blocks()
567
            parser.add_child('code_block', parser.offset)
568
            return 2
569
570
        return 0
571
572
573
class Parser(object):
574
    def __init__(self, options={}):
575
        self.doc = Node('document', [[1, 1], [0, 0]])
576
        self.block_starts = BlockStarts()
577
        self.tip = self.doc
578
        self.oldtip = self.doc
579
        self.current_line = ''
580
        self.line_number = 0
581
        self.offset = 0
582
        self.column = 0
583
        self.next_nonspace = 0
584
        self.next_nonspace_column = 0
585
        self.indent = 0
586
        self.indented = False
587
        self.blank = False
588
        self.partially_consumed_tab = False
589
        self.all_closed = True
590
        self.last_matched_container = self.doc
591
        self.refmap = {}
592
        self.last_line_length = 0
593
        self.inline_parser = InlineParser(options)
594
        self.options = options
595
596
    def add_line(self):
597
        """ Add a line to the block at the tip.  We assume the tip
598
        can accept lines -- that check should be done before calling this."""
599
        if self.partially_consumed_tab:
600
            # Skip over tab
601
            self.offset += 1
602
            # Add space characters
603
            chars_to_tab = 4 - (self.column % 4)
604
            self.tip.string_content += (' ' * chars_to_tab)
605
        self.tip.string_content += (self.current_line[self.offset:] + '\n')
606
607
    def add_child(self, tag, offset):
608
        """ Add block of type tag as a child of the tip.  If the tip can't
609
        accept children, close and finalize it and try its parent,
610
        and so on til we find a block that can accept children."""
611
        block_class = getattr(import_module('CommonMark.blocks'),
612
                              to_camel_case(self.tip.t))
613
        while not block_class.can_contain(tag):
614
            self.finalize(self.tip, self.line_number - 1)
615
            block_class = getattr(
616
                import_module('CommonMark.blocks'),
617
                to_camel_case(self.tip.t))
618
619
        column_number = offset + 1
620
        new_block = Node(tag, [[self.line_number, column_number], [0, 0]])
621
        new_block.string_content = ''
622
        self.tip.append_child(new_block)
623
        self.tip = new_block
624
        return new_block
625
626
    def close_unmatched_blocks(self):
627
        """Finalize and close any unmatched blocks."""
628
        if not self.all_closed:
629
            while self.oldtip != self.last_matched_container:
630
                parent = self.oldtip.parent
631
                self.finalize(self.oldtip, self.line_number - 1)
632
                self.oldtip = parent
633
            self.all_closed = True
634
635
    def find_next_nonspace(self):
636
        current_line = self.current_line
637
        i = self.offset
638
        cols = self.column
639
640
        try:
641
            c = current_line[i]
642
        except IndexError:
643
            c = ''
644
        while c != '':
645
            if c == ' ':
646
                i += 1
647
                cols += 1
648
            elif c == '\t':
649
                i += 1
650
                cols += (4 - (cols % 4))
651
            else:
652
                break
653
654
            try:
655
                c = current_line[i]
656
            except IndexError:
657
                c = ''
658
659
        self.blank = (c == '\n' or c == '\r' or c == '')
660
        self.next_nonspace = i
661
        self.next_nonspace_column = cols
662
        self.indent = self.next_nonspace_column - self.column
663
        self.indented = self.indent >= CODE_INDENT
664
665
    def advance_next_nonspace(self):
666
        self.offset = self.next_nonspace
667
        self.column = self.next_nonspace_column
668
        self.partially_consumed_tab = False
669
670
    def advance_offset(self, count, columns):
671
        current_line = self.current_line
672
        try:
673
            c = current_line[self.offset]
674
        except IndexError:
675
            c = None
676
        while count > 0 and c is not None:
677
            if c == '\t':
678
                chars_to_tab = 4 - (self.column % 4)
679
                if columns:
680
                    self.partially_consumed_tab = chars_to_tab > count
681
                    chars_to_advance = min(count, chars_to_tab)
682
                    self.column += chars_to_advance
683
                    self.offset += 0 if self.partially_consumed_tab else 1
684
                    count -= chars_to_advance
685
                else:
686
                    self.partially_consumed_tab = False
687
                    self.column += chars_to_tab
688
                    self.offset += 1
689
                    count -= 1
690
            else:
691
                self.partially_consumed_tab = False
692
                self.offset += 1
693
                # assume ascii; block starts are ascii
694
                self.column += 1
695
                count -= 1
696
            try:
697
                c = current_line[self.offset]
698
            except IndexError:
699
                c = None
700
701
    def incorporate_line(self, ln):
702
        """Analyze a line of text and update the document appropriately.
703
704
        We parse markdown text by calling this on each line of input,
705
        then finalizing the document.
706
        """
707
        all_matched = True
708
709
        container = self.doc
710
        self.oldtip = self.tip
711
        self.offset = 0
712
        self.column = 0
713
        self.blank = False
714
        self.partially_consumed_tab = False
715
        self.line_number += 1
716
717
        # replace NUL characters for security
718
        if re.search(r'\u0000', ln) is not None:
719
            ln = re.sub(r'\0', '\uFFFD', ln)
720
721
        self.current_line = ln
722
723
        # For each containing block, try to parse the associated line start.
724
        # Bail out on failure: container will point to the last matching block.
725
        # Set all_matched to false if not all containers match.
726
        last_child = container.last_child
727
        while last_child and last_child.is_open:
728
            container = last_child
729
730
            self.find_next_nonspace()
731
            block_class = getattr(
732
                import_module('CommonMark.blocks'),
733
                to_camel_case(container.t))
734
            rv = block_class.continue_(self, container)
735
            if rv == 0:
736
                # we've matched, keep going
737
                pass
738
            elif rv == 1:
739
                # we've failed to match a block
740
                all_matched = False
741
            elif rv == 2:
742
                # we've hit end of line for fenced code close and can return
743
                self.last_line_length = len(ln)
744
                return
745
            else:
746
                raise ValueError('returned illegal value, must be 0, 1, or 2')
747
748
            if not all_matched:
749
                # back up to last matching block
750
                container = container.parent
751
                break
752
753
            last_child = container.last_child
754
755
        self.all_closed = (container == self.oldtip)
756
        self.last_matched_container = container
757
758
        block_class = getattr(import_module('CommonMark.blocks'),
759
                              to_camel_case(container.t))
760
        matched_leaf = container.t != 'paragraph' and block_class.accepts_lines
761
        starts = self.block_starts
762
        starts_len = len(starts.METHODS)
763
        # Unless last matched container is a code block, try new container
764
        # starts, adding children to the last matched container:
765
        while not matched_leaf:
766
            self.find_next_nonspace()
767
768
            # this is a little performance optimization:
769
            if not self.indented and \
770
               not re.search(reMaybeSpecial, ln[self.next_nonspace:]):
771
                self.advance_next_nonspace()
772
                break
773
774
            i = 0
775
            while i < starts_len:
776
                res = getattr(starts, starts.METHODS[i])(self, container)
777
                if res == 1:
778
                    container = self.tip
779
                    break
780
                elif res == 2:
781
                    container = self.tip
782
                    matched_leaf = True
783
                    break
784
                else:
785
                    i += 1
786
787
            if i == starts_len:
788
                # nothing matched
789
                self.advance_next_nonspace()
790
                break
791
792
        # What remains at the offset is a text line. Add the text to the
793
        # appropriate container.
794
        if not self.all_closed and not self.blank and \
795
           self.tip.t == 'paragraph':
796
            # lazy paragraph continuation
797
            self.add_line()
798
        else:
799
            # not a lazy continuation
800
            # finalize any blocks not matched
801
            self.close_unmatched_blocks()
802
            if self.blank and container.last_child:
803
                container.last_child.last_line_blank = True
804
805
            t = container.t
806
807
            # Block quote lines are never blank as they start with >
808
            # and we don't count blanks in fenced code for purposes of
809
            # tight/loose lists or breaking out of lists.  We also
810
            # don't set last_line_blank on an empty list item, or if we
811
            # just closed a fenced block.
812
            last_line_blank = self.blank and \
813
                not (t == 'block_quote' or
814
                     (t == 'code_block' and container.is_fenced) or
815
                     (t == 'item' and
816
                      not container.first_child and
817
                      container.sourcepos[0][0] == self.line_number))
818
819
            # propagate last_line_blank up through parents:
820
            cont = container
821
            while cont:
822
                cont.last_line_blank = last_line_blank
823
                cont = cont.parent
824
825
            block_class = getattr(import_module('CommonMark.blocks'),
826
                                  to_camel_case(t))
827
            if block_class.accepts_lines:
828
                self.add_line()
829
                # if HtmlBlock, check for end condition
830
                if t == 'html_block' and \
831
                   container.html_block_type >= 1 and \
832
                   container.html_block_type <= 5 and \
833
                   re.search(
834
                       reHtmlBlockClose[container.html_block_type],
835
                       self.current_line[self.offset:]):
836
                    self.finalize(container, self.line_number)
837
            elif self.offset < len(ln) and not self.blank:
838
                # create a paragraph container for one line
839
                container = self.add_child('paragraph', self.offset)
840
                self.advance_next_nonspace()
841
                self.add_line()
842
843
        self.last_line_length = len(ln)
844
845
    def finalize(self, block, line_number):
846
        """ Finalize a block.  Close it and do any necessary postprocessing,
847
        e.g. creating string_content from strings, setting the 'tight'
848
        or 'loose' status of a list, and parsing the beginnings
849
        of paragraphs for reference definitions.  Reset the tip to the
850
        parent of the closed block."""
851
        above = block.parent
852
        block.is_open = False
853
        block.sourcepos[1] = [line_number, self.last_line_length]
854
        block_class = getattr(import_module('CommonMark.blocks'),
855
                              to_camel_case(block.t))
856
        block_class.finalize(self, block)
857
858
        self.tip = above
859
860
    def process_inlines(self, block):
861
        """
862
        Walk through a block & children recursively, parsing string content
863
        into inline content where appropriate.
864
        """
865
        walker = block.walker()
866
        self.inline_parser.refmap = self.refmap
867
        self.inline_parser.options = self.options
868
        event = walker.nxt()
869
        while event is not None:
870
            node = event['node']
871
            t = node.t
872
            if not event['entering'] and (t == 'paragraph' or t == 'heading'):
873
                self.inline_parser.parse(node)
874
            event = walker.nxt()
875
876
    def parse(self, my_input):
877
        """ The main parsing function.  Returns a parsed document AST."""
878
        self.doc = Node('document', [[1, 1], [0, 0]])
879
        self.tip = self.doc
880
        self.refmap = {}
881
        self.line_number = 0
882
        self.last_line_length = 0
883
        self.offset = 0
884
        self.column = 0
885
        self.last_matched_container = self.doc
886
        self.current_line = ''
887
        lines = re.split(reLineEnding, my_input)
888
        length = len(lines)
889
        if len(my_input) > 0 and my_input[-1] == '\n':
890
            # ignore last blank line created by final newline
891
            length -= 1
892
        for i in range(length):
893
            self.incorporate_line(lines[i])
894
        while (self.tip):
895
            self.finalize(self.tip, length)
896
        self.process_inlines(self.doc)
897
        return self.doc
898