Passed
Push — master ( ec62d3...86008f )
by Dongxin
42s
created

parse_list_marker()   F

Complexity

Conditions 17

Size

Total Lines 65

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 17
c 2
b 0
f 0
dl 0
loc 65
rs 2.7789

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like parse_list_marker() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from __future__ import absolute_import, unicode_literals
2
3
import re
4
from importlib import import_module
5
from CommonMark import common
6
from CommonMark.common import unescape_string
7
from CommonMark.inlines import InlineParser
8
from CommonMark.node import Node
9
from CommonMark.utils import to_camel_case
10
11
12
CODE_INDENT = 4
13
reHtmlBlockOpen = [
14
    re.compile(r'.'),  # dummy for 0
15
    re.compile(r'^<(?:script|pre|style)(?:\s|>|$)', re.IGNORECASE),
16
    re.compile(r'^<!--'),
17
    re.compile(r'^<[?]'),
18
    re.compile(r'^<![A-Z]'),
19
    re.compile(r'^<!\[CDATA\['),
20
    re.compile(
21
        r'^<[/]?(?:address|article|aside|base|basefont|blockquote|body|'
22
        r'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|'
23
        r'fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|'
24
        r'header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|'
25
        r'nav|noframes|ol|optgroup|option|p|param|section|source|title|'
26
        r'summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)'
27
        r'(?:\s|[/]?[>]|$)',
28
        re.IGNORECASE),
29
    re.compile(
30
        '^(?:' + common.OPENTAG + '|' + common.CLOSETAG + ')\\s*$',
31
        re.IGNORECASE),
32
]
33
reHtmlBlockClose = [
34
    re.compile(r'.'),  # dummy for 0
35
    re.compile(r'<\/(?:script|pre|style)>', re.IGNORECASE),
36
    re.compile(r'-->'),
37
    re.compile(r'\?>'),
38
    re.compile(r'>'),
39
    re.compile(r'\]\]>'),
40
]
41
reThematicBreak = re.compile(
42
    r'^(?:(?:\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$')
43
reMaybeSpecial = re.compile(r'^[#`~*+_=<>0-9-]')
44
reNonSpace = re.compile(r'[^ \t\f\v\r\n]')
45
reBulletListMarker = re.compile(r'^[*+-]')
46
reOrderedListMarker = re.compile(r'^(\d{1,9})([.)])')
47
reATXHeadingMarker = re.compile(r'^#{1,6}(?:[ \t]+|$)')
48
reCodeFence = re.compile(r'^`{3,}(?!.*`)|^~{3,}(?!.*~)')
49
reClosingCodeFence = re.compile(r'^(?:`{3,}|~{3,})(?= *$)')
50
reSetextHeadingLine = re.compile(r'^(?:=+|-+)[ \t]*$')
51
reLineEnding = re.compile(r'\r\n|\n|\r')
52
53
54
def is_blank(s):
55
    """Returns True if string contains only space characters."""
56
    return re.search(reNonSpace, s) is None
57
58
59
def is_space_or_tab(s):
60
    return s == ' ' or s == '\t'
61
62
63
def peek(ln, pos):
64
    if pos < len(ln):
65
        return ln[pos]
66
    else:
67
        return None
68
69
70
def ends_with_blank_line(block):
71
    """ Returns true if block ends with a blank line,
72
    descending if needed into lists and sublists."""
73
    while block:
74
        if block.last_line_blank:
75
            return True
76
        if (block.t == 'list' or block.t == 'item'):
77
            block = block.last_child
78
        else:
79
            break
80
81
    return False
82
83
84
def parse_list_marker(parser, container):
85
    """ Parse a list marker and return data on the marker (type,
86
    start, delimiter, bullet character, padding) or None."""
87
    rest = parser.current_line[parser.next_nonspace:]
88
    data = {
89
        'type': None,
90
        'tight': True,  # lists are tight by default
91
        'bullet_char': None,
92
        'start': None,
93
        'delimiter': None,
94
        'padding': None,
95
        'marker_offset': parser.indent,
96
    }
97
    m = re.search(reBulletListMarker, rest)
98
    m2 = re.search(reOrderedListMarker, rest)
99
    if m:
100
        data['type'] = 'bullet'
101
        data['bullet_char'] = m.group()[0]
102
    elif m2 and (container.t != 'paragraph' or m2.group(1) == '1'):
103
        m = m2
104
        data['type'] = 'ordered'
105
        data['start'] = int(m.group(1))
106
        data['delimiter'] = m.group(2)
107
    else:
108
        return None
109
110
    # make sure we have spaces after
111
    nextc = peek(parser.current_line, parser.next_nonspace + len(m.group()))
112
    if not (nextc is None or nextc == '\t' or nextc == ' '):
113
        return None
114
115
    # if it interrupts paragraph, make sure first line isn't blank
116
    if container.t == 'paragraph' and \
117
       not re.search(
118
           reNonSpace,
119
           parser.current_line[parser.next_nonspace + len(m.group()):]):
120
        return None
121
122
    # we've got a match! advance offset and calculate padding
123
    parser.advance_next_nonspace()  # to start of marker
124
    parser.advance_offset(len(m.group()), True)  # to end of marker
125
    spaces_start_col = parser.column
126
    spaces_start_offset = parser.offset
127
    while True:
128
        parser.advance_offset(1, True)
129
        nextc = peek(parser.current_line, parser.offset)
130
        if parser.column - spaces_start_col < 5 and \
131
           is_space_or_tab(nextc):
132
            pass
133
        else:
134
            break
135
    blank_item = peek(parser.current_line, parser.offset) is None
136
    spaces_after_marker = parser.column - spaces_start_col
137
    if spaces_after_marker >= 5 or \
138
       spaces_after_marker < 1 or \
139
       blank_item:
140
        data['padding'] = len(m.group()) + 1
141
        parser.column = spaces_start_col
142
        parser.offset = spaces_start_offset
143
        if is_space_or_tab(peek(parser.current_line, parser.offset)):
144
            parser.advance_offset(1, True)
145
    else:
146
        data['padding'] = len(m.group()) + spaces_after_marker
147
148
    return data
149
150
151
def lists_match(list_data, item_data):
152
    """
153
    Returns True if the two list items are of the same type,
154
    with the same delimiter and bullet character.  This is used
155
    in agglomerating list items into lists.
156
    """
157
    return list_data.get('type') == item_data.get('type') and \
158
        list_data.get('delimiter') == item_data.get('delimiter') and \
159
        list_data.get('bullet_char') == item_data.get('bullet_char')
160
161
162
class Block(object):
163
    accepts_lines = None
164
165
    @staticmethod
166
    def continue_(parser=None, container=None):
167
        return
168
169
    @staticmethod
170
    def finalize(parser=None, block=None):
171
        return
172
173
    @staticmethod
174
    def can_contain(t):
175
        return
176
177
178
class Document(Block):
179
    accepts_lines = False
180
181
    @staticmethod
182
    def continue_(parser=None, container=None):
183
        return 0
184
185
    @staticmethod
186
    def finalize(parser=None, block=None):
187
        return
188
189
    @staticmethod
190
    def can_contain(t):
191
        return t != 'item'
192
193
194
class List(Block):
195
    accepts_lines = False
196
197
    @staticmethod
198
    def continue_(parser=None, container=None):
199
        return 0
200
201
    @staticmethod
202
    def finalize(parser=None, block=None):
203
        item = block.first_child
204
        while item:
205
            # check for non-final list item ending with blank line:
206
            if ends_with_blank_line(item) and item.nxt:
207
                block.list_data['tight'] = False
208
                break
209
            # recurse into children of list item, to see if there are
210
            # spaces between any of them:
211
            subitem = item.first_child
212
            while subitem:
213
                if ends_with_blank_line(subitem) and \
214
                   (item.nxt or subitem.nxt):
215
                    block.list_data['tight'] = False
216
                    break
217
                subitem = subitem.nxt
218
            item = item.nxt
219
220
    @staticmethod
221
    def can_contain(t):
222
        return t == 'item'
223
224
225
class BlockQuote(Block):
226
    accepts_lines = False
227
228
    @staticmethod
229
    def continue_(parser=None, container=None):
230
        ln = parser.current_line
231
        if not parser.indented and peek(ln, parser.next_nonspace) == '>':
232
            parser.advance_next_nonspace()
233
            parser.advance_offset(1, False)
234
            if is_space_or_tab(peek(ln, parser.offset)):
235
                parser.advance_offset(1, True)
236
        else:
237
            return 1
238
        return 0
239
240
    @staticmethod
241
    def finalize(parser=None, block=None):
242
        return
243
244
    @staticmethod
245
    def can_contain(t):
246
        return t != 'item'
247
248
249
class Item(Block):
250
    accepts_lines = False
251
252
    @staticmethod
253
    def continue_(parser=None, container=None):
254
        if parser.blank:
255
            if container.first_child is None:
256
                # Blank line after empty list item
257
                return 1
258
            else:
259
                parser.advance_next_nonspace()
260
        elif parser.indent >= (container.list_data['marker_offset'] +
261
                               container.list_data['padding']):
262
            parser.advance_offset(
263
                container.list_data['marker_offset'] +
264
                container.list_data['padding'], True)
265
        else:
266
            return 1
267
        return 0
268
269
    @staticmethod
270
    def finalize(parser=None, block=None):
271
        return
272
273
    @staticmethod
274
    def can_contain(t):
275
        return t != 'item'
276
277
278
class Heading(Block):
279
    accepts_lines = False
280
281
    @staticmethod
282
    def continue_(parser=None, container=None):
283
        # A heading can never container > 1 line, so fail to match:
284
        return 1
285
286
    @staticmethod
287
    def finalize(parser=None, block=None):
288
        return
289
290
    @staticmethod
291
    def can_contain(t):
292
        return False
293
294
295
class ThematicBreak(Block):
296
    accepts_lines = False
297
298
    @staticmethod
299
    def continue_(parser=None, container=None):
300
        # A thematic break can never container > 1 line, so fail to match:
301
        return 1
302
303
    @staticmethod
304
    def finalize(parser=None, block=None):
305
        return
306
307
    @staticmethod
308
    def can_contain(t):
309
        return False
310
311
312
class CodeBlock(Block):
313
    accepts_lines = True
314
315
    @staticmethod
316
    def continue_(parser=None, container=None):
317
        ln = parser.current_line
318
        indent = parser.indent
319
        if container.is_fenced:
320
            match = indent <= 3 and \
321
                len(ln) >= parser.next_nonspace + 1 and \
322
                ln[parser.next_nonspace] == container.fence_char and \
323
                re.search(reClosingCodeFence, ln[parser.next_nonspace:])
324
            if match and len(match.group()) >= container.fence_length:
325
                # closing fence - we're at end of line, so we can return
326
                parser.finalize(container, parser.line_number)
327
                return 2
328
            else:
329
                # skip optional spaces of fence offset
330
                i = container.fence_offset
331
                while i > 0 and is_space_or_tab(peek(ln, parser.offset)):
332
                    parser.advance_offset(1, True)
333
                    i -= 1
334
        else:
335
            # indented
336
            if indent >= CODE_INDENT:
337
                parser.advance_offset(CODE_INDENT, True)
338
            elif parser.blank:
339
                parser.advance_next_nonspace()
340
            else:
341
                return 1
342
        return 0
343
344
    @staticmethod
345
    def finalize(parser=None, block=None):
346
        if block.is_fenced:
347
            # first line becomes info string
348
            content = block.string_content
349
            newline_pos = content.index('\n')
350
            first_line = content[0:newline_pos]
351
            rest = content[newline_pos + 1:]
352
            block.info = unescape_string(first_line.strip())
353
            block.literal = rest
354
        else:
355
            # indented
356
            block.literal = re.sub(r'(\n *)+$', '\n', block.string_content)
357
358
        block.string_content = None
359
360
    @staticmethod
361
    def can_contain(t):
362
        return False
363
364
365
class HtmlBlock(Block):
366
    accepts_lines = True
367
368
    @staticmethod
369
    def continue_(parser=None, container=None):
370
        if parser.blank and (container.html_block_type == 6 or
371
                             container.html_block_type == 7):
372
            return 1
373
        else:
374
            return 0
375
376
    @staticmethod
377
    def finalize(parser=None, block=None):
378
        block.literal = re.sub(r'(\n *)+$', '', block.string_content)
379
        # allow GC
380
        block.string_content = None
381
382
    @staticmethod
383
    def can_contain(t):
384
        return False
385
386
387
class Paragraph(Block):
388
    accepts_lines = True
389
390
    @staticmethod
391
    def continue_(parser=None, container=None):
392
        return 1 if parser.blank else 0
393
394
    @staticmethod
395
    def finalize(parser=None, block=None):
396
        has_reference_defs = False
397
398
        # try parsing the beginning as link reference definitions:
399
        while peek(block.string_content, 0) == '[':
400
            pos = parser.inline_parser.parseReference(
401
                block.string_content, parser.refmap)
402
            if not pos:
403
                break
404
            block.string_content = block.string_content[pos:]
405
            has_reference_defs = True
406
        if has_reference_defs and is_blank(block.string_content):
407
            block.unlink()
408
409
    @staticmethod
410
    def can_contain(t):
411
        return False
412
413
414
class BlockStarts(object):
415
    """Block start functions.
416
417
    Return values:
418
    0 = no match
419
    1 = matched container, keep going
420
    2 = matched leaf, no more block starts
421
    """
422
    METHODS = [
423
        'block_quote',
424
        'atx_heading',
425
        'fenced_code_block',
426
        'html_block',
427
        'setext_heading',
428
        'thematic_break',
429
        'list_item',
430
        'indented_code_block',
431
    ]
432
433
    @staticmethod
434
    def block_quote(parser, container=None):
435
        if not parser.indented and \
436
           peek(parser.current_line, parser.next_nonspace) == '>':
437
            parser.advance_next_nonspace()
438
            parser.advance_offset(1, False)
439
            # optional following space
440
            if is_space_or_tab(peek(parser.current_line, parser.offset)):
441
                parser.advance_offset(1, True)
442
            parser.close_unmatched_blocks()
443
            parser.add_child('block_quote', parser.next_nonspace)
444
            return 1
445
446
        return 0
447
448
    @staticmethod
449
    def atx_heading(parser, container=None):
450
        if not parser.indented:
451
            m = re.search(reATXHeadingMarker,
452
                          parser.current_line[parser.next_nonspace:])
453
            if m:
454
                parser.advance_next_nonspace()
455
                parser.advance_offset(len(m.group()), False)
456
                parser.close_unmatched_blocks()
457
                container = parser.add_child('heading', parser.next_nonspace)
458
                # number of #s
459
                container.level = len(m.group().strip())
460
                # remove trailing ###s:
461
                container.string_content = re.sub(
462
                    r'[ \t]+#+[ \t]*$', '', re.sub(
463
                        r'^[ \t]*#+[ \t]*$',
464
                        '',
465
                        parser.current_line[parser.offset:]))
466
                parser.advance_offset(
467
                    len(parser.current_line) - parser.offset, False)
468
                return 2
469
470
        return 0
471
472
    @staticmethod
473
    def fenced_code_block(parser, container=None):
474
        if not parser.indented:
475
            m = re.search(
476
                reCodeFence,
477
                parser.current_line[parser.next_nonspace:])
478
            if m:
479
                fence_length = len(m.group())
480
                parser.close_unmatched_blocks()
481
                container = parser.add_child(
482
                    'code_block', parser.next_nonspace)
483
                container.is_fenced = True
484
                container.fence_length = fence_length
485
                container.fence_char = m.group()[0]
486
                container.fence_offset = parser.indent
487
                parser.advance_next_nonspace()
488
                parser.advance_offset(fence_length, False)
489
                return 2
490
491
        return 0
492
493
    @staticmethod
494
    def html_block(parser, container=None):
495
        if not parser.indented and \
496
           peek(parser.current_line, parser.next_nonspace) == '<':
497
            s = parser.current_line[parser.next_nonspace:]
498
499
            for block_type in range(1, 8):
500
                if re.search(reHtmlBlockOpen[block_type], s) and \
501
                   (block_type < 7 or container.t != 'paragraph'):
502
                    parser.close_unmatched_blocks()
503
                    # We don't adjust parser.offset;
504
                    # spaces are part of the HTML block:
505
                    b = parser.add_child('html_block', parser.offset)
506
                    b.html_block_type = block_type
507
                    return 2
508
        return 0
509
510
    @staticmethod
511
    def setext_heading(parser, container=None):
512
        if not parser.indented and container.t == 'paragraph':
513
            m = re.search(
514
                reSetextHeadingLine,
515
                parser.current_line[parser.next_nonspace:])
516
            if m:
517
                parser.close_unmatched_blocks()
518
                heading = Node('heading', container.sourcepos)
519
                heading.level = 1 if m.group()[0] == '=' else 2
520
                heading.string_content = container.string_content
521
                container.insert_after(heading)
522
                container.unlink()
523
                parser.tip = heading
524
                parser.advance_offset(
525
                    len(parser.current_line) - parser.offset, False)
526
                return 2
527
528
        return 0
529
530
    @staticmethod
531
    def thematic_break(parser, container=None):
532
        if not parser.indented and re.search(
533
                reThematicBreak, parser.current_line[parser.next_nonspace:]):
534
            parser.close_unmatched_blocks()
535
            parser.add_child('thematic_break', parser.next_nonspace)
536
            parser.advance_offset(
537
                len(parser.current_line) - parser.offset, False)
538
            return 2
539
        return 0
540
541
    @staticmethod
542
    def list_item(parser, container=None):
543
        if (not parser.indented or container.t == 'list'):
544
            data = parse_list_marker(parser, container)
545
            if data:
546
                parser.close_unmatched_blocks()
547
548
                # add the list if needed
549
                if parser.tip.t != 'list' or \
550
                   not lists_match(container.list_data, data):
551
                    container = parser.add_child('list', parser.next_nonspace)
552
                    container.list_data = data
553
554
                # add the list item
555
                container = parser.add_child('item', parser.next_nonspace)
556
                container.list_data = data
557
                return 1
558
559
        return 0
560
561
    @staticmethod
562
    def indented_code_block(parser, container=None):
563
        if parser.indented and \
564
           parser.tip.t != 'paragraph' and \
565
                           not parser.blank:
566
            # indented code
567
            parser.advance_offset(CODE_INDENT, True)
568
            parser.close_unmatched_blocks()
569
            parser.add_child('code_block', parser.offset)
570
            return 2
571
572
        return 0
573
574
575
class Parser(object):
576
    def __init__(self, options={}):
577
        self.doc = Node('document', [[1, 1], [0, 0]])
578
        self.block_starts = BlockStarts()
579
        self.tip = self.doc
580
        self.oldtip = self.doc
581
        self.current_line = ''
582
        self.line_number = 0
583
        self.offset = 0
584
        self.column = 0
585
        self.next_nonspace = 0
586
        self.next_nonspace_column = 0
587
        self.indent = 0
588
        self.indented = False
589
        self.blank = False
590
        self.partially_consumed_tab = False
591
        self.all_closed = True
592
        self.last_matched_container = self.doc
593
        self.refmap = {}
594
        self.last_line_length = 0
595
        self.inline_parser = InlineParser(options)
596
        self.options = options
597
598
    def add_line(self):
599
        """ Add a line to the block at the tip.  We assume the tip
600
        can accept lines -- that check should be done before calling this."""
601
        if self.partially_consumed_tab:
602
            # Skip over tab
603
            self.offset += 1
604
            # Add space characters
605
            chars_to_tab = 4 - (self.column % 4)
606
            self.tip.string_content += (' ' * chars_to_tab)
607
        self.tip.string_content += (self.current_line[self.offset:] + '\n')
608
609
    def add_child(self, tag, offset):
610
        """ Add block of type tag as a child of the tip.  If the tip can't
611
        accept children, close and finalize it and try its parent,
612
        and so on til we find a block that can accept children."""
613
        block_class = getattr(import_module('CommonMark.blocks'),
614
                              to_camel_case(self.tip.t))
615
        while not block_class.can_contain(tag):
616
            self.finalize(self.tip, self.line_number - 1)
617
            block_class = getattr(
618
                import_module('CommonMark.blocks'),
619
                to_camel_case(self.tip.t))
620
621
        column_number = offset + 1
622
        new_block = Node(tag, [[self.line_number, column_number], [0, 0]])
623
        new_block.string_content = ''
624
        self.tip.append_child(new_block)
625
        self.tip = new_block
626
        return new_block
627
628
    def close_unmatched_blocks(self):
629
        """Finalize and close any unmatched blocks."""
630
        if not self.all_closed:
631
            while self.oldtip != self.last_matched_container:
632
                parent = self.oldtip.parent
633
                self.finalize(self.oldtip, self.line_number - 1)
634
                self.oldtip = parent
635
            self.all_closed = True
636
637
    def find_next_nonspace(self):
638
        current_line = self.current_line
639
        i = self.offset
640
        cols = self.column
641
642
        try:
643
            c = current_line[i]
644
        except IndexError:
645
            c = ''
646
        while c != '':
647
            if c == ' ':
648
                i += 1
649
                cols += 1
650
            elif c == '\t':
651
                i += 1
652
                cols += (4 - (cols % 4))
653
            else:
654
                break
655
656
            try:
657
                c = current_line[i]
658
            except IndexError:
659
                c = ''
660
661
        self.blank = (c == '\n' or c == '\r' or c == '')
662
        self.next_nonspace = i
663
        self.next_nonspace_column = cols
664
        self.indent = self.next_nonspace_column - self.column
665
        self.indented = self.indent >= CODE_INDENT
666
667
    def advance_next_nonspace(self):
668
        self.offset = self.next_nonspace
669
        self.column = self.next_nonspace_column
670
        self.partially_consumed_tab = False
671
672
    def advance_offset(self, count, columns):
673
        current_line = self.current_line
674
        try:
675
            c = current_line[self.offset]
676
        except IndexError:
677
            c = None
678
        while count > 0 and c is not None:
679
            if c == '\t':
680
                chars_to_tab = 4 - (self.column % 4)
681
                if columns:
682
                    self.partially_consumed_tab = chars_to_tab > count
683
                    chars_to_advance = min(count, chars_to_tab)
684
                    self.column += chars_to_advance
685
                    self.offset += 0 if self.partially_consumed_tab else 1
686
                    count -= chars_to_advance
687
                else:
688
                    self.partially_consumed_tab = False
689
                    self.column += chars_to_tab
690
                    self.offset += 1
691
                    count -= 1
692
            else:
693
                self.partially_consumed_tab = False
694
                self.offset += 1
695
                # assume ascii; block starts are ascii
696
                self.column += 1
697
                count -= 1
698
            try:
699
                c = current_line[self.offset]
700
            except IndexError:
701
                c = None
702
703
    def incorporate_line(self, ln):
704
        """Analyze a line of text and update the document appropriately.
705
706
        We parse markdown text by calling this on each line of input,
707
        then finalizing the document.
708
        """
709
        all_matched = True
710
711
        container = self.doc
712
        self.oldtip = self.tip
713
        self.offset = 0
714
        self.column = 0
715
        self.blank = False
716
        self.partially_consumed_tab = False
717
        self.line_number += 1
718
719
        # replace NUL characters for security
720
        if re.search(r'\u0000', ln) is not None:
721
            ln = re.sub(r'\0', '\uFFFD', ln)
722
723
        self.current_line = ln
724
725
        # For each containing block, try to parse the associated line start.
726
        # Bail out on failure: container will point to the last matching block.
727
        # Set all_matched to false if not all containers match.
728
        last_child = container.last_child
729
        while last_child and last_child.is_open:
730
            container = last_child
731
732
            self.find_next_nonspace()
733
            block_class = getattr(
734
                import_module('CommonMark.blocks'),
735
                to_camel_case(container.t))
736
            rv = block_class.continue_(self, container)
737
            if rv == 0:
738
                # we've matched, keep going
739
                pass
740
            elif rv == 1:
741
                # we've failed to match a block
742
                all_matched = False
743
            elif rv == 2:
744
                # we've hit end of line for fenced code close and can return
745
                self.last_line_length = len(ln)
746
                return
747
            else:
748
                raise ValueError('returned illegal value, must be 0, 1, or 2')
749
750
            if not all_matched:
751
                # back up to last matching block
752
                container = container.parent
753
                break
754
755
            last_child = container.last_child
756
757
        self.all_closed = (container == self.oldtip)
758
        self.last_matched_container = container
759
760
        block_class = getattr(import_module('CommonMark.blocks'),
761
                              to_camel_case(container.t))
762
        matched_leaf = container.t != 'paragraph' and block_class.accepts_lines
763
        starts = self.block_starts
764
        starts_len = len(starts.METHODS)
765
        # Unless last matched container is a code block, try new container
766
        # starts, adding children to the last matched container:
767
        while not matched_leaf:
768
            self.find_next_nonspace()
769
770
            # this is a little performance optimization:
771
            if not self.indented and \
772
               not re.search(reMaybeSpecial, ln[self.next_nonspace:]):
773
                self.advance_next_nonspace()
774
                break
775
776
            i = 0
777
            while i < starts_len:
778
                res = getattr(starts, starts.METHODS[i])(self, container)
779
                if res == 1:
780
                    container = self.tip
781
                    break
782
                elif res == 2:
783
                    container = self.tip
784
                    matched_leaf = True
785
                    break
786
                else:
787
                    i += 1
788
789
            if i == starts_len:
790
                # nothing matched
791
                self.advance_next_nonspace()
792
                break
793
794
        # What remains at the offset is a text line. Add the text to the
795
        # appropriate container.
796
        if not self.all_closed and not self.blank and \
797
           self.tip.t == 'paragraph':
798
            # lazy paragraph continuation
799
            self.add_line()
800
        else:
801
            # not a lazy continuation
802
            # finalize any blocks not matched
803
            self.close_unmatched_blocks()
804
            if self.blank and container.last_child:
805
                container.last_child.last_line_blank = True
806
807
            t = container.t
808
809
            # Block quote lines are never blank as they start with >
810
            # and we don't count blanks in fenced code for purposes of
811
            # tight/loose lists or breaking out of lists.  We also
812
            # don't set last_line_blank on an empty list item, or if we
813
            # just closed a fenced block.
814
            last_line_blank = self.blank and \
815
                not (t == 'block_quote' or
816
                     (t == 'code_block' and container.is_fenced) or
817
                     (t == 'item' and
818
                      not container.first_child and
819
                      container.sourcepos[0][0] == self.line_number))
820
821
            # propagate last_line_blank up through parents:
822
            cont = container
823
            while cont:
824
                cont.last_line_blank = last_line_blank
825
                cont = cont.parent
826
827
            block_class = getattr(import_module('CommonMark.blocks'),
828
                                  to_camel_case(t))
829
            if block_class.accepts_lines:
830
                self.add_line()
831
                # if HtmlBlock, check for end condition
832
                if t == 'html_block' and \
833
                   container.html_block_type >= 1 and \
834
                   container.html_block_type <= 5 and \
835
                   re.search(
836
                       reHtmlBlockClose[container.html_block_type],
837
                       self.current_line[self.offset:]):
838
                    self.finalize(container, self.line_number)
839
            elif self.offset < len(ln) and not self.blank:
840
                # create a paragraph container for one line
841
                container = self.add_child('paragraph', self.offset)
842
                self.advance_next_nonspace()
843
                self.add_line()
844
845
        self.last_line_length = len(ln)
846
847
    def finalize(self, block, line_number):
848
        """ Finalize a block.  Close it and do any necessary postprocessing,
849
        e.g. creating string_content from strings, setting the 'tight'
850
        or 'loose' status of a list, and parsing the beginnings
851
        of paragraphs for reference definitions.  Reset the tip to the
852
        parent of the closed block."""
853
        above = block.parent
854
        block.is_open = False
855
        block.sourcepos[1] = [line_number, self.last_line_length]
856
        block_class = getattr(import_module('CommonMark.blocks'),
857
                              to_camel_case(block.t))
858
        block_class.finalize(self, block)
859
860
        self.tip = above
861
862
    def process_inlines(self, block):
863
        """
864
        Walk through a block & children recursively, parsing string content
865
        into inline content where appropriate.
866
        """
867
        walker = block.walker()
868
        self.inline_parser.refmap = self.refmap
869
        self.inline_parser.options = self.options
870
        event = walker.nxt()
871
        while event is not None:
872
            node = event['node']
873
            t = node.t
874
            if not event['entering'] and (t == 'paragraph' or t == 'heading'):
875
                self.inline_parser.parse(node)
876
            event = walker.nxt()
877
878
    def parse(self, my_input):
879
        """ The main parsing function.  Returns a parsed document AST."""
880
        self.doc = Node('document', [[1, 1], [0, 0]])
881
        self.tip = self.doc
882
        self.refmap = {}
883
        self.line_number = 0
884
        self.last_line_length = 0
885
        self.offset = 0
886
        self.column = 0
887
        self.last_matched_container = self.doc
888
        self.current_line = ''
889
        lines = re.split(reLineEnding, my_input)
890
        length = len(lines)
891
        if len(my_input) > 0 and my_input[-1] == '\n':
892
            # ignore last blank line created by final newline
893
            length -= 1
894
        for i in range(length):
895
            self.incorporate_line(lines[i])
896
        while (self.tip):
897
            self.finalize(self.tip, length)
898
        self.process_inlines(self.doc)
899
        return self.doc
900