Passed
Push — master ( a0acea...4858c5 )
by Dongxin
48s
created

Parser.advance_offset()   F

Complexity

Conditions 10

Size

Total Lines 35

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 10
c 2
b 0
f 0
dl 0
loc 35
rs 3.1304

How to fix   Complexity   

Complexity

Complex classes like Parser.advance_offset() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from __future__ import absolute_import, unicode_literals
2
3
import re
4
from importlib import import_module
5
from CommonMark import common
6
from CommonMark.common import unescape_string
7
from CommonMark.inlines import InlineParser
8
from CommonMark.node import Node
9
from CommonMark.utils import to_camel_case
10
11
12
CODE_INDENT = 4
13
reHtmlBlockOpen = [
14
    re.compile(r'.'),  # dummy for 0
15
    re.compile(r'^<(?:script|pre|style)(?:\s|>|$)', re.IGNORECASE),
16
    re.compile(r'^<!--'),
17
    re.compile(r'^<[?]'),
18
    re.compile(r'^<![A-Z]'),
19
    re.compile(r'^<!\[CDATA\['),
20
    re.compile(
21
        r'^<[/]?(?:address|article|aside|base|basefont|blockquote|body|'
22
        r'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|'
23
        r'fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|'
24
        r'header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|'
25
        r'nav|noframes|ol|optgroup|option|p|param|section|source|title|'
26
        r'summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)'
27
        r'(?:\s|[/]?[>]|$)',
28
        re.IGNORECASE),
29
    re.compile(
30
        '^(?:' + common.OPENTAG + '|' + common.CLOSETAG + ')\\s*$',
31
        re.IGNORECASE),
32
]
33
reHtmlBlockClose = [
34
    re.compile(r'.'),  # dummy for 0
35
    re.compile(r'<\/(?:script|pre|style)>', re.IGNORECASE),
36
    re.compile(r'-->'),
37
    re.compile(r'\?>'),
38
    re.compile(r'>'),
39
    re.compile(r'\]\]>'),
40
]
41
reThematicBreak = re.compile(
42
    r'^(?:(?:\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$')
43
reMaybeSpecial = re.compile(r'^[#`~*+_=<>0-9-]')
44
reNonSpace = re.compile(r'[^ \t\f\v\r\n]')
45
reBulletListMarker = re.compile(r'^[*+-]')
46
reOrderedListMarker = re.compile(r'^(\d{1,9})([.)])')
47
reATXHeadingMarker = re.compile(r'^#{1,6}(?:[ \t]+|$)')
48
reCodeFence = re.compile(r'^`{3,}(?!.*`)|^~{3,}(?!.*~)')
49
reClosingCodeFence = re.compile(r'^(?:`{3,}|~{3,})(?= *$)')
50
reSetextHeadingLine = re.compile(r'^(?:=+|-+)[ \t]*$')
51
reLineEnding = re.compile(r'\r\n|\n|\r')
52
53
54
def is_blank(s):
55
    """Returns True if string contains only space characters."""
56
    return re.search(reNonSpace, s) is None
57
58
59
def is_space_or_tab(s):
60
    if s is not None:
61
        #  s = unicode(s, 'latin1').encode('utf-8')
62
        try:
63
            s = unicode(s, 'latin1').encode('utf-8')
64
        except Exception:
65
            s = str(s);
66
    return s == ' ' or s == '\t'
67
68
69
def peek(ln, pos):
70
    if pos < len(ln):
71
        return ln[pos]
72
    else:
73
        return None
74
75
76
def ends_with_blank_line(block):
77
    """ Returns true if block ends with a blank line,
78
    descending if needed into lists and sublists."""
79
    while block:
80
        if block.last_line_blank:
81
            return True
82
        if (block.t == 'list' or block.t == 'item'):
83
            block = block.last_child
84
        else:
85
            break
86
87
    return False
88
89
90
def parse_list_marker(parser, container):
91
    """ Parse a list marker and return data on the marker (type,
92
    start, delimiter, bullet character, padding) or None."""
93
    rest = parser.current_line[parser.next_nonspace:]
94
    data = {
95
        'type': None,
96
        'tight': True,  # lists are tight by default
97
        'bullet_char': None,
98
        'start': None,
99
        'delimiter': None,
100
        'padding': None,
101
        'marker_offset': parser.indent,
102
    }
103
    m = re.search(reBulletListMarker, rest)
104
    m2 = re.search(reOrderedListMarker, rest)
105
    if m:
106
        data['type'] = 'bullet'
107
        data['bullet_char'] = m.group()[0]
108
    elif m2 and (container.t != 'paragraph' or m2.group(1) == '1'):
109
        m = m2
110
        data['type'] = 'ordered'
111
        data['start'] = int(m.group(1))
112
        data['delimiter'] = m.group(2)
113
    else:
114
        return None
115
116
    # make sure we have spaces after
117
    nextc = peek(parser.current_line, parser.next_nonspace + len(m.group()))
118
    if not (nextc is None or nextc == '\t' or nextc == ' '):
119
        return None
120
121
    # if it interrupts paragraph, make sure first line isn't blank
122
    if container.t == 'paragraph' and \
123
       not re.search(
124
           reNonSpace,
125
           parser.current_line[parser.next_nonspace + len(m.group()):]):
126
        return None
127
128
    # we've got a match! advance offset and calculate padding
129
    parser.advance_next_nonspace()  # to start of marker
130
    parser.advance_offset(len(m.group()), True)  # to end of marker
131
    spaces_start_col = parser.column
132
    spaces_start_offset = parser.offset
133
    while True:
134
        parser.advance_offset(1, True)
135
        nextc = peek(parser.current_line, parser.offset)
136
        if parser.column - spaces_start_col < 5 and \
137
           is_space_or_tab(nextc):
138
            pass
139
        else:
140
            break
141
    blank_item = peek(parser.current_line, parser.offset) is None
142
    spaces_after_marker = parser.column - spaces_start_col
143
    if spaces_after_marker >= 5 or \
144
       spaces_after_marker < 1 or \
145
       blank_item:
146
        data['padding'] = len(m.group()) + 1
147
        parser.column = spaces_start_col
148
        parser.offset = spaces_start_offset
149
        if is_space_or_tab(peek(parser.current_line, parser.offset)):
150
            parser.advance_offset(1, True)
151
    else:
152
        data['padding'] = len(m.group()) + spaces_after_marker
153
154
    return data
155
156
157
def lists_match(list_data, item_data):
158
    """
159
    Returns True if the two list items are of the same type,
160
    with the same delimiter and bullet character.  This is used
161
    in agglomerating list items into lists.
162
    """
163
    return list_data.get('type') == item_data.get('type') and \
164
        list_data.get('delimiter') == item_data.get('delimiter') and \
165
        list_data.get('bullet_char') == item_data.get('bullet_char')
166
167
168
class Block(object):
169
    accepts_lines = None
170
171
    @staticmethod
172
    def continue_(parser=None, container=None):
173
        return
174
175
    @staticmethod
176
    def finalize(parser=None, block=None):
177
        return
178
179
    @staticmethod
180
    def can_contain(t):
181
        return
182
183
184
class Document(Block):
185
    accepts_lines = False
186
187
    @staticmethod
188
    def continue_(parser=None, container=None):
189
        return 0
190
191
    @staticmethod
192
    def finalize(parser=None, block=None):
193
        return
194
195
    @staticmethod
196
    def can_contain(t):
197
        return t != 'item'
198
199
200
class List(Block):
201
    accepts_lines = False
202
203
    @staticmethod
204
    def continue_(parser=None, container=None):
205
        return 0
206
207
    @staticmethod
208
    def finalize(parser=None, block=None):
209
        item = block.first_child
210
        while item:
211
            # check for non-final list item ending with blank line:
212
            if ends_with_blank_line(item) and item.nxt:
213
                block.list_data['tight'] = False
214
                break
215
            # recurse into children of list item, to see if there are
216
            # spaces between any of them:
217
            subitem = item.first_child
218
            while subitem:
219
                if ends_with_blank_line(subitem) and \
220
                   (item.nxt or subitem.nxt):
221
                    block.list_data['tight'] = False
222
                    break
223
                subitem = subitem.nxt
224
            item = item.nxt
225
226
    @staticmethod
227
    def can_contain(t):
228
        return t == 'item'
229
230
231
class BlockQuote(Block):
232
    accepts_lines = False
233
234
    @staticmethod
235
    def continue_(parser=None, container=None):
236
        ln = parser.current_line
237
        if not parser.indented and peek(ln, parser.next_nonspace) == '>':
238
            parser.advance_next_nonspace()
239
            parser.advance_offset(1, False)
240
            if is_space_or_tab(peek(ln, parser.offset)):
241
                parser.advance_offset(1, True)
242
        else:
243
            return 1
244
        return 0
245
246
    @staticmethod
247
    def finalize(parser=None, block=None):
248
        return
249
250
    @staticmethod
251
    def can_contain(t):
252
        return t != 'item'
253
254
255
class Item(Block):
256
    accepts_lines = False
257
258
    @staticmethod
259
    def continue_(parser=None, container=None):
260
        if parser.blank:
261
            if container.first_child is None:
262
                # Blank line after empty list item
263
                return 1
264
            else:
265
                parser.advance_next_nonspace()
266
        elif parser.indent >= (container.list_data['marker_offset'] +
267
                               container.list_data['padding']):
268
            parser.advance_offset(
269
                container.list_data['marker_offset'] +
270
                container.list_data['padding'], True)
271
        else:
272
            return 1
273
        return 0
274
275
    @staticmethod
276
    def finalize(parser=None, block=None):
277
        return
278
279
    @staticmethod
280
    def can_contain(t):
281
        return t != 'item'
282
283
284
class Heading(Block):
285
    accepts_lines = False
286
287
    @staticmethod
288
    def continue_(parser=None, container=None):
289
        # A heading can never container > 1 line, so fail to match:
290
        return 1
291
292
    @staticmethod
293
    def finalize(parser=None, block=None):
294
        return
295
296
    @staticmethod
297
    def can_contain(t):
298
        return False
299
300
301
class ThematicBreak(Block):
302
    accepts_lines = False
303
304
    @staticmethod
305
    def continue_(parser=None, container=None):
306
        # A thematic break can never container > 1 line, so fail to match:
307
        return 1
308
309
    @staticmethod
310
    def finalize(parser=None, block=None):
311
        return
312
313
    @staticmethod
314
    def can_contain(t):
315
        return False
316
317
318
class CodeBlock(Block):
319
    accepts_lines = True
320
321
    @staticmethod
322
    def continue_(parser=None, container=None):
323
        ln = parser.current_line
324
        indent = parser.indent
325
        if container.is_fenced:
326
            match = indent <= 3 and \
327
                len(ln) >= parser.next_nonspace + 1 and \
328
                ln[parser.next_nonspace] == container.fence_char and \
329
                re.search(reClosingCodeFence, ln[parser.next_nonspace:])
330
            if match and len(match.group()) >= container.fence_length:
331
                # closing fence - we're at end of line, so we can return
332
                parser.finalize(container, parser.line_number)
333
                return 2
334
            else:
335
                # skip optional spaces of fence offset
336
                i = container.fence_offset
337
                while i > 0 and is_space_or_tab(peek(ln, parser.offset)):
338
                    parser.advance_offset(1, True)
339
                    i -= 1
340
        else:
341
            # indented
342
            if indent >= CODE_INDENT:
343
                parser.advance_offset(CODE_INDENT, True)
344
            elif parser.blank:
345
                parser.advance_next_nonspace()
346
            else:
347
                return 1
348
        return 0
349
350
    @staticmethod
351
    def finalize(parser=None, block=None):
352
        if block.is_fenced:
353
            # first line becomes info string
354
            content = block.string_content
355
            newline_pos = content.index('\n')
356
            first_line = content[0:newline_pos]
357
            rest = content[newline_pos + 1:]
358
            block.info = unescape_string(first_line.strip())
359
            block.literal = rest
360
        else:
361
            # indented
362
            block.literal = re.sub(r'(\n *)+$', '\n', block.string_content)
363
364
        block.string_content = None
365
366
    @staticmethod
367
    def can_contain(t):
368
        return False
369
370
371
class HtmlBlock(Block):
372
    accepts_lines = True
373
374
    @staticmethod
375
    def continue_(parser=None, container=None):
376
        if parser.blank and (container.html_block_type == 6 or
377
                             container.html_block_type == 7):
378
            return 1
379
        else:
380
            return 0
381
382
    @staticmethod
383
    def finalize(parser=None, block=None):
384
        block.literal = re.sub(r'(\n *)+$', '', block.string_content)
385
        # allow GC
386
        block.string_content = None
387
388
    @staticmethod
389
    def can_contain(t):
390
        return False
391
392
393
class Paragraph(Block):
394
    accepts_lines = True
395
396
    @staticmethod
397
    def continue_(parser=None, container=None):
398
        return 1 if parser.blank else 0
399
400
    @staticmethod
401
    def finalize(parser=None, block=None):
402
        has_reference_defs = False
403
404
        # try parsing the beginning as link reference definitions:
405
        while peek(block.string_content, 0) == '[':
406
            pos = parser.inline_parser.parseReference(
407
                block.string_content, parser.refmap)
408
            if not pos:
409
                break
410
            block.string_content = block.string_content[pos:]
411
            has_reference_defs = True
412
        if has_reference_defs and is_blank(block.string_content):
413
            block.unlink()
414
415
    @staticmethod
416
    def can_contain(t):
417
        return False
418
419
420
class BlockStarts(object):
421
    """Block start functions.
422
423
    Return values:
424
    0 = no match
425
    1 = matched container, keep going
426
    2 = matched leaf, no more block starts
427
    """
428
    METHODS = [
429
        'block_quote',
430
        'atx_heading',
431
        'fenced_code_block',
432
        'html_block',
433
        'setext_heading',
434
        'thematic_break',
435
        'list_item',
436
        'indented_code_block',
437
    ]
438
439
    @staticmethod
440
    def block_quote(parser, container=None):
441
        if not parser.indented and \
442
           peek(parser.current_line, parser.next_nonspace) == '>':
443
            parser.advance_next_nonspace()
444
            parser.advance_offset(1, False)
445
            # optional following space
446
            if is_space_or_tab(peek(parser.current_line, parser.offset)):
447
                parser.advance_offset(1, True)
448
            parser.close_unmatched_blocks()
449
            parser.add_child('block_quote', parser.next_nonspace)
450
            return 1
451
452
        return 0
453
454
    @staticmethod
455
    def atx_heading(parser, container=None):
456
        if not parser.indented:
457
            m = re.search(reATXHeadingMarker,
458
                          parser.current_line[parser.next_nonspace:])
459
            if m:
460
                parser.advance_next_nonspace()
461
                parser.advance_offset(len(m.group()), False)
462
                parser.close_unmatched_blocks()
463
                container = parser.add_child('heading', parser.next_nonspace)
464
                # number of #s
465
                container.level = len(m.group().strip())
466
                # remove trailing ###s:
467
                container.string_content = re.sub(
468
                    r'[ \t]+#+[ \t]*$', '', re.sub(
469
                        r'^[ \t]*#+[ \t]*$',
470
                        '',
471
                        parser.current_line[parser.offset:]))
472
                parser.advance_offset(
473
                    len(parser.current_line) - parser.offset, False)
474
                return 2
475
476
        return 0
477
478
    @staticmethod
479
    def fenced_code_block(parser, container=None):
480
        if not parser.indented:
481
            m = re.search(
482
                reCodeFence,
483
                parser.current_line[parser.next_nonspace:])
484
            if m:
485
                fence_length = len(m.group())
486
                parser.close_unmatched_blocks()
487
                container = parser.add_child(
488
                    'code_block', parser.next_nonspace)
489
                container.is_fenced = True
490
                container.fence_length = fence_length
491
                container.fence_char = m.group()[0]
492
                container.fence_offset = parser.indent
493
                parser.advance_next_nonspace()
494
                parser.advance_offset(fence_length, False)
495
                return 2
496
497
        return 0
498
499
    @staticmethod
500
    def html_block(parser, container=None):
501
        if not parser.indented and \
502
           peek(parser.current_line, parser.next_nonspace) == '<':
503
            s = parser.current_line[parser.next_nonspace:]
504
505
            for block_type in range(1, 8):
506
                if re.search(reHtmlBlockOpen[block_type], s) and \
507
                   (block_type < 7 or container.t != 'paragraph'):
508
                    parser.close_unmatched_blocks()
509
                    # We don't adjust parser.offset;
510
                    # spaces are part of the HTML block:
511
                    b = parser.add_child('html_block', parser.offset)
512
                    b.html_block_type = block_type
513
                    return 2
514
        return 0
515
516
    @staticmethod
517
    def setext_heading(parser, container=None):
518
        if not parser.indented and container.t == 'paragraph':
519
            m = re.search(
520
                reSetextHeadingLine,
521
                parser.current_line[parser.next_nonspace:])
522
            if m:
523
                parser.close_unmatched_blocks()
524
                heading = Node('heading', container.sourcepos)
525
                heading.level = 1 if m.group()[0] == '=' else 2
526
                heading.string_content = container.string_content
527
                container.insert_after(heading)
528
                container.unlink()
529
                parser.tip = heading
530
                parser.advance_offset(
531
                    len(parser.current_line) - parser.offset, False)
532
                return 2
533
534
        return 0
535
536
    @staticmethod
537
    def thematic_break(parser, container=None):
538
        if not parser.indented and re.search(
539
                reThematicBreak, parser.current_line[parser.next_nonspace:]):
540
            parser.close_unmatched_blocks()
541
            parser.add_child('thematic_break', parser.next_nonspace)
542
            parser.advance_offset(
543
                len(parser.current_line) - parser.offset, False)
544
            return 2
545
        return 0
546
547
    @staticmethod
548
    def list_item(parser, container=None):
549
        if (not parser.indented or container.t == 'list'):
550
            data = parse_list_marker(parser, container)
551
            if data:
552
                parser.close_unmatched_blocks()
553
554
                # add the list if needed
555
                if parser.tip.t != 'list' or \
556
                   not lists_match(container.list_data, data):
557
                    container = parser.add_child('list', parser.next_nonspace)
558
                    container.list_data = data
559
560
                # add the list item
561
                container = parser.add_child('item', parser.next_nonspace)
562
                container.list_data = data
563
                return 1
564
565
        return 0
566
567
    @staticmethod
568
    def indented_code_block(parser, container=None):
569
        if parser.indented and \
570
           parser.tip.t != 'paragraph' and \
571
                           not parser.blank:
572
            # indented code
573
            parser.advance_offset(CODE_INDENT, True)
574
            parser.close_unmatched_blocks()
575
            parser.add_child('code_block', parser.offset)
576
            return 2
577
578
        return 0
579
580
581
class Parser(object):
582
    def __init__(self, options={}):
583
        self.doc = Node('document', [[1, 1], [0, 0]])
584
        self.block_starts = BlockStarts()
585
        self.tip = self.doc
586
        self.oldtip = self.doc
587
        self.current_line = ''
588
        self.line_number = 0
589
        self.offset = 0
590
        self.column = 0
591
        self.next_nonspace = 0
592
        self.next_nonspace_column = 0
593
        self.indent = 0
594
        self.indented = False
595
        self.blank = False
596
        self.partially_consumed_tab = False
597
        self.all_closed = True
598
        self.last_matched_container = self.doc
599
        self.refmap = {}
600
        self.last_line_length = 0
601
        self.inline_parser = InlineParser(options)
602
        self.options = options
603
604
    def add_line(self):
605
        """ Add a line to the block at the tip.  We assume the tip
606
        can accept lines -- that check should be done before calling this."""
607
        if self.partially_consumed_tab:
608
            # Skip over tab
609
            self.offset += 1
610
            # Add space characters
611
            chars_to_tab = 4 - (self.column % 4)
612
            self.tip.string_content += (' ' * chars_to_tab)
613
        self.tip.string_content += (self.current_line[self.offset:] + '\n')
614
615
    def add_child(self, tag, offset):
616
        """ Add block of type tag as a child of the tip.  If the tip can't
617
        accept children, close and finalize it and try its parent,
618
        and so on til we find a block that can accept children."""
619
        block_class = getattr(import_module('CommonMark.blocks'),
620
                              to_camel_case(self.tip.t))
621
        while not block_class.can_contain(tag):
622
            self.finalize(self.tip, self.line_number - 1)
623
            block_class = getattr(
624
                import_module('CommonMark.blocks'),
625
                to_camel_case(self.tip.t))
626
627
        column_number = offset + 1
628
        new_block = Node(tag, [[self.line_number, column_number], [0, 0]])
629
        new_block.string_content = ''
630
        self.tip.append_child(new_block)
631
        self.tip = new_block
632
        return new_block
633
634
    def close_unmatched_blocks(self):
635
        """Finalize and close any unmatched blocks."""
636
        if not self.all_closed:
637
            while self.oldtip != self.last_matched_container:
638
                parent = self.oldtip.parent
639
                self.finalize(self.oldtip, self.line_number - 1)
640
                self.oldtip = parent
641
            self.all_closed = True
642
643
    def find_next_nonspace(self):
644
        current_line = self.current_line
645
        i = self.offset
646
        cols = self.column
647
648
        try:
649
            c = current_line[i]
650
            if c is not None:
651
                try:
652
                    c = unicode(c, 'latin1').encode('utf-8')
653
                except Exception:
654
                    c = str(c);
655
        except IndexError:
656
            c = ''
657
        while c != '':
658
            if c == ' ':
659
                i += 1
660
                cols += 1
661
            elif c == '\t':
662
                i += 1
663
                cols += (4 - (cols % 4))
664
            else:
665
                break
666
667
            try:
668
                c = current_line[i]
669
            except IndexError:
670
                c = ''
671
672
        self.blank = (c == '\n' or c == '\r' or c == '')
673
        self.next_nonspace = i
674
        self.next_nonspace_column = cols
675
        self.indent = self.next_nonspace_column - self.column
676
        self.indented = self.indent >= CODE_INDENT
677
678
    def advance_next_nonspace(self):
679
        self.offset = self.next_nonspace
680
        self.column = self.next_nonspace_column
681
        self.partially_consumed_tab = False
682
683
    def advance_offset(self, count, columns):
684
        current_line = self.current_line
685
        try:
686
            c = current_line[self.offset]
687
        except IndexError:
688
            c = None
689
        while count > 0 and c is not None:
690
            if c is not None:
691
                try:
692
                    c = unicode(c, 'latin1').encode('utf-8')
693
                except Exception:
694
                    c = str(c);
695
            if c == '\t':
696
                chars_to_tab = 4 - (self.column % 4)
697
                if columns:
698
                    self.partially_consumed_tab = chars_to_tab > count
699
                    chars_to_advance = min(count, chars_to_tab)
700
                    self.column += chars_to_advance
701
                    self.offset += 0 if self.partially_consumed_tab else 1
702
                    count -= chars_to_advance
703
                else:
704
                    self.partially_consumed_tab = False
705
                    self.column += chars_to_tab
706
                    self.offset += 1
707
                    count -= 1
708
            else:
709
                self.partially_consumed_tab = False
710
                self.offset += 1
711
                # assume ascii; block starts are ascii
712
                self.column += 1
713
                count -= 1
714
            try:
715
                c = current_line[self.offset]
716
            except IndexError:
717
                c = None
718
719
    def incorporate_line(self, ln):
720
        """Analyze a line of text and update the document appropriately.
721
722
        We parse markdown text by calling this on each line of input,
723
        then finalizing the document.
724
        """
725
        all_matched = True
726
727
        container = self.doc
728
        self.oldtip = self.tip
729
        self.offset = 0
730
        self.column = 0
731
        self.blank = False
732
        self.partially_consumed_tab = False
733
        self.line_number += 1
734
735
        # replace NUL characters for security
736
        if re.search(r'\u0000', ln) is not None:
737
            ln = re.sub(r'\0', '\uFFFD', ln)
738
739
        self.current_line = ln
740
741
        # For each containing block, try to parse the associated line start.
742
        # Bail out on failure: container will point to the last matching block.
743
        # Set all_matched to false if not all containers match.
744
        last_child = container.last_child
745
        while last_child and last_child.is_open:
746
            container = last_child
747
748
            self.find_next_nonspace()
749
            block_class = getattr(
750
                import_module('CommonMark.blocks'),
751
                to_camel_case(container.t))
752
            rv = block_class.continue_(self, container)
753
            if rv == 0:
754
                # we've matched, keep going
755
                pass
756
            elif rv == 1:
757
                # we've failed to match a block
758
                all_matched = False
759
            elif rv == 2:
760
                # we've hit end of line for fenced code close and can return
761
                self.last_line_length = len(ln)
762
                return
763
            else:
764
                raise ValueError('returned illegal value, must be 0, 1, or 2')
765
766
            if not all_matched:
767
                # back up to last matching block
768
                container = container.parent
769
                break
770
771
            last_child = container.last_child
772
773
        self.all_closed = (container == self.oldtip)
774
        self.last_matched_container = container
775
776
        block_class = getattr(import_module('CommonMark.blocks'),
777
                              to_camel_case(container.t))
778
        matched_leaf = container.t != 'paragraph' and block_class.accepts_lines
779
        starts = self.block_starts
780
        starts_len = len(starts.METHODS)
781
        # Unless last matched container is a code block, try new container
782
        # starts, adding children to the last matched container:
783
        while not matched_leaf:
784
            self.find_next_nonspace()
785
786
            # this is a little performance optimization:
787
            if not self.indented and \
788
               not re.search(reMaybeSpecial, ln[self.next_nonspace:]):
789
                self.advance_next_nonspace()
790
                break
791
792
            i = 0
793
            while i < starts_len:
794
                res = getattr(starts, starts.METHODS[i])(self, container)
795
                if res == 1:
796
                    container = self.tip
797
                    break
798
                elif res == 2:
799
                    container = self.tip
800
                    matched_leaf = True
801
                    break
802
                else:
803
                    i += 1
804
805
            if i == starts_len:
806
                # nothing matched
807
                self.advance_next_nonspace()
808
                break
809
810
        # What remains at the offset is a text line. Add the text to the
811
        # appropriate container.
812
        if not self.all_closed and not self.blank and \
813
           self.tip.t == 'paragraph':
814
            # lazy paragraph continuation
815
            self.add_line()
816
        else:
817
            # not a lazy continuation
818
            # finalize any blocks not matched
819
            self.close_unmatched_blocks()
820
            if self.blank and container.last_child:
821
                container.last_child.last_line_blank = True
822
823
            t = container.t
824
825
            # Block quote lines are never blank as they start with >
826
            # and we don't count blanks in fenced code for purposes of
827
            # tight/loose lists or breaking out of lists.  We also
828
            # don't set last_line_blank on an empty list item, or if we
829
            # just closed a fenced block.
830
            last_line_blank = self.blank and \
831
                not (t == 'block_quote' or
832
                     (t == 'code_block' and container.is_fenced) or
833
                     (t == 'item' and
834
                      not container.first_child and
835
                      container.sourcepos[0][0] == self.line_number))
836
837
            # propagate last_line_blank up through parents:
838
            cont = container
839
            while cont:
840
                cont.last_line_blank = last_line_blank
841
                cont = cont.parent
842
843
            block_class = getattr(import_module('CommonMark.blocks'),
844
                                  to_camel_case(t))
845
            if block_class.accepts_lines:
846
                self.add_line()
847
                # if HtmlBlock, check for end condition
848
                if t == 'html_block' and \
849
                   container.html_block_type >= 1 and \
850
                   container.html_block_type <= 5 and \
851
                   re.search(
852
                       reHtmlBlockClose[container.html_block_type],
853
                       self.current_line[self.offset:]):
854
                    self.finalize(container, self.line_number)
855
            elif self.offset < len(ln) and not self.blank:
856
                # create a paragraph container for one line
857
                container = self.add_child('paragraph', self.offset)
858
                self.advance_next_nonspace()
859
                self.add_line()
860
861
        self.last_line_length = len(ln)
862
863
    def finalize(self, block, line_number):
864
        """ Finalize a block.  Close it and do any necessary postprocessing,
865
        e.g. creating string_content from strings, setting the 'tight'
866
        or 'loose' status of a list, and parsing the beginnings
867
        of paragraphs for reference definitions.  Reset the tip to the
868
        parent of the closed block."""
869
        above = block.parent
870
        block.is_open = False
871
        block.sourcepos[1] = [line_number, self.last_line_length]
872
        block_class = getattr(import_module('CommonMark.blocks'),
873
                              to_camel_case(block.t))
874
        block_class.finalize(self, block)
875
876
        self.tip = above
877
878
    def process_inlines(self, block):
879
        """
880
        Walk through a block & children recursively, parsing string content
881
        into inline content where appropriate.
882
        """
883
        walker = block.walker()
884
        self.inline_parser.refmap = self.refmap
885
        self.inline_parser.options = self.options
886
        event = walker.nxt()
887
        while event is not None:
888
            node = event['node']
889
            t = node.t
890
            if not event['entering'] and (t == 'paragraph' or t == 'heading'):
891
                self.inline_parser.parse(node)
892
            event = walker.nxt()
893
894
    def parse(self, my_input):
895
        """ The main parsing function.  Returns a parsed document AST."""
896
        self.doc = Node('document', [[1, 1], [0, 0]])
897
        self.tip = self.doc
898
        self.refmap = {}
899
        self.line_number = 0
900
        self.last_line_length = 0
901
        self.offset = 0
902
        self.column = 0
903
        self.last_matched_container = self.doc
904
        self.current_line = ''
905
        lines = re.split(reLineEnding, my_input)
906
        length = len(lines)
907
        if len(my_input) > 0 and my_input[-1] == '\n':
908
            # ignore last blank line created by final newline
909
            length -= 1
910
        for i in range(length):
911
            self.incorporate_line(lines[i])
912
        while (self.tip):
913
            self.finalize(self.tip, length)
914
        self.process_inlines(self.doc)
915
        return self.doc
916