Passed
Push — master ( 1f8781...f45ba0 )
by Dongxin
01:12 queued 15s
created

Parser.find_next_nonspace()   D

Complexity

Conditions 8

Size

Total Lines 35

Duplication

Lines 0
Ratio 0 %

Importance

Changes 4
Bugs 0 Features 0
Metric Value
cc 8
c 4
b 0
f 0
dl 0
loc 35
rs 4
1
from __future__ import absolute_import, unicode_literals
2
3
import re
4
from importlib import import_module
5
from CommonMark import common
6
from CommonMark.common import unescape_string
7
from CommonMark.inlines import InlineParser
8
from CommonMark.node import Node
9
from CommonMark.utils import to_camel_case
10
11
12
CODE_INDENT = 4
13
reHtmlBlockOpen = [
14
    re.compile(r'.'),  # dummy for 0
15
    re.compile(r'^<(?:script|pre|style)(?:\s|>|$)', re.IGNORECASE),
16
    re.compile(r'^<!--'),
17
    re.compile(r'^<[?]'),
18
    re.compile(r'^<![A-Z]'),
19
    re.compile(r'^<!\[CDATA\['),
20
    re.compile(
21
        r'^<[/]?(?:address|article|aside|base|basefont|blockquote|body|'
22
        r'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|'
23
        r'fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|'
24
        r'header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|'
25
        r'nav|noframes|ol|optgroup|option|p|param|section|source|title|'
26
        r'summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)'
27
        r'(?:\s|[/]?[>]|$)',
28
        re.IGNORECASE),
29
    re.compile(
30
        '^(?:' + common.OPENTAG + '|' + common.CLOSETAG + ')\\s*$',
31
        re.IGNORECASE),
32
]
33
reHtmlBlockClose = [
34
    re.compile(r'.'),  # dummy for 0
35
    re.compile(r'<\/(?:script|pre|style)>', re.IGNORECASE),
36
    re.compile(r'-->'),
37
    re.compile(r'\?>'),
38
    re.compile(r'>'),
39
    re.compile(r'\]\]>'),
40
]
41
reThematicBreak = re.compile(
42
    r'^(?:(?:\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$')
43
reMaybeSpecial = re.compile(r'^[#`~*+_=<>0-9-]')
44
reNonSpace = re.compile(r'[^ \t\f\v\r\n]')
45
reBulletListMarker = re.compile(r'^[*+-]')
46
reOrderedListMarker = re.compile(r'^(\d{1,9})([.)])')
47
reATXHeadingMarker = re.compile(r'^#{1,6}(?:[ \t]+|$)')
48
reCodeFence = re.compile(r'^`{3,}(?!.*`)|^~{3,}(?!.*~)')
49
reClosingCodeFence = re.compile(r'^(?:`{3,}|~{3,})(?= *$)')
50
reSetextHeadingLine = re.compile(r'^(?:=+|-+)[ \t]*$')
51
reLineEnding = re.compile(r'\r\n|\n|\r')
52
53
54
def is_blank(s):
55
    """Returns True if string contains only space characters."""
56
    return re.search(reNonSpace, s) is None
57
58
59
def is_space_or_tab(s):
60
    if s is not None:
61
        #  s = unicode(s, 'latin1').encode('utf-8')
62
        try:
63
            s = unicode(s, 'latin1').encode('utf-8')
64
        except Exception:
65
            s = str(s);
66
    return s == ' ' or s == '\t'
67
68
69
def peek(ln, pos):
70
    if pos < len(ln):
71
        return ln[pos]
72
    else:
73
        return None
74
75
76
def ends_with_blank_line(block):
77
    """ Returns true if block ends with a blank line,
78
    descending if needed into lists and sublists."""
79
    while block:
80
        if block.last_line_blank:
81
            return True
82
        if (block.t == 'list' or block.t == 'item'):
83
            block = block.last_child
84
        else:
85
            break
86
87
    return False
88
89
90
def parse_list_marker(parser, container):
91
    """ Parse a list marker and return data on the marker (type,
92
    start, delimiter, bullet character, padding) or None."""
93
    rest = parser.current_line[parser.next_nonspace:]
94
    data = {
95
        'type': None,
96
        'tight': True,  # lists are tight by default
97
        'bullet_char': None,
98
        'start': None,
99
        'delimiter': None,
100
        'padding': None,
101
        'marker_offset': parser.indent,
102
    }
103
    m = re.search(reBulletListMarker, rest)
104
    m2 = re.search(reOrderedListMarker, rest)
105
    if m:
106
        data['type'] = 'bullet'
107
        data['bullet_char'] = m.group()[0]
108
    elif m2 and (container.t != 'paragraph' or m2.group(1) == '1'):
109
        m = m2
110
        data['type'] = 'ordered'
111
        data['start'] = int(m.group(1))
112
        data['delimiter'] = m.group(2)
113
    else:
114
        return None
115
116
    # make sure we have spaces after
117
    nextc = peek(parser.current_line, parser.next_nonspace + len(m.group()))
118
    if not (nextc is None or nextc == '\t' or nextc == ' '):
119
        return None
120
121
    # if it interrupts paragraph, make sure first line isn't blank
122
    if container.t == 'paragraph' and \
123
       not re.search(
124
           reNonSpace,
125
           parser.current_line[parser.next_nonspace + len(m.group()):]):
126
        return None
127
128
    # we've got a match! advance offset and calculate padding
129
    parser.advance_next_nonspace()  # to start of marker
130
    parser.advance_offset(len(m.group()), True)  # to end of marker
131
    spaces_start_col = parser.column
132
    spaces_start_offset = parser.offset
133
    while True:
134
        parser.advance_offset(1, True)
135
        nextc = peek(parser.current_line, parser.offset)
136
        if parser.column - spaces_start_col < 5 and \
137
           is_space_or_tab(nextc):
138
            pass
139
        else:
140
            break
141
    blank_item = peek(parser.current_line, parser.offset) is None
142
    spaces_after_marker = parser.column - spaces_start_col
143
    if spaces_after_marker >= 5 or \
144
       spaces_after_marker < 1 or \
145
       blank_item:
146
        data['padding'] = len(m.group()) + 1
147
        parser.column = spaces_start_col
148
        parser.offset = spaces_start_offset
149
        if is_space_or_tab(peek(parser.current_line, parser.offset)):
150
            parser.advance_offset(1, True)
151
    else:
152
        data['padding'] = len(m.group()) + spaces_after_marker
153
154
    return data
155
156
157
def lists_match(list_data, item_data):
158
    """
159
    Returns True if the two list items are of the same type,
160
    with the same delimiter and bullet character.  This is used
161
    in agglomerating list items into lists.
162
    """
163
    return list_data.get('type') == item_data.get('type') and \
164
        list_data.get('delimiter') == item_data.get('delimiter') and \
165
        list_data.get('bullet_char') == item_data.get('bullet_char')
166
167
168
class Block(object):
169
    accepts_lines = None
170
171
    @staticmethod
172
    def continue_(parser=None, container=None):
173
        return
174
175
    @staticmethod
176
    def finalize(parser=None, block=None):
177
        return
178
179
    @staticmethod
180
    def can_contain(t):
181
        return
182
183
184
class Document(Block):
185
    accepts_lines = False
186
187
    @staticmethod
188
    def continue_(parser=None, container=None):
189
        return 0
190
191
    @staticmethod
192
    def finalize(parser=None, block=None):
193
        return
194
195
    @staticmethod
196
    def can_contain(t):
197
        return t != 'item'
198
199
200
class List(Block):
201
    accepts_lines = False
202
203
    @staticmethod
204
    def continue_(parser=None, container=None):
205
        return 0
206
207
    @staticmethod
208
    def finalize(parser=None, block=None):
209
        item = block.first_child
210
        while item:
211
            # check for non-final list item ending with blank line:
212
            if ends_with_blank_line(item) and item.nxt:
213
                block.list_data['tight'] = False
214
                break
215
            # recurse into children of list item, to see if there are
216
            # spaces between any of them:
217
            subitem = item.first_child
218
            while subitem:
219
                if ends_with_blank_line(subitem) and \
220
                   (item.nxt or subitem.nxt):
221
                    block.list_data['tight'] = False
222
                    break
223
                subitem = subitem.nxt
224
            item = item.nxt
225
226
    @staticmethod
227
    def can_contain(t):
228
        return t == 'item'
229
230
231
class BlockQuote(Block):
232
    accepts_lines = False
233
234
    @staticmethod
235
    def continue_(parser=None, container=None):
236
        ln = parser.current_line
237
        if not parser.indented and peek(ln, parser.next_nonspace) == '>':
238
            parser.advance_next_nonspace()
239
            parser.advance_offset(1, False)
240
            if is_space_or_tab(peek(ln, parser.offset)):
241
                parser.advance_offset(1, True)
242
        else:
243
            return 1
244
        return 0
245
246
    @staticmethod
247
    def finalize(parser=None, block=None):
248
        return
249
250
    @staticmethod
251
    def can_contain(t):
252
        return t != 'item'
253
254
255
class Item(Block):
256
    accepts_lines = False
257
258
    @staticmethod
259
    def continue_(parser=None, container=None):
260
        if parser.blank:
261
            if container.first_child is None:
262
                # Blank line after empty list item
263
                return 1
264
            else:
265
                parser.advance_next_nonspace()
266
        elif parser.indent >= (container.list_data['marker_offset'] +
267
                               container.list_data['padding']):
268
            parser.advance_offset(
269
                container.list_data['marker_offset'] +
270
                container.list_data['padding'], True)
271
        else:
272
            return 1
273
        return 0
274
275
    @staticmethod
276
    def finalize(parser=None, block=None):
277
        return
278
279
    @staticmethod
280
    def can_contain(t):
281
        return t != 'item'
282
283
284
class Heading(Block):
285
    accepts_lines = False
286
287
    @staticmethod
288
    def continue_(parser=None, container=None):
289
        # A heading can never container > 1 line, so fail to match:
290
        return 1
291
292
    @staticmethod
293
    def finalize(parser=None, block=None):
294
        return
295
296
    @staticmethod
297
    def can_contain(t):
298
        return False
299
300
301
class ThematicBreak(Block):
302
    accepts_lines = False
303
304
    @staticmethod
305
    def continue_(parser=None, container=None):
306
        # A thematic break can never container > 1 line, so fail to match:
307
        return 1
308
309
    @staticmethod
310
    def finalize(parser=None, block=None):
311
        return
312
313
    @staticmethod
314
    def can_contain(t):
315
        return False
316
317
318
class CodeBlock(Block):
319
    accepts_lines = True
320
321
    @staticmethod
322
    def continue_(parser=None, container=None):
323
        ln = parser.current_line
324
        indent = parser.indent
325
        if container.is_fenced:
326
            match = indent <= 3 and \
327
                len(ln) >= parser.next_nonspace + 1 and \
328
                ln[parser.next_nonspace] == container.fence_char and \
329
                re.search(reClosingCodeFence, ln[parser.next_nonspace:])
330
            if match and len(match.group()) >= container.fence_length:
331
                # closing fence - we're at end of line, so we can return
332
                parser.finalize(container, parser.line_number)
333
                return 2
334
            else:
335
                # skip optional spaces of fence offset
336
                i = container.fence_offset
337
                while i > 0 and is_space_or_tab(peek(ln, parser.offset)):
338
                    parser.advance_offset(1, True)
339
                    i -= 1
340
        else:
341
            # indented
342
            if indent >= CODE_INDENT:
343
                parser.advance_offset(CODE_INDENT, True)
344
            elif parser.blank:
345
                parser.advance_next_nonspace()
346
            else:
347
                return 1
348
        return 0
349
350
    @staticmethod
351
    def finalize(parser=None, block=None):
352
        if block.is_fenced:
353
            # first line becomes info string
354
            content = block.string_content
355
            newline_pos = content.index('\n')
356
            first_line = content[0:newline_pos]
357
            rest = content[newline_pos + 1:]
358
            block.info = unescape_string(first_line.strip())
359
            block.literal = rest
360
        else:
361
            # indented
362
            block.literal = re.sub(r'(\n *)+$', '\n', block.string_content)
363
364
        block.string_content = None
365
366
    @staticmethod
367
    def can_contain(t):
368
        return False
369
370
371
class HtmlBlock(Block):
372
    accepts_lines = True
373
374
    @staticmethod
375
    def continue_(parser=None, container=None):
376
        if parser.blank and (container.html_block_type == 6 or
377
                             container.html_block_type == 7):
378
            return 1
379
        else:
380
            return 0
381
382
    @staticmethod
383
    def finalize(parser=None, block=None):
384
        block.literal = re.sub(r'(\n *)+$', '', block.string_content)
385
        # allow GC
386
        block.string_content = None
387
388
    @staticmethod
389
    def can_contain(t):
390
        return False
391
392
393
class Paragraph(Block):
394
    accepts_lines = True
395
396
    @staticmethod
397
    def continue_(parser=None, container=None):
398
        return 1 if parser.blank else 0
399
400
    @staticmethod
401
    def finalize(parser=None, block=None):
402
        has_reference_defs = False
403
404
        # try parsing the beginning as link reference definitions:
405
        while peek(block.string_content, 0) == '[':
406
            pos = parser.inline_parser.parseReference(
407
                block.string_content, parser.refmap)
408
            if not pos:
409
                break
410
            block.string_content = block.string_content[pos:]
411
            has_reference_defs = True
412
        if has_reference_defs and is_blank(block.string_content):
413
            block.unlink()
414
415
    @staticmethod
416
    def can_contain(t):
417
        return False
418
419
420
class BlockStarts(object):
421
    """Block start functions.
422
423
    Return values:
424
    0 = no match
425
    1 = matched container, keep going
426
    2 = matched leaf, no more block starts
427
    """
428
    METHODS = [
429
        'block_quote',
430
        'atx_heading',
431
        'fenced_code_block',
432
        'html_block',
433
        'setext_heading',
434
        'thematic_break',
435
        'list_item',
436
        'indented_code_block',
437
    ]
438
439
    @staticmethod
440
    def block_quote(parser, container=None):
441
        if not parser.indented and \
442
           peek(parser.current_line, parser.next_nonspace) == '>':
443
            parser.advance_next_nonspace()
444
            parser.advance_offset(1, False)
445
            # optional following space
446
            if is_space_or_tab(peek(parser.current_line, parser.offset)):
447
                parser.advance_offset(1, True)
448
            parser.close_unmatched_blocks()
449
            parser.add_child('block_quote', parser.next_nonspace)
450
            return 1
451
452
        return 0
453
454
    @staticmethod
455
    def atx_heading(parser, container=None):
456
        if not parser.indented:
457
            m = re.search(reATXHeadingMarker,
458
                          parser.current_line[parser.next_nonspace:])
459
            if m:
460
                parser.advance_next_nonspace()
461
                parser.advance_offset(len(m.group()), False)
462
                parser.close_unmatched_blocks()
463
                container = parser.add_child('heading', parser.next_nonspace)
464
                # number of #s
465
                container.level = len(m.group().strip())
466
                # remove trailing ###s:
467
                container.string_content = re.sub(
468
                    r'[ \t]+#+[ \t]*$', '', re.sub(
469
                        r'^[ \t]*#+[ \t]*$',
470
                        '',
471
                        parser.current_line[parser.offset:]))
472
                parser.advance_offset(
473
                    len(parser.current_line) - parser.offset, False)
474
                return 2
475
476
        return 0
477
478
    @staticmethod
479
    def fenced_code_block(parser, container=None):
480
        if not parser.indented:
481
            m = re.search(
482
                reCodeFence,
483
                parser.current_line[parser.next_nonspace:])
484
            if m:
485
                fence_length = len(m.group())
486
                parser.close_unmatched_blocks()
487
                container = parser.add_child(
488
                    'code_block', parser.next_nonspace)
489
                container.is_fenced = True
490
                container.fence_length = fence_length
491
                container.fence_char = m.group()[0]
492
                container.fence_offset = parser.indent
493
                parser.advance_next_nonspace()
494
                parser.advance_offset(fence_length, False)
495
                return 2
496
497
        return 0
498
499
    @staticmethod
500
    def html_block(parser, container=None):
501
        if not parser.indented and \
502
           peek(parser.current_line, parser.next_nonspace) == '<':
503
            s = parser.current_line[parser.next_nonspace:]
504
505
            for block_type in range(1, 8):
506
                if re.search(reHtmlBlockOpen[block_type], s) and \
507
                   (block_type < 7 or container.t != 'paragraph'):
508
                    parser.close_unmatched_blocks()
509
                    # We don't adjust parser.offset;
510
                    # spaces are part of the HTML block:
511
                    b = parser.add_child('html_block', parser.offset)
512
                    b.html_block_type = block_type
513
                    return 2
514
        return 0
515
516
    @staticmethod
517
    def setext_heading(parser, container=None):
518
        if not parser.indented and container.t == 'paragraph':
519
            m = re.search(
520
                reSetextHeadingLine,
521
                parser.current_line[parser.next_nonspace:])
522
            if m:
523
                parser.close_unmatched_blocks()
524
                heading = Node('heading', container.sourcepos)
525
                heading.level = 1 if m.group()[0] == '=' else 2
526
                heading.string_content = container.string_content
527
                container.insert_after(heading)
528
                container.unlink()
529
                parser.tip = heading
530
                parser.advance_offset(
531
                    len(parser.current_line) - parser.offset, False)
532
                return 2
533
534
        return 0
535
536
    @staticmethod
537
    def thematic_break(parser, container=None):
538
        if not parser.indented and re.search(
539
                reThematicBreak, parser.current_line[parser.next_nonspace:]):
540
            parser.close_unmatched_blocks()
541
            parser.add_child('thematic_break', parser.next_nonspace)
542
            parser.advance_offset(
543
                len(parser.current_line) - parser.offset, False)
544
            return 2
545
        return 0
546
547
    @staticmethod
548
    def list_item(parser, container=None):
549
        if (not parser.indented or container.t == 'list'):
550
            data = parse_list_marker(parser, container)
551
            if data:
552
                parser.close_unmatched_blocks()
553
554
                # add the list if needed
555
                if parser.tip.t != 'list' or \
556
                   not lists_match(container.list_data, data):
557
                    container = parser.add_child('list', parser.next_nonspace)
558
                    container.list_data = data
559
560
                # add the list item
561
                container = parser.add_child('item', parser.next_nonspace)
562
                container.list_data = data
563
                return 1
564
565
        return 0
566
567
    @staticmethod
568
    def indented_code_block(parser, container=None):
569
        if parser.indented and \
570
           parser.tip.t != 'paragraph' and \
571
                           not parser.blank:
572
            # indented code
573
            parser.advance_offset(CODE_INDENT, True)
574
            parser.close_unmatched_blocks()
575
            parser.add_child('code_block', parser.offset)
576
            return 2
577
578
        return 0
579
580
581
class Parser(object):
582
    def __init__(self, options={}):
583
        self.doc = Node('document', [[1, 1], [0, 0]])
584
        self.block_starts = BlockStarts()
585
        self.tip = self.doc
586
        self.oldtip = self.doc
587
        self.current_line = ''
588
        self.line_number = 0
589
        self.offset = 0
590
        self.column = 0
591
        self.next_nonspace = 0
592
        self.next_nonspace_column = 0
593
        self.indent = 0
594
        self.indented = False
595
        self.blank = False
596
        self.partially_consumed_tab = False
597
        self.all_closed = True
598
        self.last_matched_container = self.doc
599
        self.refmap = {}
600
        self.last_line_length = 0
601
        self.inline_parser = InlineParser(options)
602
        self.options = options
603
604
    def add_line(self):
605
        """ Add a line to the block at the tip.  We assume the tip
606
        can accept lines -- that check should be done before calling this."""
607
        if self.partially_consumed_tab:
608
            # Skip over tab
609
            self.offset += 1
610
            # Add space characters
611
            chars_to_tab = 4 - (self.column % 4)
612
            self.tip.string_content += (' ' * chars_to_tab)
613
        self.tip.string_content += (self.current_line[self.offset:] + '\n')
614
615
    def add_child(self, tag, offset):
616
        """ Add block of type tag as a child of the tip.  If the tip can't
617
        accept children, close and finalize it and try its parent,
618
        and so on til we find a block that can accept children."""
619
        block_class = getattr(import_module('CommonMark.blocks'),
620
                              to_camel_case(self.tip.t))
621
        while not block_class.can_contain(tag):
622
            self.finalize(self.tip, self.line_number - 1)
623
            block_class = getattr(
624
                import_module('CommonMark.blocks'),
625
                to_camel_case(self.tip.t))
626
627
        column_number = offset + 1
628
        new_block = Node(tag, [[self.line_number, column_number], [0, 0]])
629
        new_block.string_content = ''
630
        self.tip.append_child(new_block)
631
        self.tip = new_block
632
        return new_block
633
634
    def close_unmatched_blocks(self):
635
        """Finalize and close any unmatched blocks."""
636
        if not self.all_closed:
637
            while self.oldtip != self.last_matched_container:
638
                parent = self.oldtip.parent
639
                self.finalize(self.oldtip, self.line_number - 1)
640
                self.oldtip = parent
641
            self.all_closed = True
642
643
    def find_next_nonspace(self):
644
        current_line = self.current_line
645
        i = self.offset
646
        cols = self.column
647
648
        try:
649
            c = current_line[i]
650
            if c is not None:
651
                #  c = unicode(c, 'latin1').encode('utf-8')
652
                try:
653
                    c = unicode(c, 'latin1').encode('utf-8')
654
                except Exception:
655
                    c = str(c);
656
        except IndexError:
657
            c = ''
658
        while c != '':
659
            if c == ' ':
660
                i += 1
661
                cols += 1
662
            elif c == '\t':
663
                i += 1
664
                cols += (4 - (cols % 4))
665
            else:
666
                break
667
668
            try:
669
                c = current_line[i]
670
            except IndexError:
671
                c = ''
672
673
        self.blank = (c == '\n' or c == '\r' or c == '')
674
        self.next_nonspace = i
675
        self.next_nonspace_column = cols
676
        self.indent = self.next_nonspace_column - self.column
677
        self.indented = self.indent >= CODE_INDENT
678
679
    def advance_next_nonspace(self):
680
        self.offset = self.next_nonspace
681
        self.column = self.next_nonspace_column
682
        self.partially_consumed_tab = False
683
684
    def advance_offset(self, count, columns):
685
        current_line = self.current_line
686
        try:
687
            c = current_line[self.offset]
688
        except IndexError:
689
            c = None
690
        while count > 0 and c is not None:
691
            if c == '\t':
692
                chars_to_tab = 4 - (self.column % 4)
693
                if columns:
694
                    self.partially_consumed_tab = chars_to_tab > count
695
                    chars_to_advance = min(count, chars_to_tab)
696
                    self.column += chars_to_advance
697
                    self.offset += 0 if self.partially_consumed_tab else 1
698
                    count -= chars_to_advance
699
                else:
700
                    self.partially_consumed_tab = False
701
                    self.column += chars_to_tab
702
                    self.offset += 1
703
                    count -= 1
704
            else:
705
                self.partially_consumed_tab = False
706
                self.offset += 1
707
                # assume ascii; block starts are ascii
708
                self.column += 1
709
                count -= 1
710
            try:
711
                c = current_line[self.offset]
712
            except IndexError:
713
                c = None
714
715
    def incorporate_line(self, ln):
716
        """Analyze a line of text and update the document appropriately.
717
718
        We parse markdown text by calling this on each line of input,
719
        then finalizing the document.
720
        """
721
        all_matched = True
722
723
        container = self.doc
724
        self.oldtip = self.tip
725
        self.offset = 0
726
        self.column = 0
727
        self.blank = False
728
        self.partially_consumed_tab = False
729
        self.line_number += 1
730
731
        # replace NUL characters for security
732
        if re.search(r'\u0000', ln) is not None:
733
            ln = re.sub(r'\0', '\uFFFD', ln)
734
735
        self.current_line = ln
736
737
        # For each containing block, try to parse the associated line start.
738
        # Bail out on failure: container will point to the last matching block.
739
        # Set all_matched to false if not all containers match.
740
        last_child = container.last_child
741
        while last_child and last_child.is_open:
742
            container = last_child
743
744
            self.find_next_nonspace()
745
            block_class = getattr(
746
                import_module('CommonMark.blocks'),
747
                to_camel_case(container.t))
748
            rv = block_class.continue_(self, container)
749
            if rv == 0:
750
                # we've matched, keep going
751
                pass
752
            elif rv == 1:
753
                # we've failed to match a block
754
                all_matched = False
755
            elif rv == 2:
756
                # we've hit end of line for fenced code close and can return
757
                self.last_line_length = len(ln)
758
                return
759
            else:
760
                raise ValueError('returned illegal value, must be 0, 1, or 2')
761
762
            if not all_matched:
763
                # back up to last matching block
764
                container = container.parent
765
                break
766
767
            last_child = container.last_child
768
769
        self.all_closed = (container == self.oldtip)
770
        self.last_matched_container = container
771
772
        block_class = getattr(import_module('CommonMark.blocks'),
773
                              to_camel_case(container.t))
774
        matched_leaf = container.t != 'paragraph' and block_class.accepts_lines
775
        starts = self.block_starts
776
        starts_len = len(starts.METHODS)
777
        # Unless last matched container is a code block, try new container
778
        # starts, adding children to the last matched container:
779
        while not matched_leaf:
780
            self.find_next_nonspace()
781
782
            # this is a little performance optimization:
783
            if not self.indented and \
784
               not re.search(reMaybeSpecial, ln[self.next_nonspace:]):
785
                self.advance_next_nonspace()
786
                break
787
788
            i = 0
789
            while i < starts_len:
790
                res = getattr(starts, starts.METHODS[i])(self, container)
791
                if res == 1:
792
                    container = self.tip
793
                    break
794
                elif res == 2:
795
                    container = self.tip
796
                    matched_leaf = True
797
                    break
798
                else:
799
                    i += 1
800
801
            if i == starts_len:
802
                # nothing matched
803
                self.advance_next_nonspace()
804
                break
805
806
        # What remains at the offset is a text line. Add the text to the
807
        # appropriate container.
808
        if not self.all_closed and not self.blank and \
809
           self.tip.t == 'paragraph':
810
            # lazy paragraph continuation
811
            self.add_line()
812
        else:
813
            # not a lazy continuation
814
            # finalize any blocks not matched
815
            self.close_unmatched_blocks()
816
            if self.blank and container.last_child:
817
                container.last_child.last_line_blank = True
818
819
            t = container.t
820
821
            # Block quote lines are never blank as they start with >
822
            # and we don't count blanks in fenced code for purposes of
823
            # tight/loose lists or breaking out of lists.  We also
824
            # don't set last_line_blank on an empty list item, or if we
825
            # just closed a fenced block.
826
            last_line_blank = self.blank and \
827
                not (t == 'block_quote' or
828
                     (t == 'code_block' and container.is_fenced) or
829
                     (t == 'item' and
830
                      not container.first_child and
831
                      container.sourcepos[0][0] == self.line_number))
832
833
            # propagate last_line_blank up through parents:
834
            cont = container
835
            while cont:
836
                cont.last_line_blank = last_line_blank
837
                cont = cont.parent
838
839
            block_class = getattr(import_module('CommonMark.blocks'),
840
                                  to_camel_case(t))
841
            if block_class.accepts_lines:
842
                self.add_line()
843
                # if HtmlBlock, check for end condition
844
                if t == 'html_block' and \
845
                   container.html_block_type >= 1 and \
846
                   container.html_block_type <= 5 and \
847
                   re.search(
848
                       reHtmlBlockClose[container.html_block_type],
849
                       self.current_line[self.offset:]):
850
                    self.finalize(container, self.line_number)
851
            elif self.offset < len(ln) and not self.blank:
852
                # create a paragraph container for one line
853
                container = self.add_child('paragraph', self.offset)
854
                self.advance_next_nonspace()
855
                self.add_line()
856
857
        self.last_line_length = len(ln)
858
859
    def finalize(self, block, line_number):
860
        """ Finalize a block.  Close it and do any necessary postprocessing,
861
        e.g. creating string_content from strings, setting the 'tight'
862
        or 'loose' status of a list, and parsing the beginnings
863
        of paragraphs for reference definitions.  Reset the tip to the
864
        parent of the closed block."""
865
        above = block.parent
866
        block.is_open = False
867
        block.sourcepos[1] = [line_number, self.last_line_length]
868
        block_class = getattr(import_module('CommonMark.blocks'),
869
                              to_camel_case(block.t))
870
        block_class.finalize(self, block)
871
872
        self.tip = above
873
874
    def process_inlines(self, block):
875
        """
876
        Walk through a block & children recursively, parsing string content
877
        into inline content where appropriate.
878
        """
879
        walker = block.walker()
880
        self.inline_parser.refmap = self.refmap
881
        self.inline_parser.options = self.options
882
        event = walker.nxt()
883
        while event is not None:
884
            node = event['node']
885
            t = node.t
886
            if not event['entering'] and (t == 'paragraph' or t == 'heading'):
887
                self.inline_parser.parse(node)
888
            event = walker.nxt()
889
890
    def parse(self, my_input):
891
        """ The main parsing function.  Returns a parsed document AST."""
892
        self.doc = Node('document', [[1, 1], [0, 0]])
893
        self.tip = self.doc
894
        self.refmap = {}
895
        self.line_number = 0
896
        self.last_line_length = 0
897
        self.offset = 0
898
        self.column = 0
899
        self.last_matched_container = self.doc
900
        self.current_line = ''
901
        lines = re.split(reLineEnding, my_input)
902
        length = len(lines)
903
        if len(my_input) > 0 and my_input[-1] == '\n':
904
            # ignore last blank line created by final newline
905
            length -= 1
906
        for i in range(length):
907
            self.incorporate_line(lines[i])
908
        while (self.tip):
909
            self.finalize(self.tip, length)
910
        self.process_inlines(self.doc)
911
        return self.doc
912