Passed
Push — master ( 86008f...e608a9 )
by Dongxin
45s
created

Parser.find_next_nonspace()   C

Complexity

Conditions 7

Size

Total Lines 31

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 7
c 3
b 0
f 0
dl 0
loc 31
rs 5.5
1
from __future__ import absolute_import, unicode_literals
2
3
import re
4
from importlib import import_module
5
from CommonMark import common
6
from CommonMark.common import unescape_string
7
from CommonMark.inlines import InlineParser
8
from CommonMark.node import Node
9
from CommonMark.utils import to_camel_case
10
11
12
CODE_INDENT = 4
13
reHtmlBlockOpen = [
14
    re.compile(r'.'),  # dummy for 0
15
    re.compile(r'^<(?:script|pre|style)(?:\s|>|$)', re.IGNORECASE),
16
    re.compile(r'^<!--'),
17
    re.compile(r'^<[?]'),
18
    re.compile(r'^<![A-Z]'),
19
    re.compile(r'^<!\[CDATA\['),
20
    re.compile(
21
        r'^<[/]?(?:address|article|aside|base|basefont|blockquote|body|'
22
        r'caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|'
23
        r'fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|'
24
        r'header|hr|html|iframe|legend|li|link|main|menu|menuitem|meta|'
25
        r'nav|noframes|ol|optgroup|option|p|param|section|source|title|'
26
        r'summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)'
27
        r'(?:\s|[/]?[>]|$)',
28
        re.IGNORECASE),
29
    re.compile(
30
        '^(?:' + common.OPENTAG + '|' + common.CLOSETAG + ')\\s*$',
31
        re.IGNORECASE),
32
]
33
reHtmlBlockClose = [
34
    re.compile(r'.'),  # dummy for 0
35
    re.compile(r'<\/(?:script|pre|style)>', re.IGNORECASE),
36
    re.compile(r'-->'),
37
    re.compile(r'\?>'),
38
    re.compile(r'>'),
39
    re.compile(r'\]\]>'),
40
]
41
reThematicBreak = re.compile(
42
    r'^(?:(?:\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$')
43
reMaybeSpecial = re.compile(r'^[#`~*+_=<>0-9-]')
44
reNonSpace = re.compile(r'[^ \t\f\v\r\n]')
45
reBulletListMarker = re.compile(r'^[*+-]')
46
reOrderedListMarker = re.compile(r'^(\d{1,9})([.)])')
47
reATXHeadingMarker = re.compile(r'^#{1,6}(?:[ \t]+|$)')
48
reCodeFence = re.compile(r'^`{3,}(?!.*`)|^~{3,}(?!.*~)')
49
reClosingCodeFence = re.compile(r'^(?:`{3,}|~{3,})(?= *$)')
50
reSetextHeadingLine = re.compile(r'^(?:=+|-+)[ \t]*$')
51
reLineEnding = re.compile(r'\r\n|\n|\r')
52
53
54
def is_blank(s):
55
    """Returns True if string contains only space characters."""
56
    return re.search(reNonSpace, s) is None
57
58
59
def is_space_or_tab(s):
60
    if s is not None:
61
        s = unicode(s, 'latin1').encode('utf-8')
62
    return s == ' ' or s == '\t'
63
64
65
def peek(ln, pos):
66
    if pos < len(ln):
67
        return ln[pos]
68
    else:
69
        return None
70
71
72
def ends_with_blank_line(block):
73
    """ Returns true if block ends with a blank line,
74
    descending if needed into lists and sublists."""
75
    while block:
76
        if block.last_line_blank:
77
            return True
78
        if (block.t == 'list' or block.t == 'item'):
79
            block = block.last_child
80
        else:
81
            break
82
83
    return False
84
85
86
def parse_list_marker(parser, container):
87
    """ Parse a list marker and return data on the marker (type,
88
    start, delimiter, bullet character, padding) or None."""
89
    rest = parser.current_line[parser.next_nonspace:]
90
    data = {
91
        'type': None,
92
        'tight': True,  # lists are tight by default
93
        'bullet_char': None,
94
        'start': None,
95
        'delimiter': None,
96
        'padding': None,
97
        'marker_offset': parser.indent,
98
    }
99
    m = re.search(reBulletListMarker, rest)
100
    m2 = re.search(reOrderedListMarker, rest)
101
    if m:
102
        data['type'] = 'bullet'
103
        data['bullet_char'] = m.group()[0]
104
    elif m2 and (container.t != 'paragraph' or m2.group(1) == '1'):
105
        m = m2
106
        data['type'] = 'ordered'
107
        data['start'] = int(m.group(1))
108
        data['delimiter'] = m.group(2)
109
    else:
110
        return None
111
112
    # make sure we have spaces after
113
    nextc = peek(parser.current_line, parser.next_nonspace + len(m.group()))
114
    if not (nextc is None or nextc == '\t' or nextc == ' '):
115
        return None
116
117
    # if it interrupts paragraph, make sure first line isn't blank
118
    if container.t == 'paragraph' and \
119
       not re.search(
120
           reNonSpace,
121
           parser.current_line[parser.next_nonspace + len(m.group()):]):
122
        return None
123
124
    # we've got a match! advance offset and calculate padding
125
    parser.advance_next_nonspace()  # to start of marker
126
    parser.advance_offset(len(m.group()), True)  # to end of marker
127
    spaces_start_col = parser.column
128
    spaces_start_offset = parser.offset
129
    while True:
130
        parser.advance_offset(1, True)
131
        nextc = peek(parser.current_line, parser.offset)
132
        if parser.column - spaces_start_col < 5 and \
133
           is_space_or_tab(nextc):
134
            pass
135
        else:
136
            break
137
    blank_item = peek(parser.current_line, parser.offset) is None
138
    spaces_after_marker = parser.column - spaces_start_col
139
    if spaces_after_marker >= 5 or \
140
       spaces_after_marker < 1 or \
141
       blank_item:
142
        data['padding'] = len(m.group()) + 1
143
        parser.column = spaces_start_col
144
        parser.offset = spaces_start_offset
145
        if is_space_or_tab(peek(parser.current_line, parser.offset)):
146
            parser.advance_offset(1, True)
147
    else:
148
        data['padding'] = len(m.group()) + spaces_after_marker
149
150
    return data
151
152
153
def lists_match(list_data, item_data):
154
    """
155
    Returns True if the two list items are of the same type,
156
    with the same delimiter and bullet character.  This is used
157
    in agglomerating list items into lists.
158
    """
159
    return list_data.get('type') == item_data.get('type') and \
160
        list_data.get('delimiter') == item_data.get('delimiter') and \
161
        list_data.get('bullet_char') == item_data.get('bullet_char')
162
163
164
class Block(object):
165
    accepts_lines = None
166
167
    @staticmethod
168
    def continue_(parser=None, container=None):
169
        return
170
171
    @staticmethod
172
    def finalize(parser=None, block=None):
173
        return
174
175
    @staticmethod
176
    def can_contain(t):
177
        return
178
179
180
class Document(Block):
181
    accepts_lines = False
182
183
    @staticmethod
184
    def continue_(parser=None, container=None):
185
        return 0
186
187
    @staticmethod
188
    def finalize(parser=None, block=None):
189
        return
190
191
    @staticmethod
192
    def can_contain(t):
193
        return t != 'item'
194
195
196
class List(Block):
197
    accepts_lines = False
198
199
    @staticmethod
200
    def continue_(parser=None, container=None):
201
        return 0
202
203
    @staticmethod
204
    def finalize(parser=None, block=None):
205
        item = block.first_child
206
        while item:
207
            # check for non-final list item ending with blank line:
208
            if ends_with_blank_line(item) and item.nxt:
209
                block.list_data['tight'] = False
210
                break
211
            # recurse into children of list item, to see if there are
212
            # spaces between any of them:
213
            subitem = item.first_child
214
            while subitem:
215
                if ends_with_blank_line(subitem) and \
216
                   (item.nxt or subitem.nxt):
217
                    block.list_data['tight'] = False
218
                    break
219
                subitem = subitem.nxt
220
            item = item.nxt
221
222
    @staticmethod
223
    def can_contain(t):
224
        return t == 'item'
225
226
227
class BlockQuote(Block):
228
    accepts_lines = False
229
230
    @staticmethod
231
    def continue_(parser=None, container=None):
232
        ln = parser.current_line
233
        if not parser.indented and peek(ln, parser.next_nonspace) == '>':
234
            parser.advance_next_nonspace()
235
            parser.advance_offset(1, False)
236
            if is_space_or_tab(peek(ln, parser.offset)):
237
                parser.advance_offset(1, True)
238
        else:
239
            return 1
240
        return 0
241
242
    @staticmethod
243
    def finalize(parser=None, block=None):
244
        return
245
246
    @staticmethod
247
    def can_contain(t):
248
        return t != 'item'
249
250
251
class Item(Block):
252
    accepts_lines = False
253
254
    @staticmethod
255
    def continue_(parser=None, container=None):
256
        if parser.blank:
257
            if container.first_child is None:
258
                # Blank line after empty list item
259
                return 1
260
            else:
261
                parser.advance_next_nonspace()
262
        elif parser.indent >= (container.list_data['marker_offset'] +
263
                               container.list_data['padding']):
264
            parser.advance_offset(
265
                container.list_data['marker_offset'] +
266
                container.list_data['padding'], True)
267
        else:
268
            return 1
269
        return 0
270
271
    @staticmethod
272
    def finalize(parser=None, block=None):
273
        return
274
275
    @staticmethod
276
    def can_contain(t):
277
        return t != 'item'
278
279
280
class Heading(Block):
281
    accepts_lines = False
282
283
    @staticmethod
284
    def continue_(parser=None, container=None):
285
        # A heading can never container > 1 line, so fail to match:
286
        return 1
287
288
    @staticmethod
289
    def finalize(parser=None, block=None):
290
        return
291
292
    @staticmethod
293
    def can_contain(t):
294
        return False
295
296
297
class ThematicBreak(Block):
298
    accepts_lines = False
299
300
    @staticmethod
301
    def continue_(parser=None, container=None):
302
        # A thematic break can never container > 1 line, so fail to match:
303
        return 1
304
305
    @staticmethod
306
    def finalize(parser=None, block=None):
307
        return
308
309
    @staticmethod
310
    def can_contain(t):
311
        return False
312
313
314
class CodeBlock(Block):
315
    accepts_lines = True
316
317
    @staticmethod
318
    def continue_(parser=None, container=None):
319
        ln = parser.current_line
320
        indent = parser.indent
321
        if container.is_fenced:
322
            match = indent <= 3 and \
323
                len(ln) >= parser.next_nonspace + 1 and \
324
                ln[parser.next_nonspace] == container.fence_char and \
325
                re.search(reClosingCodeFence, ln[parser.next_nonspace:])
326
            if match and len(match.group()) >= container.fence_length:
327
                # closing fence - we're at end of line, so we can return
328
                parser.finalize(container, parser.line_number)
329
                return 2
330
            else:
331
                # skip optional spaces of fence offset
332
                i = container.fence_offset
333
                while i > 0 and is_space_or_tab(peek(ln, parser.offset)):
334
                    parser.advance_offset(1, True)
335
                    i -= 1
336
        else:
337
            # indented
338
            if indent >= CODE_INDENT:
339
                parser.advance_offset(CODE_INDENT, True)
340
            elif parser.blank:
341
                parser.advance_next_nonspace()
342
            else:
343
                return 1
344
        return 0
345
346
    @staticmethod
347
    def finalize(parser=None, block=None):
348
        if block.is_fenced:
349
            # first line becomes info string
350
            content = block.string_content
351
            newline_pos = content.index('\n')
352
            first_line = content[0:newline_pos]
353
            rest = content[newline_pos + 1:]
354
            block.info = unescape_string(first_line.strip())
355
            block.literal = rest
356
        else:
357
            # indented
358
            block.literal = re.sub(r'(\n *)+$', '\n', block.string_content)
359
360
        block.string_content = None
361
362
    @staticmethod
363
    def can_contain(t):
364
        return False
365
366
367
class HtmlBlock(Block):
368
    accepts_lines = True
369
370
    @staticmethod
371
    def continue_(parser=None, container=None):
372
        if parser.blank and (container.html_block_type == 6 or
373
                             container.html_block_type == 7):
374
            return 1
375
        else:
376
            return 0
377
378
    @staticmethod
379
    def finalize(parser=None, block=None):
380
        block.literal = re.sub(r'(\n *)+$', '', block.string_content)
381
        # allow GC
382
        block.string_content = None
383
384
    @staticmethod
385
    def can_contain(t):
386
        return False
387
388
389
class Paragraph(Block):
390
    accepts_lines = True
391
392
    @staticmethod
393
    def continue_(parser=None, container=None):
394
        return 1 if parser.blank else 0
395
396
    @staticmethod
397
    def finalize(parser=None, block=None):
398
        has_reference_defs = False
399
400
        # try parsing the beginning as link reference definitions:
401
        while peek(block.string_content, 0) == '[':
402
            pos = parser.inline_parser.parseReference(
403
                block.string_content, parser.refmap)
404
            if not pos:
405
                break
406
            block.string_content = block.string_content[pos:]
407
            has_reference_defs = True
408
        if has_reference_defs and is_blank(block.string_content):
409
            block.unlink()
410
411
    @staticmethod
412
    def can_contain(t):
413
        return False
414
415
416
class BlockStarts(object):
417
    """Block start functions.
418
419
    Return values:
420
    0 = no match
421
    1 = matched container, keep going
422
    2 = matched leaf, no more block starts
423
    """
424
    METHODS = [
425
        'block_quote',
426
        'atx_heading',
427
        'fenced_code_block',
428
        'html_block',
429
        'setext_heading',
430
        'thematic_break',
431
        'list_item',
432
        'indented_code_block',
433
    ]
434
435
    @staticmethod
436
    def block_quote(parser, container=None):
437
        if not parser.indented and \
438
           peek(parser.current_line, parser.next_nonspace) == '>':
439
            parser.advance_next_nonspace()
440
            parser.advance_offset(1, False)
441
            # optional following space
442
            if is_space_or_tab(peek(parser.current_line, parser.offset)):
443
                parser.advance_offset(1, True)
444
            parser.close_unmatched_blocks()
445
            parser.add_child('block_quote', parser.next_nonspace)
446
            return 1
447
448
        return 0
449
450
    @staticmethod
451
    def atx_heading(parser, container=None):
452
        if not parser.indented:
453
            m = re.search(reATXHeadingMarker,
454
                          parser.current_line[parser.next_nonspace:])
455
            if m:
456
                parser.advance_next_nonspace()
457
                parser.advance_offset(len(m.group()), False)
458
                parser.close_unmatched_blocks()
459
                container = parser.add_child('heading', parser.next_nonspace)
460
                # number of #s
461
                container.level = len(m.group().strip())
462
                # remove trailing ###s:
463
                container.string_content = re.sub(
464
                    r'[ \t]+#+[ \t]*$', '', re.sub(
465
                        r'^[ \t]*#+[ \t]*$',
466
                        '',
467
                        parser.current_line[parser.offset:]))
468
                parser.advance_offset(
469
                    len(parser.current_line) - parser.offset, False)
470
                return 2
471
472
        return 0
473
474
    @staticmethod
475
    def fenced_code_block(parser, container=None):
476
        if not parser.indented:
477
            m = re.search(
478
                reCodeFence,
479
                parser.current_line[parser.next_nonspace:])
480
            if m:
481
                fence_length = len(m.group())
482
                parser.close_unmatched_blocks()
483
                container = parser.add_child(
484
                    'code_block', parser.next_nonspace)
485
                container.is_fenced = True
486
                container.fence_length = fence_length
487
                container.fence_char = m.group()[0]
488
                container.fence_offset = parser.indent
489
                parser.advance_next_nonspace()
490
                parser.advance_offset(fence_length, False)
491
                return 2
492
493
        return 0
494
495
    @staticmethod
496
    def html_block(parser, container=None):
497
        if not parser.indented and \
498
           peek(parser.current_line, parser.next_nonspace) == '<':
499
            s = parser.current_line[parser.next_nonspace:]
500
501
            for block_type in range(1, 8):
502
                if re.search(reHtmlBlockOpen[block_type], s) and \
503
                   (block_type < 7 or container.t != 'paragraph'):
504
                    parser.close_unmatched_blocks()
505
                    # We don't adjust parser.offset;
506
                    # spaces are part of the HTML block:
507
                    b = parser.add_child('html_block', parser.offset)
508
                    b.html_block_type = block_type
509
                    return 2
510
        return 0
511
512
    @staticmethod
513
    def setext_heading(parser, container=None):
514
        if not parser.indented and container.t == 'paragraph':
515
            m = re.search(
516
                reSetextHeadingLine,
517
                parser.current_line[parser.next_nonspace:])
518
            if m:
519
                parser.close_unmatched_blocks()
520
                heading = Node('heading', container.sourcepos)
521
                heading.level = 1 if m.group()[0] == '=' else 2
522
                heading.string_content = container.string_content
523
                container.insert_after(heading)
524
                container.unlink()
525
                parser.tip = heading
526
                parser.advance_offset(
527
                    len(parser.current_line) - parser.offset, False)
528
                return 2
529
530
        return 0
531
532
    @staticmethod
533
    def thematic_break(parser, container=None):
534
        if not parser.indented and re.search(
535
                reThematicBreak, parser.current_line[parser.next_nonspace:]):
536
            parser.close_unmatched_blocks()
537
            parser.add_child('thematic_break', parser.next_nonspace)
538
            parser.advance_offset(
539
                len(parser.current_line) - parser.offset, False)
540
            return 2
541
        return 0
542
543
    @staticmethod
544
    def list_item(parser, container=None):
545
        if (not parser.indented or container.t == 'list'):
546
            data = parse_list_marker(parser, container)
547
            if data:
548
                parser.close_unmatched_blocks()
549
550
                # add the list if needed
551
                if parser.tip.t != 'list' or \
552
                   not lists_match(container.list_data, data):
553
                    container = parser.add_child('list', parser.next_nonspace)
554
                    container.list_data = data
555
556
                # add the list item
557
                container = parser.add_child('item', parser.next_nonspace)
558
                container.list_data = data
559
                return 1
560
561
        return 0
562
563
    @staticmethod
564
    def indented_code_block(parser, container=None):
565
        if parser.indented and \
566
           parser.tip.t != 'paragraph' and \
567
                           not parser.blank:
568
            # indented code
569
            parser.advance_offset(CODE_INDENT, True)
570
            parser.close_unmatched_blocks()
571
            parser.add_child('code_block', parser.offset)
572
            return 2
573
574
        return 0
575
576
577
class Parser(object):
578
    def __init__(self, options={}):
579
        self.doc = Node('document', [[1, 1], [0, 0]])
580
        self.block_starts = BlockStarts()
581
        self.tip = self.doc
582
        self.oldtip = self.doc
583
        self.current_line = ''
584
        self.line_number = 0
585
        self.offset = 0
586
        self.column = 0
587
        self.next_nonspace = 0
588
        self.next_nonspace_column = 0
589
        self.indent = 0
590
        self.indented = False
591
        self.blank = False
592
        self.partially_consumed_tab = False
593
        self.all_closed = True
594
        self.last_matched_container = self.doc
595
        self.refmap = {}
596
        self.last_line_length = 0
597
        self.inline_parser = InlineParser(options)
598
        self.options = options
599
600
    def add_line(self):
601
        """ Add a line to the block at the tip.  We assume the tip
602
        can accept lines -- that check should be done before calling this."""
603
        if self.partially_consumed_tab:
604
            # Skip over tab
605
            self.offset += 1
606
            # Add space characters
607
            chars_to_tab = 4 - (self.column % 4)
608
            self.tip.string_content += (' ' * chars_to_tab)
609
        self.tip.string_content += (self.current_line[self.offset:] + '\n')
610
611
    def add_child(self, tag, offset):
612
        """ Add block of type tag as a child of the tip.  If the tip can't
613
        accept children, close and finalize it and try its parent,
614
        and so on til we find a block that can accept children."""
615
        block_class = getattr(import_module('CommonMark.blocks'),
616
                              to_camel_case(self.tip.t))
617
        while not block_class.can_contain(tag):
618
            self.finalize(self.tip, self.line_number - 1)
619
            block_class = getattr(
620
                import_module('CommonMark.blocks'),
621
                to_camel_case(self.tip.t))
622
623
        column_number = offset + 1
624
        new_block = Node(tag, [[self.line_number, column_number], [0, 0]])
625
        new_block.string_content = ''
626
        self.tip.append_child(new_block)
627
        self.tip = new_block
628
        return new_block
629
630
    def close_unmatched_blocks(self):
631
        """Finalize and close any unmatched blocks."""
632
        if not self.all_closed:
633
            while self.oldtip != self.last_matched_container:
634
                parent = self.oldtip.parent
635
                self.finalize(self.oldtip, self.line_number - 1)
636
                self.oldtip = parent
637
            self.all_closed = True
638
639
    def find_next_nonspace(self):
640
        current_line = self.current_line
641
        i = self.offset
642
        cols = self.column
643
644
        try:
645
            c = current_line[i]
646
            if c is not None:
647
                c = unicode(c, 'latin1').encode('utf-8')
648
        except IndexError:
649
            c = ''
650
        while c != '':
651
            if c == ' ':
652
                i += 1
653
                cols += 1
654
            elif c == '\t':
655
                i += 1
656
                cols += (4 - (cols % 4))
657
            else:
658
                break
659
660
            try:
661
                c = current_line[i]
662
            except IndexError:
663
                c = ''
664
665
        self.blank = (c == '\n' or c == '\r' or c == '')
666
        self.next_nonspace = i
667
        self.next_nonspace_column = cols
668
        self.indent = self.next_nonspace_column - self.column
669
        self.indented = self.indent >= CODE_INDENT
670
671
    def advance_next_nonspace(self):
672
        self.offset = self.next_nonspace
673
        self.column = self.next_nonspace_column
674
        self.partially_consumed_tab = False
675
676
    def advance_offset(self, count, columns):
677
        current_line = self.current_line
678
        try:
679
            c = current_line[self.offset]
680
        except IndexError:
681
            c = None
682
        while count > 0 and c is not None:
683
            if c == '\t':
684
                chars_to_tab = 4 - (self.column % 4)
685
                if columns:
686
                    self.partially_consumed_tab = chars_to_tab > count
687
                    chars_to_advance = min(count, chars_to_tab)
688
                    self.column += chars_to_advance
689
                    self.offset += 0 if self.partially_consumed_tab else 1
690
                    count -= chars_to_advance
691
                else:
692
                    self.partially_consumed_tab = False
693
                    self.column += chars_to_tab
694
                    self.offset += 1
695
                    count -= 1
696
            else:
697
                self.partially_consumed_tab = False
698
                self.offset += 1
699
                # assume ascii; block starts are ascii
700
                self.column += 1
701
                count -= 1
702
            try:
703
                c = current_line[self.offset]
704
            except IndexError:
705
                c = None
706
707
    def incorporate_line(self, ln):
708
        """Analyze a line of text and update the document appropriately.
709
710
        We parse markdown text by calling this on each line of input,
711
        then finalizing the document.
712
        """
713
        all_matched = True
714
715
        container = self.doc
716
        self.oldtip = self.tip
717
        self.offset = 0
718
        self.column = 0
719
        self.blank = False
720
        self.partially_consumed_tab = False
721
        self.line_number += 1
722
723
        # replace NUL characters for security
724
        if re.search(r'\u0000', ln) is not None:
725
            ln = re.sub(r'\0', '\uFFFD', ln)
726
727
        self.current_line = ln
728
729
        # For each containing block, try to parse the associated line start.
730
        # Bail out on failure: container will point to the last matching block.
731
        # Set all_matched to false if not all containers match.
732
        last_child = container.last_child
733
        while last_child and last_child.is_open:
734
            container = last_child
735
736
            self.find_next_nonspace()
737
            block_class = getattr(
738
                import_module('CommonMark.blocks'),
739
                to_camel_case(container.t))
740
            rv = block_class.continue_(self, container)
741
            if rv == 0:
742
                # we've matched, keep going
743
                pass
744
            elif rv == 1:
745
                # we've failed to match a block
746
                all_matched = False
747
            elif rv == 2:
748
                # we've hit end of line for fenced code close and can return
749
                self.last_line_length = len(ln)
750
                return
751
            else:
752
                raise ValueError('returned illegal value, must be 0, 1, or 2')
753
754
            if not all_matched:
755
                # back up to last matching block
756
                container = container.parent
757
                break
758
759
            last_child = container.last_child
760
761
        self.all_closed = (container == self.oldtip)
762
        self.last_matched_container = container
763
764
        block_class = getattr(import_module('CommonMark.blocks'),
765
                              to_camel_case(container.t))
766
        matched_leaf = container.t != 'paragraph' and block_class.accepts_lines
767
        starts = self.block_starts
768
        starts_len = len(starts.METHODS)
769
        # Unless last matched container is a code block, try new container
770
        # starts, adding children to the last matched container:
771
        while not matched_leaf:
772
            self.find_next_nonspace()
773
774
            # this is a little performance optimization:
775
            if not self.indented and \
776
               not re.search(reMaybeSpecial, ln[self.next_nonspace:]):
777
                self.advance_next_nonspace()
778
                break
779
780
            i = 0
781
            while i < starts_len:
782
                res = getattr(starts, starts.METHODS[i])(self, container)
783
                if res == 1:
784
                    container = self.tip
785
                    break
786
                elif res == 2:
787
                    container = self.tip
788
                    matched_leaf = True
789
                    break
790
                else:
791
                    i += 1
792
793
            if i == starts_len:
794
                # nothing matched
795
                self.advance_next_nonspace()
796
                break
797
798
        # What remains at the offset is a text line. Add the text to the
799
        # appropriate container.
800
        if not self.all_closed and not self.blank and \
801
           self.tip.t == 'paragraph':
802
            # lazy paragraph continuation
803
            self.add_line()
804
        else:
805
            # not a lazy continuation
806
            # finalize any blocks not matched
807
            self.close_unmatched_blocks()
808
            if self.blank and container.last_child:
809
                container.last_child.last_line_blank = True
810
811
            t = container.t
812
813
            # Block quote lines are never blank as they start with >
814
            # and we don't count blanks in fenced code for purposes of
815
            # tight/loose lists or breaking out of lists.  We also
816
            # don't set last_line_blank on an empty list item, or if we
817
            # just closed a fenced block.
818
            last_line_blank = self.blank and \
819
                not (t == 'block_quote' or
820
                     (t == 'code_block' and container.is_fenced) or
821
                     (t == 'item' and
822
                      not container.first_child and
823
                      container.sourcepos[0][0] == self.line_number))
824
825
            # propagate last_line_blank up through parents:
826
            cont = container
827
            while cont:
828
                cont.last_line_blank = last_line_blank
829
                cont = cont.parent
830
831
            block_class = getattr(import_module('CommonMark.blocks'),
832
                                  to_camel_case(t))
833
            if block_class.accepts_lines:
834
                self.add_line()
835
                # if HtmlBlock, check for end condition
836
                if t == 'html_block' and \
837
                   container.html_block_type >= 1 and \
838
                   container.html_block_type <= 5 and \
839
                   re.search(
840
                       reHtmlBlockClose[container.html_block_type],
841
                       self.current_line[self.offset:]):
842
                    self.finalize(container, self.line_number)
843
            elif self.offset < len(ln) and not self.blank:
844
                # create a paragraph container for one line
845
                container = self.add_child('paragraph', self.offset)
846
                self.advance_next_nonspace()
847
                self.add_line()
848
849
        self.last_line_length = len(ln)
850
851
    def finalize(self, block, line_number):
852
        """ Finalize a block.  Close it and do any necessary postprocessing,
853
        e.g. creating string_content from strings, setting the 'tight'
854
        or 'loose' status of a list, and parsing the beginnings
855
        of paragraphs for reference definitions.  Reset the tip to the
856
        parent of the closed block."""
857
        above = block.parent
858
        block.is_open = False
859
        block.sourcepos[1] = [line_number, self.last_line_length]
860
        block_class = getattr(import_module('CommonMark.blocks'),
861
                              to_camel_case(block.t))
862
        block_class.finalize(self, block)
863
864
        self.tip = above
865
866
    def process_inlines(self, block):
867
        """
868
        Walk through a block & children recursively, parsing string content
869
        into inline content where appropriate.
870
        """
871
        walker = block.walker()
872
        self.inline_parser.refmap = self.refmap
873
        self.inline_parser.options = self.options
874
        event = walker.nxt()
875
        while event is not None:
876
            node = event['node']
877
            t = node.t
878
            if not event['entering'] and (t == 'paragraph' or t == 'heading'):
879
                self.inline_parser.parse(node)
880
            event = walker.nxt()
881
882
    def parse(self, my_input):
883
        """ The main parsing function.  Returns a parsed document AST."""
884
        self.doc = Node('document', [[1, 1], [0, 0]])
885
        self.tip = self.doc
886
        self.refmap = {}
887
        self.line_number = 0
888
        self.last_line_length = 0
889
        self.offset = 0
890
        self.column = 0
891
        self.last_matched_container = self.doc
892
        self.current_line = ''
893
        lines = re.split(reLineEnding, my_input)
894
        length = len(lines)
895
        if len(my_input) > 0 and my_input[-1] == '\n':
896
            # ignore last blank line created by final newline
897
            length -= 1
898
        for i in range(length):
899
            self.incorporate_line(lines[i])
900
        while (self.tip):
901
            self.finalize(self.tip, length)
902
        self.process_inlines(self.doc)
903
        return self.doc
904