1
|
|
|
""" |
2
|
|
|
CORE MARKDOWN BLOCKPARSER |
3
|
|
|
=========================================================================== |
4
|
|
|
|
5
|
|
|
This parser handles basic parsing of Markdown blocks. It doesn't concern |
6
|
|
|
itself with inline elements such as **bold** or *italics*, but rather just |
7
|
|
|
catches blocks, lists, quotes, etc. |
8
|
|
|
|
9
|
|
|
The BlockParser is made up of a bunch of BlockProssors, each handling a |
10
|
|
|
different type of block. Extensions may add/replace/remove BlockProcessors |
11
|
|
|
as they need to alter how markdown blocks are parsed. |
12
|
|
|
""" |
13
|
|
|
|
14
|
|
|
from __future__ import absolute_import |
15
|
|
|
from __future__ import division |
16
|
|
|
from __future__ import unicode_literals |
17
|
|
|
import logging |
18
|
|
|
import re |
19
|
|
|
from . import util |
20
|
|
|
from .blockparser import BlockParser |
21
|
|
|
|
22
|
|
|
logger = logging.getLogger('MARKDOWN') |
23
|
|
|
|
24
|
|
|
|
25
|
|
|
def build_block_parser(md_instance, **kwargs): |
26
|
|
|
""" Build the default block parser used by Markdown. """ |
27
|
|
|
parser = BlockParser(md_instance) |
28
|
|
|
parser.blockprocessors['empty'] = EmptyBlockProcessor(parser) |
29
|
|
|
parser.blockprocessors['indent'] = ListIndentProcessor(parser) |
30
|
|
|
parser.blockprocessors['code'] = CodeBlockProcessor(parser) |
31
|
|
|
parser.blockprocessors['hashheader'] = HashHeaderProcessor(parser) |
32
|
|
|
parser.blockprocessors['setextheader'] = SetextHeaderProcessor(parser) |
33
|
|
|
parser.blockprocessors['hr'] = HRProcessor(parser) |
34
|
|
|
parser.blockprocessors['olist'] = OListProcessor(parser) |
35
|
|
|
parser.blockprocessors['ulist'] = UListProcessor(parser) |
36
|
|
|
parser.blockprocessors['quote'] = BlockQuoteProcessor(parser) |
37
|
|
|
parser.blockprocessors['paragraph'] = ParagraphProcessor(parser) |
38
|
|
|
return parser |
39
|
|
|
|
40
|
|
|
|
41
|
|
|
class BlockProcessor(object): |
42
|
|
|
""" Base class for block processors. |
43
|
|
|
|
44
|
|
|
Each subclass will provide the methods below to work with the source and |
45
|
|
|
tree. Each processor will need to define it's own ``test`` and ``run`` |
46
|
|
|
methods. The ``test`` method should return True or False, to indicate |
47
|
|
|
whether the current block should be processed by this processor. If the |
48
|
|
|
test passes, the parser will call the processors ``run`` method. |
49
|
|
|
|
50
|
|
|
""" |
51
|
|
|
|
52
|
|
|
def __init__(self, parser): |
53
|
|
|
self.parser = parser |
54
|
|
|
self.tab_length = parser.markdown.tab_length |
55
|
|
|
|
56
|
|
|
def lastChild(self, parent): |
57
|
|
|
""" Return the last child of an etree element. """ |
58
|
|
|
if len(parent): |
59
|
|
|
return parent[-1] |
60
|
|
|
else: |
61
|
|
|
return None |
62
|
|
|
|
63
|
|
|
def detab(self, text): |
64
|
|
|
""" Remove a tab from the front of each line of the given text. """ |
65
|
|
|
newtext = [] |
66
|
|
|
lines = text.split('\n') |
67
|
|
|
for line in lines: |
68
|
|
|
if line.startswith(' '*self.tab_length): |
69
|
|
|
newtext.append(line[self.tab_length:]) |
70
|
|
|
elif not line.strip(): |
71
|
|
|
newtext.append('') |
72
|
|
|
else: |
73
|
|
|
break |
74
|
|
|
return '\n'.join(newtext), '\n'.join(lines[len(newtext):]) |
75
|
|
|
|
76
|
|
|
def looseDetab(self, text, level=1): |
77
|
|
|
""" Remove a tab from front of lines but allowing dedented lines. """ |
78
|
|
|
lines = text.split('\n') |
79
|
|
|
for i in range(len(lines)): |
80
|
|
|
if lines[i].startswith(' '*self.tab_length*level): |
81
|
|
|
lines[i] = lines[i][self.tab_length*level:] |
82
|
|
|
return '\n'.join(lines) |
83
|
|
|
|
84
|
|
|
def test(self, parent, block): |
85
|
|
|
""" Test for block type. Must be overridden by subclasses. |
86
|
|
|
|
87
|
|
|
As the parser loops through processors, it will call the ``test`` |
88
|
|
|
method on each to determine if the given block of text is of that |
89
|
|
|
type. This method must return a boolean ``True`` or ``False``. The |
90
|
|
|
actual method of testing is left to the needs of that particular |
91
|
|
|
block type. It could be as simple as ``block.startswith(some_string)`` |
92
|
|
|
or a complex regular expression. As the block type may be different |
93
|
|
|
depending on the parent of the block (i.e. inside a list), the parent |
94
|
|
|
etree element is also provided and may be used as part of the test. |
95
|
|
|
|
96
|
|
|
Keywords: |
97
|
|
|
|
98
|
|
|
* ``parent``: A etree element which will be the parent of the block. |
99
|
|
|
* ``block``: A block of text from the source which has been split at |
100
|
|
|
blank lines. |
101
|
|
|
""" |
102
|
|
|
pass # pragma: no cover |
103
|
|
|
|
104
|
|
|
def run(self, parent, blocks): |
105
|
|
|
""" Run processor. Must be overridden by subclasses. |
106
|
|
|
|
107
|
|
|
When the parser determines the appropriate type of a block, the parser |
108
|
|
|
will call the corresponding processor's ``run`` method. This method |
109
|
|
|
should parse the individual lines of the block and append them to |
110
|
|
|
the etree. |
111
|
|
|
|
112
|
|
|
Note that both the ``parent`` and ``etree`` keywords are pointers |
113
|
|
|
to instances of the objects which should be edited in place. Each |
114
|
|
|
processor must make changes to the existing objects as there is no |
115
|
|
|
mechanism to return new/different objects to replace them. |
116
|
|
|
|
117
|
|
|
This means that this method should be adding SubElements or adding text |
118
|
|
|
to the parent, and should remove (``pop``) or add (``insert``) items to |
119
|
|
|
the list of blocks. |
120
|
|
|
|
121
|
|
|
Keywords: |
122
|
|
|
|
123
|
|
|
* ``parent``: A etree element which is the parent of the current block. |
124
|
|
|
* ``blocks``: A list of all remaining blocks of the document. |
125
|
|
|
""" |
126
|
|
|
pass # pragma: no cover |
127
|
|
|
|
128
|
|
|
|
129
|
|
|
class ListIndentProcessor(BlockProcessor): |
130
|
|
|
""" Process children of list items. |
131
|
|
|
|
132
|
|
|
Example: |
133
|
|
|
* a list item |
134
|
|
|
process this part |
135
|
|
|
|
136
|
|
|
or this part |
137
|
|
|
|
138
|
|
|
""" |
139
|
|
|
|
140
|
|
|
ITEM_TYPES = ['li'] |
141
|
|
|
LIST_TYPES = ['ul', 'ol'] |
142
|
|
|
|
143
|
|
|
def __init__(self, *args): |
144
|
|
|
super(ListIndentProcessor, self).__init__(*args) |
145
|
|
|
self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length) |
146
|
|
|
|
147
|
|
|
def test(self, parent, block): |
148
|
|
|
return block.startswith(' '*self.tab_length) and \ |
149
|
|
|
not self.parser.state.isstate('detabbed') and \ |
150
|
|
|
(parent.tag in self.ITEM_TYPES or |
151
|
|
|
(len(parent) and parent[-1] is not None and |
152
|
|
|
(parent[-1].tag in self.LIST_TYPES))) |
153
|
|
|
|
154
|
|
|
def run(self, parent, blocks): |
155
|
|
|
block = blocks.pop(0) |
156
|
|
|
level, sibling = self.get_level(parent, block) |
157
|
|
|
block = self.looseDetab(block, level) |
158
|
|
|
|
159
|
|
|
self.parser.state.set('detabbed') |
160
|
|
|
if parent.tag in self.ITEM_TYPES: |
161
|
|
|
# It's possible that this parent has a 'ul' or 'ol' child list |
162
|
|
|
# with a member. If that is the case, then that should be the |
163
|
|
|
# parent. This is intended to catch the edge case of an indented |
164
|
|
|
# list whose first member was parsed previous to this point |
165
|
|
|
# see OListProcessor |
166
|
|
|
if len(parent) and parent[-1].tag in self.LIST_TYPES: |
167
|
|
|
self.parser.parseBlocks(parent[-1], [block]) |
168
|
|
|
else: |
169
|
|
|
# The parent is already a li. Just parse the child block. |
170
|
|
|
self.parser.parseBlocks(parent, [block]) |
171
|
|
|
elif sibling.tag in self.ITEM_TYPES: |
172
|
|
|
# The sibling is a li. Use it as parent. |
173
|
|
|
self.parser.parseBlocks(sibling, [block]) |
174
|
|
|
elif len(sibling) and sibling[-1].tag in self.ITEM_TYPES: |
175
|
|
|
# The parent is a list (``ol`` or ``ul``) which has children. |
176
|
|
|
# Assume the last child li is the parent of this block. |
177
|
|
|
if sibling[-1].text: |
178
|
|
|
# If the parent li has text, that text needs to be moved to a p |
179
|
|
|
# The p must be 'inserted' at beginning of list in the event |
180
|
|
|
# that other children already exist i.e.; a nested sublist. |
181
|
|
|
p = util.etree.Element('p') |
182
|
|
|
p.text = sibling[-1].text |
183
|
|
|
sibling[-1].text = '' |
184
|
|
|
sibling[-1].insert(0, p) |
185
|
|
|
self.parser.parseChunk(sibling[-1], block) |
186
|
|
|
else: |
187
|
|
|
self.create_item(sibling, block) |
188
|
|
|
self.parser.state.reset() |
189
|
|
|
|
190
|
|
|
def create_item(self, parent, block): |
191
|
|
|
""" Create a new li and parse the block with it as the parent. """ |
192
|
|
|
li = util.etree.SubElement(parent, 'li') |
193
|
|
|
self.parser.parseBlocks(li, [block]) |
194
|
|
|
|
195
|
|
|
def get_level(self, parent, block): |
196
|
|
|
""" Get level of indent based on list level. """ |
197
|
|
|
# Get indent level |
198
|
|
|
m = self.INDENT_RE.match(block) |
199
|
|
|
if m: |
200
|
|
|
indent_level = len(m.group(1))/self.tab_length |
201
|
|
|
else: |
202
|
|
|
indent_level = 0 |
203
|
|
|
if self.parser.state.isstate('list'): |
204
|
|
|
# We're in a tightlist - so we already are at correct parent. |
205
|
|
|
level = 1 |
206
|
|
|
else: |
207
|
|
|
# We're in a looselist - so we need to find parent. |
208
|
|
|
level = 0 |
209
|
|
|
# Step through children of tree to find matching indent level. |
210
|
|
|
while indent_level > level: |
211
|
|
|
child = self.lastChild(parent) |
212
|
|
|
if (child is not None and |
213
|
|
|
(child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES)): |
214
|
|
|
if child.tag in self.LIST_TYPES: |
215
|
|
|
level += 1 |
216
|
|
|
parent = child |
217
|
|
|
else: |
218
|
|
|
# No more child levels. If we're short of indent_level, |
219
|
|
|
# we have a code block. So we stop here. |
220
|
|
|
break |
221
|
|
|
return level, parent |
222
|
|
|
|
223
|
|
|
|
224
|
|
|
class CodeBlockProcessor(BlockProcessor): |
225
|
|
|
""" Process code blocks. """ |
226
|
|
|
|
227
|
|
|
def test(self, parent, block): |
228
|
|
|
return block.startswith(' '*self.tab_length) |
229
|
|
|
|
230
|
|
|
def run(self, parent, blocks): |
231
|
|
|
sibling = self.lastChild(parent) |
232
|
|
|
block = blocks.pop(0) |
233
|
|
|
theRest = '' |
234
|
|
|
if (sibling is not None and sibling.tag == "pre" and |
235
|
|
|
len(sibling) and sibling[0].tag == "code"): |
236
|
|
|
# The previous block was a code block. As blank lines do not start |
237
|
|
|
# new code blocks, append this block to the previous, adding back |
238
|
|
|
# linebreaks removed from the split into a list. |
239
|
|
|
code = sibling[0] |
240
|
|
|
block, theRest = self.detab(block) |
241
|
|
|
code.text = util.AtomicString( |
242
|
|
|
'%s\n%s\n' % (code.text, block.rstrip()) |
243
|
|
|
) |
244
|
|
|
else: |
245
|
|
|
# This is a new codeblock. Create the elements and insert text. |
246
|
|
|
pre = util.etree.SubElement(parent, 'pre') |
247
|
|
|
code = util.etree.SubElement(pre, 'code') |
248
|
|
|
block, theRest = self.detab(block) |
249
|
|
|
code.text = util.AtomicString('%s\n' % block.rstrip()) |
250
|
|
|
if theRest: |
251
|
|
|
# This block contained unindented line(s) after the first indented |
252
|
|
|
# line. Insert these lines as the first block of the master blocks |
253
|
|
|
# list for future processing. |
254
|
|
|
blocks.insert(0, theRest) |
255
|
|
|
|
256
|
|
|
|
257
|
|
|
class BlockQuoteProcessor(BlockProcessor): |
258
|
|
|
|
259
|
|
|
RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)') |
260
|
|
|
|
261
|
|
|
def test(self, parent, block): |
262
|
|
|
return bool(self.RE.search(block)) |
263
|
|
|
|
264
|
|
|
def run(self, parent, blocks): |
265
|
|
|
block = blocks.pop(0) |
266
|
|
|
m = self.RE.search(block) |
267
|
|
|
if m: |
268
|
|
|
before = block[:m.start()] # Lines before blockquote |
269
|
|
|
# Pass lines before blockquote in recursively for parsing forst. |
270
|
|
|
self.parser.parseBlocks(parent, [before]) |
271
|
|
|
# Remove ``> `` from begining of each line. |
272
|
|
|
block = '\n'.join( |
273
|
|
|
[self.clean(line) for line in block[m.start():].split('\n')] |
274
|
|
|
) |
275
|
|
|
sibling = self.lastChild(parent) |
276
|
|
|
if sibling is not None and sibling.tag == "blockquote": |
277
|
|
|
# Previous block was a blockquote so set that as this blocks parent |
278
|
|
|
quote = sibling |
279
|
|
|
else: |
280
|
|
|
# This is a new blockquote. Create a new parent element. |
281
|
|
|
quote = util.etree.SubElement(parent, 'blockquote') |
282
|
|
|
# Recursively parse block with blockquote as parent. |
283
|
|
|
# change parser state so blockquotes embedded in lists use p tags |
284
|
|
|
self.parser.state.set('blockquote') |
285
|
|
|
self.parser.parseChunk(quote, block) |
286
|
|
|
self.parser.state.reset() |
287
|
|
|
|
288
|
|
|
def clean(self, line): |
289
|
|
|
""" Remove ``>`` from beginning of a line. """ |
290
|
|
|
m = self.RE.match(line) |
291
|
|
|
if line.strip() == ">": |
292
|
|
|
return "" |
293
|
|
|
elif m: |
294
|
|
|
return m.group(2) |
295
|
|
|
else: |
296
|
|
|
return line |
297
|
|
|
|
298
|
|
|
|
299
|
|
|
class OListProcessor(BlockProcessor): |
300
|
|
|
""" Process ordered list blocks. """ |
301
|
|
|
|
302
|
|
|
TAG = 'ol' |
303
|
|
|
# The integer (python string) with which the lists starts (default=1) |
304
|
|
|
# Eg: If list is intialized as) |
305
|
|
|
# 3. Item |
306
|
|
|
# The ol tag will get starts="3" attribute |
307
|
|
|
STARTSWITH = '1' |
308
|
|
|
# List of allowed sibling tags. |
309
|
|
|
SIBLING_TAGS = ['ol', 'ul'] |
310
|
|
|
|
311
|
|
|
def __init__(self, parser): |
312
|
|
|
super(OListProcessor, self).__init__(parser) |
313
|
|
|
# Detect an item (``1. item``). ``group(1)`` contains contents of item. |
314
|
|
|
self.RE = re.compile(r'^[ ]{0,%d}\d+\.[ ]+(.*)' % (self.tab_length - 1)) |
315
|
|
|
# Detect items on secondary lines. they can be of either list type. |
316
|
|
|
self.CHILD_RE = re.compile(r'^[ ]{0,%d}((\d+\.)|[*+-])[ ]+(.*)' % |
317
|
|
|
(self.tab_length - 1)) |
318
|
|
|
# Detect indented (nested) items of either type |
319
|
|
|
self.INDENT_RE = re.compile(r'^[ ]{%d,%d}((\d+\.)|[*+-])[ ]+.*' % |
320
|
|
|
(self.tab_length, self.tab_length * 2 - 1)) |
321
|
|
|
|
322
|
|
|
def test(self, parent, block): |
323
|
|
|
return bool(self.RE.match(block)) |
324
|
|
|
|
325
|
|
|
def run(self, parent, blocks): |
326
|
|
|
# Check fr multiple items in one block. |
327
|
|
|
items = self.get_items(blocks.pop(0)) |
328
|
|
|
sibling = self.lastChild(parent) |
329
|
|
|
|
330
|
|
|
if sibling is not None and sibling.tag in self.SIBLING_TAGS: |
331
|
|
|
# Previous block was a list item, so set that as parent |
332
|
|
|
lst = sibling |
333
|
|
|
# make sure previous item is in a p- if the item has text, |
334
|
|
|
# then it isn't in a p |
335
|
|
|
if lst[-1].text: |
336
|
|
|
# since it's possible there are other children for this |
337
|
|
|
# sibling, we can't just SubElement the p, we need to |
338
|
|
|
# insert it as the first item. |
339
|
|
|
p = util.etree.Element('p') |
340
|
|
|
p.text = lst[-1].text |
341
|
|
|
lst[-1].text = '' |
342
|
|
|
lst[-1].insert(0, p) |
343
|
|
|
# if the last item has a tail, then the tail needs to be put in a p |
344
|
|
|
# likely only when a header is not followed by a blank line |
345
|
|
|
lch = self.lastChild(lst[-1]) |
346
|
|
|
if lch is not None and lch.tail: |
347
|
|
|
p = util.etree.SubElement(lst[-1], 'p') |
348
|
|
|
p.text = lch.tail.lstrip() |
349
|
|
|
lch.tail = '' |
350
|
|
|
|
351
|
|
|
# parse first block differently as it gets wrapped in a p. |
352
|
|
|
li = util.etree.SubElement(lst, 'li') |
353
|
|
|
self.parser.state.set('looselist') |
354
|
|
|
firstitem = items.pop(0) |
355
|
|
|
self.parser.parseBlocks(li, [firstitem]) |
356
|
|
|
self.parser.state.reset() |
357
|
|
|
elif parent.tag in ['ol', 'ul']: |
358
|
|
|
# this catches the edge case of a multi-item indented list whose |
359
|
|
|
# first item is in a blank parent-list item: |
360
|
|
|
# * * subitem1 |
361
|
|
|
# * subitem2 |
362
|
|
|
# see also ListIndentProcessor |
363
|
|
|
lst = parent |
364
|
|
|
else: |
365
|
|
|
# This is a new list so create parent with appropriate tag. |
366
|
|
|
lst = util.etree.SubElement(parent, self.TAG) |
367
|
|
|
# Check if a custom start integer is set |
368
|
|
|
if not self.parser.markdown.lazy_ol and self.STARTSWITH != '1': |
369
|
|
|
lst.attrib['start'] = self.STARTSWITH |
370
|
|
|
|
371
|
|
|
self.parser.state.set('list') |
372
|
|
|
# Loop through items in block, recursively parsing each with the |
373
|
|
|
# appropriate parent. |
374
|
|
|
for item in items: |
375
|
|
|
if item.startswith(' '*self.tab_length): |
376
|
|
|
# Item is indented. Parse with last item as parent |
377
|
|
|
self.parser.parseBlocks(lst[-1], [item]) |
378
|
|
|
else: |
379
|
|
|
# New item. Create li and parse with it as parent |
380
|
|
|
li = util.etree.SubElement(lst, 'li') |
381
|
|
|
self.parser.parseBlocks(li, [item]) |
382
|
|
|
self.parser.state.reset() |
383
|
|
|
|
384
|
|
|
def get_items(self, block): |
385
|
|
|
""" Break a block into list items. """ |
386
|
|
|
items = [] |
387
|
|
|
for line in block.split('\n'): |
388
|
|
|
m = self.CHILD_RE.match(line) |
389
|
|
|
if m: |
390
|
|
|
# This is a new list item |
391
|
|
|
# Check first item for the start index |
392
|
|
|
if not items and self.TAG == 'ol': |
393
|
|
|
# Detect the integer value of first list item |
394
|
|
|
INTEGER_RE = re.compile(r'(\d+)') |
395
|
|
|
self.STARTSWITH = INTEGER_RE.match(m.group(1)).group() |
396
|
|
|
# Append to the list |
397
|
|
|
items.append(m.group(3)) |
398
|
|
|
elif self.INDENT_RE.match(line): |
399
|
|
|
# This is an indented (possibly nested) item. |
400
|
|
|
if items[-1].startswith(' '*self.tab_length): |
401
|
|
|
# Previous item was indented. Append to that item. |
402
|
|
|
items[-1] = '%s\n%s' % (items[-1], line) |
403
|
|
|
else: |
404
|
|
|
items.append(line) |
405
|
|
|
else: |
406
|
|
|
# This is another line of previous item. Append to that item. |
407
|
|
|
items[-1] = '%s\n%s' % (items[-1], line) |
408
|
|
|
return items |
409
|
|
|
|
410
|
|
|
|
411
|
|
|
class UListProcessor(OListProcessor): |
412
|
|
|
""" Process unordered list blocks. """ |
413
|
|
|
|
414
|
|
|
TAG = 'ul' |
415
|
|
|
|
416
|
|
|
def __init__(self, parser): |
417
|
|
|
super(UListProcessor, self).__init__(parser) |
418
|
|
|
# Detect an item (``1. item``). ``group(1)`` contains contents of item. |
419
|
|
|
self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % (self.tab_length - 1)) |
420
|
|
|
|
421
|
|
|
|
422
|
|
|
class HashHeaderProcessor(BlockProcessor): |
423
|
|
|
""" Process Hash Headers. """ |
424
|
|
|
|
425
|
|
|
# Detect a header at start of any line in block |
426
|
|
|
RE = re.compile(r'(^|\n)(?P<level>#{1,6})(?P<header>.*?)#*(\n|$)') |
427
|
|
|
|
428
|
|
|
def test(self, parent, block): |
429
|
|
|
return bool(self.RE.search(block)) |
430
|
|
|
|
431
|
|
View Code Duplication |
def run(self, parent, blocks): |
|
|
|
|
432
|
|
|
block = blocks.pop(0) |
433
|
|
|
m = self.RE.search(block) |
434
|
|
|
if m: |
435
|
|
|
before = block[:m.start()] # All lines before header |
436
|
|
|
after = block[m.end():] # All lines after header |
437
|
|
|
if before: |
438
|
|
|
# As the header was not the first line of the block and the |
439
|
|
|
# lines before the header must be parsed first, |
440
|
|
|
# recursively parse this lines as a block. |
441
|
|
|
self.parser.parseBlocks(parent, [before]) |
442
|
|
|
# Create header using named groups from RE |
443
|
|
|
h = util.etree.SubElement(parent, 'h%d' % len(m.group('level'))) |
444
|
|
|
h.text = m.group('header').strip() |
445
|
|
|
if after: |
446
|
|
|
# Insert remaining lines as first block for future parsing. |
447
|
|
|
blocks.insert(0, after) |
448
|
|
|
else: # pragma: no cover |
449
|
|
|
# This should never happen, but just in case... |
450
|
|
|
logger.warn("We've got a problem header: %r" % block) |
451
|
|
|
|
452
|
|
|
|
453
|
|
|
class SetextHeaderProcessor(BlockProcessor): |
454
|
|
|
""" Process Setext-style Headers. """ |
455
|
|
|
|
456
|
|
|
# Detect Setext-style header. Must be first 2 lines of block. |
457
|
|
|
RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE) |
458
|
|
|
|
459
|
|
|
def test(self, parent, block): |
460
|
|
|
return bool(self.RE.match(block)) |
461
|
|
|
|
462
|
|
|
def run(self, parent, blocks): |
463
|
|
|
lines = blocks.pop(0).split('\n') |
464
|
|
|
# Determine level. ``=`` is 1 and ``-`` is 2. |
465
|
|
|
if lines[1].startswith('='): |
466
|
|
|
level = 1 |
467
|
|
|
else: |
468
|
|
|
level = 2 |
469
|
|
|
h = util.etree.SubElement(parent, 'h%d' % level) |
470
|
|
|
h.text = lines[0].strip() |
471
|
|
|
if len(lines) > 2: |
472
|
|
|
# Block contains additional lines. Add to master blocks for later. |
473
|
|
|
blocks.insert(0, '\n'.join(lines[2:])) |
474
|
|
|
|
475
|
|
|
|
476
|
|
|
class HRProcessor(BlockProcessor): |
477
|
|
|
""" Process Horizontal Rules. """ |
478
|
|
|
|
479
|
|
|
RE = r'^[ ]{0,3}((-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,})[ ]*' |
480
|
|
|
# Detect hr on any line of a block. |
481
|
|
|
SEARCH_RE = re.compile(RE, re.MULTILINE) |
482
|
|
|
|
483
|
|
|
def test(self, parent, block): |
484
|
|
|
m = self.SEARCH_RE.search(block) |
485
|
|
|
# No atomic grouping in python so we simulate it here for performance. |
486
|
|
|
# The regex only matches what would be in the atomic group - the HR. |
487
|
|
|
# Then check if we are at end of block or if next char is a newline. |
488
|
|
|
if m and (m.end() == len(block) or block[m.end()] == '\n'): |
489
|
|
|
# Save match object on class instance so we can use it later. |
490
|
|
|
self.match = m |
491
|
|
|
return True |
492
|
|
|
return False |
493
|
|
|
|
494
|
|
View Code Duplication |
def run(self, parent, blocks): |
|
|
|
|
495
|
|
|
block = blocks.pop(0) |
496
|
|
|
match = self.match |
497
|
|
|
# Check for lines in block before hr. |
498
|
|
|
prelines = block[:match.start()].rstrip('\n') |
499
|
|
|
if prelines: |
500
|
|
|
# Recursively parse lines before hr so they get parsed first. |
501
|
|
|
self.parser.parseBlocks(parent, [prelines]) |
502
|
|
|
# create hr |
503
|
|
|
util.etree.SubElement(parent, 'hr') |
504
|
|
|
# check for lines in block after hr. |
505
|
|
|
postlines = block[match.end():].lstrip('\n') |
506
|
|
|
if postlines: |
507
|
|
|
# Add lines after hr to master blocks for later parsing. |
508
|
|
|
blocks.insert(0, postlines) |
509
|
|
|
|
510
|
|
|
|
511
|
|
|
class EmptyBlockProcessor(BlockProcessor): |
512
|
|
|
""" Process blocks that are empty or start with an empty line. """ |
513
|
|
|
|
514
|
|
|
def test(self, parent, block): |
515
|
|
|
return not block or block.startswith('\n') |
516
|
|
|
|
517
|
|
|
def run(self, parent, blocks): |
518
|
|
|
block = blocks.pop(0) |
519
|
|
|
filler = '\n\n' |
520
|
|
|
if block: |
521
|
|
|
# Starts with empty line |
522
|
|
|
# Only replace a single line. |
523
|
|
|
filler = '\n' |
524
|
|
|
# Save the rest for later. |
525
|
|
|
theRest = block[1:] |
526
|
|
|
if theRest: |
527
|
|
|
# Add remaining lines to master blocks for later. |
528
|
|
|
blocks.insert(0, theRest) |
529
|
|
|
sibling = self.lastChild(parent) |
530
|
|
|
if (sibling is not None and sibling.tag == 'pre' and |
531
|
|
|
len(sibling) and sibling[0].tag == 'code'): |
532
|
|
|
# Last block is a codeblock. Append to preserve whitespace. |
533
|
|
|
sibling[0].text = util.AtomicString( |
534
|
|
|
'%s%s' % (sibling[0].text, filler) |
535
|
|
|
) |
536
|
|
|
|
537
|
|
|
|
538
|
|
|
class ParagraphProcessor(BlockProcessor): |
539
|
|
|
""" Process Paragraph blocks. """ |
540
|
|
|
|
541
|
|
|
def test(self, parent, block): |
542
|
|
|
return True |
543
|
|
|
|
544
|
|
|
def run(self, parent, blocks): |
545
|
|
|
block = blocks.pop(0) |
546
|
|
|
if block.strip(): |
547
|
|
|
# Not a blank block. Add to parent, otherwise throw it away. |
548
|
|
|
if self.parser.state.isstate('list'): |
549
|
|
|
# The parent is a tight-list. |
550
|
|
|
# |
551
|
|
|
# Check for any children. This will likely only happen in a |
552
|
|
|
# tight-list when a header isn't followed by a blank line. |
553
|
|
|
# For example: |
554
|
|
|
# |
555
|
|
|
# * # Header |
556
|
|
|
# Line 2 of list item - not part of header. |
557
|
|
|
sibling = self.lastChild(parent) |
558
|
|
|
if sibling is not None: |
559
|
|
|
# Insetrt after sibling. |
560
|
|
|
if sibling.tail: |
561
|
|
|
sibling.tail = '%s\n%s' % (sibling.tail, block) |
562
|
|
|
else: |
563
|
|
|
sibling.tail = '\n%s' % block |
564
|
|
|
else: |
565
|
|
|
# Append to parent.text |
566
|
|
|
if parent.text: |
567
|
|
|
parent.text = '%s\n%s' % (parent.text, block) |
568
|
|
|
else: |
569
|
|
|
parent.text = block.lstrip() |
570
|
|
|
else: |
571
|
|
|
# Create a regular paragraph |
572
|
|
|
p = util.etree.SubElement(parent, 'p') |
573
|
|
|
p.text = block.lstrip() |
574
|
|
|
|