|
1
|
|
|
""" |
|
2
|
|
|
Tables Extension for Python-Markdown |
|
3
|
|
|
==================================== |
|
4
|
|
|
|
|
5
|
|
|
Added parsing of tables to Python-Markdown. |
|
6
|
|
|
|
|
7
|
|
|
See <https://pythonhosted.org/Markdown/extensions/tables.html> |
|
8
|
|
|
for documentation. |
|
9
|
|
|
|
|
10
|
|
|
Original code Copyright 2009 [Waylan Limberg](http://achinghead.com) |
|
11
|
|
|
|
|
12
|
|
|
All changes Copyright 2008-2014 The Python Markdown Project |
|
13
|
|
|
|
|
14
|
|
|
License: [BSD](http://www.opensource.org/licenses/bsd-license.php) |
|
15
|
|
|
|
|
16
|
|
|
""" |
|
17
|
|
|
|
|
18
|
|
|
from __future__ import absolute_import |
|
19
|
|
|
from __future__ import unicode_literals |
|
20
|
|
|
from . import Extension |
|
21
|
|
|
from ..blockprocessors import BlockProcessor |
|
22
|
|
|
from ..util import etree |
|
23
|
|
|
import re |
|
24
|
|
|
PIPE_NONE = 0 |
|
25
|
|
|
PIPE_LEFT = 1 |
|
26
|
|
|
PIPE_RIGHT = 2 |
|
27
|
|
|
|
|
28
|
|
|
|
|
29
|
|
|
class TableProcessor(BlockProcessor): |
|
30
|
|
|
""" Process Tables. """ |
|
31
|
|
|
|
|
32
|
|
|
RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))') |
|
33
|
|
|
RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$') |
|
34
|
|
|
|
|
35
|
|
|
def __init__(self, parser): |
|
36
|
|
|
self.border = False |
|
37
|
|
|
self.separator = '' |
|
38
|
|
|
super(TableProcessor, self).__init__(parser) |
|
39
|
|
|
|
|
40
|
|
|
def test(self, parent, block): |
|
41
|
|
|
""" |
|
42
|
|
|
Ensure first two rows (column header and separator row) are valid table rows. |
|
43
|
|
|
|
|
44
|
|
|
Keep border check and separator row do avoid repeating the work. |
|
45
|
|
|
""" |
|
46
|
|
|
is_table = False |
|
47
|
|
|
rows = [row.strip() for row in block.split('\n')] |
|
48
|
|
|
if len(rows) > 1: |
|
49
|
|
|
header0 = rows[0] |
|
50
|
|
|
self.border = PIPE_NONE |
|
51
|
|
|
if header0.startswith('|'): |
|
52
|
|
|
self.border |= PIPE_LEFT |
|
53
|
|
|
if self.RE_END_BORDER.search(header0) is not None: |
|
54
|
|
|
self.border |= PIPE_RIGHT |
|
55
|
|
|
row = self._split_row(header0) |
|
56
|
|
|
row0_len = len(row) |
|
57
|
|
|
is_table = row0_len > 1 |
|
58
|
|
|
|
|
59
|
|
|
# Each row in a single column table needs at least one pipe. |
|
60
|
|
|
if not is_table and row0_len == 1 and self.border: |
|
61
|
|
|
for index in range(1, len(rows)): |
|
62
|
|
|
is_table = rows[index].startswith('|') |
|
63
|
|
|
if not is_table: |
|
64
|
|
|
is_table = self.RE_END_BORDER.search(rows[index]) is not None |
|
65
|
|
|
if not is_table: |
|
66
|
|
|
break |
|
67
|
|
|
|
|
68
|
|
|
if is_table: |
|
69
|
|
|
row = self._split_row(rows[1]) |
|
70
|
|
|
is_table = (len(row) == row0_len) and set(''.join(row)) <= set('|:- ') |
|
71
|
|
|
if is_table: |
|
72
|
|
|
self.separator = row |
|
73
|
|
|
|
|
74
|
|
|
return is_table |
|
75
|
|
|
|
|
76
|
|
|
def run(self, parent, blocks): |
|
77
|
|
|
""" Parse a table block and build table. """ |
|
78
|
|
|
block = blocks.pop(0).split('\n') |
|
79
|
|
|
header = block[0].strip() |
|
80
|
|
|
rows = [] if len(block) < 3 else block[2:] |
|
81
|
|
|
|
|
82
|
|
|
# Get alignment of columns |
|
83
|
|
|
align = [] |
|
84
|
|
|
for c in self.separator: |
|
85
|
|
|
c = c.strip() |
|
86
|
|
|
if c.startswith(':') and c.endswith(':'): |
|
87
|
|
|
align.append('center') |
|
88
|
|
|
elif c.startswith(':'): |
|
89
|
|
|
align.append('left') |
|
90
|
|
|
elif c.endswith(':'): |
|
91
|
|
|
align.append('right') |
|
92
|
|
|
else: |
|
93
|
|
|
align.append(None) |
|
94
|
|
|
|
|
95
|
|
|
# Build table |
|
96
|
|
|
table = etree.SubElement(parent, 'table') |
|
97
|
|
|
thead = etree.SubElement(table, 'thead') |
|
98
|
|
|
self._build_row(header, thead, align) |
|
99
|
|
|
tbody = etree.SubElement(table, 'tbody') |
|
100
|
|
|
if len(rows) == 0: |
|
101
|
|
|
# Handle empty table |
|
102
|
|
|
self._build_empty_row(tbody, align) |
|
103
|
|
|
else: |
|
104
|
|
|
for row in rows: |
|
105
|
|
|
self._build_row(row.strip(), tbody, align) |
|
106
|
|
|
|
|
107
|
|
|
def _build_empty_row(self, parent, align): |
|
108
|
|
|
"""Build an empty row.""" |
|
109
|
|
|
tr = etree.SubElement(parent, 'tr') |
|
110
|
|
|
count = len(align) |
|
111
|
|
|
while count: |
|
112
|
|
|
etree.SubElement(tr, 'td') |
|
113
|
|
|
count -= 1 |
|
114
|
|
|
|
|
115
|
|
|
def _build_row(self, row, parent, align): |
|
116
|
|
|
""" Given a row of text, build table cells. """ |
|
117
|
|
|
tr = etree.SubElement(parent, 'tr') |
|
118
|
|
|
tag = 'td' |
|
119
|
|
|
if parent.tag == 'thead': |
|
120
|
|
|
tag = 'th' |
|
121
|
|
|
cells = self._split_row(row) |
|
122
|
|
|
# We use align here rather than cells to ensure every row |
|
123
|
|
|
# contains the same number of columns. |
|
124
|
|
|
for i, a in enumerate(align): |
|
125
|
|
|
c = etree.SubElement(tr, tag) |
|
126
|
|
|
try: |
|
127
|
|
|
c.text = cells[i].strip() |
|
128
|
|
|
except IndexError: # pragma: no cover |
|
129
|
|
|
c.text = "" |
|
130
|
|
|
if a: |
|
131
|
|
|
c.set('align', a) |
|
132
|
|
|
|
|
133
|
|
|
def _split_row(self, row): |
|
134
|
|
|
""" split a row of text into list of cells. """ |
|
135
|
|
|
if self.border: |
|
136
|
|
|
if row.startswith('|'): |
|
137
|
|
|
row = row[1:] |
|
138
|
|
|
row = self.RE_END_BORDER.sub('', row) |
|
139
|
|
|
return self._split(row) |
|
140
|
|
|
|
|
141
|
|
|
def _split(self, row): |
|
142
|
|
|
""" split a row of text with some code into a list of cells. """ |
|
143
|
|
|
elements = [] |
|
144
|
|
|
pipes = [] |
|
145
|
|
|
tics = [] |
|
146
|
|
|
tic_points = [] |
|
147
|
|
|
tic_region = [] |
|
148
|
|
|
good_pipes = [] |
|
149
|
|
|
|
|
150
|
|
|
# Parse row |
|
151
|
|
|
# Throw out \\, and \| |
|
152
|
|
|
for m in self.RE_CODE_PIPES.finditer(row): |
|
153
|
|
|
# Store ` data (len, start_pos, end_pos) |
|
154
|
|
|
if m.group(2): |
|
155
|
|
|
# \`+ |
|
156
|
|
|
# Store length of each tic group: subtract \ |
|
157
|
|
|
tics.append(len(m.group(2)) - 1) |
|
158
|
|
|
# Store start of group, end of group, and escape length |
|
159
|
|
|
tic_points.append((m.start(2), m.end(2) - 1, 1)) |
|
160
|
|
|
elif m.group(3): |
|
161
|
|
|
# `+ |
|
162
|
|
|
# Store length of each tic group |
|
163
|
|
|
tics.append(len(m.group(3))) |
|
164
|
|
|
# Store start of group, end of group, and escape length |
|
165
|
|
|
tic_points.append((m.start(3), m.end(3) - 1, 0)) |
|
166
|
|
|
# Store pipe location |
|
167
|
|
|
elif m.group(5): |
|
168
|
|
|
pipes.append(m.start(5)) |
|
169
|
|
|
|
|
170
|
|
|
# Pair up tics according to size if possible |
|
171
|
|
|
# Subtract the escape length *only* from the opening. |
|
172
|
|
|
# Walk through tic list and see if tic has a close. |
|
173
|
|
|
# Store the tic region (start of region, end of region). |
|
174
|
|
|
pos = 0 |
|
175
|
|
|
tic_len = len(tics) |
|
176
|
|
|
while pos < tic_len: |
|
177
|
|
|
try: |
|
178
|
|
|
tic_size = tics[pos] - tic_points[pos][2] |
|
179
|
|
|
if tic_size == 0: |
|
180
|
|
|
raise ValueError |
|
181
|
|
|
index = tics[pos + 1:].index(tic_size) + 1 |
|
182
|
|
|
tic_region.append((tic_points[pos][0], tic_points[pos + index][1])) |
|
183
|
|
|
pos += index + 1 |
|
184
|
|
|
except ValueError: |
|
185
|
|
|
pos += 1 |
|
186
|
|
|
|
|
187
|
|
|
# Resolve pipes. Check if they are within a tic pair region. |
|
188
|
|
|
# Walk through pipes comparing them to each region. |
|
189
|
|
|
# - If pipe position is less that a region, it isn't in a region |
|
190
|
|
|
# - If it is within a region, we don't want it, so throw it out |
|
191
|
|
|
# - If we didn't throw it out, it must be a table pipe |
|
192
|
|
|
for pipe in pipes: |
|
193
|
|
|
throw_out = False |
|
194
|
|
|
for region in tic_region: |
|
195
|
|
|
if pipe < region[0]: |
|
196
|
|
|
# Pipe is not in a region |
|
197
|
|
|
break |
|
198
|
|
|
elif region[0] <= pipe <= region[1]: |
|
199
|
|
|
# Pipe is within a code region. Throw it out. |
|
200
|
|
|
throw_out = True |
|
201
|
|
|
break |
|
202
|
|
|
if not throw_out: |
|
203
|
|
|
good_pipes.append(pipe) |
|
204
|
|
|
|
|
205
|
|
|
# Split row according to table delimeters. |
|
206
|
|
|
pos = 0 |
|
207
|
|
|
for pipe in good_pipes: |
|
208
|
|
|
elements.append(row[pos:pipe]) |
|
209
|
|
|
pos = pipe + 1 |
|
210
|
|
|
elements.append(row[pos:]) |
|
211
|
|
|
return elements |
|
212
|
|
|
|
|
213
|
|
|
|
|
214
|
|
|
class TableExtension(Extension): |
|
215
|
|
|
""" Add tables to Markdown. """ |
|
216
|
|
|
|
|
217
|
|
|
def extendMarkdown(self, md, md_globals): |
|
218
|
|
|
""" Add an instance of TableProcessor to BlockParser. """ |
|
219
|
|
|
if '|' not in md.ESCAPED_CHARS: |
|
220
|
|
|
md.ESCAPED_CHARS.append('|') |
|
221
|
|
|
md.parser.blockprocessors.add('table', |
|
222
|
|
|
TableProcessor(md.parser), |
|
223
|
|
|
'<hashheader') |
|
224
|
|
|
|
|
225
|
|
|
|
|
226
|
|
|
def makeExtension(*args, **kwargs): |
|
227
|
|
|
return TableExtension(*args, **kwargs) |
|
228
|
|
|
|