Completed
Push — master ( 32cfa8...ec62d3 )
by Dongxin
48s
created

TableProcessor.test()   F

Complexity

Conditions 13

Size

Total Lines 35

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 13
c 1
b 0
f 0
dl 0
loc 35
rs 2.7716

How to fix   Complexity   

Complexity

Complex classes like TableProcessor.test() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"""
2
Tables Extension for Python-Markdown
3
====================================
4
5
Added parsing of tables to Python-Markdown.
6
7
See <https://pythonhosted.org/Markdown/extensions/tables.html>
8
for documentation.
9
10
Original code Copyright 2009 [Waylan Limberg](http://achinghead.com)
11
12
All changes Copyright 2008-2014 The Python Markdown Project
13
14
License: [BSD](http://www.opensource.org/licenses/bsd-license.php)
15
16
"""
17
18
from __future__ import absolute_import
19
from __future__ import unicode_literals
20
from . import Extension
21
from ..blockprocessors import BlockProcessor
22
from ..util import etree
23
import re
24
PIPE_NONE = 0
25
PIPE_LEFT = 1
26
PIPE_RIGHT = 2
27
28
29
class TableProcessor(BlockProcessor):
30
    """ Process Tables. """
31
32
    RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))')
33
    RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$')
34
35
    def __init__(self, parser):
36
        self.border = False
37
        self.separator = ''
38
        super(TableProcessor, self).__init__(parser)
39
40
    def test(self, parent, block):
41
        """
42
        Ensure first two rows (column header and separator row) are valid table rows.
43
44
        Keep border check and separator row do avoid repeating the work.
45
        """
46
        is_table = False
47
        rows = [row.strip() for row in block.split('\n')]
48
        if len(rows) > 1:
49
            header0 = rows[0]
50
            self.border = PIPE_NONE
51
            if header0.startswith('|'):
52
                self.border |= PIPE_LEFT
53
            if self.RE_END_BORDER.search(header0) is not None:
54
                self.border |= PIPE_RIGHT
55
            row = self._split_row(header0)
56
            row0_len = len(row)
57
            is_table = row0_len > 1
58
59
            # Each row in a single column table needs at least one pipe.
60
            if not is_table and row0_len == 1 and self.border:
61
                for index in range(1, len(rows)):
62
                    is_table = rows[index].startswith('|')
63
                    if not is_table:
64
                        is_table = self.RE_END_BORDER.search(rows[index]) is not None
65
                    if not is_table:
66
                        break
67
68
            if is_table:
69
                row = self._split_row(rows[1])
70
                is_table = (len(row) == row0_len) and set(''.join(row)) <= set('|:- ')
71
                if is_table:
72
                    self.separator = row
73
74
        return is_table
75
76
    def run(self, parent, blocks):
77
        """ Parse a table block and build table. """
78
        block = blocks.pop(0).split('\n')
79
        header = block[0].strip()
80
        rows = [] if len(block) < 3 else block[2:]
81
82
        # Get alignment of columns
83
        align = []
84
        for c in self.separator:
85
            c = c.strip()
86
            if c.startswith(':') and c.endswith(':'):
87
                align.append('center')
88
            elif c.startswith(':'):
89
                align.append('left')
90
            elif c.endswith(':'):
91
                align.append('right')
92
            else:
93
                align.append(None)
94
95
        # Build table
96
        table = etree.SubElement(parent, 'table')
97
        thead = etree.SubElement(table, 'thead')
98
        self._build_row(header, thead, align)
99
        tbody = etree.SubElement(table, 'tbody')
100
        if len(rows) == 0:
101
            # Handle empty table
102
            self._build_empty_row(tbody, align)
103
        else:
104
            for row in rows:
105
                self._build_row(row.strip(), tbody, align)
106
107
    def _build_empty_row(self, parent, align):
108
        """Build an empty row."""
109
        tr = etree.SubElement(parent, 'tr')
110
        count = len(align)
111
        while count:
112
            etree.SubElement(tr, 'td')
113
            count -= 1
114
115
    def _build_row(self, row, parent, align):
116
        """ Given a row of text, build table cells. """
117
        tr = etree.SubElement(parent, 'tr')
118
        tag = 'td'
119
        if parent.tag == 'thead':
120
            tag = 'th'
121
        cells = self._split_row(row)
122
        # We use align here rather than cells to ensure every row
123
        # contains the same number of columns.
124
        for i, a in enumerate(align):
125
            c = etree.SubElement(tr, tag)
126
            try:
127
                c.text = cells[i].strip()
128
            except IndexError:  # pragma: no cover
129
                c.text = ""
130
            if a:
131
                c.set('align', a)
132
133
    def _split_row(self, row):
134
        """ split a row of text into list of cells. """
135
        if self.border:
136
            if row.startswith('|'):
137
                row = row[1:]
138
            row = self.RE_END_BORDER.sub('', row)
139
        return self._split(row)
140
141
    def _split(self, row):
142
        """ split a row of text with some code into a list of cells. """
143
        elements = []
144
        pipes = []
145
        tics = []
146
        tic_points = []
147
        tic_region = []
148
        good_pipes = []
149
150
        # Parse row
151
        # Throw out \\, and \|
152
        for m in self.RE_CODE_PIPES.finditer(row):
153
            # Store ` data (len, start_pos, end_pos)
154
            if m.group(2):
155
                # \`+
156
                # Store length of each tic group: subtract \
157
                tics.append(len(m.group(2)) - 1)
158
                # Store start of group, end of group, and escape length
159
                tic_points.append((m.start(2), m.end(2) - 1, 1))
160
            elif m.group(3):
161
                # `+
162
                # Store length of each tic group
163
                tics.append(len(m.group(3)))
164
                # Store start of group, end of group, and escape length
165
                tic_points.append((m.start(3), m.end(3) - 1, 0))
166
            # Store pipe location
167
            elif m.group(5):
168
                pipes.append(m.start(5))
169
170
        # Pair up tics according to size if possible
171
        # Subtract the escape length *only* from the opening.
172
        # Walk through tic list and see if tic has a close.
173
        # Store the tic region (start of region, end of region).
174
        pos = 0
175
        tic_len = len(tics)
176
        while pos < tic_len:
177
            try:
178
                tic_size = tics[pos] - tic_points[pos][2]
179
                if tic_size == 0:
180
                    raise ValueError
181
                index = tics[pos + 1:].index(tic_size) + 1
182
                tic_region.append((tic_points[pos][0], tic_points[pos + index][1]))
183
                pos += index + 1
184
            except ValueError:
185
                pos += 1
186
187
        # Resolve pipes.  Check if they are within a tic pair region.
188
        # Walk through pipes comparing them to each region.
189
        #     - If pipe position is less that a region, it isn't in a region
190
        #     - If it is within a region, we don't want it, so throw it out
191
        #     - If we didn't throw it out, it must be a table pipe
192
        for pipe in pipes:
193
            throw_out = False
194
            for region in tic_region:
195
                if pipe < region[0]:
196
                    # Pipe is not in a region
197
                    break
198
                elif region[0] <= pipe <= region[1]:
199
                    # Pipe is within a code region.  Throw it out.
200
                    throw_out = True
201
                    break
202
            if not throw_out:
203
                good_pipes.append(pipe)
204
205
        # Split row according to table delimeters.
206
        pos = 0
207
        for pipe in good_pipes:
208
            elements.append(row[pos:pipe])
209
            pos = pipe + 1
210
        elements.append(row[pos:])
211
        return elements
212
213
214
class TableExtension(Extension):
215
    """ Add tables to Markdown. """
216
217
    def extendMarkdown(self, md, md_globals):
218
        """ Add an instance of TableProcessor to BlockParser. """
219
        if '|' not in md.ESCAPED_CHARS:
220
            md.ESCAPED_CHARS.append('|')
221
        md.parser.blockprocessors.add('table',
222
                                      TableProcessor(md.parser),
223
                                      '<hashheader')
224
225
226
def makeExtension(*args, **kwargs):
227
    return TableExtension(*args, **kwargs)
228