Completed
Push — master ( 32cfa8...ec62d3 )
by Dongxin
48s
created

InlineProcessor.run()   F

Complexity

Conditions 19

Size

Total Lines 70

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 19
c 1
b 0
f 0
dl 0
loc 70
rs 2.559

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like InlineProcessor.run() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from __future__ import unicode_literals
2
from __future__ import absolute_import
3
from . import util
4
from . import odict
5
from . import inlinepatterns
6
7
8
def build_treeprocessors(md_instance, **kwargs):
9
    """ Build the default treeprocessors for Markdown. """
10
    treeprocessors = odict.OrderedDict()
11
    treeprocessors["inline"] = InlineProcessor(md_instance)
12
    treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance)
13
    return treeprocessors
14
15
16
def isString(s):
17
    """ Check if it's string """
18
    if not isinstance(s, util.AtomicString):
19
        return isinstance(s, util.string_type)
20
    return False
21
22
23
class Treeprocessor(util.Processor):
24
    """
25
    Treeprocessors are run on the ElementTree object before serialization.
26
27
    Each Treeprocessor implements a "run" method that takes a pointer to an
28
    ElementTree, modifies it as necessary and returns an ElementTree
29
    object.
30
31
    Treeprocessors must extend markdown.Treeprocessor.
32
33
    """
34
    def run(self, root):
35
        """
36
        Subclasses of Treeprocessor should implement a `run` method, which
37
        takes a root ElementTree. This method can return another ElementTree
38
        object, and the existing root ElementTree will be replaced, or it can
39
        modify the current tree and return None.
40
        """
41
        pass  # pragma: no cover
42
43
44
class InlineProcessor(Treeprocessor):
45
    """
46
    A Treeprocessor that traverses a tree, applying inline patterns.
47
    """
48
49
    def __init__(self, md):
50
        self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
51
        self.__placeholder_suffix = util.ETX
52
        self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
53
                                      + len(self.__placeholder_suffix)
54
        self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
55
        self.markdown = md
56
        self.inlinePatterns = md.inlinePatterns
57
58
    def __makePlaceholder(self, type):
59
        """ Generate a placeholder """
60
        id = "%04d" % len(self.stashed_nodes)
61
        hash = util.INLINE_PLACEHOLDER % id
62
        return hash, id
63
64
    def __findPlaceholder(self, data, index):
65
        """
66
        Extract id from data string, start from index
67
68
        Keyword arguments:
69
70
        * data: string
71
        * index: index, from which we start search
72
73
        Returns: placeholder id and string index, after the found placeholder.
74
75
        """
76
        m = self.__placeholder_re.search(data, index)
77
        if m:
78
            return m.group(1), m.end()
79
        else:
80
            return None, index + 1
81
82
    def __stashNode(self, node, type):
83
        """ Add node to stash """
84
        placeholder, id = self.__makePlaceholder(type)
85
        self.stashed_nodes[id] = node
86
        return placeholder
87
88
    def __handleInline(self, data, patternIndex=0):
89
        """
90
        Process string with inline patterns and replace it
91
        with placeholders
92
93
        Keyword arguments:
94
95
        * data: A line of Markdown text
96
        * patternIndex: The index of the inlinePattern to start with
97
98
        Returns: String with placeholders.
99
100
        """
101
        if not isinstance(data, util.AtomicString):
102
            startIndex = 0
103
            while patternIndex < len(self.inlinePatterns):
104
                data, matched, startIndex = self.__applyPattern(
105
                    self.inlinePatterns.value_for_index(patternIndex),
106
                    data, patternIndex, startIndex)
107
                if not matched:
108
                    patternIndex += 1
109
        return data
110
111
    def __processElementText(self, node, subnode, isText=True):
112
        """
113
        Process placeholders in Element.text or Element.tail
114
        of Elements popped from self.stashed_nodes.
115
116
        Keywords arguments:
117
118
        * node: parent node
119
        * subnode: processing node
120
        * isText: bool variable, True - it's text, False - it's tail
121
122
        Returns: None
123
124
        """
125
        if isText:
126
            text = subnode.text
127
            subnode.text = None
128
        else:
129
            text = subnode.tail
130
            subnode.tail = None
131
132
        childResult = self.__processPlaceholders(text, subnode, isText)
133
134
        if not isText and node is not subnode:
135
            pos = list(node).index(subnode) + 1
136
        else:
137
            pos = 0
138
139
        childResult.reverse()
140
        for newChild in childResult:
141
            node.insert(pos, newChild)
142
143
    def __processPlaceholders(self, data, parent, isText=True):
144
        """
145
        Process string with placeholders and generate ElementTree tree.
146
147
        Keyword arguments:
148
149
        * data: string with placeholders instead of ElementTree elements.
150
        * parent: Element, which contains processing inline data
151
152
        Returns: list with ElementTree elements with applied inline patterns.
153
154
        """
155
        def linkText(text):
156
            if text:
157
                if result:
158
                    if result[-1].tail:
159
                        result[-1].tail += text
160
                    else:
161
                        result[-1].tail = text
162
                elif not isText:
163
                    if parent.tail:
164
                        parent.tail += text
165
                    else:
166
                        parent.tail = text
167
                else:
168
                    if parent.text:
169
                        parent.text += text
170
                    else:
171
                        parent.text = text
172
        result = []
173
        strartIndex = 0
174
        while data:
175
            index = data.find(self.__placeholder_prefix, strartIndex)
176
            if index != -1:
177
                id, phEndIndex = self.__findPlaceholder(data, index)
178
179
                if id in self.stashed_nodes:
180
                    node = self.stashed_nodes.get(id)
181
182
                    if index > 0:
183
                        text = data[strartIndex:index]
184
                        linkText(text)
185
186
                    if not isString(node):  # it's Element
187
                        for child in [node] + list(node):
188
                            if child.tail:
189
                                if child.tail.strip():
190
                                    self.__processElementText(
191
                                        node, child, False
192
                                    )
193
                            if child.text:
194
                                if child.text.strip():
195
                                    self.__processElementText(child, child)
196
                    else:  # it's just a string
197
                        linkText(node)
198
                        strartIndex = phEndIndex
199
                        continue
200
201
                    strartIndex = phEndIndex
202
                    result.append(node)
203
204
                else:  # wrong placeholder
205
                    end = index + len(self.__placeholder_prefix)
206
                    linkText(data[strartIndex:end])
207
                    strartIndex = end
208
            else:
209
                text = data[strartIndex:]
210
                if isinstance(data, util.AtomicString):
211
                    # We don't want to loose the AtomicString
212
                    text = util.AtomicString(text)
213
                linkText(text)
214
                data = ""
215
216
        return result
217
218
    def __applyPattern(self, pattern, data, patternIndex, startIndex=0):
219
        """
220
        Check if the line fits the pattern, create the necessary
221
        elements, add it to stashed_nodes.
222
223
        Keyword arguments:
224
225
        * data: the text to be processed
226
        * pattern: the pattern to be checked
227
        * patternIndex: index of current pattern
228
        * startIndex: string index, from which we start searching
229
230
        Returns: String with placeholders instead of ElementTree elements.
231
232
        """
233
        match = pattern.getCompiledRegExp().match(data[startIndex:])
234
        leftData = data[:startIndex]
235
236
        if not match:
237
            return data, False, 0
238
239
        node = pattern.handleMatch(match)
240
241
        if node is None:
242
            return data, True, len(leftData)+match.span(len(match.groups()))[0]
243
244
        if not isString(node):
245
            if not isinstance(node.text, util.AtomicString):
246
                # We need to process current node too
247
                for child in [node] + list(node):
248
                    if not isString(node):
249
                        if child.text:
250
                            child.text = self.__handleInline(
251
                                child.text, patternIndex + 1
252
                            )
253
                        if child.tail:
254
                            child.tail = self.__handleInline(
255
                                child.tail, patternIndex
256
                            )
257
258
        placeholder = self.__stashNode(node, pattern.type())
259
260
        return "%s%s%s%s" % (leftData,
261
                             match.group(1),
262
                             placeholder, match.groups()[-1]), True, 0
263
264
    def run(self, tree):
265
        """Apply inline patterns to a parsed Markdown tree.
266
267
        Iterate over ElementTree, find elements with inline tag, apply inline
268
        patterns and append newly created Elements to tree.  If you don't
269
        want to process your data with inline paterns, instead of normal
270
        string, use subclass AtomicString:
271
272
            node.text = markdown.AtomicString("This will not be processed.")
273
274
        Arguments:
275
276
        * tree: ElementTree object, representing Markdown tree.
277
278
        Returns: ElementTree object with applied inline patterns.
279
280
        """
281
        self.stashed_nodes = {}
282
283
        stack = [tree]
284
285
        while stack:
286
            currElement = stack.pop()
287
            insertQueue = []
288
            for child in currElement:
289
                if child.text and not isinstance(
290
                    child.text, util.AtomicString
291
                ):
292
                    text = child.text
293
                    child.text = None
294
                    lst = self.__processPlaceholders(
295
                        self.__handleInline(text), child
296
                    )
297
                    stack += lst
298
                    insertQueue.append((child, lst))
299
                if child.tail:
300
                    tail = self.__handleInline(child.tail)
301
                    dumby = util.etree.Element('d')
302
                    child.tail = None
303
                    tailResult = self.__processPlaceholders(tail, dumby, False)
304
                    if dumby.tail:
305
                        child.tail = dumby.tail
306
                    pos = list(currElement).index(child) + 1
307
                    tailResult.reverse()
308
                    for newChild in tailResult:
309
                        currElement.insert(pos, newChild)
310
                if len(child):
311
                    stack.append(child)
312
313
            for element, lst in insertQueue:
314
                if self.markdown.enable_attributes:
315
                    if element.text and isString(element.text):
316
                        element.text = inlinepatterns.handleAttributes(
317
                            element.text, element
318
                        )
319
                i = 0
320
                for newChild in lst:
321
                    if self.markdown.enable_attributes:
322
                        # Processing attributes
323
                        if newChild.tail and isString(newChild.tail):
324
                            newChild.tail = inlinepatterns.handleAttributes(
325
                                newChild.tail, element
326
                            )
327
                        if newChild.text and isString(newChild.text):
328
                            newChild.text = inlinepatterns.handleAttributes(
329
                                newChild.text, newChild
330
                            )
331
                    element.insert(i, newChild)
332
                    i += 1
333
        return tree
334
335
336
class PrettifyTreeprocessor(Treeprocessor):
337
    """ Add linebreaks to the html document. """
338
339
    def _prettifyETree(self, elem):
340
        """ Recursively add linebreaks to ElementTree children. """
341
342
        i = "\n"
343
        if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
344
            if (not elem.text or not elem.text.strip()) \
345
                    and len(elem) and util.isBlockLevel(elem[0].tag):
346
                elem.text = i
347
            for e in elem:
348
                if util.isBlockLevel(e.tag):
349
                    self._prettifyETree(e)
350
            if not elem.tail or not elem.tail.strip():
351
                elem.tail = i
352
        if not elem.tail or not elem.tail.strip():
353
            elem.tail = i
354
355
    def run(self, root):
356
        """ Add linebreaks to ElementTree root object. """
357
358
        self._prettifyETree(root)
359
        # Do <br />'s seperately as they are often in the middle of
360
        # inline content and missed by _prettifyETree.
361
        brs = root.iter('br')
362
        for br in brs:
363
            if not br.tail or not br.tail.strip():
364
                br.tail = '\n'
365
            else:
366
                br.tail = '\n%s' % br.tail
367
        # Clean up extra empty lines at end of code blocks.
368
        pres = root.iter('pre')
369
        for pre in pres:
370
            if len(pre) and pre[0].tag == 'code':
371
                pre[0].text = util.AtomicString(pre[0].text.rstrip() + '\n')
372