collect()   F
last analyzed

Complexity

Conditions 13

Size

Total Lines 64

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 13
c 2
b 0
f 0
dl 0
loc 64
rs 2.7658

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like collect() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python
2
3
"""
4
Pandoc filter to create lists of all kinds
5
"""
6
7
from pandocfilters import toJSONFilters, walk, Str, Plain, Link, BulletList, Para, RawInline
8
from functools import reduce
9
from copy import deepcopy
10
import io
11
import sys
12
import codecs
13
import json
14
import re
15
import unicodedata
16
import subprocess
17
18
collections = {}
19
headers = [0, 0, 0, 0, 0, 0]
20
headers2 = [0, 0, 0, 0, 0, 0]
21
22
def stringify(x, format):
23
    """Walks the tree x and returns concatenated string content,
24
    leaving out all formatting.
25
    """
26
    result = []
27
28
    def go(key, val, format, meta):
29
        if key in ['Str', 'MetaString']:
30
            result.append(val)
31
        elif key == 'Code':
32
            result.append(val[1])
33
        elif key == 'Math':
34
            # Modified from the stringify function in the pandocfilter package
35
            if format == 'latex':
36
                result.append('$' + val[1] + '$')
37
            else:
38
                result.append(val[1])
39
        elif key == 'LineBreak':
40
            result.append(" ")
41
        elif key == 'Space':
42
            result.append(" ")
43
        elif key == 'Note':
44
            # Do not stringify value from Note node
45
            del val[:]
46
47
    walk(x, go, format, {})
48
    return ''.join(result)
49
50
def collect(key, value, format, meta):
51
    global headers
52
53
    # Is it a header? Keep the correct numbered headers in the headers array
54
    if key == 'Header':
55
        [level, [id, classes, attributes], content] = value
56
        if 'unnumbered' not in classes:
57
            headers[level - 1] = headers[level - 1] + 1
58
            for index in range(level, 6):
59
                headers[index] = 0
60
61
    # Is it a link with a right tag?
62
    elif key == 'Span':
63
64
        # Get the Span
65
        [[anchor, classes, other], text] = value
66
67
        # Is the anchor correct?
68
        result = re.match('^([a-zA-Z][\w.-]*):([\w.-]+)$', anchor)
69
        if result:
70
            global collections
71
72
            # Compute the name
73
            name = result.group(1)
74
75
            # Compute the identifier
76
            identifier = result.group(2)
77
78
            # Store the new item
79
            string = stringify(deepcopy(text), format)
80
            
81
            # Prepare the names
82
            names = []
83
84
            # Add the atomic name to the list
85
            names.append(name)
86
87
            # Prepare the latex output
88
            if format == 'latex':
89
               latex = '\\phantomsection\\addcontentsline{' + name + '}{figure}{' + string + '}'
90
91
            # Loop on all the headers
92
            for i in [0, 1, 2, 3, 4, 5]:
93
                if headers[i] > 0:
94
                    # Add an alternate name to the list
95
                    altName = name + ':' + '.'.join(map(str, headers[:i+1]))
96
                    names.append(altName)
97
                    if format == 'latex':
98
                       # Complete the latex output
99
                       latex = latex + '\\phantomsection\\addcontentsline{' + altName + '}{figure}{' + string + '}'
100
                       latex = latex + '\\phantomsection\\addcontentsline{' + altName + '_}{figure}{' + string + '}'
101
                else:
102
                    break
103
104
            for name in names:
105
                # Prepare the new collections if needed
106
                if name not in collections:
107
                    collections[name] = []
108
                collections[name].append({'identifier': identifier, 'text': string})
109
110
            # Special case for LaTeX output
111
            if format == 'latex':
112
                text.insert(0, RawInline('tex', latex))
113
                value[1] = text
114
115
def listof(key, value, format, meta):
116
    global headers2
117
118
    # Is it a header?
119
    if key == 'Header':
120
        [level, [id, classes, attributes], content] = value
121
        if 'unnumbered' not in classes:
122
            headers2[level - 1] = headers2[level - 1] + 1
123
            for index in range(level, 6):
124
                headers2[index] = 0
125
126
    # Is it a paragraph with only one string?
127
    if key == 'Para' and len(value) == 1 and value[0]['t'] == 'Str':
128
129
        # Is it {tag}?
130
        result = re.match('^{(?P<name>(?P<prefix>[a-zA-Z][\w.-]*)(?P<section>\:((?P<sharp>#(\.#)*)|(\d+(\.\d+)*)))?)}$', value[0]['c'])
131
        if result:
132
133
            prefix = result.group('prefix')
134
135
            # Get the collection name
136
            if result.group('sharp') == None:
137
                name = result.group('name')
138
            else:
139
                level = (len(result.group('sharp')) - 1) // 2 + 1
140
                name = prefix + ':' + '.'.join(map(str, headers2[:level]))
141
142
            # Is it an existing collection
143
            if name in collections:
144
145
                if format == 'latex':
146
                    # Special case for LaTeX output
147
                    if 'toccolor' in meta:
148
                        linkcolor = '\\hypersetup{linkcolor=' + stringify(meta['toccolor']['c'], format) + '}'
149
                    else:
150
                        linkcolor = '\\hypersetup{linkcolor=black}'
151
                    if result.group('sharp') == None:
152
                        suffix = ''
153
                    else:
154
                        suffix = '_'
155
                    return Para([RawInline('tex', linkcolor + '\\makeatletter\\@starttoc{' + name + suffix + '}\\makeatother')])
156
157
                else:
158
                    # Prepare the list
159
                    elements = []
160
161
                    # Loop on the collection
162
                    for value in collections[name]:
163
164
                        # Add an item to the list
165
                        if pandocVersion() < '1.16':
166
                            # pandoc 1.15
167
                            link = Link([Str(value['text'])], ['#' + prefix + ':' + value['identifier'], ''])
168
                        else:
169
                            # pandoc 1.16
170
                            link = Link(['', [], []], [Str(value['text'])], ['#' + prefix + ':' + value['identifier'], ''])
171
172
                        elements.append([Plain([link])])
173
174
                    # Return a bullet list
175
                    return BulletList(elements)
176
177
        # Special case where the paragraph start with '{{...'
178
        elif re.match('^{{[a-zA-Z][\w.-]*}$', value[0]['c']):
179
            value[0]['c'] = value[0]['c'][1:]
180
181
def pandocVersion():
182
    if not hasattr(pandocVersion, 'value'):
183
        p = subprocess.Popen(['pandoc', '-v'], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
184
        out, err = p.communicate()
185
        pandocVersion.value = re.search(b'pandoc (?P<version>.*)', out).group('version').decode('utf-8')
186
    return pandocVersion.value
187
188
189
def main():
190
    toJSONFilters([collect, listof])
191
192
if __name__ == '__main__':
193
    main()
194
195