Completed
Push — master ( 85d380...5cc46a )
by Christophe
01:14
created

getFormat()   F

Complexity

Conditions 13

Size

Total Lines 24

Duplication

Lines 24
Ratio 100 %

Importance

Changes 0
Metric Value
cc 13
c 0
b 0
f 0
dl 24
loc 24
rs 2.8487

How to fix   Complexity   

Complexity

Complex classes like getFormat() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python
2
3
"""
4
Pandoc filter to number all kinds of things.
5
"""
6
7
from pandocfilters import walk, stringify, Str, Space, Para, BulletList, Plain, Strong, Span, Link, Emph, RawInline, RawBlock, Header
8
from functools import reduce
9
import json
10
import io
11
import sys
12
import codecs
13
import re
14
import unicodedata
15
import subprocess
16
17
count = {}
18
information = {}
19
collections = {}
20
headers = [0, 0, 0, 0, 0, 0]
21
headerRegex = '(?P<header>(?P<hidden>(-\.)*)(\+\.)*)'
22
23
def toJSONFilters(actions):
24
    """Converts a list of actions into a filter that reads a JSON-formatted
25
    pandoc document from stdin, transforms it by walking the tree
26
    with the actions, and returns a new JSON-formatted pandoc document
27
    to stdout.  The argument is a list of functions action(key, value, format, meta),
28
    where key is the type of the pandoc object (e.g. 'Str', 'Para'),
29
    value is the contents of the object (e.g. a string for 'Str',
30
    a list of inline elements for 'Para'), format is the target
31
    output format (which will be taken for the first command line
32
    argument if present), and meta is the document's metadata.
33
    If the function returns None, the object to which it applies
34
    will remain unchanged.  If it returns an object, the object will
35
    be replaced.    If it returns a list, the list will be spliced in to
36
    the list to which the target object belongs.    (So, returning an
37
    empty list deletes the object.)
38
    """
39
    try:
40
        input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
41
    except AttributeError:
42
        # Python 2 does not have sys.stdin.buffer.
43
        # REF: http://stackoverflow.com/questions/2467928/python-unicodeencodeerror-when-reading-from-stdin
44
        input_stream = codecs.getreader("utf-8")(sys.stdin)
45
46
    doc = json.loads(input_stream.read())
47
    if len(sys.argv) > 1:
48
        format = sys.argv[1]
49
    else:
50
        format = ""
51
    altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
52
    addListings(altered, format, altered[0]['unMeta'])
53
    json.dump(altered, sys.stdout)
54
55
def removeAccents(string):
56
    nfkd_form = unicodedata.normalize('NFKD', string)
57
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
58
59
def toIdentifier(string):
60
    # replace invalid characters by dash
61
    string = re.sub('[^0-9a-zA-Z_-]+', '-', removeAccents(string.lower()))
62
63
    # Remove leading digits
64
    string = re.sub('^[^a-zA-Z]+', '', string)
65
66
    return string
67
68
def toLatex(x):
69
    """Walks the tree x and returns concatenated string content,
70
    leaving out all formatting.
71
    """
72
    result = []
73
74
    def go(key, val, format, meta):
75
        if key in ['Str', 'MetaString']:
76
            result.append(val)
77
        elif key == 'Code':
78
            result.append(val[1])
79
        elif key == 'Math':
80
            # Modified from the stringify function in the pandocfilter package
81
            if format == 'latex':
82
                result.append('$' + val[1] + '$')
83
            else:
84
                result.append(val[1])
85
        elif key == 'LineBreak':
86
            result.append(" ")
87
        elif key == 'Space':
88
            result.append(" ")
89
        elif key == 'Note':
90
            # Do not stringify value from Note node
91
            del val[:]
92
93
    walk(x, go, 'latex', {})
94
    return ''.join(result)
95
96
def numbering(key, value, format, meta):
97
    global headerRegex
98
    if key == 'Header':
99
        [level, [id, classes, attributes], content] = value
100
        if 'unnumbered' not in classes:
101
            headers[level - 1] = headers[level - 1] + 1
102
            for index in range(level, 6):
103
                headers[index] = 0
104
    elif key == 'Para':
105
        if len(value) >= 3 and value[-2] == Space() and value[-1]['t'] == 'Str':
106
            last = value[-1]['c']
107
108
            match = re.match('^' + headerRegex + '#((?P<prefix>[a-zA-Z][\w.-]*):)?(?P<name>[a-zA-Z][\w:.-]*)?$', last)
109
110
            if match:
111
                # Is it a Para and the last element is an identifier beginning with '#'
112
                global count, information, collections
113
114
                # Detect the title
115
                title = []
116
                if value[-3]['t'] == 'Str' and value[-3]['c'][-1:] == ')':
117
                    for (i, item) in enumerate(value):
118
                        if item['t'] == 'Str' and item['c'][0] == '(':
119
                            title = value[i:-2]
120
                            title[0]['c'] = title[0]['c'][1:]
121
                            title[-1]['c'] = title[-1]['c'][:-1]
122
                            value = value[:i - 1] + value[-2:]
123
                            break
124
125
                # Compute the description
126
                description = value[:-2]
127
128
                # Compute the basicCategory and the category
129
                if match.group('prefix') == None:
130
                    basicCategory = toIdentifier(stringify(description))
131
                else:
132
                    basicCategory = match.group('prefix')
133
134
                # Compute the levelInf and levelSup values
135
                levelInf = len(match.group('hidden')) // 2
136
                levelSup = len(match.group('header')) // 2
137
138
                # Get the default inf and sup level
139
                if levelInf == 0 and levelSup == 0:
140
                    [levelInf, levelSup] = getDefaultLevels(basicCategory, meta)
141
142
                # Compute the section number
143
                sectionNumber = '.'.join(map(str, headers[:levelSup]))
144
145
                # Compute the leading (composed of the section numbering and a dot)
146
                if levelSup != 0:
147
                    leading = sectionNumber + '.'
148
                else:
149
                    leading = ''
150
151
                category = basicCategory + ':' + leading
152
153
                # Is it a new category?
154
                if category not in count:
155
                    count[category] = 0
156
157
                count[category] = count[category] + 1
158
159
                # Get the number
160
                number = str(count[category])
161
162
                # Determine the final tag
163
                if match.group('name') == None:
164
                    tag = category + number
165
                else:
166
                    tag = basicCategory + ':' + match.group('name')
167
168
                # Replace the '-.-.+.+...#' by the category count (omitting the hidden part)
169
                localNumber = '.'.join(map(str, headers[levelInf:levelSup] + [number]))
170
171
                # Compute the globalNumber
172
                if sectionNumber:
173
                    globalNumber = sectionNumber + '.' + number
174
                else:
175
                    globalNumber = number
176
177
                # Is the automatic formatting required for this category?
178
                if getFormat(basicCategory, meta):
179
                    # Prepare the final text
180
                    text = [Strong(description + [Space(), Str(localNumber)])]
181
182
                    # Add the title to the final text
183
                    if title:
184
                        text = text + [Space(), Emph([Str('(')] + title + [Str(')')])]
185
186
                    # Compute the link
187
                    link = description + [Space(), Str(localNumber)]
188
189
                    # Compute the toc
190
                    toc = [Str(globalNumber), Space()]
191
                    if title:
192
                        toc = toc + title
193
                    else:
194
                        toc = toc + description
195
196
                else:
197
                    # Prepare the final text
198
                    text = [
199
                        Span(['', ['description'], []], description),
200
                        Span(['', ['title'], []], title),
201
                        Span(['', ['local'], []], [Str(localNumber)]),
202
                        Span(['', ['global'], []], [Str(globalNumber)]),
203
                        Span(['', ['section'], []], [Str(sectionNumber)]),
204
                    ]
205
206
                    # Compute the link
207
                    link = [Span(['', ['pandoc-numbering-link'] + getClasses(basicCategory, meta), []], text)]
208
209
                    # Compute the toc
210
                    toc = [Span(['', ['pandoc-numbering-toc'] + getClasses(basicCategory, meta), []], text)]
211
212
213
                # Store the numbers and the label for automatic numbering (See referencing function)
214
                information[tag] = {
215
                    'section': sectionNumber,
216
                    'local': localNumber,
217
                    'global': globalNumber,
218
                    'count': number,
219
                    'description': description,
220
                    'title': title,
221
                    'link': link,
222
                    'toc': toc
223
                }
224
225
                # Prepare the contents
226
                contents = [Span([tag, ['pandoc-numbering-text'] + getClasses(basicCategory, meta), []], text)]
227
228
                # Compute collections
229
                if basicCategory not in collections:
230
                    collections[basicCategory] = []
231
232
                collections[basicCategory].append(tag)
233
234
                # Special case for LaTeX
235
                if format == 'latex' and getFormat(basicCategory, meta):
236
                    latexCategory = re.sub('[^a-z]+', '', basicCategory)
237
                    if title:
238
                      entry = title
239
                    else:
240
                      entry = description
241
                    latex = '\\phantomsection\\addcontentsline{' + latexCategory + '}{' + latexCategory + '}{\\protect\\numberline {' + \
242
                        leading + number + '}{\ignorespaces ' + toLatex(entry) + '}}'
243
                    contents.insert(0, RawInline('tex', latex))
244
245
                # Return the contents in a Para element
246
                return Para(contents)
247
            elif re.match('^' + headerRegex + '##(?P<prefix>[a-zA-Z][\w.-]*:)?(?P<name>[a-zA-Z][\w:.-]*)?$', last):
248
                # Special case where the last element is '...##...'
249
                value[-1]['c'] = value[-1]['c'].replace('##', '#', 1)
250
                return Para(value)
251
252
replace = None
253
search = None
254
255
def lowering(key, value, format, meta):
256
    if key == 'Str':
257
        return Str(value.lower())
258
259
def referencing(key, value, format, meta):
260
    global information, replace, search
261
262
    # Is it a link with a right reference?
263
    if key == 'Link':
264
        if pandocVersion() < '1.16':
265
            # pandoc 1.15
266
            [text, [reference, title]] = value
267
        else:
268
            # pandoc > 1.15
269
            [attributes, text, [reference, title]] = value
270
271
        if re.match('^(#([a-zA-Z][\w:.-]*))$', reference):
272
            # Compute the name
273
            tag = reference[1:]
274
275
            if tag in information:
276
                if pandocVersion() < '1.16':
277
                    # pandoc 1.15
278
                    i = 0
279
                else:
280
                    # pandoc > 1.15
281
                    i = 1
282
283
                # Replace all '#t', '#T', '#d', '#D', '#s', '#g', '#c', '#n', '#' with the corresponding text in the title
284
                value[i + 1][1] = value[i + 1][1].replace('#t', stringify(information[tag]['title']).lower())
285
                value[i + 1][1] = value[i + 1][1].replace('#T', stringify(information[tag]['title']))
286
                value[i + 1][1] = value[i + 1][1].replace('#d', stringify(information[tag]['description']).lower())
287
                value[i + 1][1] = value[i + 1][1].replace('#D', stringify(information[tag]['description']))
288
                value[i + 1][1] = value[i + 1][1].replace('#s', information[tag]['section'])
289
                value[i + 1][1] = value[i + 1][1].replace('#g', information[tag]['global'])
290
                value[i + 1][1] = value[i + 1][1].replace('#c', information[tag]['count'])
291
                value[i + 1][1] = value[i + 1][1].replace('#n', information[tag]['local'])
292
                value[i + 1][1] = value[i + 1][1].replace('#', information[tag]['local'])
293
294
                if text == []:
295
                    # The link text is empty, replace it with the default label
296
                    value[i] = information[tag]['link']
297
                else:
298
                    # The link text is not empty
299
300
                    #replace all '#t' with the title in lower case
301
                    replace = walk(information[tag]['title'], lowering, format, meta)
302
                    search = '#t'
303
                    value[i] = walk(value[i], replacing, format, meta)
304
305
                    #replace all '#T' with the title
306
                    replace = information[tag]['title']
307
                    search = '#T'
308
                    value[i] = walk(value[i], replacing, format, meta)
309
310
                    #replace all '#d' with the description in lower case
311
                    replace = walk(information[tag]['description'], lowering, format, meta)
312
                    search = '#d'
313
                    value[i] = walk(value[i], replacing, format, meta)
314
315
                    #replace all '#D' with the description
316
                    replace = information[tag]['description']
317
                    search = '#D'
318
                    value[i] = walk(value[i], replacing, format, meta)
319
320
                    #replace all '#s' with the corresponding number
321
                    replace = [Str(information[tag]['section'])]
322
                    search = '#s'
323
                    value[i] = walk(value[i], replacing, format, meta)
324
325
                    #replace all '#g' with the corresponding number
326
                    replace = [Str(information[tag]['global'])]
327
                    search = '#g'
328
                    value[i] = walk(value[i], replacing, format, meta)
329
330
                    #replace all '#c' with the corresponding number
331
                    replace = [Str(information[tag]['count'])]
332
                    search = '#c'
333
                    value[i] = walk(value[i], replacing, format, meta)
334
335
                    #replace all '#n' with the corresponding number
336
                    replace = [Str(information[tag]['local'])]
337
                    search = '#n'
338
                    value[i] = walk(value[i], replacing, format, meta)
339
340
                    #replace all '#' with the corresponding number
341
                    replace = [Str(information[tag]['local'])]
342
                    search = '#'
343
                    value[i] = walk(value[i], replacing, format, meta)
344
345
    elif key == 'Cite':
346
        match = re.match('^(@(?P<tag>(?P<category>[a-zA-Z][\w.-]*):(([a-zA-Z][\w.-]*)|(\d*(\.\d*)*))))$', value[1][0]['c'])
347
        if match != None and getCiteShortCut(match.group('category'), meta):
348
349
            # Deal with @prefix:name shortcut
350
            tag = match.group('tag')
351
            if tag in information:
352
                if pandocVersion() < '1.16':
353
                    # pandoc 1.15
354
                    return Link([Str(information[tag]['local'])], ['#' + tag, ''])
355
                else:
356
                    # pandoc > 1.15
357
                    return Link(['', [], []], [Str(information[tag]['local'])], ['#' + tag, ''])
358
359
def replacing(key, value, format, meta):
360
    global replace, search
361
    if key == 'Str':
362
        prepare = value.split(search)
363
        if len(prepare) > 1:
364
365
            ret = []
366
367
            if prepare[0] != '':
368
                ret.append(Str(prepare[0]))
369
370
            for string in prepare[1:]:
371
                ret.extend(replace)
372
                if string != '':
373
                    ret.append(Str(string))
374
375
            return ret
376
377
def addListings(doc, format, meta):
378
    global collections, information
379
    if 'pandoc-numbering' in meta and meta['pandoc-numbering']['t'] == 'MetaList':
380
381
        listings = []
382
383
        # Loop on all listings definition
384
        for definition in meta['pandoc-numbering']['c']:
385
            if definition['t'] == 'MetaMap' and\
386
               'category' in definition['c'] and\
387
               'listing' in definition['c'] and\
388
               definition['c']['category']['t'] == 'MetaInlines' and\
389
               definition['c']['listing']['t'] == 'MetaInlines' and\
390
               len(definition['c']['category']['c']) == 1 and\
391
               definition['c']['category']['c'][0]['t'] == 'Str':
392
393
                # Get the category name
394
                category = definition['c']['category']['c'][0]['c']
395
396
                # Get the title
397
                title = definition['c']['listing']['c']
398
399
                if format == 'latex':
400
401
                    # Special case for latex output
402
403
                    # Get the link color
404
                    if 'toccolor' in meta:
405
                        linkcolor = '\\hypersetup{linkcolor=' + stringify(meta['toccolor']['c']) + '}'
406
                    else:
407
                        linkcolor = '\\hypersetup{linkcolor=black}'
408
409
                    # Get the tab
410
                    if 'tab' in definition['c'] and definition['c']['tab']['t'] == 'MetaString':
411
                        try:
412
                            tab = float(definition['c']['tab']['c'])
413
                        except ValueError:
414
                            tab = None
415
                    else:
416
                        tab = None
417
418
                    # Get the space
419
                    if 'space' in definition['c'] and definition['c']['space']['t'] == 'MetaString':
420
                        try:
421
                            space = float(definition['c']['space']['c'])
422
                        except ValueError:
423
                            space = None
424
                    else:
425
                        space = None
426
427
                    # Deal with default tab length
428
                    if tab == None:
429
                        tab = 1.5
430
431
                    # Deal with default space length
432
                    if space == None:
433
                        level = 0
434
                        if category in collections:
435
                            # Loop on the collection
436
                            for tag in collections[category]:
437
                                level = max(level, information[tag]['section'].count('.'))
438
                        space = level + 2.3
439
440
                    # Add a RawBlock
441
                    latexCategory = re.sub('[^a-z]+', '', category)
442
                    latex = [
443
                        linkcolor,
444
                        '\\makeatletter',
445
                        '\\newcommand*\\l@' + latexCategory + '{\\@dottedtocline{1}{' + str(tab) + 'em}{'+ str(space) +'em}}',
446
                        '\\@starttoc{' + latexCategory + '}',
447
                        '\\makeatother'
448
                    ]
449
                    elt = [RawBlock('tex', ''.join(latex))]
450
                else:
451
                    if category in collections:
452
                        # Prepare the list
453
                        elements = []
454
455
                        # Loop on the collection
456
                        for tag in collections[category]:
457
458
                            # Add an item to the list
459
                            text = information[tag]['toc']
460
461
                            if pandocVersion() < '1.16':
462
                                # pandoc 1.15
463
                                link = Link(text, ['#' + tag, ''])
464
                            else:
465
                                # pandoc 1.16
466
                                link = Link(['', [], []], text, ['#' + tag, ''])
467
468
                            elements.append([Plain([link])])
469
470
                        # Add a bullet list
471
                        elt = [BulletList(elements)]
472
                    else:
473
474
                        # Add nothing
475
                        elt = []
476
477
                # Add a new listing
478
                listings = listings + [Header(1, ['', ['unnumbered'], []], title)] + elt
479
480
        # Add listings to the document
481
        doc[1] = listings + doc[1]
482
483 View Code Duplication
def getFormat(category, meta):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
484
    if not hasattr(getFormat, 'value'):
485
        getFormat.value = {}
486
487
        if 'pandoc-numbering' in meta and meta['pandoc-numbering']['t'] == 'MetaList':
488
489
            # Loop on all listings definition
490
            for definition in meta['pandoc-numbering']['c']:
491
492
                if definition['t'] == 'MetaMap' and\
493
                   'format' in definition['c'] and\
494
                   'category' in definition['c'] and\
495
                   definition['c']['category']['t'] == 'MetaInlines' and\
496
                   len(definition['c']['category']['c']) == 1 and\
497
                   definition['c']['category']['c'][0]['t'] == 'Str' and\
498
                   definition['c']['format']['t'] == 'MetaBool':
499
500
                    getFormat.value[definition['c']['category']['c'][0]['c']] = definition['c']['format']['c']
501
502
    if not category in getFormat.value:
503
504
        getFormat.value[category] = True
505
506
    return getFormat.value[category]
507
508 View Code Duplication
def getCiteShortCut(category, meta):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
509
    if not hasattr(getCiteShortCut, 'value'):
510
        getCiteShortCut.value = {}
511
512
        if 'pandoc-numbering' in meta and meta['pandoc-numbering']['t'] == 'MetaList':
513
514
            # Loop on all listings definition
515
            for definition in meta['pandoc-numbering']['c']:
516
517
                if definition['t'] == 'MetaMap' and\
518
                   'cite-shortcut' in definition['c'] and\
519
                   'category' in definition['c'] and\
520
                   definition['c']['category']['t'] == 'MetaInlines' and\
521
                   len(definition['c']['category']['c']) == 1 and\
522
                   definition['c']['category']['c'][0]['t'] == 'Str' and\
523
                   definition['c']['cite-shortcut']['t'] == 'MetaBool':
524
525
                    getCiteShortCut.value[definition['c']['category']['c'][0]['c']] = definition['c']['cite-shortcut']['c']
526
527
    if not category in getCiteShortCut.value:
528
529
        getCiteShortCut.value[category] = False
530
531
    return getCiteShortCut.value[category]
532
533
def getDefaultLevels(category, meta):
534
    if not hasattr(getDefaultLevels, 'value'):
535
536
        getDefaultLevels.value = {}
537
538
        if 'pandoc-numbering' in meta and meta['pandoc-numbering']['t'] == 'MetaList':
539
540
            # Loop on all listings definition
541
            for definition in meta['pandoc-numbering']['c']:
542
543
                if definition['t'] == 'MetaMap' and\
544
                   'category' in definition['c'] and\
545
                   definition['c']['category']['t'] == 'MetaInlines' and\
546
                   len(definition['c']['category']['c']) == 1 and\
547
                   definition['c']['category']['c'][0]['t'] == 'Str':
548
549
                    levelInf = 0
550
                    levelSup = 0
551
552
                    if 'sectioning' in definition['c'] and\
553
                       definition['c']['sectioning']['t'] == 'MetaInlines' and\
554
                       len(definition['c']['sectioning']['c']) == 1 and\
555
                       definition['c']['sectioning']['c'][0]['t'] == 'Str':
556
557
                        global headerRegex
558
559
                        match = re.match('^' + headerRegex + '$', definition['c']['sectioning']['c'][0]['c'])
560
                        if match:
561
                            # Compute the levelInf and levelSup values
562
                            levelInf = len(match.group('hidden')) // 2
563
                            levelSup = len(match.group('header')) // 2
564
565
                    else:
566
567
                        if 'first' in definition['c'] and definition['c']['first']['t'] == 'MetaString':
568
                            try:
569
                                levelInf = max(min(int(definition['c']['first']['c']) - 1, 6), 0)
570
                            except ValueError:
571
                                pass
572
573
                        if 'last' in definition['c'] and definition['c']['last']['t'] == 'MetaString':
574
                            try:
575
                                levelSup = max(min(int(definition['c']['last']['c']), 6), levelInf)
576
                            except ValueError:
577
                                pass
578
579
                    getDefaultLevels.value[definition['c']['category']['c'][0]['c']] = [levelInf, levelSup]
580
581
    if not category in getDefaultLevels.value:
582
583
        getDefaultLevels.value[category] = [0, 0]
584
585
    return getDefaultLevels.value[category]
586
587 View Code Duplication
def getClasses(category, meta):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
588
    if not hasattr(getClasses, 'value'):
589
590
        getClasses.value = {}
591
592
        if 'pandoc-numbering' in meta and meta['pandoc-numbering']['t'] == 'MetaList':
593
594
            # Loop on all listings definition
595
            for definition in meta['pandoc-numbering']['c']:
596
597
                if definition['t'] == 'MetaMap' and\
598
                   'category' in definition['c'] and\
599
                   definition['c']['category']['t'] == 'MetaInlines' and\
600
                   len(definition['c']['category']['c']) == 1 and\
601
                   definition['c']['category']['c'][0]['t'] == 'Str':
602
603
                    if 'classes' in definition['c'] and definition['c']['classes']['t'] == 'MetaList':
604
605
                        classes = []
606
607
                        for elt in definition['c']['classes']['c']:
608
                            classes.append(stringify(elt))
609
610
                        getClasses.value[definition['c']['category']['c'][0]['c']] = classes
611
612
    if not category in getClasses.value:
613
614
        getClasses.value[category] = [category]
615
616
    return getClasses.value[category]
617
618
def pandocVersion():
619
    if not hasattr(pandocVersion, 'value'):
620
        p = subprocess.Popen(['pandoc', '-v'], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
621
        out, err = p.communicate()
622
        pandocVersion.value = re.search(b'pandoc (?P<version>.*)', out).group('version').decode('utf-8')
623
    return pandocVersion.value
624
625
def main():
626
    toJSONFilters([numbering, referencing])
627
628
if __name__ == '__main__':
629
    main()
630