Completed
Push — master ( d6b46b...4e469a )
by Christophe
01:10
created

referencingLink()   B

Complexity

Conditions 6

Size

Total Lines 82

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 6
dl 0
loc 82
rs 7.3557
c 1
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
#!/usr/bin/env python
2
3
"""
4
Pandoc filter to number all kinds of things.
5
"""
6
7
from pandocfilters import walk, stringify, Str, Space, Para, BulletList, Plain, Strong, Span, Link, Emph, RawInline, RawBlock, Header
8
from functools import reduce
9
import json
10
import io
11
import sys
12
import codecs
13
import re
14
import unicodedata
15
import subprocess
16
17
count = {}
18
information = {}
19
collections = {}
20
headers = [0, 0, 0, 0, 0, 0]
21
headerRegex = '(?P<header>(?P<hidden>(-\.)*)(\+\.)*)'
22
23
def toJSONFilters(actions):
24
    """Converts a list of actions into a filter that reads a JSON-formatted
25
    pandoc document from stdin, transforms it by walking the tree
26
    with the actions, and returns a new JSON-formatted pandoc document
27
    to stdout.  The argument is a list of functions action(key, value, format, meta),
28
    where key is the type of the pandoc object (e.g. 'Str', 'Para'),
29
    value is the contents of the object (e.g. a string for 'Str',
30
    a list of inline elements for 'Para'), format is the target
31
    output format (which will be taken for the first command line
32
    argument if present), and meta is the document's metadata.
33
    If the function returns None, the object to which it applies
34
    will remain unchanged.  If it returns an object, the object will
35
    be replaced.    If it returns a list, the list will be spliced in to
36
    the list to which the target object belongs.    (So, returning an
37
    empty list deletes the object.)
38
    """
39
    try:
40
        input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
41
    except AttributeError:
42
        # Python 2 does not have sys.stdin.buffer.
43
        # REF: http://stackoverflow.com/questions/2467928/python-unicodeencodeerror-when-reading-from-stdin
44
        input_stream = codecs.getreader("utf-8")(sys.stdin)
45
46
    doc = json.loads(input_stream.read())
47
    if len(sys.argv) > 1:
48
        format = sys.argv[1]
49
    else:
50
        format = ""
51
    altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
52
    addListings(altered, format, altered[0]['unMeta'])
53
    json.dump(altered, sys.stdout)
54
55
def removeAccents(string):
56
    nfkd_form = unicodedata.normalize('NFKD', string)
57
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
58
59
def toIdentifier(string):
60
    # replace invalid characters by dash
61
    string = re.sub('[^0-9a-zA-Z_-]+', '-', removeAccents(string.lower()))
62
63
    # Remove leading digits
64
    string = re.sub('^[^a-zA-Z]+', '', string)
65
66
    return string
67
68
def toLatex(x):
69
    """Walks the tree x and returns concatenated string content,
70
    leaving out all formatting.
71
    """
72
    result = []
73
74
    def go(key, val, format, meta):
75
        if key in ['Str', 'MetaString']:
76
            result.append(val)
77
        elif key == 'Code':
78
            result.append(val[1])
79
        elif key == 'Math':
80
            # Modified from the stringify function in the pandocfilter package
81
            if format == 'latex':
82
                result.append('$' + val[1] + '$')
83
            else:
84
                result.append(val[1])
85
        elif key == 'LineBreak':
86
            result.append(" ")
87
        elif key == 'Space':
88
            result.append(" ")
89
        elif key == 'Note':
90
            # Do not stringify value from Note node
91
            del val[:]
92
93
    walk(x, go, 'latex', {})
94
    return ''.join(result)
95
96
def numbering(key, value, format, meta):
97
    global headerRegex
98
    if key == 'Header':
99
        [level, [id, classes, attributes], content] = value
100
        if 'unnumbered' not in classes:
101
            headers[level - 1] = headers[level - 1] + 1
102
            for index in range(level, 6):
103
                headers[index] = 0
104
    elif key == 'Para':
105
        if len(value) >= 3 and value[-2] == Space() and value[-1]['t'] == 'Str':
106
            last = value[-1]['c']
107
108
            match = re.match('^' + headerRegex + '#((?P<prefix>[a-zA-Z][\w.-]*):)?(?P<name>[a-zA-Z][\w:.-]*)?$', last)
109
110
            if match:
111
                # Is it a Para and the last element is an identifier beginning with '#'
112
                global count, information, collections
113
114
                # Detect the title
115
                title = []
116
                if value[-3]['t'] == 'Str' and value[-3]['c'][-1:] == ')':
117
                    for (i, item) in enumerate(value):
118
                        if item['t'] == 'Str' and item['c'][0] == '(':
119
                            title = value[i:-2]
120
                            title[0]['c'] = title[0]['c'][1:]
121
                            title[-1]['c'] = title[-1]['c'][:-1]
122
                            value = value[:i - 1] + value[-2:]
123
                            break
124
125
                # Compute the description
126
                description = value[:-2]
127
128
                # Compute the basicCategory and the category
129
                if match.group('prefix') == None:
130
                    basicCategory = toIdentifier(stringify(description))
131
                else:
132
                    basicCategory = match.group('prefix')
133
134
                # Compute the levelInf and levelSup values
135
                levelInf = len(match.group('hidden')) // 2
136
                levelSup = len(match.group('header')) // 2
137
138
                # Get the default inf and sup level
139
                if levelInf == 0 and levelSup == 0:
140
                    [levelInf, levelSup] = getDefaultLevels(basicCategory, meta)
141
142
                # Compute the section number
143
                sectionNumber = '.'.join(map(str, headers[:levelSup]))
144
145
                # Compute the leading (composed of the section numbering and a dot)
146
                if levelSup != 0:
147
                    leading = sectionNumber + '.'
148
                else:
149
                    leading = ''
150
151
                category = basicCategory + ':' + leading
152
153
                # Is it a new category?
154
                if category not in count:
155
                    count[category] = 0
156
157
                count[category] = count[category] + 1
158
159
                # Get the number
160
                number = str(count[category])
161
162
                # Determine the final tag
163
                if match.group('name') == None:
164
                    tag = category + number
165
                else:
166
                    tag = basicCategory + ':' + match.group('name')
167
168
                # Replace the '-.-.+.+...#' by the category count (omitting the hidden part)
169
                localNumber = '.'.join(map(str, headers[levelInf:levelSup] + [number]))
170
171
                # Compute the globalNumber
172
                if sectionNumber:
173
                    globalNumber = sectionNumber + '.' + number
174
                else:
175
                    globalNumber = number
176
177
                # Is the automatic formatting required for this category?
178
                if getFormat(basicCategory, meta):
179
                    # Prepare the final text
180
                    text = [Strong(description + [Space(), Str(localNumber)])]
181
182
                    # Add the title to the final text
183
                    if title:
184
                        text = text + [Space(), Emph([Str('(')] + title + [Str(')')])]
185
186
                    # Compute the link
187
                    link = description + [Space(), Str(localNumber)]
188
189
                    # Compute the toc
190
                    toc = [Str(globalNumber), Space()]
191
                    if title:
192
                        toc = toc + title
193
                    else:
194
                        toc = toc + description
195
196
                else:
197
                    # Prepare the final text
198
                    text = [
199
                        Span(['', ['description'], []], description),
200
                        Span(['', ['title'], []], title),
201
                        Span(['', ['local'], []], [Str(localNumber)]),
202
                        Span(['', ['global'], []], [Str(globalNumber)]),
203
                        Span(['', ['section'], []], [Str(sectionNumber)]),
204
                    ]
205
206
                    # Compute the link
207
                    link = [Span(['', ['pandoc-numbering-link'] + getClasses(basicCategory, meta), []], text)]
208
209
                    # Compute the toc
210
                    toc = [Span(['', ['pandoc-numbering-toc'] + getClasses(basicCategory, meta), []], text)]
211
212
213
                # Store the numbers and the label for automatic numbering (See referencing function)
214
                information[tag] = {
215
                    'section': sectionNumber,
216
                    'local': localNumber,
217
                    'global': globalNumber,
218
                    'count': number,
219
                    'description': description,
220
                    'title': title,
221
                    'link': link,
222
                    'toc': toc
223
                }
224
225
                # Prepare the contents
226
                contents = [Span([tag, ['pandoc-numbering-text'] + getClasses(basicCategory, meta), []], text)]
227
228
                # Compute collections
229
                if basicCategory not in collections:
230
                    collections[basicCategory] = []
231
232
                collections[basicCategory].append(tag)
233
234
                # Special case for LaTeX
235
                if format == 'latex' and getFormat(basicCategory, meta):
236
                    latexCategory = re.sub('[^a-z]+', '', basicCategory)
237
                    if title:
238
                      entry = title
239
                    else:
240
                      entry = description
241
                    latex = '\\phantomsection\\addcontentsline{' + latexCategory + '}{' + latexCategory + '}{\\protect\\numberline {' + \
242
                        leading + number + '}{\ignorespaces ' + toLatex(entry) + '}}'
243
                    contents.insert(0, RawInline('tex', latex))
244
245
                # Return the contents in a Para element
246
                return Para(contents)
247
            elif re.match('^' + headerRegex + '##(?P<prefix>[a-zA-Z][\w.-]*:)?(?P<name>[a-zA-Z][\w:.-]*)?$', last):
248
                # Special case where the last element is '...##...'
249
                value[-1]['c'] = value[-1]['c'].replace('##', '#', 1)
250
                return Para(value)
251
252
replace = None
253
search = None
254
255
def lowering(key, value, format, meta):
256
    if key == 'Str':
257
        return Str(value.lower())
258
259
def referencing(key, value, format, meta):
260
    if key == 'Link':
261
        return referencingLink(value, format, meta)
262
    elif key == 'Cite':
263
        return referencingCite(value, format, meta)
264
265
def referencingLink(value, format, meta):
266
    global information, replace, search
267
    if pandocVersion() < '1.16':
268
        # pandoc 1.15
269
        [text, [reference, title]] = value
270
    else:
271
        # pandoc > 1.15
272
        [attributes, text, [reference, title]] = value
273
274
    if re.match('^(#([a-zA-Z][\w:.-]*))$', reference):
275
        # Compute the name
276
        tag = reference[1:]
277
278
        if tag in information:
279
            if pandocVersion() < '1.16':
280
                # pandoc 1.15
281
                i = 0
282
            else:
283
                # pandoc > 1.15
284
                i = 1
285
286
            # Replace all '#t', '#T', '#d', '#D', '#s', '#g', '#c', '#n', '#' with the corresponding text in the title
287
            value[i + 1][1] = value[i + 1][1].replace('#t', stringify(information[tag]['title']).lower())
288
            value[i + 1][1] = value[i + 1][1].replace('#T', stringify(information[tag]['title']))
289
            value[i + 1][1] = value[i + 1][1].replace('#d', stringify(information[tag]['description']).lower())
290
            value[i + 1][1] = value[i + 1][1].replace('#D', stringify(information[tag]['description']))
291
            value[i + 1][1] = value[i + 1][1].replace('#s', information[tag]['section'])
292
            value[i + 1][1] = value[i + 1][1].replace('#g', information[tag]['global'])
293
            value[i + 1][1] = value[i + 1][1].replace('#c', information[tag]['count'])
294
            value[i + 1][1] = value[i + 1][1].replace('#n', information[tag]['local'])
295
            value[i + 1][1] = value[i + 1][1].replace('#', information[tag]['local'])
296
297
            if text == []:
298
                # The link text is empty, replace it with the default label
299
                value[i] = information[tag]['link']
300
            else:
301
                # The link text is not empty
302
303
                #replace all '#t' with the title in lower case
304
                replace = walk(information[tag]['title'], lowering, format, meta)
305
                search = '#t'
306
                value[i] = walk(value[i], replacing, format, meta)
307
308
                #replace all '#T' with the title
309
                replace = information[tag]['title']
310
                search = '#T'
311
                value[i] = walk(value[i], replacing, format, meta)
312
313
                #replace all '#d' with the description in lower case
314
                replace = walk(information[tag]['description'], lowering, format, meta)
315
                search = '#d'
316
                value[i] = walk(value[i], replacing, format, meta)
317
318
                #replace all '#D' with the description
319
                replace = information[tag]['description']
320
                search = '#D'
321
                value[i] = walk(value[i], replacing, format, meta)
322
323
                #replace all '#s' with the corresponding number
324
                replace = [Str(information[tag]['section'])]
325
                search = '#s'
326
                value[i] = walk(value[i], replacing, format, meta)
327
328
                #replace all '#g' with the corresponding number
329
                replace = [Str(information[tag]['global'])]
330
                search = '#g'
331
                value[i] = walk(value[i], replacing, format, meta)
332
333
                #replace all '#c' with the corresponding number
334
                replace = [Str(information[tag]['count'])]
335
                search = '#c'
336
                value[i] = walk(value[i], replacing, format, meta)
337
338
                #replace all '#n' with the corresponding number
339
                replace = [Str(information[tag]['local'])]
340
                search = '#n'
341
                value[i] = walk(value[i], replacing, format, meta)
342
343
                #replace all '#' with the corresponding number
344
                replace = [Str(information[tag]['local'])]
345
                search = '#'
346
                value[i] = walk(value[i], replacing, format, meta)
347
348
def referencingCite(value, format, meta):
349
    global information
350
    match = re.match('^(@(?P<tag>(?P<category>[a-zA-Z][\w.-]*):(([a-zA-Z][\w.-]*)|(\d*(\.\d*)*))))$', value[1][0]['c'])
351
    if match != None and getCiteShortCut(match.group('category'), meta):
352
353
        # Deal with @prefix:name shortcut
354
        tag = match.group('tag')
355
        if tag in information:
356
            if pandocVersion() < '1.16':
357
                # pandoc 1.15
358
                return Link([Str(information[tag]['local'])], ['#' + tag, ''])
359
            else:
360
                # pandoc > 1.15
361
                return Link(['', [], []], [Str(information[tag]['local'])], ['#' + tag, ''])
362
363
def replacing(key, value, format, meta):
364
    global replace, search
365
    if key == 'Str':
366
        prepare = value.split(search)
367
        if len(prepare) > 1:
368
369
            ret = []
370
371
            if prepare[0] != '':
372
                ret.append(Str(prepare[0]))
373
374
            for string in prepare[1:]:
375
                ret.extend(replace)
376
                if string != '':
377
                    ret.append(Str(string))
378
379
            return ret
380
381
def hasMeta(meta):
382
    return 'pandoc-numbering' in meta and meta['pandoc-numbering']['t'] == 'MetaList'
383
384
def isCorrect(definition):
385
    return definition['t'] == 'MetaMap' and\
386
        'category' in definition['c'] and\
387
        definition['c']['category']['t'] == 'MetaInlines' and\
388
        len(definition['c']['category']['c']) == 1 and\
389
        definition['c']['category']['c'][0]['t'] == 'Str'
390
391
def hasProperty(definition, name, type):
392
    return name in definition['c'] and definition['c'][name]['t'] == type
393
394
def getProperty(definition, name):
395
    return definition['c'][name]['c']
396
397
def getFirstValue(definition, name):
398
	return getProperty(definition, name)[0]['c']
399
400
def addListings(doc, format, meta):
401
402
    global collections, information
403
404
    if hasMeta(meta):
405
406
        listings = []
407
408
        # Loop on all listings definition
409
        for definition in meta['pandoc-numbering']['c']:
410
            if isCorrect(definition) and hasProperty(definition, 'listing', 'MetaInlines'):
411
412
                # Get the category name
413
                category = getFirstValue(definition, 'category')
414
415
                # Get the title
416
                title = getProperty(definition, 'listing')
417
418
                if format == 'latex':
419
420
                    # Special case for latex output
421
422
                    # Get the link color
423
                    if 'toccolor' in meta:
424
                        linkcolor = '\\hypersetup{linkcolor=' + stringify(meta['toccolor']['c']) + '}'
425
                    else:
426
                        linkcolor = '\\hypersetup{linkcolor=black}'
427
428
                    # Get the tab
429
                    if hasProperty(definition, 'tab', 'MetaString'):
430
                        try:
431
                            tab = float(getProperty(definition, 'tab'))
432
                        except ValueError:
433
                            tab = None
434
                    else:
435
                        tab = None
436
437
                    # Get the space
438
                    if hasProperty(definition, 'space', 'MetaString'):
439
                        try:
440
                            space = float(getProperty(definition, 'space'))
441
                        except ValueError:
442
                            space = None
443
                    else:
444
                        space = None
445
446
                    # Deal with default tab length
447
                    if tab == None:
448
                        tab = 1.5
449
450
                    # Deal with default space length
451
                    if space == None:
452
                        level = 0
453
                        if category in collections:
454
                            # Loop on the collection
455
                            for tag in collections[category]:
456
                                level = max(level, information[tag]['section'].count('.'))
457
                        space = level + 2.3
458
459
                    # Add a RawBlock
460
                    latexCategory = re.sub('[^a-z]+', '', category)
461
                    latex = [
462
                        linkcolor,
463
                        '\\makeatletter',
464
                        '\\newcommand*\\l@' + latexCategory + '{\\@dottedtocline{1}{' + str(tab) + 'em}{'+ str(space) +'em}}',
465
                        '\\@starttoc{' + latexCategory + '}',
466
                        '\\makeatother'
467
                    ]
468
                    elt = [RawBlock('tex', ''.join(latex))]
469
                else:
470
                    if category in collections:
471
                        # Prepare the list
472
                        elements = []
473
474
                        # Loop on the collection
475
                        for tag in collections[category]:
476
477
                            # Add an item to the list
478
                            text = information[tag]['toc']
479
480
                            if pandocVersion() < '1.16':
481
                                # pandoc 1.15
482
                                link = Link(text, ['#' + tag, ''])
483
                            else:
484
                                # pandoc 1.16
485
                                link = Link(['', [], []], text, ['#' + tag, ''])
486
487
                            elements.append([Plain([link])])
488
489
                        # Add a bullet list
490
                        elt = [BulletList(elements)]
491
                    else:
492
493
                        # Add nothing
494
                        elt = []
495
496
                # Add a new listing
497
                listings = listings + [Header(1, ['', ['unnumbered'], []], title)] + elt
498 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
499
        # Add listings to the document
500
        doc[1] = listings + doc[1]
501
502
def getFormat(category, meta):
503
    if not hasattr(getFormat, 'value'):
504
        getFormat.value = {}
505
        if hasMeta(meta):
506
            # Loop on all listings definition
507
            for definition in meta['pandoc-numbering']['c']:
508
                if isCorrect(definition) and hasProperty(definition, 'format', 'MetaBool'):
509
                    getFormat.value[getFirstValue(definition, 'category')] = getProperty(definition, 'format')
510
511
    if not category in getFormat.value:
512
        getFormat.value[category] = True
513
514
    return getFormat.value[category]
515
516
def getCiteShortCut(category, meta):
517
    if not hasattr(getCiteShortCut, 'value'):
518
        getCiteShortCut.value = {}
519
        if hasMeta(meta):
520
            # Loop on all listings definition
521
            for definition in meta['pandoc-numbering']['c']:
522
                if isCorrect(definition) and hasProperty(definition, 'cite-shortcut', 'MetaBool'):
523
                    getCiteShortCut.value[getFirstValue(definition, 'category')] = getProperty(definition, 'cite-shortcut')
524
525
    if not category in getCiteShortCut.value:
526
        getCiteShortCut.value[category] = False
527
528
    return getCiteShortCut.value[category]
529
530
def getDefaultLevels(category, meta):
531
    if not hasattr(getDefaultLevels, 'value'):
532
        getDefaultLevels.value = {}
533
        if hasMeta(meta):
534
            # Loop on all listings definition
535
            for definition in meta['pandoc-numbering']['c']:
536
                if isCorrect(definition):
537
                    levelInf = 0
538
                    levelSup = 0
539
                    if hasProperty(definition, 'sectioning', 'MetaInlines') and\
540
                       len(getProperty(definition, 'sectioning')) == 1 and\
541
                       getProperty(definition, 'sectioning')[0]['t'] == 'Str':
542
543
                        global headerRegex
544
545
                        match = re.match('^' + headerRegex + '$', getFirstValue(definition, 'sectioning'))
546
                        if match:
547
                            # Compute the levelInf and levelSup values
548
                            levelInf = len(match.group('hidden')) // 2
549
                            levelSup = len(match.group('header')) // 2
550
                    else:
551
                        if hasProperty(definition, 'first', 'MetaString'):
552
                            try:
553
                                levelInf = max(min(int(getProperty(definition, 'first')) - 1, 6), 0)
554
                            except ValueError:
555
                                pass
556
                        if hasProperty(definition, 'last', 'MetaString'):
557
                            try:
558
                                levelSup = max(min(int(getProperty(definition, 'last')), 6), levelInf)
559
                            except ValueError:
560
                                pass
561
                    getDefaultLevels.value[getFirstValue(definition, 'category')] = [levelInf, levelSup]
562
563
    if not category in getDefaultLevels.value:
564 View Code Duplication
        getDefaultLevels.value[category] = [0, 0]
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
565
566
    return getDefaultLevels.value[category]
567
568
def getClasses(category, meta):
569
    if not hasattr(getClasses, 'value'):
570
        getClasses.value = {}
571
        if hasMeta(meta):
572
            # Loop on all listings definition
573
            for definition in meta['pandoc-numbering']['c']:
574
                if isCorrect(definition) and hasProperty(definition, 'classes', 'MetaList'):
575
                    classes = []
576
                    for elt in getProperty(definition, 'classes'):
577
                        classes.append(stringify(elt))
578
                    getClasses.value[getFirstValue(definition, 'category')] = classes
579
580
    if not category in getClasses.value:
581
        getClasses.value[category] = [category]
582
583
    return getClasses.value[category]
584
585
def pandocVersion():
586
    if not hasattr(pandocVersion, 'value'):
587
        p = subprocess.Popen(['pandoc', '-v'], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
588
        out, err = p.communicate()
589
        pandocVersion.value = re.search(b'pandoc (?P<version>.*)', out).group('version').decode('utf-8')
590
    return pandocVersion.value
591
592
def main():
593
    toJSONFilters([numbering, referencing])
594
595
if __name__ == '__main__':
596
    main()
597