Completed
Push — master ( a1e08a...a710b8 )
by Christophe
01:14
created

computeDescription()   A

Complexity

Conditions 1

Size

Total Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
dl 0
loc 2
rs 10
c 1
b 0
f 0
1
#!/usr/bin/env python
2
3
"""
4
Pandoc filter to number all kinds of things.
5
"""
6
7
from pandocfilters import walk, stringify, Str, Space, Para, BulletList, Plain, Strong, Span, Link, Emph, RawInline, RawBlock, Header
8
from functools import reduce
9
import json
10
import io
11
import sys
12
import codecs
13
import re
14
import unicodedata
15
import subprocess
16
17
count = {}
18
information = {}
19
collections = {}
20
headers = [0, 0, 0, 0, 0, 0]
21
headerRegex = '(?P<header>(?P<hidden>(-\.)*)(\+\.)*)'
22
23
def toJSONFilters(actions):
24
    """Converts a list of actions into a filter that reads a JSON-formatted
25
    pandoc document from stdin, transforms it by walking the tree
26
    with the actions, and returns a new JSON-formatted pandoc document
27
    to stdout.  The argument is a list of functions action(key, value, format, meta),
28
    where key is the type of the pandoc object (e.g. 'Str', 'Para'),
29
    value is the contents of the object (e.g. a string for 'Str',
30
    a list of inline elements for 'Para'), format is the target
31
    output format (which will be taken for the first command line
32
    argument if present), and meta is the document's metadata.
33
    If the function returns None, the object to which it applies
34
    will remain unchanged.  If it returns an object, the object will
35
    be replaced.    If it returns a list, the list will be spliced in to
36
    the list to which the target object belongs.    (So, returning an
37
    empty list deletes the object.)
38
    """
39
    try:
40
        input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
41
    except AttributeError:
42
        # Python 2 does not have sys.stdin.buffer.
43
        # REF: http://stackoverflow.com/questions/2467928/python-unicodeencodeerror-when-reading-from-stdin
44
        input_stream = codecs.getreader("utf-8")(sys.stdin)
45
46
    doc = json.loads(input_stream.read())
47
    if len(sys.argv) > 1:
48
        format = sys.argv[1]
49
    else:
50
        format = ""
51
    altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
52
    addListings(altered, format, altered[0]['unMeta'])
53
    json.dump(altered, sys.stdout)
54
55
def removeAccents(string):
56
    nfkd_form = unicodedata.normalize('NFKD', string)
57
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
58
59
def toIdentifier(string):
60
    # replace invalid characters by dash
61
    string = re.sub('[^0-9a-zA-Z_-]+', '-', removeAccents(string.lower()))
62
63
    # Remove leading digits
64
    string = re.sub('^[^a-zA-Z]+', '', string)
65
66
    return string
67
68
def toLatex(x):
69
    """Walks the tree x and returns concatenated string content,
70
    leaving out all formatting.
71
    """
72
    result = []
73
74
    def go(key, val, format, meta):
75
        if key in ['Str', 'MetaString']:
76
            result.append(val)
77
        elif key == 'Code':
78
            result.append(val[1])
79
        elif key == 'Math':
80
            # Modified from the stringify function in the pandocfilter package
81
            if format == 'latex':
82
                result.append('$' + val[1] + '$')
83
            else:
84
                result.append(val[1])
85
        elif key == 'LineBreak':
86
            result.append(" ")
87
        elif key == 'Space':
88
            result.append(" ")
89
        elif key == 'Note':
90
            # Do not stringify value from Note node
91
            del val[:]
92
93
    walk(x, go, 'latex', {})
94
    return ''.join(result)
95
96
def numbering(key, value, format, meta):
97
    if key == 'Header':
98
        return numberingHeader(value)
99
    elif key == 'Para':
100
        return numberingPara(value, format, meta)
101
102
def numberingHeader(value):
103
    [level, [id, classes, attributes], content] = value
104
    if 'unnumbered' not in classes:
105
        headers[level - 1] = headers[level - 1] + 1
106
        for index in range(level, 6):
107
            headers[index] = 0
108
109
def numberingPara(value, format, meta):
110
    global headerRegex
111
    if len(value) >= 3 and value[-2] == Space() and value[-1]['t'] == 'Str':
112
        last = value[-1]['c']
113
        match = re.match('^' + headerRegex + '#((?P<prefix>[a-zA-Z][\w.-]*):)?(?P<name>[a-zA-Z][\w:.-]*)?$', last)
114
        if match:
115
            # Is it a Para and the last element is an identifier beginning with '#'
116
            return numberingEffective(match, value, format, meta)
117
        elif re.match('^' + headerRegex + '##(?P<prefix>[a-zA-Z][\w.-]*:)?(?P<name>[a-zA-Z][\w:.-]*)?$', last):
118
            # Special case where the last element is '...##...'
119
            return numberingSharpSharp(value)
120
121
def numberingEffective(match, value, format, meta):
122
    title = computeTitle(value)
123
    description = computeDescription(value)
124
    basicCategory = computeBasicCategory(match, description)
125
    [levelInf, levelSup] = computeLevels(match, basicCategory, meta)
126
    sectionNumber = computeSectionNumber(levelSup)
127
    leading = computeLeading(levelSup, sectionNumber)
128
    category = computeCategory(basicCategory, leading)
129
    number = str(count[category])
130
    tag = computeTag(match, basicCategory, category, number)
131
132
    # Replace the '-.-.+.+...#' by the category count (omitting the hidden part)
133
    localNumber = '.'.join(map(str, headers[levelInf:levelSup] + [number]))
134
135
    # Compute the globalNumber
136
    if sectionNumber:
137
        globalNumber = sectionNumber + '.' + number
138
    else:
139
        globalNumber = number
140
141
    # Is the automatic formatting required for this category?
142
    if getFormat(basicCategory, meta):
143
        # Prepare the final text
144
        text = [Strong(description + [Space(), Str(localNumber)])]
145
146
        # Add the title to the final text
147
        if title:
148
            text = text + [Space(), Emph([Str('(')] + title + [Str(')')])]
149
150
        # Compute the link
151
        link = description + [Space(), Str(localNumber)]
152
153
        # Compute the toc
154
        toc = [Str(globalNumber), Space()]
155
        if title:
156
            toc = toc + title
157
        else:
158
            toc = toc + description
159
160
    else:
161
        # Prepare the final text
162
        text = [
163
            Span(['', ['description'], []], description),
164
            Span(['', ['title'], []], title),
165
            Span(['', ['local'], []], [Str(localNumber)]),
166
            Span(['', ['global'], []], [Str(globalNumber)]),
167
            Span(['', ['section'], []], [Str(sectionNumber)]),
168
        ]
169
170
        # Compute the link
171
        link = [Span(['', ['pandoc-numbering-link'] + getClasses(basicCategory, meta), []], text)]
172
173
        # Compute the toc
174
        toc = [Span(['', ['pandoc-numbering-toc'] + getClasses(basicCategory, meta), []], text)]
175
176
177
    # Store the numbers and the label for automatic numbering (See referencing function)
178
    information[tag] = {
179
        'section': sectionNumber,
180
        'local': localNumber,
181
        'global': globalNumber,
182
        'count': number,
183
        'description': description,
184
        'title': title,
185
        'link': link,
186
        'toc': toc
187
    }
188
189
    # Prepare the contents
190
    contents = [Span([tag, ['pandoc-numbering-text'] + getClasses(basicCategory, meta), []], text)]
191
192
    # Compute collections
193
    if basicCategory not in collections:
194
        collections[basicCategory] = []
195
196
    collections[basicCategory].append(tag)
197
198
    # Special case for LaTeX
199
    if format == 'latex' and getFormat(basicCategory, meta):
200
        latexCategory = re.sub('[^a-z]+', '', basicCategory)
201
        if title:
202
          entry = title
203
        else:
204
          entry = description
205
        latex = '\\phantomsection\\addcontentsline{' + latexCategory + '}{' + latexCategory + '}{\\protect\\numberline {' + \
206
            leading + number + '}{\ignorespaces ' + toLatex(entry) + '}}'
207
        contents.insert(0, RawInline('tex', latex))
208
209
    # Return the contents in a Para element
210
    return Para(contents)
211
212
def computeTitle(value):
213
    title = []
214
    if value[-3]['t'] == 'Str' and value[-3]['c'][-1:] == ')':
215
        for (i, item) in enumerate(value):
216
            if item['t'] == 'Str' and item['c'][0] == '(':
217
                title = value[i:-2]
218
                title[0]['c'] = title[0]['c'][1:]
219
                title[-1]['c'] = title[-1]['c'][:-1]
220
                del value[i-1:-2]
221
                break
222
    return title
223
224
def computeDescription(value):
225
    return value[:-2]
226
227
def computeBasicCategory(match, description):
228
    if match.group('prefix') == None:
229
        return toIdentifier(stringify(description))
230
    else:
231
        return match.group('prefix')
232
233
def computeLevels(match, basicCategory, meta):
234
    # Compute the levelInf and levelSup values
235
    levelInf = len(match.group('hidden')) // 2
236
    levelSup = len(match.group('header')) // 2
237
238
    # Get the default inf and sup level
239
    if levelInf == 0 and levelSup == 0:
240
        [levelInf, levelSup] = getDefaultLevels(basicCategory, meta)
241
242
    return [levelInf, levelSup]
243
244
def computeSectionNumber(levelSup):
245
    return '.'.join(map(str, headers[:levelSup]))
246
247
def computeLeading(levelSup, sectionNumber):
248
    # Compute the leading (composed of the section numbering and a dot)
249
    if levelSup != 0:
250
        return sectionNumber + '.'
251
    else:
252
        return ''
253
254
255
def computeCategory(basicCategory, leading):
256
    category = basicCategory + ':' + leading
257
258
    # Is it a new category?
259
    if category not in count:
260
        count[category] = 0
261
262
    count[category] = count[category] + 1
263
264
    return category
265
266
def computeTag(match, basicCategory, category, number):
267
    # Determine the final tag
268
    if match.group('name') == None:
269
        return category + number
270
    else:
271
        return basicCategory + ':' + match.group('name')
272
273
def numberingSharpSharp(value):
274
    value[-1]['c'] = value[-1]['c'].replace('##', '#', 1)
275
276
replace = None
277
search = None
278
279
def lowering(key, value, format, meta):
280
    if key == 'Str':
281
        return Str(value.lower())
282
283
def referencing(key, value, format, meta):
284
    if key == 'Link':
285
        return referencingLink(value, format, meta)
286
    elif key == 'Cite':
287
        return referencingCite(value, format, meta)
288
289
def referencingLink(value, format, meta):
290
    global information, replace, search
291
    if pandocVersion() < '1.16':
292
        # pandoc 1.15
293
        [text, [reference, title]] = value
294
    else:
295
        # pandoc > 1.15
296
        [attributes, text, [reference, title]] = value
297
298
    if re.match('^(#([a-zA-Z][\w:.-]*))$', reference):
299
        # Compute the name
300
        tag = reference[1:]
301
302
        if tag in information:
303
            if pandocVersion() < '1.16':
304
                # pandoc 1.15
305
                i = 0
306
            else:
307
                # pandoc > 1.15
308
                i = 1
309
310
            # Replace all '#t', '#T', '#d', '#D', '#s', '#g', '#c', '#n', '#' with the corresponding text in the title
311
            value[i + 1][1] = value[i + 1][1].replace('#t', stringify(information[tag]['title']).lower())
312
            value[i + 1][1] = value[i + 1][1].replace('#T', stringify(information[tag]['title']))
313
            value[i + 1][1] = value[i + 1][1].replace('#d', stringify(information[tag]['description']).lower())
314
            value[i + 1][1] = value[i + 1][1].replace('#D', stringify(information[tag]['description']))
315
            value[i + 1][1] = value[i + 1][1].replace('#s', information[tag]['section'])
316
            value[i + 1][1] = value[i + 1][1].replace('#g', information[tag]['global'])
317
            value[i + 1][1] = value[i + 1][1].replace('#c', information[tag]['count'])
318
            value[i + 1][1] = value[i + 1][1].replace('#n', information[tag]['local'])
319
            value[i + 1][1] = value[i + 1][1].replace('#', information[tag]['local'])
320
321
            if text == []:
322
                # The link text is empty, replace it with the default label
323
                value[i] = information[tag]['link']
324
            else:
325
                # The link text is not empty
326
327
                #replace all '#t' with the title in lower case
328
                replace = walk(information[tag]['title'], lowering, format, meta)
329
                search = '#t'
330
                value[i] = walk(value[i], replacing, format, meta)
331
332
                #replace all '#T' with the title
333
                replace = information[tag]['title']
334
                search = '#T'
335
                value[i] = walk(value[i], replacing, format, meta)
336
337
                #replace all '#d' with the description in lower case
338
                replace = walk(information[tag]['description'], lowering, format, meta)
339
                search = '#d'
340
                value[i] = walk(value[i], replacing, format, meta)
341
342
                #replace all '#D' with the description
343
                replace = information[tag]['description']
344
                search = '#D'
345
                value[i] = walk(value[i], replacing, format, meta)
346
347
                #replace all '#s' with the corresponding number
348
                replace = [Str(information[tag]['section'])]
349
                search = '#s'
350
                value[i] = walk(value[i], replacing, format, meta)
351
352
                #replace all '#g' with the corresponding number
353
                replace = [Str(information[tag]['global'])]
354
                search = '#g'
355
                value[i] = walk(value[i], replacing, format, meta)
356
357
                #replace all '#c' with the corresponding number
358
                replace = [Str(information[tag]['count'])]
359
                search = '#c'
360
                value[i] = walk(value[i], replacing, format, meta)
361
362
                #replace all '#n' with the corresponding number
363
                replace = [Str(information[tag]['local'])]
364
                search = '#n'
365
                value[i] = walk(value[i], replacing, format, meta)
366
367
                #replace all '#' with the corresponding number
368
                replace = [Str(information[tag]['local'])]
369
                search = '#'
370
                value[i] = walk(value[i], replacing, format, meta)
371
372
def referencingCite(value, format, meta):
373
    global information
374
    match = re.match('^(@(?P<tag>(?P<category>[a-zA-Z][\w.-]*):(([a-zA-Z][\w.-]*)|(\d*(\.\d*)*))))$', value[1][0]['c'])
375
    if match != None and getCiteShortCut(match.group('category'), meta):
376
377
        # Deal with @prefix:name shortcut
378
        tag = match.group('tag')
379
        if tag in information:
380
            if pandocVersion() < '1.16':
381
                # pandoc 1.15
382
                return Link([Str(information[tag]['local'])], ['#' + tag, ''])
383
            else:
384
                # pandoc > 1.15
385
                return Link(['', [], []], [Str(information[tag]['local'])], ['#' + tag, ''])
386
387
def replacing(key, value, format, meta):
388
    global replace, search
389
    if key == 'Str':
390
        prepare = value.split(search)
391
        if len(prepare) > 1:
392
393
            ret = []
394
395
            if prepare[0] != '':
396
                ret.append(Str(prepare[0]))
397
398
            for string in prepare[1:]:
399
                ret.extend(replace)
400
                if string != '':
401
                    ret.append(Str(string))
402
403
            return ret
404
405
def hasMeta(meta):
406
    return 'pandoc-numbering' in meta and meta['pandoc-numbering']['t'] == 'MetaList'
407
408
def isCorrect(definition):
409
    return definition['t'] == 'MetaMap' and\
410
        'category' in definition['c'] and\
411
        definition['c']['category']['t'] == 'MetaInlines' and\
412
        len(definition['c']['category']['c']) == 1 and\
413
        definition['c']['category']['c'][0]['t'] == 'Str'
414
415
def hasProperty(definition, name, type):
416
    return name in definition['c'] and definition['c'][name]['t'] == type
417
418
def getProperty(definition, name):
419
    return definition['c'][name]['c']
420
421
def getFirstValue(definition, name):
422
	return getProperty(definition, name)[0]['c']
423
424
def addListings(doc, format, meta):
425
426
    global collections, information
427
428
    if hasMeta(meta):
429
430
        listings = []
431
432
        # Loop on all listings definition
433
        for definition in meta['pandoc-numbering']['c']:
434
            if isCorrect(definition) and hasProperty(definition, 'listing', 'MetaInlines'):
435
436
                # Get the category name
437
                category = getFirstValue(definition, 'category')
438
439
                # Get the title
440
                title = getProperty(definition, 'listing')
441
442
                if format == 'latex':
443
444
                    # Special case for latex output
445
446
                    # Get the link color
447
                    if 'toccolor' in meta:
448
                        linkcolor = '\\hypersetup{linkcolor=' + stringify(meta['toccolor']['c']) + '}'
449
                    else:
450
                        linkcolor = '\\hypersetup{linkcolor=black}'
451
452
                    # Get the tab
453
                    if hasProperty(definition, 'tab', 'MetaString'):
454
                        try:
455
                            tab = float(getProperty(definition, 'tab'))
456
                        except ValueError:
457
                            tab = None
458
                    else:
459
                        tab = None
460
461
                    # Get the space
462
                    if hasProperty(definition, 'space', 'MetaString'):
463
                        try:
464
                            space = float(getProperty(definition, 'space'))
465
                        except ValueError:
466
                            space = None
467
                    else:
468
                        space = None
469
470
                    # Deal with default tab length
471
                    if tab == None:
472
                        tab = 1.5
473
474
                    # Deal with default space length
475
                    if space == None:
476
                        level = 0
477
                        if category in collections:
478
                            # Loop on the collection
479
                            for tag in collections[category]:
480
                                level = max(level, information[tag]['section'].count('.'))
481
                        space = level + 2.3
482
483
                    # Add a RawBlock
484
                    latexCategory = re.sub('[^a-z]+', '', category)
485
                    latex = [
486
                        linkcolor,
487
                        '\\makeatletter',
488
                        '\\newcommand*\\l@' + latexCategory + '{\\@dottedtocline{1}{' + str(tab) + 'em}{'+ str(space) +'em}}',
489
                        '\\@starttoc{' + latexCategory + '}',
490
                        '\\makeatother'
491
                    ]
492
                    elt = [RawBlock('tex', ''.join(latex))]
493
                else:
494
                    if category in collections:
495
                        # Prepare the list
496
                        elements = []
497
498 View Code Duplication
                        # Loop on the collection
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
499
                        for tag in collections[category]:
500
501
                            # Add an item to the list
502
                            text = information[tag]['toc']
503
504
                            if pandocVersion() < '1.16':
505
                                # pandoc 1.15
506
                                link = Link(text, ['#' + tag, ''])
507
                            else:
508
                                # pandoc 1.16
509
                                link = Link(['', [], []], text, ['#' + tag, ''])
510
511
                            elements.append([Plain([link])])
512
513
                        # Add a bullet list
514
                        elt = [BulletList(elements)]
515
                    else:
516
517
                        # Add nothing
518
                        elt = []
519
520
                # Add a new listing
521
                listings = listings + [Header(1, ['', ['unnumbered'], []], title)] + elt
522
523
        # Add listings to the document
524
        doc[1] = listings + doc[1]
525
526
def getFormat(category, meta):
527
    if not hasattr(getFormat, 'value'):
528
        getFormat.value = {}
529
        if hasMeta(meta):
530
            # Loop on all listings definition
531
            for definition in meta['pandoc-numbering']['c']:
532
                if isCorrect(definition) and hasProperty(definition, 'format', 'MetaBool'):
533
                    getFormat.value[getFirstValue(definition, 'category')] = getProperty(definition, 'format')
534
535
    if not category in getFormat.value:
536
        getFormat.value[category] = True
537
538
    return getFormat.value[category]
539
540
def getCiteShortCut(category, meta):
541
    if not hasattr(getCiteShortCut, 'value'):
542
        getCiteShortCut.value = {}
543
        if hasMeta(meta):
544
            # Loop on all listings definition
545
            for definition in meta['pandoc-numbering']['c']:
546
                if isCorrect(definition) and hasProperty(definition, 'cite-shortcut', 'MetaBool'):
547
                    getCiteShortCut.value[getFirstValue(definition, 'category')] = getProperty(definition, 'cite-shortcut')
548
549
    if not category in getCiteShortCut.value:
550
        getCiteShortCut.value[category] = False
551
552
    return getCiteShortCut.value[category]
553
554
def getDefaultLevels(category, meta):
555
    if not hasattr(getDefaultLevels, 'value'):
556
        getDefaultLevels.value = {}
557
        if hasMeta(meta):
558
            # Loop on all listings definition
559
            for definition in meta['pandoc-numbering']['c']:
560
                if isCorrect(definition):
561
                    levelInf = 0
562
                    levelSup = 0
563
                    if hasProperty(definition, 'sectioning', 'MetaInlines') and\
564 View Code Duplication
                       len(getProperty(definition, 'sectioning')) == 1 and\
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
565
                       getProperty(definition, 'sectioning')[0]['t'] == 'Str':
566
567
                        global headerRegex
568
569
                        match = re.match('^' + headerRegex + '$', getFirstValue(definition, 'sectioning'))
570
                        if match:
571
                            # Compute the levelInf and levelSup values
572
                            levelInf = len(match.group('hidden')) // 2
573
                            levelSup = len(match.group('header')) // 2
574
                    else:
575
                        if hasProperty(definition, 'first', 'MetaString'):
576
                            try:
577
                                levelInf = max(min(int(getProperty(definition, 'first')) - 1, 6), 0)
578
                            except ValueError:
579
                                pass
580
                        if hasProperty(definition, 'last', 'MetaString'):
581
                            try:
582
                                levelSup = max(min(int(getProperty(definition, 'last')), 6), levelInf)
583
                            except ValueError:
584
                                pass
585
                    getDefaultLevels.value[getFirstValue(definition, 'category')] = [levelInf, levelSup]
586
587
    if not category in getDefaultLevels.value:
588
        getDefaultLevels.value[category] = [0, 0]
589
590
    return getDefaultLevels.value[category]
591
592
def getClasses(category, meta):
593
    if not hasattr(getClasses, 'value'):
594
        getClasses.value = {}
595
        if hasMeta(meta):
596
            # Loop on all listings definition
597
            for definition in meta['pandoc-numbering']['c']:
598
                if isCorrect(definition) and hasProperty(definition, 'classes', 'MetaList'):
599
                    classes = []
600
                    for elt in getProperty(definition, 'classes'):
601
                        classes.append(stringify(elt))
602
                    getClasses.value[getFirstValue(definition, 'category')] = classes
603
604
    if not category in getClasses.value:
605
        getClasses.value[category] = [category]
606
607
    return getClasses.value[category]
608
609
def pandocVersion():
610
    if not hasattr(pandocVersion, 'value'):
611
        p = subprocess.Popen(['pandoc', '-v'], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
612
        out, err = p.communicate()
613
        pandocVersion.value = re.search(b'pandoc (?P<version>.*)', out).group('version').decode('utf-8')
614
    return pandocVersion.value
615
616
def main():
617
    toJSONFilters([numbering, referencing])
618
619
if __name__ == '__main__':
620
    main()
621