Completed
Push — master ( 5d7ab5...c3d092 )
by Christophe
01:15
created

numberingHeader()   A

Complexity

Conditions 3

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
dl 0
loc 6
rs 9.4285
c 1
b 0
f 0
1
#!/usr/bin/env python
2
3
"""
4
Pandoc filter to number all kinds of things.
5
"""
6
7
from pandocfilters import walk, stringify, Str, Space, Para, BulletList, Plain, Strong, Span, Link, Emph, RawInline, RawBlock, Header
8
from functools import reduce
9
import json
10
import io
11
import sys
12
import codecs
13
import re
14
import unicodedata
15
import subprocess
16
17
count = {}
18
information = {}
19
collections = {}
20
headers = [0, 0, 0, 0, 0, 0]
21
headerRegex = '(?P<header>(?P<hidden>(-\.)*)(\+\.)*)'
22
23
def toJSONFilters(actions):
24
    """Converts a list of actions into a filter that reads a JSON-formatted
25
    pandoc document from stdin, transforms it by walking the tree
26
    with the actions, and returns a new JSON-formatted pandoc document
27
    to stdout.  The argument is a list of functions action(key, value, format, meta),
28
    where key is the type of the pandoc object (e.g. 'Str', 'Para'),
29
    value is the contents of the object (e.g. a string for 'Str',
30
    a list of inline elements for 'Para'), format is the target
31
    output format (which will be taken for the first command line
32
    argument if present), and meta is the document's metadata.
33
    If the function returns None, the object to which it applies
34
    will remain unchanged.  If it returns an object, the object will
35
    be replaced.    If it returns a list, the list will be spliced in to
36
    the list to which the target object belongs.    (So, returning an
37
    empty list deletes the object.)
38
    """
39
    try:
40
        input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
41
    except AttributeError:
42
        # Python 2 does not have sys.stdin.buffer.
43
        # REF: http://stackoverflow.com/questions/2467928/python-unicodeencodeerror-when-reading-from-stdin
44
        input_stream = codecs.getreader("utf-8")(sys.stdin)
45
46
    doc = json.loads(input_stream.read())
47
    if len(sys.argv) > 1:
48
        format = sys.argv[1]
49
    else:
50
        format = ""
51
    altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
52
    addListings(altered, format, altered[0]['unMeta'])
53
    json.dump(altered, sys.stdout)
54
55
def removeAccents(string):
56
    nfkd_form = unicodedata.normalize('NFKD', string)
57
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
58
59
def toIdentifier(string):
60
    # replace invalid characters by dash
61
    string = re.sub('[^0-9a-zA-Z_-]+', '-', removeAccents(string.lower()))
62
63
    # Remove leading digits
64
    string = re.sub('^[^a-zA-Z]+', '', string)
65
66
    return string
67
68
def toLatex(x):
69
    """Walks the tree x and returns concatenated string content,
70
    leaving out all formatting.
71
    """
72
    result = []
73
74
    def go(key, val, format, meta):
75
        if key in ['Str', 'MetaString']:
76
            result.append(val)
77
        elif key == 'Code':
78
            result.append(val[1])
79
        elif key == 'Math':
80
            # Modified from the stringify function in the pandocfilter package
81
            if format == 'latex':
82
                result.append('$' + val[1] + '$')
83
            else:
84
                result.append(val[1])
85
        elif key == 'LineBreak':
86
            result.append(" ")
87
        elif key == 'Space':
88
            result.append(" ")
89
        elif key == 'Note':
90
            # Do not stringify value from Note node
91
            del val[:]
92
93
    walk(x, go, 'latex', {})
94
    return ''.join(result)
95
96
def numbering(key, value, format, meta):
97
    if key == 'Header':
98
        return numberingHeader(value)
99
    elif key == 'Para':
100
        return numberingPara(value, format, meta)
101
102
def numberingHeader(value):
103
    [level, [id, classes, attributes], content] = value
104
    if 'unnumbered' not in classes:
105
        headers[level - 1] = headers[level - 1] + 1
106
        for index in range(level, 6):
107
            headers[index] = 0
108
109
def numberingPara(value, format, meta):
110
    global headerRegex
111
    if len(value) >= 3 and value[-2] == Space() and value[-1]['t'] == 'Str':
112
        last = value[-1]['c']
113
114
        match = re.match('^' + headerRegex + '#((?P<prefix>[a-zA-Z][\w.-]*):)?(?P<name>[a-zA-Z][\w:.-]*)?$', last)
115
116
        if match:
117
            # Is it a Para and the last element is an identifier beginning with '#'
118
            global count, information, collections
119
120
            # Detect the title
121
            title = []
122
            if value[-3]['t'] == 'Str' and value[-3]['c'][-1:] == ')':
123
                for (i, item) in enumerate(value):
124
                    if item['t'] == 'Str' and item['c'][0] == '(':
125
                        title = value[i:-2]
126
                        title[0]['c'] = title[0]['c'][1:]
127
                        title[-1]['c'] = title[-1]['c'][:-1]
128
                        value = value[:i - 1] + value[-2:]
129
                        break
130
131
            # Compute the description
132
            description = value[:-2]
133
134
            # Compute the basicCategory and the category
135
            if match.group('prefix') == None:
136
                basicCategory = toIdentifier(stringify(description))
137
            else:
138
                basicCategory = match.group('prefix')
139
140
            # Compute the levelInf and levelSup values
141
            levelInf = len(match.group('hidden')) // 2
142
            levelSup = len(match.group('header')) // 2
143
144
            # Get the default inf and sup level
145
            if levelInf == 0 and levelSup == 0:
146
                [levelInf, levelSup] = getDefaultLevels(basicCategory, meta)
147
148
            # Compute the section number
149
            sectionNumber = '.'.join(map(str, headers[:levelSup]))
150
151
            # Compute the leading (composed of the section numbering and a dot)
152
            if levelSup != 0:
153
                leading = sectionNumber + '.'
154
            else:
155
                leading = ''
156
157
            category = basicCategory + ':' + leading
158
159
            # Is it a new category?
160
            if category not in count:
161
                count[category] = 0
162
163
            count[category] = count[category] + 1
164
165
            # Get the number
166
            number = str(count[category])
167
168
            # Determine the final tag
169
            if match.group('name') == None:
170
                tag = category + number
171
            else:
172
                tag = basicCategory + ':' + match.group('name')
173
174
            # Replace the '-.-.+.+...#' by the category count (omitting the hidden part)
175
            localNumber = '.'.join(map(str, headers[levelInf:levelSup] + [number]))
176
177
            # Compute the globalNumber
178
            if sectionNumber:
179
                globalNumber = sectionNumber + '.' + number
180
            else:
181
                globalNumber = number
182
183
            # Is the automatic formatting required for this category?
184
            if getFormat(basicCategory, meta):
185
                # Prepare the final text
186
                text = [Strong(description + [Space(), Str(localNumber)])]
187
188
                # Add the title to the final text
189
                if title:
190
                    text = text + [Space(), Emph([Str('(')] + title + [Str(')')])]
191
192
                # Compute the link
193
                link = description + [Space(), Str(localNumber)]
194
195
                # Compute the toc
196
                toc = [Str(globalNumber), Space()]
197
                if title:
198
                    toc = toc + title
199
                else:
200
                    toc = toc + description
201
202
            else:
203
                # Prepare the final text
204
                text = [
205
                    Span(['', ['description'], []], description),
206
                    Span(['', ['title'], []], title),
207
                    Span(['', ['local'], []], [Str(localNumber)]),
208
                    Span(['', ['global'], []], [Str(globalNumber)]),
209
                    Span(['', ['section'], []], [Str(sectionNumber)]),
210
                ]
211
212
                # Compute the link
213
                link = [Span(['', ['pandoc-numbering-link'] + getClasses(basicCategory, meta), []], text)]
214
215
                # Compute the toc
216
                toc = [Span(['', ['pandoc-numbering-toc'] + getClasses(basicCategory, meta), []], text)]
217
218
219
            # Store the numbers and the label for automatic numbering (See referencing function)
220
            information[tag] = {
221
                'section': sectionNumber,
222
                'local': localNumber,
223
                'global': globalNumber,
224
                'count': number,
225
                'description': description,
226
                'title': title,
227
                'link': link,
228
                'toc': toc
229
            }
230
231
            # Prepare the contents
232
            contents = [Span([tag, ['pandoc-numbering-text'] + getClasses(basicCategory, meta), []], text)]
233
234
            # Compute collections
235
            if basicCategory not in collections:
236
                collections[basicCategory] = []
237
238
            collections[basicCategory].append(tag)
239
240
            # Special case for LaTeX
241
            if format == 'latex' and getFormat(basicCategory, meta):
242
                latexCategory = re.sub('[^a-z]+', '', basicCategory)
243
                if title:
244
                  entry = title
245
                else:
246
                  entry = description
247
                latex = '\\phantomsection\\addcontentsline{' + latexCategory + '}{' + latexCategory + '}{\\protect\\numberline {' + \
248
                    leading + number + '}{\ignorespaces ' + toLatex(entry) + '}}'
249
                contents.insert(0, RawInline('tex', latex))
250
251
            # Return the contents in a Para element
252
            return Para(contents)
253
        elif re.match('^' + headerRegex + '##(?P<prefix>[a-zA-Z][\w.-]*:)?(?P<name>[a-zA-Z][\w:.-]*)?$', last):
254
            # Special case where the last element is '...##...'
255
            value[-1]['c'] = value[-1]['c'].replace('##', '#', 1)
256
            return Para(value)
257
258
replace = None
259
search = None
260
261
def lowering(key, value, format, meta):
262
    if key == 'Str':
263
        return Str(value.lower())
264
265
def referencing(key, value, format, meta):
266
    if key == 'Link':
267
        return referencingLink(value, format, meta)
268
    elif key == 'Cite':
269
        return referencingCite(value, format, meta)
270
271
def referencingLink(value, format, meta):
272
    global information, replace, search
273
    if pandocVersion() < '1.16':
274
        # pandoc 1.15
275
        [text, [reference, title]] = value
276
    else:
277
        # pandoc > 1.15
278
        [attributes, text, [reference, title]] = value
279
280
    if re.match('^(#([a-zA-Z][\w:.-]*))$', reference):
281
        # Compute the name
282
        tag = reference[1:]
283
284
        if tag in information:
285
            if pandocVersion() < '1.16':
286
                # pandoc 1.15
287
                i = 0
288
            else:
289
                # pandoc > 1.15
290
                i = 1
291
292
            # Replace all '#t', '#T', '#d', '#D', '#s', '#g', '#c', '#n', '#' with the corresponding text in the title
293
            value[i + 1][1] = value[i + 1][1].replace('#t', stringify(information[tag]['title']).lower())
294
            value[i + 1][1] = value[i + 1][1].replace('#T', stringify(information[tag]['title']))
295
            value[i + 1][1] = value[i + 1][1].replace('#d', stringify(information[tag]['description']).lower())
296
            value[i + 1][1] = value[i + 1][1].replace('#D', stringify(information[tag]['description']))
297
            value[i + 1][1] = value[i + 1][1].replace('#s', information[tag]['section'])
298
            value[i + 1][1] = value[i + 1][1].replace('#g', information[tag]['global'])
299
            value[i + 1][1] = value[i + 1][1].replace('#c', information[tag]['count'])
300
            value[i + 1][1] = value[i + 1][1].replace('#n', information[tag]['local'])
301
            value[i + 1][1] = value[i + 1][1].replace('#', information[tag]['local'])
302
303
            if text == []:
304
                # The link text is empty, replace it with the default label
305
                value[i] = information[tag]['link']
306
            else:
307
                # The link text is not empty
308
309
                #replace all '#t' with the title in lower case
310
                replace = walk(information[tag]['title'], lowering, format, meta)
311
                search = '#t'
312
                value[i] = walk(value[i], replacing, format, meta)
313
314
                #replace all '#T' with the title
315
                replace = information[tag]['title']
316
                search = '#T'
317
                value[i] = walk(value[i], replacing, format, meta)
318
319
                #replace all '#d' with the description in lower case
320
                replace = walk(information[tag]['description'], lowering, format, meta)
321
                search = '#d'
322
                value[i] = walk(value[i], replacing, format, meta)
323
324
                #replace all '#D' with the description
325
                replace = information[tag]['description']
326
                search = '#D'
327
                value[i] = walk(value[i], replacing, format, meta)
328
329
                #replace all '#s' with the corresponding number
330
                replace = [Str(information[tag]['section'])]
331
                search = '#s'
332
                value[i] = walk(value[i], replacing, format, meta)
333
334
                #replace all '#g' with the corresponding number
335
                replace = [Str(information[tag]['global'])]
336
                search = '#g'
337
                value[i] = walk(value[i], replacing, format, meta)
338
339
                #replace all '#c' with the corresponding number
340
                replace = [Str(information[tag]['count'])]
341
                search = '#c'
342
                value[i] = walk(value[i], replacing, format, meta)
343
344
                #replace all '#n' with the corresponding number
345
                replace = [Str(information[tag]['local'])]
346
                search = '#n'
347
                value[i] = walk(value[i], replacing, format, meta)
348
349
                #replace all '#' with the corresponding number
350
                replace = [Str(information[tag]['local'])]
351
                search = '#'
352
                value[i] = walk(value[i], replacing, format, meta)
353
354
def referencingCite(value, format, meta):
355
    global information
356
    match = re.match('^(@(?P<tag>(?P<category>[a-zA-Z][\w.-]*):(([a-zA-Z][\w.-]*)|(\d*(\.\d*)*))))$', value[1][0]['c'])
357
    if match != None and getCiteShortCut(match.group('category'), meta):
358
359
        # Deal with @prefix:name shortcut
360
        tag = match.group('tag')
361
        if tag in information:
362
            if pandocVersion() < '1.16':
363
                # pandoc 1.15
364
                return Link([Str(information[tag]['local'])], ['#' + tag, ''])
365
            else:
366
                # pandoc > 1.15
367
                return Link(['', [], []], [Str(information[tag]['local'])], ['#' + tag, ''])
368
369
def replacing(key, value, format, meta):
370
    global replace, search
371
    if key == 'Str':
372
        prepare = value.split(search)
373
        if len(prepare) > 1:
374
375
            ret = []
376
377
            if prepare[0] != '':
378
                ret.append(Str(prepare[0]))
379
380
            for string in prepare[1:]:
381
                ret.extend(replace)
382
                if string != '':
383
                    ret.append(Str(string))
384
385
            return ret
386
387
def hasMeta(meta):
388
    return 'pandoc-numbering' in meta and meta['pandoc-numbering']['t'] == 'MetaList'
389
390
def isCorrect(definition):
391
    return definition['t'] == 'MetaMap' and\
392
        'category' in definition['c'] and\
393
        definition['c']['category']['t'] == 'MetaInlines' and\
394
        len(definition['c']['category']['c']) == 1 and\
395
        definition['c']['category']['c'][0]['t'] == 'Str'
396
397
def hasProperty(definition, name, type):
398
    return name in definition['c'] and definition['c'][name]['t'] == type
399
400
def getProperty(definition, name):
401
    return definition['c'][name]['c']
402
403
def getFirstValue(definition, name):
404
	return getProperty(definition, name)[0]['c']
405
406
def addListings(doc, format, meta):
407
408
    global collections, information
409
410
    if hasMeta(meta):
411
412
        listings = []
413
414
        # Loop on all listings definition
415
        for definition in meta['pandoc-numbering']['c']:
416
            if isCorrect(definition) and hasProperty(definition, 'listing', 'MetaInlines'):
417
418
                # Get the category name
419
                category = getFirstValue(definition, 'category')
420
421
                # Get the title
422
                title = getProperty(definition, 'listing')
423
424
                if format == 'latex':
425
426
                    # Special case for latex output
427
428
                    # Get the link color
429
                    if 'toccolor' in meta:
430
                        linkcolor = '\\hypersetup{linkcolor=' + stringify(meta['toccolor']['c']) + '}'
431
                    else:
432
                        linkcolor = '\\hypersetup{linkcolor=black}'
433
434
                    # Get the tab
435
                    if hasProperty(definition, 'tab', 'MetaString'):
436
                        try:
437
                            tab = float(getProperty(definition, 'tab'))
438
                        except ValueError:
439
                            tab = None
440
                    else:
441
                        tab = None
442
443
                    # Get the space
444
                    if hasProperty(definition, 'space', 'MetaString'):
445
                        try:
446
                            space = float(getProperty(definition, 'space'))
447
                        except ValueError:
448
                            space = None
449
                    else:
450
                        space = None
451
452
                    # Deal with default tab length
453
                    if tab == None:
454
                        tab = 1.5
455
456
                    # Deal with default space length
457
                    if space == None:
458
                        level = 0
459
                        if category in collections:
460
                            # Loop on the collection
461
                            for tag in collections[category]:
462
                                level = max(level, information[tag]['section'].count('.'))
463
                        space = level + 2.3
464
465
                    # Add a RawBlock
466
                    latexCategory = re.sub('[^a-z]+', '', category)
467
                    latex = [
468
                        linkcolor,
469
                        '\\makeatletter',
470
                        '\\newcommand*\\l@' + latexCategory + '{\\@dottedtocline{1}{' + str(tab) + 'em}{'+ str(space) +'em}}',
471
                        '\\@starttoc{' + latexCategory + '}',
472
                        '\\makeatother'
473
                    ]
474
                    elt = [RawBlock('tex', ''.join(latex))]
475
                else:
476
                    if category in collections:
477
                        # Prepare the list
478
                        elements = []
479
480
                        # Loop on the collection
481
                        for tag in collections[category]:
482
483
                            # Add an item to the list
484
                            text = information[tag]['toc']
485
486
                            if pandocVersion() < '1.16':
487
                                # pandoc 1.15
488
                                link = Link(text, ['#' + tag, ''])
489
                            else:
490
                                # pandoc 1.16
491
                                link = Link(['', [], []], text, ['#' + tag, ''])
492
493
                            elements.append([Plain([link])])
494
495
                        # Add a bullet list
496
                        elt = [BulletList(elements)]
497
                    else:
498 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
499
                        # Add nothing
500
                        elt = []
501
502
                # Add a new listing
503
                listings = listings + [Header(1, ['', ['unnumbered'], []], title)] + elt
504
505
        # Add listings to the document
506
        doc[1] = listings + doc[1]
507
508
def getFormat(category, meta):
509
    if not hasattr(getFormat, 'value'):
510
        getFormat.value = {}
511
        if hasMeta(meta):
512
            # Loop on all listings definition
513
            for definition in meta['pandoc-numbering']['c']:
514
                if isCorrect(definition) and hasProperty(definition, 'format', 'MetaBool'):
515
                    getFormat.value[getFirstValue(definition, 'category')] = getProperty(definition, 'format')
516
517
    if not category in getFormat.value:
518
        getFormat.value[category] = True
519
520
    return getFormat.value[category]
521
522
def getCiteShortCut(category, meta):
523
    if not hasattr(getCiteShortCut, 'value'):
524
        getCiteShortCut.value = {}
525
        if hasMeta(meta):
526
            # Loop on all listings definition
527
            for definition in meta['pandoc-numbering']['c']:
528
                if isCorrect(definition) and hasProperty(definition, 'cite-shortcut', 'MetaBool'):
529
                    getCiteShortCut.value[getFirstValue(definition, 'category')] = getProperty(definition, 'cite-shortcut')
530
531
    if not category in getCiteShortCut.value:
532
        getCiteShortCut.value[category] = False
533
534
    return getCiteShortCut.value[category]
535
536
def getDefaultLevels(category, meta):
537
    if not hasattr(getDefaultLevels, 'value'):
538
        getDefaultLevels.value = {}
539
        if hasMeta(meta):
540
            # Loop on all listings definition
541
            for definition in meta['pandoc-numbering']['c']:
542
                if isCorrect(definition):
543
                    levelInf = 0
544
                    levelSup = 0
545
                    if hasProperty(definition, 'sectioning', 'MetaInlines') and\
546
                       len(getProperty(definition, 'sectioning')) == 1 and\
547
                       getProperty(definition, 'sectioning')[0]['t'] == 'Str':
548
549
                        global headerRegex
550
551
                        match = re.match('^' + headerRegex + '$', getFirstValue(definition, 'sectioning'))
552
                        if match:
553
                            # Compute the levelInf and levelSup values
554
                            levelInf = len(match.group('hidden')) // 2
555
                            levelSup = len(match.group('header')) // 2
556
                    else:
557
                        if hasProperty(definition, 'first', 'MetaString'):
558
                            try:
559
                                levelInf = max(min(int(getProperty(definition, 'first')) - 1, 6), 0)
560
                            except ValueError:
561
                                pass
562
                        if hasProperty(definition, 'last', 'MetaString'):
563
                            try:
564 View Code Duplication
                                levelSup = max(min(int(getProperty(definition, 'last')), 6), levelInf)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
565
                            except ValueError:
566
                                pass
567
                    getDefaultLevels.value[getFirstValue(definition, 'category')] = [levelInf, levelSup]
568
569
    if not category in getDefaultLevels.value:
570
        getDefaultLevels.value[category] = [0, 0]
571
572
    return getDefaultLevels.value[category]
573
574
def getClasses(category, meta):
575
    if not hasattr(getClasses, 'value'):
576
        getClasses.value = {}
577
        if hasMeta(meta):
578
            # Loop on all listings definition
579
            for definition in meta['pandoc-numbering']['c']:
580
                if isCorrect(definition) and hasProperty(definition, 'classes', 'MetaList'):
581
                    classes = []
582
                    for elt in getProperty(definition, 'classes'):
583
                        classes.append(stringify(elt))
584
                    getClasses.value[getFirstValue(definition, 'category')] = classes
585
586
    if not category in getClasses.value:
587
        getClasses.value[category] = [category]
588
589
    return getClasses.value[category]
590
591
def pandocVersion():
592
    if not hasattr(pandocVersion, 'value'):
593
        p = subprocess.Popen(['pandoc', '-v'], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
594
        out, err = p.communicate()
595
        pandocVersion.value = re.search(b'pandoc (?P<version>.*)', out).group('version').decode('utf-8')
596
    return pandocVersion.value
597
598
def main():
599
    toJSONFilters([numbering, referencing])
600
601
if __name__ == '__main__':
602
    main()
603