Completed
Push — master ( 850425...cd4f5d )
by Christophe
01:10
created

applyJSONFilters()   B

Complexity

Conditions 6

Size

Total Lines 41

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
c 0
b 0
f 0
dl 0
loc 41
rs 7.5384
1
#!/usr/bin/env python
2
3
"""
4
Pandoc filter to number all kinds of things.
5
"""
6
7
from __future__ import print_function
8
9
from pandocfilters import walk, stringify, Str, Space, Para, BulletList, Plain, Strong, Span, Link, Emph, RawInline, RawBlock, Header
10
from functools import reduce
11
import json
12
import io
13
import sys
14
import codecs
15
import re
16
import unicodedata
17
import subprocess
18
19
def warning(*objs):
20
    print("WARNING: ", *objs, file=sys.stderr)
21
22
count = {}
23
information = {}
24
collections = {}
25
headers = [0, 0, 0, 0, 0, 0]
26
headerRegex = '(?P<header>(?P<hidden>(-\.)*)(\+\.)*)'
27
28
def toJSONFilters(actions):
29
    """Generate a JSON-to-JSON filter from stdin to stdout
30
31
    The filter:
32
33
    * reads a JSON-formatted pandoc document from stdin
34
    * transforms it by walking the tree and performing the actions
35
    * returns a new JSON-formatted pandoc document to stdout
36
37
    The argument `actions` is a list of functions of the form
38
    `action(key, value, format, meta)`, as described in more
39
    detail under `walk`.
40
41
    This function calls `applyJSONFilters`, with the `format`
42
    argument provided by the first command-line argument,
43
    if present.  (Pandoc sets this by default when calling
44
    filters.)
45
    """
46
    try:
47
        input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
48
    except AttributeError:
49
        # Python 2 does not have sys.stdin.buffer.
50
        # REF: https://stackoverflow.com/questions/2467928/python-unicodeencode
51
        input_stream = codecs.getreader("utf-8")(sys.stdin)
52
53
    source = input_stream.read()
54
    if len(sys.argv) > 1:
55
        format = sys.argv[1]
56
    else:
57
        format = ""
58
59
    sys.stdout.write(applyJSONFilters(actions, source, format))
60
61
def applyJSONFilters(actions, source, format=""):
62
    """Walk through JSON structure and apply filters
63
64
    This:
65
66
    * reads a JSON-formatted pandoc document from a source string
67
    * transforms it by walking the tree and performing the actions
68
    * returns a new JSON-formatted pandoc document as a string
69
70
    The `actions` argument is a list of functions (see `walk`
71
    for a full description).
72
73
    The argument `source` is a string encoded JSON object.
74
75
    The argument `format` is a string describing the output format.
76
77
    Returns a the new JSON-formatted pandoc document.
78
    """
79
80
    doc = json.loads(source)
81
82
    if 'meta' in doc:
83
        meta = doc['meta']
84
    elif doc[0]:  # old API
85
        meta = doc[0]['unMeta']
86
    else:
87
        meta = {}
88
    altered = doc
89
    for action in actions:
90
        altered = walk(altered, action, format, meta)
91
92
    if 'meta' in altered:
93
        meta = altered['meta']
94
    elif meta[0]:  # old API
95
        meta = altered[0]['unMeta']
96
    else:
97
        meta = {}
98
99
    addListings(altered, format, meta)
100
101
    return json.dumps(altered)
102
103
def removeAccents(string):
104
    nfkd_form = unicodedata.normalize('NFKD', string)
105
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
106
107
def toIdentifier(string):
108
    # replace invalid characters by dash
109
    string = re.sub('[^0-9a-zA-Z_-]+', '-', removeAccents(string.lower()))
110
111
    # Remove leading digits
112
    string = re.sub('^[^a-zA-Z]+', '', string)
113
114
    return string
115
116
def toLatex(x):
117
    """Walks the tree x and returns concatenated string content,
118
    leaving out all formatting.
119
    """
120
    result = []
121
122
    def go(key, val, format, meta):
123
        if key in ['Str', 'MetaString']:
124
            result.append(val)
125
        elif key == 'Code':
126
            result.append(val[1])
127
        elif key == 'Math':
128
            # Modified from the stringify function in the pandocfilter package
129
            if format == 'latex':
130
                result.append('$' + val[1] + '$')
131
            else:
132
                result.append(val[1])
133
        elif key == 'LineBreak':
134
            result.append(" ")
135
        elif key == 'Space':
136
            result.append(" ")
137
        elif key == 'Note':
138
            # Do not stringify value from Note node
139
            del val[:]
140
141
    walk(x, go, 'latex', {})
142
    return ''.join(result)
143
144
def numbering(key, value, format, meta):
145
    if key == 'Header':
146
        return numberingHeader(value)
147
    elif key == 'Para':
148
        return numberingPara(value, format, meta)
149
150
def numberingHeader(value):
151
    [level, [id, classes, attributes], content] = value
152
    if 'unnumbered' not in classes:
153
        headers[level - 1] = headers[level - 1] + 1
154
        for index in range(level, 6):
155
            headers[index] = 0
156
157
def numberingPara(value, format, meta):
158
    if len(value) >= 3 and value[-2]['t'] == 'Space' and value[-1]['t'] == 'Str':
159
        last = value[-1]['c']
160
        match = re.match('^' + headerRegex + '#((?P<prefix>[a-zA-Z][\w.-]*):)?(?P<name>[a-zA-Z][\w:.-]*)?$', last)
161
        if match:
162
            # Is it a Para and the last element is an identifier beginning with '#'
163
            return numberingEffective(match, value, format, meta)
164
        elif re.match('^' + headerRegex + '##(?P<prefix>[a-zA-Z][\w.-]*:)?(?P<name>[a-zA-Z][\w:.-]*)?$', last):
165
            # Special case where the last element is '...##...'
166
            return numberingSharpSharp(value)
167
168
def numberingEffective(match, value, format, meta):
169
    title = computeTitle(value)
170
    description = computeDescription(value)
171
    basicCategory = computeBasicCategory(match, description)
172
    [levelInf, levelSup] = computeLevels(match, basicCategory, meta)
173
    sectionNumber = computeSectionNumber(levelSup)
174
    leading = computeLeading(levelSup, sectionNumber)
175
    category = computeCategory(basicCategory, leading)
176
    number = str(count[category])
177
    tag = computeTag(match, basicCategory, category, number)
178
    localNumber = computeLocalNumber(levelInf, levelSup, number)
179
    globalNumber = computeGlobalNumber(sectionNumber, number)
180
    [text, link, toc] = computeTextLinkToc(meta, basicCategory, description, title, localNumber, globalNumber, sectionNumber)
181
182
    # Store the numbers and the label for automatic numbering (See referencing function)
183
    information[tag] = {
184
        'section': sectionNumber,
185
        'local': localNumber,
186
        'global': globalNumber,
187
        'count': number,
188
        'description': description,
189
        'title': title,
190
        'link': link,
191
        'toc': toc
192
    }
193
194
    # Prepare the contents
195
    contents = [Span([tag, ['pandoc-numbering-text'] + getClasses(basicCategory, meta), []], text)]
196
197
    # Compute collections
198
    if basicCategory not in collections:
199
        collections[basicCategory] = []
200
201
    collections[basicCategory].append(tag)
202
203
    # Special case for LaTeX
204
    if format == 'latex' and getFormat(basicCategory, meta):
205
        addLaTeX(contents, basicCategory, title, description, leading, number)
206
207
    # Return the contents in a Para element
208
    return Para(contents)
209
210
def computeTitle(value):
211
    title = []
212
    if value[-3]['t'] == 'Str' and value[-3]['c'][-1:] == ')':
213
        for (i, item) in enumerate(value):
214
            if item['t'] == 'Str' and item['c'][0] == '(':
215
                title = value[i:-2]
216
                title[0]['c'] = title[0]['c'][1:]
217
                title[-1]['c'] = title[-1]['c'][:-1]
218
                del value[i-1:-2]
219
                break
220
    return title
221
222
def computeDescription(value):
223
    return value[:-2]
224
225
def computeBasicCategory(match, description):
226
    if match.group('prefix') == None:
227
        return toIdentifier(stringify(description))
228
    else:
229
        return match.group('prefix')
230
231
def computeLevels(match, basicCategory, meta):
232
    # Compute the levelInf and levelSup values
233
    levelInf = len(match.group('hidden')) // 2
234
    levelSup = len(match.group('header')) // 2
235
236
    # Get the default inf and sup level
237
    if levelInf == 0 and levelSup == 0:
238
        [levelInf, levelSup] = getDefaultLevels(basicCategory, meta)
239
240
    return [levelInf, levelSup]
241
242
def computeSectionNumber(levelSup):
243
    return '.'.join(map(str, headers[:levelSup]))
244
245
def computeLeading(levelSup, sectionNumber):
246
    # Compute the leading (composed of the section numbering and a dot)
247
    if levelSup != 0:
248
        return sectionNumber + '.'
249
    else:
250
        return ''
251
252
def computeCategory(basicCategory, leading):
253
    category = basicCategory + ':' + leading
254
255
    # Is it a new category?
256
    if category not in count:
257
        count[category] = 0
258
259
    count[category] = count[category] + 1
260
261
    return category
262
263
def computeTag(match, basicCategory, category, number):
264
    # Determine the final tag
265
    if match.group('name') == None:
266
        return category + number
267
    else:
268
        return basicCategory + ':' + match.group('name')
269
270
def computeLocalNumber(levelInf, levelSup, number):
271
    # Replace the '-.-.+.+...#' by the category count (omitting the hidden part)
272
    return '.'.join(map(str, headers[levelInf:levelSup] + [number]))
273
274
def computeGlobalNumber(sectionNumber, number):
275
    # Compute the globalNumber
276
    if sectionNumber:
277
        return sectionNumber + '.' + number
278
    else:
279
        return number
280
281
def computeTextLinkToc(meta, basicCategory, description, title, localNumber, globalNumber, sectionNumber):
282
    # Is the automatic formatting required for this category?
283
    if getFormat(basicCategory, meta):
284
        # Prepare the final text
285
        text = [Strong(description + [Space(), Str(localNumber)])]
286
287
        # Add the title to the final text
288
        if title:
289
            text = text + [Space(), Emph([Str('(')] + title + [Str(')')])]
290
291
        # Compute the link
292
        link = description + [Space(), Str(localNumber)]
293
294
        # Compute the toc
295
        toc = [Str(globalNumber), Space()]
296
        if title:
297
            toc = toc + title
298
        else:
299
            toc = toc + description
300
301
    else:
302
        # Prepare the final text
303
        text = [
304
            Span(['', ['description'], []], description),
305
            Span(['', ['title'], []], title),
306
            Span(['', ['local'], []], [Str(localNumber)]),
307
            Span(['', ['global'], []], [Str(globalNumber)]),
308
            Span(['', ['section'], []], [Str(sectionNumber)]),
309
        ]
310
311
        # Compute the link
312
        link = [Span(['', ['pandoc-numbering-link'] + getClasses(basicCategory, meta), []], text)]
313
314
        # Compute the toc
315
        toc = [Span(['', ['pandoc-numbering-toc'] + getClasses(basicCategory, meta), []], text)]
316
    return [text, link, toc]
317
318
def addLaTeX(contents, basicCategory, title, description, leading, number):
319
    latexCategory = re.sub('[^a-z]+', '', basicCategory)
320
    if title:
321
      entry = title
322
    else:
323
      entry = description
324
    latex = '\\phantomsection\\addcontentsline{' + latexCategory + '}{' + latexCategory + '}{\\protect\\numberline {' + \
325
        leading + number + '}{\ignorespaces ' + toLatex(entry) + '}}'
326
    contents.insert(0, RawInline('tex', latex))
327
328
def numberingSharpSharp(value):
329
    value[-1]['c'] = value[-1]['c'].replace('##', '#', 1)
330
331
replace = None
332
search = None
333
334
def lowering(key, value, format, meta):
335
    if key == 'Str':
336
        return Str(value.lower())
337
338
def referencing(key, value, format, meta):
339
    if key == 'Link':
340
        return referencingLink(value, format, meta)
341
    elif key == 'Cite':
342
        return referencingCite(value, format, meta)
343
344
def referencingLink(value, format, meta):
345
    global replace, search
346
    if pandocVersion() < '1.16':
347
        # pandoc 1.15
348
        [text, [reference, title]] = value
349
    else:
350
        # pandoc > 1.15
351
        [attributes, text, [reference, title]] = value
352
353
    if re.match('^(#([a-zA-Z][\w:.-]*))$', reference):
354
        # Compute the name
355
        tag = reference[1:]
356
357
        if tag in information:
358
            if pandocVersion() < '1.16':
359
                # pandoc 1.15
360
                i = 0
361
            else:
362
                # pandoc > 1.15
363
                i = 1
364
365
            # Replace all '#t', '#T', '#d', '#D', '#s', '#g', '#c', '#n', '#' with the corresponding text in the title
366
            value[i + 1][1] = value[i + 1][1].replace('#t', stringify(information[tag]['title']).lower())
367
            value[i + 1][1] = value[i + 1][1].replace('#T', stringify(information[tag]['title']))
368
            value[i + 1][1] = value[i + 1][1].replace('#d', stringify(information[tag]['description']).lower())
369
            value[i + 1][1] = value[i + 1][1].replace('#D', stringify(information[tag]['description']))
370
            value[i + 1][1] = value[i + 1][1].replace('#s', information[tag]['section'])
371
            value[i + 1][1] = value[i + 1][1].replace('#g', information[tag]['global'])
372
            value[i + 1][1] = value[i + 1][1].replace('#c', information[tag]['count'])
373
            value[i + 1][1] = value[i + 1][1].replace('#n', information[tag]['local'])
374
            value[i + 1][1] = value[i + 1][1].replace('#', information[tag]['local'])
375
376
            if text == []:
377
                # The link text is empty, replace it with the default label
378
                value[i] = information[tag]['link']
379
            else:
380
                # The link text is not empty
381
382
                #replace all '#t' with the title in lower case
383
                replace = walk(information[tag]['title'], lowering, format, meta)
384
                search = '#t'
385
                value[i] = walk(value[i], replacing, format, meta)
386
387
                #replace all '#T' with the title
388
                replace = information[tag]['title']
389
                search = '#T'
390
                value[i] = walk(value[i], replacing, format, meta)
391
392
                #replace all '#d' with the description in lower case
393
                replace = walk(information[tag]['description'], lowering, format, meta)
394
                search = '#d'
395
                value[i] = walk(value[i], replacing, format, meta)
396
397
                #replace all '#D' with the description
398
                replace = information[tag]['description']
399
                search = '#D'
400
                value[i] = walk(value[i], replacing, format, meta)
401
402
                #replace all '#s' with the corresponding number
403
                replace = [Str(information[tag]['section'])]
404
                search = '#s'
405
                value[i] = walk(value[i], replacing, format, meta)
406
407
                #replace all '#g' with the corresponding number
408
                replace = [Str(information[tag]['global'])]
409
                search = '#g'
410
                value[i] = walk(value[i], replacing, format, meta)
411
412
                #replace all '#c' with the corresponding number
413
                replace = [Str(information[tag]['count'])]
414
                search = '#c'
415
                value[i] = walk(value[i], replacing, format, meta)
416
417
                #replace all '#n' with the corresponding number
418
                replace = [Str(information[tag]['local'])]
419
                search = '#n'
420
                value[i] = walk(value[i], replacing, format, meta)
421
422
                #replace all '#' with the corresponding number
423
                replace = [Str(information[tag]['local'])]
424
                search = '#'
425
                value[i] = walk(value[i], replacing, format, meta)
426
427
def referencingCite(value, format, meta):
428
    match = re.match('^(@(?P<tag>(?P<category>[a-zA-Z][\w.-]*):(([a-zA-Z][\w.-]*)|(\d*(\.\d*)*))))$', value[1][0]['c'])
429
    if match != None and getCiteShortCut(match.group('category'), meta):
430
431
        # Deal with @prefix:name shortcut
432
        tag = match.group('tag')
433
        if tag in information:
434
            if pandocVersion() < '1.16':
435
                # pandoc 1.15
436
                return Link([Str(information[tag]['local'])], ['#' + tag, ''])
437
            else:
438
                # pandoc > 1.15
439
                return Link(['', [], []], [Str(information[tag]['local'])], ['#' + tag, ''])
440
441
def replacing(key, value, format, meta):
442
    if key == 'Str':
443
        prepare = value.split(search)
444
        if len(prepare) > 1:
445
446
            ret = []
447
448
            if prepare[0] != '':
449
                ret.append(Str(prepare[0]))
450
451
            for string in prepare[1:]:
452
                ret.extend(replace)
453
                if string != '':
454
                    ret.append(Str(string))
455
456
            return ret
457
458
def hasMeta(meta):
459
    return 'pandoc-numbering' in meta and meta['pandoc-numbering']['t'] == 'MetaList'
460
461
def isCorrect(definition):
462
    return definition['t'] == 'MetaMap' and\
463
        'category' in definition['c'] and\
464
        definition['c']['category']['t'] == 'MetaInlines' and\
465
        len(definition['c']['category']['c']) == 1 and\
466
        definition['c']['category']['c'][0]['t'] == 'Str'
467
468
def hasProperty(definition, name, type):
469
    return name in definition['c'] and definition['c'][name]['t'] == type
470
471
def getProperty(definition, name):
472
    return definition['c'][name]['c']
473
474
def getFirstValue(definition, name):
475
	return getProperty(definition, name)[0]['c']
476
477
def addListings(doc, format, meta):
478
    if hasMeta(meta):
479
        listings = []
480
481
        # Loop on all listings definition
482
        for definition in meta['pandoc-numbering']['c']:
483
            if isCorrect(definition) and hasProperty(definition, 'listing', 'MetaInlines'):
484
485
                # Get the category name
486
                category = getFirstValue(definition, 'category')
487
488
                # Get the title
489
                title = getProperty(definition, 'listing')
490
491
                listings.append(Header(1, ['', ['unnumbered'], []], title))
492
493
                if format == 'latex':
494
                    extendListingsLaTeX(listings, meta, definition, category)
495
                else:
496
                    extendListingsOther(listings, meta, definition, category)
497
498
        # Add listings to the document
499
        if 'blocks' in doc:
500
            doc['blocks'][0:0] = listings
501
        else:  # old API
502
            doc[1][0:0] = listings
503
504
def extendListingsLaTeX(listings, meta, definition, category):
505
    space = getSpace(definition, category)
506
    tab = getTab(definition, category)
507
    # Add a RawBlock
508
    latexCategory = re.sub('[^a-z]+', '', category)
509
    latex = [
510
        getLinkColor(meta),
511
        '\\makeatletter',
512
        '\\newcommand*\\l@' + latexCategory + '{\\@dottedtocline{1}{' + str(tab) + 'em}{'+ str(space) +'em}}',
513
        '\\@starttoc{' + latexCategory + '}',
514
        '\\makeatother'
515
    ]
516
    listings.append(RawBlock('tex', ''.join(latex)))
517
518
def getLinkColor(meta):
519
    # Get the link color
520
    if 'toccolor' in meta:
521
        return '\\hypersetup{linkcolor=' + stringify(meta['toccolor']['c']) + '}'
522
    else:
523
        return '\\hypersetup{linkcolor=black}'
524
525
def getTab(definition, category):
526
    # Get the tab
527
    if hasProperty(definition, 'tab', 'MetaString'):
528
        try:
529
            tab = float(getProperty(definition, 'tab'))
530
        except ValueError:
531
            tab = None
532
    else:
533
        tab = None
534
535
    # Deal with default tab length
536
    if tab == None:
537
        return 1.5
538
    else:
539
        return tab
540
541
def getSpace(definition, category):
542
    # Get the space
543
    if hasProperty(definition, 'space', 'MetaString'):
544
        try:
545
            space = float(getProperty(definition, 'space'))
546
        except ValueError:
547
            space = None
548
    else:
549
        space = None
550
551
    # Deal with default space length
552
    if space == None:
553
        level = 0
554
        if category in collections:
555
            # Loop on the collection
556
            for tag in collections[category]:
557
                level = max(level, information[tag]['section'].count('.'))
558
        return level + 2.3
559
    else:
560
        return space
561
562
def extendListingsOther(listings, meta, definition, category):
563
    if category in collections:
564
        # Prepare the list
565
        elements = []
566
567
        # Loop on the collection
568
        for tag in collections[category]:
569
570
            # Add an item to the list
571
            text = information[tag]['toc']
572
573
            if pandocVersion() < '1.16':
574
                # pandoc 1.15
575
                link = Link(text, ['#' + tag, ''])
576
            else:
577
                # pandoc 1.16
578
                link = Link(['', [], []], text, ['#' + tag, ''])
579
580
            elements.append([Plain([link])])
581
582
        # Add a bullet list
583
        listings.append(BulletList(elements))
584
585
def getValue(category, meta, fct, default, analyzeDefinition):
586
    if not hasattr(fct, 'value'):
587
        fct.value = {}
588
        if hasMeta(meta):
589
            # Loop on all listings definition
590
            for definition in meta['pandoc-numbering']['c']:
591
                if isCorrect(definition):
592
                    analyzeDefinition(definition)
593
594
    if not category in fct.value:
595
        fct.value[category] = default
596
597
    return fct.value[category]
598
599
def getFormat(category, meta):
600
    def analyzeDefinition(definition):
601
        if hasProperty(definition, 'format', 'MetaBool'):
602
            getFormat.value[getFirstValue(definition, 'category')] = getProperty(definition, 'format')
603
        
604
    return getValue(category, meta, getFormat, True, analyzeDefinition)
605
606
def getCiteShortCut(category, meta):
607
    def analyzeDefinition(definition):
608
        if hasProperty(definition, 'cite-shortcut', 'MetaBool'):
609
            getCiteShortCut.value[getFirstValue(definition, 'category')] = getProperty(definition, 'cite-shortcut')
610
611
    return getValue(category, meta, getCiteShortCut, False, analyzeDefinition)
612
613
def getLevelsFromYaml(definition):
614
    levelInf = 0
615
    levelSup = 0
616
    if hasProperty(definition, 'first', 'MetaString'):
617
        try:
618
            levelInf = max(min(int(getProperty(definition, 'first')) - 1, 6), 0)
619
        except ValueError:
620
            pass
621
    if hasProperty(definition, 'last', 'MetaString'):
622
        try:
623
            levelSup = max(min(int(getProperty(definition, 'last')), 6), levelInf)
624
        except ValueError:
625
            pass
626
    return [levelInf, levelSup]
627
628
def getLevelsFromRegex(definition):
629
    match = re.match('^' + headerRegex + '$', getFirstValue(definition, 'sectioning'))
630
    if match:
631
        # Compute the levelInf and levelSup values
632
        return [len(match.group('hidden')) // 2, len(match.group('header')) // 2]
633
    else:
634
        return [0, 0]
635
636
def getDefaultLevels(category, meta):
637
    def analyzeDefinition(definition):
638
        if hasProperty(definition, 'sectioning', 'MetaInlines') and\
639
           len(getProperty(definition, 'sectioning')) == 1 and\
640
           getProperty(definition, 'sectioning')[0]['t'] == 'Str':
641
642
            getDefaultLevels.value[getFirstValue(definition, 'category')] = getLevelsFromRegex(definition)
643
        else:
644
            getDefaultLevels.value[getFirstValue(definition, 'category')] = getLevelsFromYaml(definition)
645
646
    return getValue(category, meta, getDefaultLevels, [0, 0], analyzeDefinition)
647
648
def getClasses(category, meta): 
649
    def analyzeDefinition(definition):
650
        if hasProperty(definition, 'classes', 'MetaList'):
651
            classes = []
652
            for elt in getProperty(definition, 'classes'):
653
                classes.append(stringify(elt))
654
            getClasses.value[getFirstValue(definition, 'category')] = classes
655
656
    return getValue(category, meta, getClasses, [category], analyzeDefinition)
657
658
def pandocVersion():
659
    if not hasattr(pandocVersion, 'value'):
660
        p = subprocess.Popen(['pandoc', '-v'], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
661
        out, err = p.communicate()
662
        pandocVersion.value = re.search(b'pandoc (?P<version>.*)', out).group('version').decode('utf-8')
663
    return pandocVersion.value
664
665
def main():
666
    toJSONFilters([numbering, referencing])
667
668
if __name__ == '__main__':
669
    main()
670