Completed
Push — master ( dc2c20...73c814 )
by Christophe
01:24
created

addListings()   B

Complexity

Conditions 6

Size

Total Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 6
dl 0
loc 23
rs 7.6949
c 2
b 0
f 0
1
#!/usr/bin/env python
2
3
"""
4
Pandoc filter to number all kinds of things.
5
"""
6
7
from pandocfilters import walk, stringify, Str, Space, Para, BulletList, Plain, Strong, Span, Link, Emph, RawInline, RawBlock, Header
8
from functools import reduce
9
import json
10
import io
11
import sys
12
import codecs
13
import re
14
import unicodedata
15
import subprocess
16
17
count = {}
18
information = {}
19
collections = {}
20
headers = [0, 0, 0, 0, 0, 0]
21
headerRegex = '(?P<header>(?P<hidden>(-\.)*)(\+\.)*)'
22
23
def toJSONFilters(actions):
24
    """Converts a list of actions into a filter that reads a JSON-formatted
25
    pandoc document from stdin, transforms it by walking the tree
26
    with the actions, and returns a new JSON-formatted pandoc document
27
    to stdout.  The argument is a list of functions action(key, value, format, meta),
28
    where key is the type of the pandoc object (e.g. 'Str', 'Para'),
29
    value is the contents of the object (e.g. a string for 'Str',
30
    a list of inline elements for 'Para'), format is the target
31
    output format (which will be taken for the first command line
32
    argument if present), and meta is the document's metadata.
33
    If the function returns None, the object to which it applies
34
    will remain unchanged.  If it returns an object, the object will
35
    be replaced.    If it returns a list, the list will be spliced in to
36
    the list to which the target object belongs.    (So, returning an
37
    empty list deletes the object.)
38
    """
39
    try:
40
        input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
41
    except AttributeError:
42
        # Python 2 does not have sys.stdin.buffer.
43
        # REF: http://stackoverflow.com/questions/2467928/python-unicodeencodeerror-when-reading-from-stdin
44
        input_stream = codecs.getreader("utf-8")(sys.stdin)
45
46
    doc = json.loads(input_stream.read())
47
    if len(sys.argv) > 1:
48
        format = sys.argv[1]
49
    else:
50
        format = ""
51
    altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
52
    addListings(altered, format, altered[0]['unMeta'])
53
    json.dump(altered, sys.stdout)
54
55
def removeAccents(string):
56
    nfkd_form = unicodedata.normalize('NFKD', string)
57
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
58
59
def toIdentifier(string):
60
    # replace invalid characters by dash
61
    string = re.sub('[^0-9a-zA-Z_-]+', '-', removeAccents(string.lower()))
62
63
    # Remove leading digits
64
    string = re.sub('^[^a-zA-Z]+', '', string)
65
66
    return string
67
68
def toLatex(x):
69
    """Walks the tree x and returns concatenated string content,
70
    leaving out all formatting.
71
    """
72
    result = []
73
74
    def go(key, val, format, meta):
75
        if key in ['Str', 'MetaString']:
76
            result.append(val)
77
        elif key == 'Code':
78
            result.append(val[1])
79
        elif key == 'Math':
80
            # Modified from the stringify function in the pandocfilter package
81
            if format == 'latex':
82
                result.append('$' + val[1] + '$')
83
            else:
84
                result.append(val[1])
85
        elif key == 'LineBreak':
86
            result.append(" ")
87
        elif key == 'Space':
88
            result.append(" ")
89
        elif key == 'Note':
90
            # Do not stringify value from Note node
91
            del val[:]
92
93
    walk(x, go, 'latex', {})
94
    return ''.join(result)
95
96
def numbering(key, value, format, meta):
97
    if key == 'Header':
98
        return numberingHeader(value)
99
    elif key == 'Para':
100
        return numberingPara(value, format, meta)
101
102
def numberingHeader(value):
103
    [level, [id, classes, attributes], content] = value
104
    if 'unnumbered' not in classes:
105
        headers[level - 1] = headers[level - 1] + 1
106
        for index in range(level, 6):
107
            headers[index] = 0
108
109
def numberingPara(value, format, meta):
110
    if len(value) >= 3 and value[-2] == Space() and value[-1]['t'] == 'Str':
111
        last = value[-1]['c']
112
        match = re.match('^' + headerRegex + '#((?P<prefix>[a-zA-Z][\w.-]*):)?(?P<name>[a-zA-Z][\w:.-]*)?$', last)
113
        if match:
114
            # Is it a Para and the last element is an identifier beginning with '#'
115
            return numberingEffective(match, value, format, meta)
116
        elif re.match('^' + headerRegex + '##(?P<prefix>[a-zA-Z][\w.-]*:)?(?P<name>[a-zA-Z][\w:.-]*)?$', last):
117
            # Special case where the last element is '...##...'
118
            return numberingSharpSharp(value)
119
120
def numberingEffective(match, value, format, meta):
121
    title = computeTitle(value)
122
    description = computeDescription(value)
123
    basicCategory = computeBasicCategory(match, description)
124
    [levelInf, levelSup] = computeLevels(match, basicCategory, meta)
125
    sectionNumber = computeSectionNumber(levelSup)
126
    leading = computeLeading(levelSup, sectionNumber)
127
    category = computeCategory(basicCategory, leading)
128
    number = str(count[category])
129
    tag = computeTag(match, basicCategory, category, number)
130
    localNumber = computeLocalNumber(levelInf, levelSup, number)
131
    globalNumber = computeGlobalNumber(sectionNumber, number)
132
    [text, link, toc] = computeTextLinkToc(meta, basicCategory, description, title, localNumber, globalNumber, sectionNumber)
133
134
    # Store the numbers and the label for automatic numbering (See referencing function)
135
    information[tag] = {
136
        'section': sectionNumber,
137
        'local': localNumber,
138
        'global': globalNumber,
139
        'count': number,
140
        'description': description,
141
        'title': title,
142
        'link': link,
143
        'toc': toc
144
    }
145
146
    # Prepare the contents
147
    contents = [Span([tag, ['pandoc-numbering-text'] + getClasses(basicCategory, meta), []], text)]
148
149
    # Compute collections
150
    if basicCategory not in collections:
151
        collections[basicCategory] = []
152
153
    collections[basicCategory].append(tag)
154
155
    # Special case for LaTeX
156
    if format == 'latex' and getFormat(basicCategory, meta):
157
        addLaTeX(contents, basicCategory, title, description, leading, number)
158
159
    # Return the contents in a Para element
160
    return Para(contents)
161
162
def computeTitle(value):
163
    title = []
164
    if value[-3]['t'] == 'Str' and value[-3]['c'][-1:] == ')':
165
        for (i, item) in enumerate(value):
166
            if item['t'] == 'Str' and item['c'][0] == '(':
167
                title = value[i:-2]
168
                title[0]['c'] = title[0]['c'][1:]
169
                title[-1]['c'] = title[-1]['c'][:-1]
170
                del value[i-1:-2]
171
                break
172
    return title
173
174
def computeDescription(value):
175
    return value[:-2]
176
177
def computeBasicCategory(match, description):
178
    if match.group('prefix') == None:
179
        return toIdentifier(stringify(description))
180
    else:
181
        return match.group('prefix')
182
183
def computeLevels(match, basicCategory, meta):
184
    # Compute the levelInf and levelSup values
185
    levelInf = len(match.group('hidden')) // 2
186
    levelSup = len(match.group('header')) // 2
187
188
    # Get the default inf and sup level
189
    if levelInf == 0 and levelSup == 0:
190
        [levelInf, levelSup] = getDefaultLevels(basicCategory, meta)
191
192
    return [levelInf, levelSup]
193
194
def computeSectionNumber(levelSup):
195
    return '.'.join(map(str, headers[:levelSup]))
196
197
def computeLeading(levelSup, sectionNumber):
198
    # Compute the leading (composed of the section numbering and a dot)
199
    if levelSup != 0:
200
        return sectionNumber + '.'
201
    else:
202
        return ''
203
204
def computeCategory(basicCategory, leading):
205
    category = basicCategory + ':' + leading
206
207
    # Is it a new category?
208
    if category not in count:
209
        count[category] = 0
210
211
    count[category] = count[category] + 1
212
213
    return category
214
215
def computeTag(match, basicCategory, category, number):
216
    # Determine the final tag
217
    if match.group('name') == None:
218
        return category + number
219
    else:
220
        return basicCategory + ':' + match.group('name')
221
222
def computeLocalNumber(levelInf, levelSup, number):
223
    # Replace the '-.-.+.+...#' by the category count (omitting the hidden part)
224
    return '.'.join(map(str, headers[levelInf:levelSup] + [number]))
225
226
def computeGlobalNumber(sectionNumber, number):
227
    # Compute the globalNumber
228
    if sectionNumber:
229
        return sectionNumber + '.' + number
230
    else:
231
        return number
232
233
def computeTextLinkToc(meta, basicCategory, description, title, localNumber, globalNumber, sectionNumber):
234
    # Is the automatic formatting required for this category?
235
    if getFormat(basicCategory, meta):
236
        # Prepare the final text
237
        text = [Strong(description + [Space(), Str(localNumber)])]
238
239
        # Add the title to the final text
240
        if title:
241
            text = text + [Space(), Emph([Str('(')] + title + [Str(')')])]
242
243
        # Compute the link
244
        link = description + [Space(), Str(localNumber)]
245
246
        # Compute the toc
247
        toc = [Str(globalNumber), Space()]
248
        if title:
249
            toc = toc + title
250
        else:
251
            toc = toc + description
252
253
    else:
254
        # Prepare the final text
255
        text = [
256
            Span(['', ['description'], []], description),
257
            Span(['', ['title'], []], title),
258
            Span(['', ['local'], []], [Str(localNumber)]),
259
            Span(['', ['global'], []], [Str(globalNumber)]),
260
            Span(['', ['section'], []], [Str(sectionNumber)]),
261
        ]
262
263
        # Compute the link
264
        link = [Span(['', ['pandoc-numbering-link'] + getClasses(basicCategory, meta), []], text)]
265
266
        # Compute the toc
267
        toc = [Span(['', ['pandoc-numbering-toc'] + getClasses(basicCategory, meta), []], text)]
268
    return [text, link, toc]
269
270
def addLaTeX(contents, basicCategory, title, description, leading, number):
271
    latexCategory = re.sub('[^a-z]+', '', basicCategory)
272
    if title:
273
      entry = title
274
    else:
275
      entry = description
276
    latex = '\\phantomsection\\addcontentsline{' + latexCategory + '}{' + latexCategory + '}{\\protect\\numberline {' + \
277
        leading + number + '}{\ignorespaces ' + toLatex(entry) + '}}'
278
    contents.insert(0, RawInline('tex', latex))
279
280
def numberingSharpSharp(value):
281
    value[-1]['c'] = value[-1]['c'].replace('##', '#', 1)
282
283
replace = None
284
search = None
285
286
def lowering(key, value, format, meta):
287
    if key == 'Str':
288
        return Str(value.lower())
289
290
def referencing(key, value, format, meta):
291
    if key == 'Link':
292
        return referencingLink(value, format, meta)
293
    elif key == 'Cite':
294
        return referencingCite(value, format, meta)
295
296
def referencingLink(value, format, meta):
297
    global replace, search
298
    if pandocVersion() < '1.16':
299
        # pandoc 1.15
300
        [text, [reference, title]] = value
301
    else:
302
        # pandoc > 1.15
303
        [attributes, text, [reference, title]] = value
304
305
    if re.match('^(#([a-zA-Z][\w:.-]*))$', reference):
306
        # Compute the name
307
        tag = reference[1:]
308
309
        if tag in information:
310
            if pandocVersion() < '1.16':
311
                # pandoc 1.15
312
                i = 0
313
            else:
314
                # pandoc > 1.15
315
                i = 1
316
317
            # Replace all '#t', '#T', '#d', '#D', '#s', '#g', '#c', '#n', '#' with the corresponding text in the title
318
            value[i + 1][1] = value[i + 1][1].replace('#t', stringify(information[tag]['title']).lower())
319
            value[i + 1][1] = value[i + 1][1].replace('#T', stringify(information[tag]['title']))
320
            value[i + 1][1] = value[i + 1][1].replace('#d', stringify(information[tag]['description']).lower())
321
            value[i + 1][1] = value[i + 1][1].replace('#D', stringify(information[tag]['description']))
322
            value[i + 1][1] = value[i + 1][1].replace('#s', information[tag]['section'])
323
            value[i + 1][1] = value[i + 1][1].replace('#g', information[tag]['global'])
324
            value[i + 1][1] = value[i + 1][1].replace('#c', information[tag]['count'])
325
            value[i + 1][1] = value[i + 1][1].replace('#n', information[tag]['local'])
326
            value[i + 1][1] = value[i + 1][1].replace('#', information[tag]['local'])
327
328
            if text == []:
329
                # The link text is empty, replace it with the default label
330
                value[i] = information[tag]['link']
331
            else:
332
                # The link text is not empty
333
334
                #replace all '#t' with the title in lower case
335
                replace = walk(information[tag]['title'], lowering, format, meta)
336
                search = '#t'
337
                value[i] = walk(value[i], replacing, format, meta)
338
339
                #replace all '#T' with the title
340
                replace = information[tag]['title']
341
                search = '#T'
342
                value[i] = walk(value[i], replacing, format, meta)
343
344
                #replace all '#d' with the description in lower case
345
                replace = walk(information[tag]['description'], lowering, format, meta)
346
                search = '#d'
347
                value[i] = walk(value[i], replacing, format, meta)
348
349
                #replace all '#D' with the description
350
                replace = information[tag]['description']
351
                search = '#D'
352
                value[i] = walk(value[i], replacing, format, meta)
353
354
                #replace all '#s' with the corresponding number
355
                replace = [Str(information[tag]['section'])]
356
                search = '#s'
357
                value[i] = walk(value[i], replacing, format, meta)
358
359
                #replace all '#g' with the corresponding number
360
                replace = [Str(information[tag]['global'])]
361
                search = '#g'
362
                value[i] = walk(value[i], replacing, format, meta)
363
364
                #replace all '#c' with the corresponding number
365
                replace = [Str(information[tag]['count'])]
366
                search = '#c'
367
                value[i] = walk(value[i], replacing, format, meta)
368
369
                #replace all '#n' with the corresponding number
370
                replace = [Str(information[tag]['local'])]
371
                search = '#n'
372
                value[i] = walk(value[i], replacing, format, meta)
373
374
                #replace all '#' with the corresponding number
375
                replace = [Str(information[tag]['local'])]
376
                search = '#'
377
                value[i] = walk(value[i], replacing, format, meta)
378
379
def referencingCite(value, format, meta):
380
    match = re.match('^(@(?P<tag>(?P<category>[a-zA-Z][\w.-]*):(([a-zA-Z][\w.-]*)|(\d*(\.\d*)*))))$', value[1][0]['c'])
381
    if match != None and getCiteShortCut(match.group('category'), meta):
382
383
        # Deal with @prefix:name shortcut
384
        tag = match.group('tag')
385
        if tag in information:
386
            if pandocVersion() < '1.16':
387
                # pandoc 1.15
388
                return Link([Str(information[tag]['local'])], ['#' + tag, ''])
389
            else:
390
                # pandoc > 1.15
391
                return Link(['', [], []], [Str(information[tag]['local'])], ['#' + tag, ''])
392
393
def replacing(key, value, format, meta):
394
    if key == 'Str':
395
        prepare = value.split(search)
396
        if len(prepare) > 1:
397
398
            ret = []
399
400
            if prepare[0] != '':
401
                ret.append(Str(prepare[0]))
402
403
            for string in prepare[1:]:
404
                ret.extend(replace)
405
                if string != '':
406
                    ret.append(Str(string))
407
408
            return ret
409
410
def hasMeta(meta):
411
    return 'pandoc-numbering' in meta and meta['pandoc-numbering']['t'] == 'MetaList'
412
413
def isCorrect(definition):
414
    return definition['t'] == 'MetaMap' and\
415
        'category' in definition['c'] and\
416
        definition['c']['category']['t'] == 'MetaInlines' and\
417
        len(definition['c']['category']['c']) == 1 and\
418
        definition['c']['category']['c'][0]['t'] == 'Str'
419
420
def hasProperty(definition, name, type):
421
    return name in definition['c'] and definition['c'][name]['t'] == type
422
423
def getProperty(definition, name):
424
    return definition['c'][name]['c']
425
426
def getFirstValue(definition, name):
427
	return getProperty(definition, name)[0]['c']
428
429
def addListings(doc, format, meta):
430
    if hasMeta(meta):
431
        listings = []
432
433
        # Loop on all listings definition
434
        for definition in meta['pandoc-numbering']['c']:
435
            if isCorrect(definition) and hasProperty(definition, 'listing', 'MetaInlines'):
436
437
                # Get the category name
438
                category = getFirstValue(definition, 'category')
439
440
                # Get the title
441
                title = getProperty(definition, 'listing')
442
443
                listings.append(Header(1, ['', ['unnumbered'], []], title))
444
445
                if format == 'latex':
446
                    extendListingsLaTeX(listings, meta, definition, category)
447
                else:
448
                    extendListingsOther(listings, meta, definition, category)
449
450
        # Add listings to the document
451
        doc[1][0:0] = listings
452
453
def extendListingsLaTeX(listings, meta, definition, category):
454
    # Get the link color
455
    if 'toccolor' in meta:
456
        linkcolor = '\\hypersetup{linkcolor=' + stringify(meta['toccolor']['c']) + '}'
457
    else:
458
        linkcolor = '\\hypersetup{linkcolor=black}'
459
460
    # Get the tab
461
    if hasProperty(definition, 'tab', 'MetaString'):
462
        try:
463
            tab = float(getProperty(definition, 'tab'))
464
        except ValueError:
465
            tab = None
466
    else:
467
        tab = None
468
469
    # Get the space
470
    if hasProperty(definition, 'space', 'MetaString'):
471
        try:
472
            space = float(getProperty(definition, 'space'))
473
        except ValueError:
474
            space = None
475
    else:
476
        space = None
477
478
    # Deal with default tab length
479
    if tab == None:
480
        tab = 1.5
481
482
    # Deal with default space length
483
    if space == None:
484
        level = 0
485
        if category in collections:
486
            # Loop on the collection
487
            for tag in collections[category]:
488
                level = max(level, information[tag]['section'].count('.'))
489
        space = level + 2.3
490
491
    # Add a RawBlock
492
    latexCategory = re.sub('[^a-z]+', '', category)
493
    latex = [
494
        linkcolor,
495
        '\\makeatletter',
496
        '\\newcommand*\\l@' + latexCategory + '{\\@dottedtocline{1}{' + str(tab) + 'em}{'+ str(space) +'em}}',
497
        '\\@starttoc{' + latexCategory + '}',
498
        '\\makeatother'
499
    ]
500
    listings.append(RawBlock('tex', ''.join(latex)))
501
502
def extendListingsOther(listings, meta, definition, category):
503
    if category in collections:
504
        # Prepare the list
505
        elements = []
506
507
        # Loop on the collection
508
        for tag in collections[category]:
509
510
            # Add an item to the list
511
            text = information[tag]['toc']
512
513
            if pandocVersion() < '1.16':
514
                # pandoc 1.15
515
                link = Link(text, ['#' + tag, ''])
516
            else:
517
                # pandoc 1.16
518
                link = Link(['', [], []], text, ['#' + tag, ''])
519
520
            elements.append([Plain([link])])
521
522
        # Add a bullet list
523
        listings.append(BulletList(elements))
524
525
def getValue(category, meta, fct, default, analyzeDefinition):
526
    if not hasattr(fct, 'value'):
527
        fct.value = {}
528
        if hasMeta(meta):
529
            # Loop on all listings definition
530
            for definition in meta['pandoc-numbering']['c']:
531
                if isCorrect(definition):
532
                    analyzeDefinition(definition)
533
534
    if not category in fct.value:
535
        fct.value[category] = default
536
537
    return fct.value[category]
538
539
def getFormat(category, meta):
540
    def analyzeDefinition(definition):
541
        if hasProperty(definition, 'format', 'MetaBool'):
542
            getFormat.value[getFirstValue(definition, 'category')] = getProperty(definition, 'format')
543
        
544
    return getValue(category, meta, getFormat, True, analyzeDefinition)
545
546
def getCiteShortCut(category, meta):
547
    def analyzeDefinition(definition):
548
        if hasProperty(definition, 'cite-shortcut', 'MetaBool'):
549
            getCiteShortCut.value[getFirstValue(definition, 'category')] = getProperty(definition, 'cite-shortcut')
550
551
    return getValue(category, meta, getCiteShortCut, False, analyzeDefinition)
552
553
def getLevelsFromYaml(definition):
554
    levelInf = 0
555
    levelSup = 0
556
    if hasProperty(definition, 'first', 'MetaString'):
557
        try:
558
            levelInf = max(min(int(getProperty(definition, 'first')) - 1, 6), 0)
559
        except ValueError:
560
            pass
561
    if hasProperty(definition, 'last', 'MetaString'):
562
        try:
563
            levelSup = max(min(int(getProperty(definition, 'last')), 6), levelInf)
564
        except ValueError:
565
            pass
566
    return [levelInf, levelSup]
567
568
def getLevelsFromRegex(definition):
569
    match = re.match('^' + headerRegex + '$', getFirstValue(definition, 'sectioning'))
570
    if match:
571
        # Compute the levelInf and levelSup values
572
        return [len(match.group('hidden')) // 2, len(match.group('header')) // 2]
573
    else:
574
        return [0, 0]
575
576
def getDefaultLevels(category, meta):
577
    def analyzeDefinition(definition):
578
        if hasProperty(definition, 'sectioning', 'MetaInlines') and\
579
           len(getProperty(definition, 'sectioning')) == 1 and\
580
           getProperty(definition, 'sectioning')[0]['t'] == 'Str':
581
582
            getDefaultLevels.value[getFirstValue(definition, 'category')] = getLevelsFromRegex(definition)
583
        else:
584
            getDefaultLevels.value[getFirstValue(definition, 'category')] = getLevelsFromYaml(definition)
585
586
    return getValue(category, meta, getDefaultLevels, [0, 0], analyzeDefinition)
587
588
def getClasses(category, meta): 
589
    def analyzeDefinition(definition):
590
        if hasProperty(definition, 'classes', 'MetaList'):
591
            classes = []
592
            for elt in getProperty(definition, 'classes'):
593
                classes.append(stringify(elt))
594
            getClasses.value[getFirstValue(definition, 'category')] = classes
595
596
    return getValue(category, meta, getClasses, [category], analyzeDefinition)
597
598
def pandocVersion():
599
    if not hasattr(pandocVersion, 'value'):
600
        p = subprocess.Popen(['pandoc', '-v'], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
601
        out, err = p.communicate()
602
        pandocVersion.value = re.search(b'pandoc (?P<version>.*)', out).group('version').decode('utf-8')
603
    return pandocVersion.value
604
605
def main():
606
    toJSONFilters([numbering, referencing])
607
608
if __name__ == '__main__':
609
    main()
610