Completed
Push — master ( 798d64...dc2c20 )
by Christophe
01:15
created

getDefaultLevels()   B

Complexity

Conditions 5

Size

Total Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 5
c 3
b 0
f 0
dl 0
loc 11
rs 8.5454
1
#!/usr/bin/env python
2
3
"""
4
Pandoc filter to number all kinds of things.
5
"""
6
7
from pandocfilters import walk, stringify, Str, Space, Para, BulletList, Plain, Strong, Span, Link, Emph, RawInline, RawBlock, Header
8
from functools import reduce
9
import json
10
import io
11
import sys
12
import codecs
13
import re
14
import unicodedata
15
import subprocess
16
17
count = {}
18
information = {}
19
collections = {}
20
headers = [0, 0, 0, 0, 0, 0]
21
headerRegex = '(?P<header>(?P<hidden>(-\.)*)(\+\.)*)'
22
23
def toJSONFilters(actions):
24
    """Converts a list of actions into a filter that reads a JSON-formatted
25
    pandoc document from stdin, transforms it by walking the tree
26
    with the actions, and returns a new JSON-formatted pandoc document
27
    to stdout.  The argument is a list of functions action(key, value, format, meta),
28
    where key is the type of the pandoc object (e.g. 'Str', 'Para'),
29
    value is the contents of the object (e.g. a string for 'Str',
30
    a list of inline elements for 'Para'), format is the target
31
    output format (which will be taken for the first command line
32
    argument if present), and meta is the document's metadata.
33
    If the function returns None, the object to which it applies
34
    will remain unchanged.  If it returns an object, the object will
35
    be replaced.    If it returns a list, the list will be spliced in to
36
    the list to which the target object belongs.    (So, returning an
37
    empty list deletes the object.)
38
    """
39
    try:
40
        input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
41
    except AttributeError:
42
        # Python 2 does not have sys.stdin.buffer.
43
        # REF: http://stackoverflow.com/questions/2467928/python-unicodeencodeerror-when-reading-from-stdin
44
        input_stream = codecs.getreader("utf-8")(sys.stdin)
45
46
    doc = json.loads(input_stream.read())
47
    if len(sys.argv) > 1:
48
        format = sys.argv[1]
49
    else:
50
        format = ""
51
    altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
52
    addListings(altered, format, altered[0]['unMeta'])
53
    json.dump(altered, sys.stdout)
54
55
def removeAccents(string):
56
    nfkd_form = unicodedata.normalize('NFKD', string)
57
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
58
59
def toIdentifier(string):
60
    # replace invalid characters by dash
61
    string = re.sub('[^0-9a-zA-Z_-]+', '-', removeAccents(string.lower()))
62
63
    # Remove leading digits
64
    string = re.sub('^[^a-zA-Z]+', '', string)
65
66
    return string
67
68
def toLatex(x):
69
    """Walks the tree x and returns concatenated string content,
70
    leaving out all formatting.
71
    """
72
    result = []
73
74
    def go(key, val, format, meta):
75
        if key in ['Str', 'MetaString']:
76
            result.append(val)
77
        elif key == 'Code':
78
            result.append(val[1])
79
        elif key == 'Math':
80
            # Modified from the stringify function in the pandocfilter package
81
            if format == 'latex':
82
                result.append('$' + val[1] + '$')
83
            else:
84
                result.append(val[1])
85
        elif key == 'LineBreak':
86
            result.append(" ")
87
        elif key == 'Space':
88
            result.append(" ")
89
        elif key == 'Note':
90
            # Do not stringify value from Note node
91
            del val[:]
92
93
    walk(x, go, 'latex', {})
94
    return ''.join(result)
95
96
def numbering(key, value, format, meta):
97
    if key == 'Header':
98
        return numberingHeader(value)
99
    elif key == 'Para':
100
        return numberingPara(value, format, meta)
101
102
def numberingHeader(value):
103
    [level, [id, classes, attributes], content] = value
104
    if 'unnumbered' not in classes:
105
        headers[level - 1] = headers[level - 1] + 1
106
        for index in range(level, 6):
107
            headers[index] = 0
108
109
def numberingPara(value, format, meta):
110
    if len(value) >= 3 and value[-2] == Space() and value[-1]['t'] == 'Str':
111
        last = value[-1]['c']
112
        match = re.match('^' + headerRegex + '#((?P<prefix>[a-zA-Z][\w.-]*):)?(?P<name>[a-zA-Z][\w:.-]*)?$', last)
113
        if match:
114
            # Is it a Para and the last element is an identifier beginning with '#'
115
            return numberingEffective(match, value, format, meta)
116
        elif re.match('^' + headerRegex + '##(?P<prefix>[a-zA-Z][\w.-]*:)?(?P<name>[a-zA-Z][\w:.-]*)?$', last):
117
            # Special case where the last element is '...##...'
118
            return numberingSharpSharp(value)
119
120
def numberingEffective(match, value, format, meta):
121
    title = computeTitle(value)
122
    description = computeDescription(value)
123
    basicCategory = computeBasicCategory(match, description)
124
    [levelInf, levelSup] = computeLevels(match, basicCategory, meta)
125
    sectionNumber = computeSectionNumber(levelSup)
126
    leading = computeLeading(levelSup, sectionNumber)
127
    category = computeCategory(basicCategory, leading)
128
    number = str(count[category])
129
    tag = computeTag(match, basicCategory, category, number)
130
    localNumber = computeLocalNumber(levelInf, levelSup, number)
131
    globalNumber = computeGlobalNumber(sectionNumber, number)
132
    [text, link, toc] = computeTextLinkToc(meta, basicCategory, description, title, localNumber, globalNumber, sectionNumber)
133
134
    # Store the numbers and the label for automatic numbering (See referencing function)
135
    information[tag] = {
136
        'section': sectionNumber,
137
        'local': localNumber,
138
        'global': globalNumber,
139
        'count': number,
140
        'description': description,
141
        'title': title,
142
        'link': link,
143
        'toc': toc
144
    }
145
146
    # Prepare the contents
147
    contents = [Span([tag, ['pandoc-numbering-text'] + getClasses(basicCategory, meta), []], text)]
148
149
    # Compute collections
150
    if basicCategory not in collections:
151
        collections[basicCategory] = []
152
153
    collections[basicCategory].append(tag)
154
155
    # Special case for LaTeX
156
    if format == 'latex' and getFormat(basicCategory, meta):
157
        addLaTeX(contents, basicCategory, title, description, leading, number)
158
159
    # Return the contents in a Para element
160
    return Para(contents)
161
162
def computeTitle(value):
163
    title = []
164
    if value[-3]['t'] == 'Str' and value[-3]['c'][-1:] == ')':
165
        for (i, item) in enumerate(value):
166
            if item['t'] == 'Str' and item['c'][0] == '(':
167
                title = value[i:-2]
168
                title[0]['c'] = title[0]['c'][1:]
169
                title[-1]['c'] = title[-1]['c'][:-1]
170
                del value[i-1:-2]
171
                break
172
    return title
173
174
def computeDescription(value):
175
    return value[:-2]
176
177
def computeBasicCategory(match, description):
178
    if match.group('prefix') == None:
179
        return toIdentifier(stringify(description))
180
    else:
181
        return match.group('prefix')
182
183
def computeLevels(match, basicCategory, meta):
184
    # Compute the levelInf and levelSup values
185
    levelInf = len(match.group('hidden')) // 2
186
    levelSup = len(match.group('header')) // 2
187
188
    # Get the default inf and sup level
189
    if levelInf == 0 and levelSup == 0:
190
        [levelInf, levelSup] = getDefaultLevels(basicCategory, meta)
191
192
    return [levelInf, levelSup]
193
194
def computeSectionNumber(levelSup):
195
    return '.'.join(map(str, headers[:levelSup]))
196
197
def computeLeading(levelSup, sectionNumber):
198
    # Compute the leading (composed of the section numbering and a dot)
199
    if levelSup != 0:
200
        return sectionNumber + '.'
201
    else:
202
        return ''
203
204
def computeCategory(basicCategory, leading):
205
    category = basicCategory + ':' + leading
206
207
    # Is it a new category?
208
    if category not in count:
209
        count[category] = 0
210
211
    count[category] = count[category] + 1
212
213
    return category
214
215
def computeTag(match, basicCategory, category, number):
216
    # Determine the final tag
217
    if match.group('name') == None:
218
        return category + number
219
    else:
220
        return basicCategory + ':' + match.group('name')
221
222
def computeLocalNumber(levelInf, levelSup, number):
223
    # Replace the '-.-.+.+...#' by the category count (omitting the hidden part)
224
    return '.'.join(map(str, headers[levelInf:levelSup] + [number]))
225
226
def computeGlobalNumber(sectionNumber, number):
227
    # Compute the globalNumber
228
    if sectionNumber:
229
        return sectionNumber + '.' + number
230
    else:
231
        return number
232
233
def computeTextLinkToc(meta, basicCategory, description, title, localNumber, globalNumber, sectionNumber):
234
    # Is the automatic formatting required for this category?
235
    if getFormat(basicCategory, meta):
236
        # Prepare the final text
237
        text = [Strong(description + [Space(), Str(localNumber)])]
238
239
        # Add the title to the final text
240
        if title:
241
            text = text + [Space(), Emph([Str('(')] + title + [Str(')')])]
242
243
        # Compute the link
244
        link = description + [Space(), Str(localNumber)]
245
246
        # Compute the toc
247
        toc = [Str(globalNumber), Space()]
248
        if title:
249
            toc = toc + title
250
        else:
251
            toc = toc + description
252
253
    else:
254
        # Prepare the final text
255
        text = [
256
            Span(['', ['description'], []], description),
257
            Span(['', ['title'], []], title),
258
            Span(['', ['local'], []], [Str(localNumber)]),
259
            Span(['', ['global'], []], [Str(globalNumber)]),
260
            Span(['', ['section'], []], [Str(sectionNumber)]),
261
        ]
262
263
        # Compute the link
264
        link = [Span(['', ['pandoc-numbering-link'] + getClasses(basicCategory, meta), []], text)]
265
266
        # Compute the toc
267
        toc = [Span(['', ['pandoc-numbering-toc'] + getClasses(basicCategory, meta), []], text)]
268
    return [text, link, toc]
269
270
def addLaTeX(contents, basicCategory, title, description, leading, number):
271
    latexCategory = re.sub('[^a-z]+', '', basicCategory)
272
    if title:
273
      entry = title
274
    else:
275
      entry = description
276
    latex = '\\phantomsection\\addcontentsline{' + latexCategory + '}{' + latexCategory + '}{\\protect\\numberline {' + \
277
        leading + number + '}{\ignorespaces ' + toLatex(entry) + '}}'
278
    contents.insert(0, RawInline('tex', latex))
279
280
def numberingSharpSharp(value):
281
    value[-1]['c'] = value[-1]['c'].replace('##', '#', 1)
282
283
replace = None
284
search = None
285
286
def lowering(key, value, format, meta):
287
    if key == 'Str':
288
        return Str(value.lower())
289
290
def referencing(key, value, format, meta):
291
    if key == 'Link':
292
        return referencingLink(value, format, meta)
293
    elif key == 'Cite':
294
        return referencingCite(value, format, meta)
295
296
def referencingLink(value, format, meta):
297
    global replace, search
298
    if pandocVersion() < '1.16':
299
        # pandoc 1.15
300
        [text, [reference, title]] = value
301
    else:
302
        # pandoc > 1.15
303
        [attributes, text, [reference, title]] = value
304
305
    if re.match('^(#([a-zA-Z][\w:.-]*))$', reference):
306
        # Compute the name
307
        tag = reference[1:]
308
309
        if tag in information:
310
            if pandocVersion() < '1.16':
311
                # pandoc 1.15
312
                i = 0
313
            else:
314
                # pandoc > 1.15
315
                i = 1
316
317
            # Replace all '#t', '#T', '#d', '#D', '#s', '#g', '#c', '#n', '#' with the corresponding text in the title
318
            value[i + 1][1] = value[i + 1][1].replace('#t', stringify(information[tag]['title']).lower())
319
            value[i + 1][1] = value[i + 1][1].replace('#T', stringify(information[tag]['title']))
320
            value[i + 1][1] = value[i + 1][1].replace('#d', stringify(information[tag]['description']).lower())
321
            value[i + 1][1] = value[i + 1][1].replace('#D', stringify(information[tag]['description']))
322
            value[i + 1][1] = value[i + 1][1].replace('#s', information[tag]['section'])
323
            value[i + 1][1] = value[i + 1][1].replace('#g', information[tag]['global'])
324
            value[i + 1][1] = value[i + 1][1].replace('#c', information[tag]['count'])
325
            value[i + 1][1] = value[i + 1][1].replace('#n', information[tag]['local'])
326
            value[i + 1][1] = value[i + 1][1].replace('#', information[tag]['local'])
327
328
            if text == []:
329
                # The link text is empty, replace it with the default label
330
                value[i] = information[tag]['link']
331
            else:
332
                # The link text is not empty
333
334
                #replace all '#t' with the title in lower case
335
                replace = walk(information[tag]['title'], lowering, format, meta)
336
                search = '#t'
337
                value[i] = walk(value[i], replacing, format, meta)
338
339
                #replace all '#T' with the title
340
                replace = information[tag]['title']
341
                search = '#T'
342
                value[i] = walk(value[i], replacing, format, meta)
343
344
                #replace all '#d' with the description in lower case
345
                replace = walk(information[tag]['description'], lowering, format, meta)
346
                search = '#d'
347
                value[i] = walk(value[i], replacing, format, meta)
348
349
                #replace all '#D' with the description
350
                replace = information[tag]['description']
351
                search = '#D'
352
                value[i] = walk(value[i], replacing, format, meta)
353
354
                #replace all '#s' with the corresponding number
355
                replace = [Str(information[tag]['section'])]
356
                search = '#s'
357
                value[i] = walk(value[i], replacing, format, meta)
358
359
                #replace all '#g' with the corresponding number
360
                replace = [Str(information[tag]['global'])]
361
                search = '#g'
362
                value[i] = walk(value[i], replacing, format, meta)
363
364
                #replace all '#c' with the corresponding number
365
                replace = [Str(information[tag]['count'])]
366
                search = '#c'
367
                value[i] = walk(value[i], replacing, format, meta)
368
369
                #replace all '#n' with the corresponding number
370
                replace = [Str(information[tag]['local'])]
371
                search = '#n'
372
                value[i] = walk(value[i], replacing, format, meta)
373
374
                #replace all '#' with the corresponding number
375
                replace = [Str(information[tag]['local'])]
376
                search = '#'
377
                value[i] = walk(value[i], replacing, format, meta)
378
379
def referencingCite(value, format, meta):
380
    match = re.match('^(@(?P<tag>(?P<category>[a-zA-Z][\w.-]*):(([a-zA-Z][\w.-]*)|(\d*(\.\d*)*))))$', value[1][0]['c'])
381
    if match != None and getCiteShortCut(match.group('category'), meta):
382
383
        # Deal with @prefix:name shortcut
384
        tag = match.group('tag')
385
        if tag in information:
386
            if pandocVersion() < '1.16':
387
                # pandoc 1.15
388
                return Link([Str(information[tag]['local'])], ['#' + tag, ''])
389
            else:
390
                # pandoc > 1.15
391
                return Link(['', [], []], [Str(information[tag]['local'])], ['#' + tag, ''])
392
393
def replacing(key, value, format, meta):
394
    if key == 'Str':
395
        prepare = value.split(search)
396
        if len(prepare) > 1:
397
398
            ret = []
399
400
            if prepare[0] != '':
401
                ret.append(Str(prepare[0]))
402
403
            for string in prepare[1:]:
404
                ret.extend(replace)
405
                if string != '':
406
                    ret.append(Str(string))
407
408
            return ret
409
410
def hasMeta(meta):
411
    return 'pandoc-numbering' in meta and meta['pandoc-numbering']['t'] == 'MetaList'
412
413
def isCorrect(definition):
414
    return definition['t'] == 'MetaMap' and\
415
        'category' in definition['c'] and\
416
        definition['c']['category']['t'] == 'MetaInlines' and\
417
        len(definition['c']['category']['c']) == 1 and\
418
        definition['c']['category']['c'][0]['t'] == 'Str'
419
420
def hasProperty(definition, name, type):
421
    return name in definition['c'] and definition['c'][name]['t'] == type
422
423
def getProperty(definition, name):
424
    return definition['c'][name]['c']
425
426
def getFirstValue(definition, name):
427
	return getProperty(definition, name)[0]['c']
428
429
def addListings(doc, format, meta):
430
    if hasMeta(meta):
431
432
        listings = []
433
434
        # Loop on all listings definition
435
        for definition in meta['pandoc-numbering']['c']:
436
            if isCorrect(definition) and hasProperty(definition, 'listing', 'MetaInlines'):
437
438
                # Get the category name
439
                category = getFirstValue(definition, 'category')
440
441
                # Get the title
442
                title = getProperty(definition, 'listing')
443
444
                if format == 'latex':
445
446
                    # Special case for latex output
447
448
                    # Get the link color
449
                    if 'toccolor' in meta:
450
                        linkcolor = '\\hypersetup{linkcolor=' + stringify(meta['toccolor']['c']) + '}'
451
                    else:
452
                        linkcolor = '\\hypersetup{linkcolor=black}'
453
454
                    # Get the tab
455
                    if hasProperty(definition, 'tab', 'MetaString'):
456
                        try:
457
                            tab = float(getProperty(definition, 'tab'))
458
                        except ValueError:
459
                            tab = None
460
                    else:
461
                        tab = None
462
463
                    # Get the space
464
                    if hasProperty(definition, 'space', 'MetaString'):
465
                        try:
466
                            space = float(getProperty(definition, 'space'))
467
                        except ValueError:
468
                            space = None
469
                    else:
470
                        space = None
471
472
                    # Deal with default tab length
473
                    if tab == None:
474
                        tab = 1.5
475
476
                    # Deal with default space length
477
                    if space == None:
478
                        level = 0
479
                        if category in collections:
480
                            # Loop on the collection
481
                            for tag in collections[category]:
482
                                level = max(level, information[tag]['section'].count('.'))
483
                        space = level + 2.3
484
485
                    # Add a RawBlock
486
                    latexCategory = re.sub('[^a-z]+', '', category)
487
                    latex = [
488
                        linkcolor,
489
                        '\\makeatletter',
490
                        '\\newcommand*\\l@' + latexCategory + '{\\@dottedtocline{1}{' + str(tab) + 'em}{'+ str(space) +'em}}',
491
                        '\\@starttoc{' + latexCategory + '}',
492
                        '\\makeatother'
493
                    ]
494
                    elt = [RawBlock('tex', ''.join(latex))]
495
                else:
496
                    if category in collections:
497
                        # Prepare the list
498
                        elements = []
499
500
                        # Loop on the collection
501
                        for tag in collections[category]:
502
503
                            # Add an item to the list
504
                            text = information[tag]['toc']
505
506
                            if pandocVersion() < '1.16':
507
                                # pandoc 1.15
508
                                link = Link(text, ['#' + tag, ''])
509
                            else:
510
                                # pandoc 1.16
511
                                link = Link(['', [], []], text, ['#' + tag, ''])
512
513
                            elements.append([Plain([link])])
514
515
                        # Add a bullet list
516
                        elt = [BulletList(elements)]
517
                    else:
518
519
                        # Add nothing
520
                        elt = []
521
522
                # Add a new listing
523
                listings = listings + [Header(1, ['', ['unnumbered'], []], title)] + elt
524
525
        # Add listings to the document
526
        doc[1] = listings + doc[1]
527
528
def getValue(category, meta, fct, default, analyzeDefinition):
529
    if not hasattr(fct, 'value'):
530
        fct.value = {}
531
        if hasMeta(meta):
532
            # Loop on all listings definition
533
            for definition in meta['pandoc-numbering']['c']:
534
                if isCorrect(definition):
535
                    analyzeDefinition(definition)
536
537
    if not category in fct.value:
538
        fct.value[category] = default
539
540
    return fct.value[category]
541
542
def getFormat(category, meta):
543
    def analyzeDefinition(definition):
544
        if hasProperty(definition, 'format', 'MetaBool'):
545
            getFormat.value[getFirstValue(definition, 'category')] = getProperty(definition, 'format')
546
        
547
    return getValue(category, meta, getFormat, True, analyzeDefinition)
548
549
def getCiteShortCut(category, meta):
550
    def analyzeDefinition(definition):
551
        if hasProperty(definition, 'cite-shortcut', 'MetaBool'):
552
            getCiteShortCut.value[getFirstValue(definition, 'category')] = getProperty(definition, 'cite-shortcut')
553
554
    return getValue(category, meta, getCiteShortCut, False, analyzeDefinition)
555
556
def getLevelsFromYaml(definition):
557
    levelInf = 0
558
    levelSup = 0
559
    if hasProperty(definition, 'first', 'MetaString'):
560
        try:
561
            levelInf = max(min(int(getProperty(definition, 'first')) - 1, 6), 0)
562
        except ValueError:
563
            pass
564
    if hasProperty(definition, 'last', 'MetaString'):
565
        try:
566
            levelSup = max(min(int(getProperty(definition, 'last')), 6), levelInf)
567
        except ValueError:
568
            pass
569
    return [levelInf, levelSup]
570
571
def getLevelsFromRegex(definition):
572
    match = re.match('^' + headerRegex + '$', getFirstValue(definition, 'sectioning'))
573
    if match:
574
        # Compute the levelInf and levelSup values
575
        return [len(match.group('hidden')) // 2, len(match.group('header')) // 2]
576
    else:
577
        return [0, 0]
578
579
def getDefaultLevels(category, meta):
580
    def analyzeDefinition(definition):
581
        if hasProperty(definition, 'sectioning', 'MetaInlines') and\
582
           len(getProperty(definition, 'sectioning')) == 1 and\
583
           getProperty(definition, 'sectioning')[0]['t'] == 'Str':
584
585
            getDefaultLevels.value[getFirstValue(definition, 'category')] = getLevelsFromRegex(definition)
586
        else:
587
            getDefaultLevels.value[getFirstValue(definition, 'category')] = getLevelsFromYaml(definition)
588
589
    return getValue(category, meta, getDefaultLevels, [0, 0], analyzeDefinition)
590
591
def getClasses(category, meta): 
592
    def analyzeDefinition(definition):
593
        if hasProperty(definition, 'classes', 'MetaList'):
594
            classes = []
595
            for elt in getProperty(definition, 'classes'):
596
                classes.append(stringify(elt))
597
            getClasses.value[getFirstValue(definition, 'category')] = classes
598
599
    return getValue(category, meta, getClasses, [category], analyzeDefinition)
600
601
def pandocVersion():
602
    if not hasattr(pandocVersion, 'value'):
603
        p = subprocess.Popen(['pandoc', '-v'], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
604
        out, err = p.communicate()
605
        pandocVersion.value = re.search(b'pandoc (?P<version>.*)', out).group('version').decode('utf-8')
606
    return pandocVersion.value
607
608
def main():
609
    toJSONFilters([numbering, referencing])
610
611
if __name__ == '__main__':
612
    main()
613