Completed
Push — master ( 8b7533...e90def )
by Christophe
01:16
created

getClasses()   A

Complexity

Conditions 4

Size

Total Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
dl 0
loc 9
rs 9.2
c 0
b 0
f 0
1
#!/usr/bin/env python
2
3
"""
4
Pandoc filter to number all kinds of things.
5
"""
6
7
from pandocfilters import walk, stringify, Str, Space, Para, BulletList, Plain, Strong, Span, Link, Emph, RawInline, RawBlock, Header
8
from functools import reduce
9
import json
10
import io
11
import sys
12
import codecs
13
import re
14
import unicodedata
15
import subprocess
16
17
count = {}
18
information = {}
19
collections = {}
20
headers = [0, 0, 0, 0, 0, 0]
21
headerRegex = '(?P<header>(?P<hidden>(-\.)*)(\+\.)*)'
22
23
def toJSONFilters(actions):
24
    """Converts a list of actions into a filter that reads a JSON-formatted
25
    pandoc document from stdin, transforms it by walking the tree
26
    with the actions, and returns a new JSON-formatted pandoc document
27
    to stdout.  The argument is a list of functions action(key, value, format, meta),
28
    where key is the type of the pandoc object (e.g. 'Str', 'Para'),
29
    value is the contents of the object (e.g. a string for 'Str',
30
    a list of inline elements for 'Para'), format is the target
31
    output format (which will be taken for the first command line
32
    argument if present), and meta is the document's metadata.
33
    If the function returns None, the object to which it applies
34
    will remain unchanged.  If it returns an object, the object will
35
    be replaced.    If it returns a list, the list will be spliced in to
36
    the list to which the target object belongs.    (So, returning an
37
    empty list deletes the object.)
38
    """
39
    try:
40
        input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
41
    except AttributeError:
42
        # Python 2 does not have sys.stdin.buffer.
43
        # REF: http://stackoverflow.com/questions/2467928/python-unicodeencodeerror-when-reading-from-stdin
44
        input_stream = codecs.getreader("utf-8")(sys.stdin)
45
46
    doc = json.loads(input_stream.read())
47
    if len(sys.argv) > 1:
48
        format = sys.argv[1]
49
    else:
50
        format = ""
51
    altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
52
    addListings(altered, format, altered[0]['unMeta'])
53
    json.dump(altered, sys.stdout)
54
55
def removeAccents(string):
56
    nfkd_form = unicodedata.normalize('NFKD', string)
57
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
58
59
def toIdentifier(string):
60
    # replace invalid characters by dash
61
    string = re.sub('[^0-9a-zA-Z_-]+', '-', removeAccents(string.lower()))
62
63
    # Remove leading digits
64
    string = re.sub('^[^a-zA-Z]+', '', string)
65
66
    return string
67
68
def toLatex(x):
69
    """Walks the tree x and returns concatenated string content,
70
    leaving out all formatting.
71
    """
72
    result = []
73
74
    def go(key, val, format, meta):
75
        if key in ['Str', 'MetaString']:
76
            result.append(val)
77
        elif key == 'Code':
78
            result.append(val[1])
79
        elif key == 'Math':
80
            # Modified from the stringify function in the pandocfilter package
81
            if format == 'latex':
82
                result.append('$' + val[1] + '$')
83
            else:
84
                result.append(val[1])
85
        elif key == 'LineBreak':
86
            result.append(" ")
87
        elif key == 'Space':
88
            result.append(" ")
89
        elif key == 'Note':
90
            # Do not stringify value from Note node
91
            del val[:]
92
93
    walk(x, go, 'latex', {})
94
    return ''.join(result)
95
96
def numbering(key, value, format, meta):
97
    if key == 'Header':
98
        return numberingHeader(value)
99
    elif key == 'Para':
100
        return numberingPara(value, format, meta)
101
102
def numberingHeader(value):
103
    [level, [id, classes, attributes], content] = value
104
    if 'unnumbered' not in classes:
105
        headers[level - 1] = headers[level - 1] + 1
106
        for index in range(level, 6):
107
            headers[index] = 0
108
109
def numberingPara(value, format, meta):
110
    global headerRegex
111
    if len(value) >= 3 and value[-2] == Space() and value[-1]['t'] == 'Str':
112
        last = value[-1]['c']
113
        match = re.match('^' + headerRegex + '#((?P<prefix>[a-zA-Z][\w.-]*):)?(?P<name>[a-zA-Z][\w:.-]*)?$', last)
114
        if match:
115
            # Is it a Para and the last element is an identifier beginning with '#'
116
            return numberingEffective(match, value, format, meta)
117
        elif re.match('^' + headerRegex + '##(?P<prefix>[a-zA-Z][\w.-]*:)?(?P<name>[a-zA-Z][\w:.-]*)?$', last):
118
            # Special case where the last element is '...##...'
119
            return numberingSharpSharp(value)
120
121
def numberingEffective(match, value, format, meta):
122
    title = computeTitle(value)
123
    description = computeDescription(value)
124
    basicCategory = computeBasicCategory(match, description)
125
    [levelInf, levelSup] = computeLevels(match, basicCategory, meta)
126
    sectionNumber = computeSectionNumber(levelSup)
127
    leading = computeLeading(levelSup, sectionNumber)
128
    category = computeCategory(basicCategory, leading)
129
    number = str(count[category])
130
    tag = computeTag(match, basicCategory, category, number)
131
    localNumber = computeLocalNumber(levelInf, levelSup, number)
132
    globalNumber = computeGlobalNumber(sectionNumber, number)
133
    [text, link, toc] = computeTextLinkToc(meta, basicCategory, description, title, localNumber, globalNumber, sectionNumber)
134
135
    # Store the numbers and the label for automatic numbering (See referencing function)
136
    information[tag] = {
137
        'section': sectionNumber,
138
        'local': localNumber,
139
        'global': globalNumber,
140
        'count': number,
141
        'description': description,
142
        'title': title,
143
        'link': link,
144
        'toc': toc
145
    }
146
147
    # Prepare the contents
148
    contents = [Span([tag, ['pandoc-numbering-text'] + getClasses(basicCategory, meta), []], text)]
149
150
    # Compute collections
151
    if basicCategory not in collections:
152
        collections[basicCategory] = []
153
154
    collections[basicCategory].append(tag)
155
156
    # Special case for LaTeX
157
    if format == 'latex' and getFormat(basicCategory, meta):
158
        addLaTeX(contents, basicCategory, title, description, leading, number)
159
160
    # Return the contents in a Para element
161
    return Para(contents)
162
163
def computeTitle(value):
164
    title = []
165
    if value[-3]['t'] == 'Str' and value[-3]['c'][-1:] == ')':
166
        for (i, item) in enumerate(value):
167
            if item['t'] == 'Str' and item['c'][0] == '(':
168
                title = value[i:-2]
169
                title[0]['c'] = title[0]['c'][1:]
170
                title[-1]['c'] = title[-1]['c'][:-1]
171
                del value[i-1:-2]
172
                break
173
    return title
174
175
def computeDescription(value):
176
    return value[:-2]
177
178
def computeBasicCategory(match, description):
179
    if match.group('prefix') == None:
180
        return toIdentifier(stringify(description))
181
    else:
182
        return match.group('prefix')
183
184
def computeLevels(match, basicCategory, meta):
185
    # Compute the levelInf and levelSup values
186
    levelInf = len(match.group('hidden')) // 2
187
    levelSup = len(match.group('header')) // 2
188
189
    # Get the default inf and sup level
190
    if levelInf == 0 and levelSup == 0:
191
        [levelInf, levelSup] = getDefaultLevels(basicCategory, meta)
192
193
    return [levelInf, levelSup]
194
195
def computeSectionNumber(levelSup):
196
    return '.'.join(map(str, headers[:levelSup]))
197
198
def computeLeading(levelSup, sectionNumber):
199
    # Compute the leading (composed of the section numbering and a dot)
200
    if levelSup != 0:
201
        return sectionNumber + '.'
202
    else:
203
        return ''
204
205
def computeCategory(basicCategory, leading):
206
    category = basicCategory + ':' + leading
207
208
    # Is it a new category?
209
    if category not in count:
210
        count[category] = 0
211
212
    count[category] = count[category] + 1
213
214
    return category
215
216
def computeTag(match, basicCategory, category, number):
217
    # Determine the final tag
218
    if match.group('name') == None:
219
        return category + number
220
    else:
221
        return basicCategory + ':' + match.group('name')
222
223
def computeLocalNumber(levelInf, levelSup, number):
224
    # Replace the '-.-.+.+...#' by the category count (omitting the hidden part)
225
    return '.'.join(map(str, headers[levelInf:levelSup] + [number]))
226
227
def computeGlobalNumber(sectionNumber, number):
228
    # Compute the globalNumber
229
    if sectionNumber:
230
        return sectionNumber + '.' + number
231
    else:
232
        return number
233
234
def computeTextLinkToc(meta, basicCategory, description, title, localNumber, globalNumber, sectionNumber):
235
    # Is the automatic formatting required for this category?
236
    if getFormat(basicCategory, meta):
237
        # Prepare the final text
238
        text = [Strong(description + [Space(), Str(localNumber)])]
239
240
        # Add the title to the final text
241
        if title:
242
            text = text + [Space(), Emph([Str('(')] + title + [Str(')')])]
243
244
        # Compute the link
245
        link = description + [Space(), Str(localNumber)]
246
247
        # Compute the toc
248
        toc = [Str(globalNumber), Space()]
249
        if title:
250
            toc = toc + title
251
        else:
252
            toc = toc + description
253
254
    else:
255
        # Prepare the final text
256
        text = [
257
            Span(['', ['description'], []], description),
258
            Span(['', ['title'], []], title),
259
            Span(['', ['local'], []], [Str(localNumber)]),
260
            Span(['', ['global'], []], [Str(globalNumber)]),
261
            Span(['', ['section'], []], [Str(sectionNumber)]),
262
        ]
263
264
        # Compute the link
265
        link = [Span(['', ['pandoc-numbering-link'] + getClasses(basicCategory, meta), []], text)]
266
267
        # Compute the toc
268
        toc = [Span(['', ['pandoc-numbering-toc'] + getClasses(basicCategory, meta), []], text)]
269
    return [text, link, toc]
270
271
def addLaTeX(contents, basicCategory, title, description, leading, number):
272
    latexCategory = re.sub('[^a-z]+', '', basicCategory)
273
    if title:
274
      entry = title
275
    else:
276
      entry = description
277
    latex = '\\phantomsection\\addcontentsline{' + latexCategory + '}{' + latexCategory + '}{\\protect\\numberline {' + \
278
        leading + number + '}{\ignorespaces ' + toLatex(entry) + '}}'
279
    contents.insert(0, RawInline('tex', latex))
280
281
def numberingSharpSharp(value):
282
    value[-1]['c'] = value[-1]['c'].replace('##', '#', 1)
283
284
replace = None
285
search = None
286
287
def lowering(key, value, format, meta):
288
    if key == 'Str':
289
        return Str(value.lower())
290
291
def referencing(key, value, format, meta):
292
    if key == 'Link':
293
        return referencingLink(value, format, meta)
294
    elif key == 'Cite':
295
        return referencingCite(value, format, meta)
296
297
def referencingLink(value, format, meta):
298
    global information, replace, search
299
    if pandocVersion() < '1.16':
300
        # pandoc 1.15
301
        [text, [reference, title]] = value
302
    else:
303
        # pandoc > 1.15
304
        [attributes, text, [reference, title]] = value
305
306
    if re.match('^(#([a-zA-Z][\w:.-]*))$', reference):
307
        # Compute the name
308
        tag = reference[1:]
309
310
        if tag in information:
311
            if pandocVersion() < '1.16':
312
                # pandoc 1.15
313
                i = 0
314
            else:
315
                # pandoc > 1.15
316
                i = 1
317
318
            # Replace all '#t', '#T', '#d', '#D', '#s', '#g', '#c', '#n', '#' with the corresponding text in the title
319
            value[i + 1][1] = value[i + 1][1].replace('#t', stringify(information[tag]['title']).lower())
320
            value[i + 1][1] = value[i + 1][1].replace('#T', stringify(information[tag]['title']))
321
            value[i + 1][1] = value[i + 1][1].replace('#d', stringify(information[tag]['description']).lower())
322
            value[i + 1][1] = value[i + 1][1].replace('#D', stringify(information[tag]['description']))
323
            value[i + 1][1] = value[i + 1][1].replace('#s', information[tag]['section'])
324
            value[i + 1][1] = value[i + 1][1].replace('#g', information[tag]['global'])
325
            value[i + 1][1] = value[i + 1][1].replace('#c', information[tag]['count'])
326
            value[i + 1][1] = value[i + 1][1].replace('#n', information[tag]['local'])
327
            value[i + 1][1] = value[i + 1][1].replace('#', information[tag]['local'])
328
329
            if text == []:
330
                # The link text is empty, replace it with the default label
331
                value[i] = information[tag]['link']
332
            else:
333
                # The link text is not empty
334
335
                #replace all '#t' with the title in lower case
336
                replace = walk(information[tag]['title'], lowering, format, meta)
337
                search = '#t'
338
                value[i] = walk(value[i], replacing, format, meta)
339
340
                #replace all '#T' with the title
341
                replace = information[tag]['title']
342
                search = '#T'
343
                value[i] = walk(value[i], replacing, format, meta)
344
345
                #replace all '#d' with the description in lower case
346
                replace = walk(information[tag]['description'], lowering, format, meta)
347
                search = '#d'
348
                value[i] = walk(value[i], replacing, format, meta)
349
350
                #replace all '#D' with the description
351
                replace = information[tag]['description']
352
                search = '#D'
353
                value[i] = walk(value[i], replacing, format, meta)
354
355
                #replace all '#s' with the corresponding number
356
                replace = [Str(information[tag]['section'])]
357
                search = '#s'
358
                value[i] = walk(value[i], replacing, format, meta)
359
360
                #replace all '#g' with the corresponding number
361
                replace = [Str(information[tag]['global'])]
362
                search = '#g'
363
                value[i] = walk(value[i], replacing, format, meta)
364
365
                #replace all '#c' with the corresponding number
366
                replace = [Str(information[tag]['count'])]
367
                search = '#c'
368
                value[i] = walk(value[i], replacing, format, meta)
369
370
                #replace all '#n' with the corresponding number
371
                replace = [Str(information[tag]['local'])]
372
                search = '#n'
373
                value[i] = walk(value[i], replacing, format, meta)
374
375
                #replace all '#' with the corresponding number
376
                replace = [Str(information[tag]['local'])]
377
                search = '#'
378
                value[i] = walk(value[i], replacing, format, meta)
379
380
def referencingCite(value, format, meta):
381
    global information
382
    match = re.match('^(@(?P<tag>(?P<category>[a-zA-Z][\w.-]*):(([a-zA-Z][\w.-]*)|(\d*(\.\d*)*))))$', value[1][0]['c'])
383
    if match != None and getCiteShortCut(match.group('category'), meta):
384
385
        # Deal with @prefix:name shortcut
386
        tag = match.group('tag')
387
        if tag in information:
388
            if pandocVersion() < '1.16':
389
                # pandoc 1.15
390
                return Link([Str(information[tag]['local'])], ['#' + tag, ''])
391
            else:
392
                # pandoc > 1.15
393
                return Link(['', [], []], [Str(information[tag]['local'])], ['#' + tag, ''])
394
395
def replacing(key, value, format, meta):
396
    global replace, search
397
    if key == 'Str':
398
        prepare = value.split(search)
399
        if len(prepare) > 1:
400
401
            ret = []
402
403
            if prepare[0] != '':
404
                ret.append(Str(prepare[0]))
405
406
            for string in prepare[1:]:
407
                ret.extend(replace)
408
                if string != '':
409
                    ret.append(Str(string))
410
411
            return ret
412
413
def hasMeta(meta):
414
    return 'pandoc-numbering' in meta and meta['pandoc-numbering']['t'] == 'MetaList'
415
416
def isCorrect(definition):
417
    return definition['t'] == 'MetaMap' and\
418
        'category' in definition['c'] and\
419
        definition['c']['category']['t'] == 'MetaInlines' and\
420
        len(definition['c']['category']['c']) == 1 and\
421
        definition['c']['category']['c'][0]['t'] == 'Str'
422
423
def hasProperty(definition, name, type):
424
    return name in definition['c'] and definition['c'][name]['t'] == type
425
426
def getProperty(definition, name):
427
    return definition['c'][name]['c']
428
429
def getFirstValue(definition, name):
430
	return getProperty(definition, name)[0]['c']
431
432
def addListings(doc, format, meta):
433
434
    global collections, information
435
436
    if hasMeta(meta):
437
438
        listings = []
439
440
        # Loop on all listings definition
441
        for definition in meta['pandoc-numbering']['c']:
442
            if isCorrect(definition) and hasProperty(definition, 'listing', 'MetaInlines'):
443
444
                # Get the category name
445
                category = getFirstValue(definition, 'category')
446
447
                # Get the title
448
                title = getProperty(definition, 'listing')
449
450
                if format == 'latex':
451
452
                    # Special case for latex output
453
454
                    # Get the link color
455
                    if 'toccolor' in meta:
456
                        linkcolor = '\\hypersetup{linkcolor=' + stringify(meta['toccolor']['c']) + '}'
457
                    else:
458
                        linkcolor = '\\hypersetup{linkcolor=black}'
459
460
                    # Get the tab
461
                    if hasProperty(definition, 'tab', 'MetaString'):
462
                        try:
463
                            tab = float(getProperty(definition, 'tab'))
464
                        except ValueError:
465
                            tab = None
466
                    else:
467
                        tab = None
468
469
                    # Get the space
470
                    if hasProperty(definition, 'space', 'MetaString'):
471
                        try:
472
                            space = float(getProperty(definition, 'space'))
473
                        except ValueError:
474
                            space = None
475
                    else:
476
                        space = None
477
478
                    # Deal with default tab length
479
                    if tab == None:
480
                        tab = 1.5
481
482
                    # Deal with default space length
483
                    if space == None:
484
                        level = 0
485
                        if category in collections:
486
                            # Loop on the collection
487
                            for tag in collections[category]:
488
                                level = max(level, information[tag]['section'].count('.'))
489
                        space = level + 2.3
490
491
                    # Add a RawBlock
492
                    latexCategory = re.sub('[^a-z]+', '', category)
493
                    latex = [
494
                        linkcolor,
495
                        '\\makeatletter',
496
                        '\\newcommand*\\l@' + latexCategory + '{\\@dottedtocline{1}{' + str(tab) + 'em}{'+ str(space) +'em}}',
497
                        '\\@starttoc{' + latexCategory + '}',
498
                        '\\makeatother'
499
                    ]
500
                    elt = [RawBlock('tex', ''.join(latex))]
501
                else:
502
                    if category in collections:
503
                        # Prepare the list
504
                        elements = []
505
506
                        # Loop on the collection
507
                        for tag in collections[category]:
508
509
                            # Add an item to the list
510
                            text = information[tag]['toc']
511
512
                            if pandocVersion() < '1.16':
513
                                # pandoc 1.15
514
                                link = Link(text, ['#' + tag, ''])
515
                            else:
516
                                # pandoc 1.16
517
                                link = Link(['', [], []], text, ['#' + tag, ''])
518
519
                            elements.append([Plain([link])])
520
521
                        # Add a bullet list
522
                        elt = [BulletList(elements)]
523
                    else:
524
525
                        # Add nothing
526
                        elt = []
527
528
                # Add a new listing
529
                listings = listings + [Header(1, ['', ['unnumbered'], []], title)] + elt
530
531
        # Add listings to the document
532
        doc[1] = listings + doc[1]
533
534
def getValue(category, meta, fct, default, analyzeDefinition):
535
    if not hasattr(fct, 'value'):
536
        fct.value = {}
537
        if hasMeta(meta):
538
            # Loop on all listings definition
539
            for definition in meta['pandoc-numbering']['c']:
540
                if isCorrect(definition):
541
                    analyzeDefinition(definition)
542
543
    if not category in fct.value:
544
        fct.value[category] = default
545
546
    return fct.value[category]
547
548
def getFormat(category, meta):
549
    def analyzeDefinition(definition):
550
        if hasProperty(definition, 'format', 'MetaBool'):
551
            getFormat.value[getFirstValue(definition, 'category')] = getProperty(definition, 'format')
552
        
553
    return getValue(category, meta, getFormat, True, analyzeDefinition)
554
555
def getCiteShortCut(category, meta):
556
    def analyzeDefinition(definition):
557
        if hasProperty(definition, 'cite-shortcut', 'MetaBool'):
558
            getCiteShortCut.value[getFirstValue(definition, 'category')] = getProperty(definition, 'cite-shortcut')
559
560
    return getValue(category, meta, getCiteShortCut, False, analyzeDefinition)
561
562
def getDefaultLevels(category, meta):
563
    def analyzeDefinition(definition):
564
        levelInf = 0
565
        levelSup = 0
566
        if hasProperty(definition, 'sectioning', 'MetaInlines') and\
567
           len(getProperty(definition, 'sectioning')) == 1 and\
568
           getProperty(definition, 'sectioning')[0]['t'] == 'Str':
569
570
            global headerRegex
571
572
            match = re.match('^' + headerRegex + '$', getFirstValue(definition, 'sectioning'))
573
            if match:
574
                # Compute the levelInf and levelSup values
575
                levelInf = len(match.group('hidden')) // 2
576
                levelSup = len(match.group('header')) // 2
577
        else:
578
            if hasProperty(definition, 'first', 'MetaString'):
579
                try:
580
                    levelInf = max(min(int(getProperty(definition, 'first')) - 1, 6), 0)
581
                except ValueError:
582
                    pass
583
            if hasProperty(definition, 'last', 'MetaString'):
584
                try:
585
                    levelSup = max(min(int(getProperty(definition, 'last')), 6), levelInf)
586
                except ValueError:
587
                    pass
588
        getDefaultLevels.value[getFirstValue(definition, 'category')] = [levelInf, levelSup]
589
590
    return getValue(category, meta, getDefaultLevels, [0, 0], analyzeDefinition)
591
592
def getClasses(category, meta): 
593
    def analyzeDefinition(definition):
594
        if hasProperty(definition, 'classes', 'MetaList'):
595
            classes = []
596
            for elt in getProperty(definition, 'classes'):
597
                classes.append(stringify(elt))
598
            getClasses.value[getFirstValue(definition, 'category')] = classes
599
600
    return getValue(category, meta, getClasses, [category], analyzeDefinition)
601
602
def pandocVersion():
603
    if not hasattr(pandocVersion, 'value'):
604
        p = subprocess.Popen(['pandoc', '-v'], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
605
        out, err = p.communicate()
606
        pandocVersion.value = re.search(b'pandoc (?P<version>.*)', out).group('version').decode('utf-8')
607
    return pandocVersion.value
608
609
def main():
610
    toJSONFilters([numbering, referencing])
611
612
if __name__ == '__main__':
613
    main()
614