Completed
Push — master ( c3d092...a1e08a )
by Christophe
01:16
created

numberingSharpSharp()   A

Complexity

Conditions 1

Size

Total Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
dl 0
loc 2
rs 10
c 0
b 0
f 0
1
#!/usr/bin/env python
2
3
"""
4
Pandoc filter to number all kinds of things.
5
"""
6
7
from pandocfilters import walk, stringify, Str, Space, Para, BulletList, Plain, Strong, Span, Link, Emph, RawInline, RawBlock, Header
8
from functools import reduce
9
import json
10
import io
11
import sys
12
import codecs
13
import re
14
import unicodedata
15
import subprocess
16
17
count = {}
18
information = {}
19
collections = {}
20
headers = [0, 0, 0, 0, 0, 0]
21
headerRegex = '(?P<header>(?P<hidden>(-\.)*)(\+\.)*)'
22
23
def toJSONFilters(actions):
24
    """Converts a list of actions into a filter that reads a JSON-formatted
25
    pandoc document from stdin, transforms it by walking the tree
26
    with the actions, and returns a new JSON-formatted pandoc document
27
    to stdout.  The argument is a list of functions action(key, value, format, meta),
28
    where key is the type of the pandoc object (e.g. 'Str', 'Para'),
29
    value is the contents of the object (e.g. a string for 'Str',
30
    a list of inline elements for 'Para'), format is the target
31
    output format (which will be taken for the first command line
32
    argument if present), and meta is the document's metadata.
33
    If the function returns None, the object to which it applies
34
    will remain unchanged.  If it returns an object, the object will
35
    be replaced.    If it returns a list, the list will be spliced in to
36
    the list to which the target object belongs.    (So, returning an
37
    empty list deletes the object.)
38
    """
39
    try:
40
        input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
41
    except AttributeError:
42
        # Python 2 does not have sys.stdin.buffer.
43
        # REF: http://stackoverflow.com/questions/2467928/python-unicodeencodeerror-when-reading-from-stdin
44
        input_stream = codecs.getreader("utf-8")(sys.stdin)
45
46
    doc = json.loads(input_stream.read())
47
    if len(sys.argv) > 1:
48
        format = sys.argv[1]
49
    else:
50
        format = ""
51
    altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
52
    addListings(altered, format, altered[0]['unMeta'])
53
    json.dump(altered, sys.stdout)
54
55
def removeAccents(string):
56
    nfkd_form = unicodedata.normalize('NFKD', string)
57
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
58
59
def toIdentifier(string):
60
    # replace invalid characters by dash
61
    string = re.sub('[^0-9a-zA-Z_-]+', '-', removeAccents(string.lower()))
62
63
    # Remove leading digits
64
    string = re.sub('^[^a-zA-Z]+', '', string)
65
66
    return string
67
68
def toLatex(x):
69
    """Walks the tree x and returns concatenated string content,
70
    leaving out all formatting.
71
    """
72
    result = []
73
74
    def go(key, val, format, meta):
75
        if key in ['Str', 'MetaString']:
76
            result.append(val)
77
        elif key == 'Code':
78
            result.append(val[1])
79
        elif key == 'Math':
80
            # Modified from the stringify function in the pandocfilter package
81
            if format == 'latex':
82
                result.append('$' + val[1] + '$')
83
            else:
84
                result.append(val[1])
85
        elif key == 'LineBreak':
86
            result.append(" ")
87
        elif key == 'Space':
88
            result.append(" ")
89
        elif key == 'Note':
90
            # Do not stringify value from Note node
91
            del val[:]
92
93
    walk(x, go, 'latex', {})
94
    return ''.join(result)
95
96
def numbering(key, value, format, meta):
97
    if key == 'Header':
98
        return numberingHeader(value)
99
    elif key == 'Para':
100
        return numberingPara(value, format, meta)
101
102
def numberingHeader(value):
103
    [level, [id, classes, attributes], content] = value
104
    if 'unnumbered' not in classes:
105
        headers[level - 1] = headers[level - 1] + 1
106
        for index in range(level, 6):
107
            headers[index] = 0
108
109
def numberingPara(value, format, meta):
110
    global headerRegex
111
    if len(value) >= 3 and value[-2] == Space() and value[-1]['t'] == 'Str':
112
        last = value[-1]['c']
113
        match = re.match('^' + headerRegex + '#((?P<prefix>[a-zA-Z][\w.-]*):)?(?P<name>[a-zA-Z][\w:.-]*)?$', last)
114
        if match:
115
            # Is it a Para and the last element is an identifier beginning with '#'
116
            return numberingEffective(match, value, format, meta)
117
        elif re.match('^' + headerRegex + '##(?P<prefix>[a-zA-Z][\w.-]*:)?(?P<name>[a-zA-Z][\w:.-]*)?$', last):
118
            # Special case where the last element is '...##...'
119
            return numberingSharpSharp(value)
120
121
def numberingEffective(match, value, format, meta):
122
    global count, information, collections
123
124
    # Detect the title
125
    title = []
126
    if value[-3]['t'] == 'Str' and value[-3]['c'][-1:] == ')':
127
        for (i, item) in enumerate(value):
128
            if item['t'] == 'Str' and item['c'][0] == '(':
129
                title = value[i:-2]
130
                title[0]['c'] = title[0]['c'][1:]
131
                title[-1]['c'] = title[-1]['c'][:-1]
132
                value = value[:i - 1] + value[-2:]
133
                break
134
135
    # Compute the description
136
    description = value[:-2]
137
138
    # Compute the basicCategory and the category
139
    if match.group('prefix') == None:
140
        basicCategory = toIdentifier(stringify(description))
141
    else:
142
        basicCategory = match.group('prefix')
143
144
    # Compute the levelInf and levelSup values
145
    levelInf = len(match.group('hidden')) // 2
146
    levelSup = len(match.group('header')) // 2
147
148
    # Get the default inf and sup level
149
    if levelInf == 0 and levelSup == 0:
150
        [levelInf, levelSup] = getDefaultLevels(basicCategory, meta)
151
152
    # Compute the section number
153
    sectionNumber = '.'.join(map(str, headers[:levelSup]))
154
155
    # Compute the leading (composed of the section numbering and a dot)
156
    if levelSup != 0:
157
        leading = sectionNumber + '.'
158
    else:
159
        leading = ''
160
161
    category = basicCategory + ':' + leading
162
163
    # Is it a new category?
164
    if category not in count:
165
        count[category] = 0
166
167
    count[category] = count[category] + 1
168
169
    # Get the number
170
    number = str(count[category])
171
172
    # Determine the final tag
173
    if match.group('name') == None:
174
        tag = category + number
175
    else:
176
        tag = basicCategory + ':' + match.group('name')
177
178
    # Replace the '-.-.+.+...#' by the category count (omitting the hidden part)
179
    localNumber = '.'.join(map(str, headers[levelInf:levelSup] + [number]))
180
181
    # Compute the globalNumber
182
    if sectionNumber:
183
        globalNumber = sectionNumber + '.' + number
184
    else:
185
        globalNumber = number
186
187
    # Is the automatic formatting required for this category?
188
    if getFormat(basicCategory, meta):
189
        # Prepare the final text
190
        text = [Strong(description + [Space(), Str(localNumber)])]
191
192
        # Add the title to the final text
193
        if title:
194
            text = text + [Space(), Emph([Str('(')] + title + [Str(')')])]
195
196
        # Compute the link
197
        link = description + [Space(), Str(localNumber)]
198
199
        # Compute the toc
200
        toc = [Str(globalNumber), Space()]
201
        if title:
202
            toc = toc + title
203
        else:
204
            toc = toc + description
205
206
    else:
207
        # Prepare the final text
208
        text = [
209
            Span(['', ['description'], []], description),
210
            Span(['', ['title'], []], title),
211
            Span(['', ['local'], []], [Str(localNumber)]),
212
            Span(['', ['global'], []], [Str(globalNumber)]),
213
            Span(['', ['section'], []], [Str(sectionNumber)]),
214
        ]
215
216
        # Compute the link
217
        link = [Span(['', ['pandoc-numbering-link'] + getClasses(basicCategory, meta), []], text)]
218
219
        # Compute the toc
220
        toc = [Span(['', ['pandoc-numbering-toc'] + getClasses(basicCategory, meta), []], text)]
221
222
223
    # Store the numbers and the label for automatic numbering (See referencing function)
224
    information[tag] = {
225
        'section': sectionNumber,
226
        'local': localNumber,
227
        'global': globalNumber,
228
        'count': number,
229
        'description': description,
230
        'title': title,
231
        'link': link,
232
        'toc': toc
233
    }
234
235
    # Prepare the contents
236
    contents = [Span([tag, ['pandoc-numbering-text'] + getClasses(basicCategory, meta), []], text)]
237
238
    # Compute collections
239
    if basicCategory not in collections:
240
        collections[basicCategory] = []
241
242
    collections[basicCategory].append(tag)
243
244
    # Special case for LaTeX
245
    if format == 'latex' and getFormat(basicCategory, meta):
246
        latexCategory = re.sub('[^a-z]+', '', basicCategory)
247
        if title:
248
          entry = title
249
        else:
250
          entry = description
251
        latex = '\\phantomsection\\addcontentsline{' + latexCategory + '}{' + latexCategory + '}{\\protect\\numberline {' + \
252
            leading + number + '}{\ignorespaces ' + toLatex(entry) + '}}'
253
        contents.insert(0, RawInline('tex', latex))
254
255
    # Return the contents in a Para element
256
    return Para(contents)
257
258
def numberingSharpSharp(value):
259
    value[-1]['c'] = value[-1]['c'].replace('##', '#', 1)
260
261
replace = None
262
search = None
263
264
def lowering(key, value, format, meta):
265
    if key == 'Str':
266
        return Str(value.lower())
267
268
def referencing(key, value, format, meta):
269
    if key == 'Link':
270
        return referencingLink(value, format, meta)
271
    elif key == 'Cite':
272
        return referencingCite(value, format, meta)
273
274
def referencingLink(value, format, meta):
275
    global information, replace, search
276
    if pandocVersion() < '1.16':
277
        # pandoc 1.15
278
        [text, [reference, title]] = value
279
    else:
280
        # pandoc > 1.15
281
        [attributes, text, [reference, title]] = value
282
283
    if re.match('^(#([a-zA-Z][\w:.-]*))$', reference):
284
        # Compute the name
285
        tag = reference[1:]
286
287
        if tag in information:
288
            if pandocVersion() < '1.16':
289
                # pandoc 1.15
290
                i = 0
291
            else:
292
                # pandoc > 1.15
293
                i = 1
294
295
            # Replace all '#t', '#T', '#d', '#D', '#s', '#g', '#c', '#n', '#' with the corresponding text in the title
296
            value[i + 1][1] = value[i + 1][1].replace('#t', stringify(information[tag]['title']).lower())
297
            value[i + 1][1] = value[i + 1][1].replace('#T', stringify(information[tag]['title']))
298
            value[i + 1][1] = value[i + 1][1].replace('#d', stringify(information[tag]['description']).lower())
299
            value[i + 1][1] = value[i + 1][1].replace('#D', stringify(information[tag]['description']))
300
            value[i + 1][1] = value[i + 1][1].replace('#s', information[tag]['section'])
301
            value[i + 1][1] = value[i + 1][1].replace('#g', information[tag]['global'])
302
            value[i + 1][1] = value[i + 1][1].replace('#c', information[tag]['count'])
303
            value[i + 1][1] = value[i + 1][1].replace('#n', information[tag]['local'])
304
            value[i + 1][1] = value[i + 1][1].replace('#', information[tag]['local'])
305
306
            if text == []:
307
                # The link text is empty, replace it with the default label
308
                value[i] = information[tag]['link']
309
            else:
310
                # The link text is not empty
311
312
                #replace all '#t' with the title in lower case
313
                replace = walk(information[tag]['title'], lowering, format, meta)
314
                search = '#t'
315
                value[i] = walk(value[i], replacing, format, meta)
316
317
                #replace all '#T' with the title
318
                replace = information[tag]['title']
319
                search = '#T'
320
                value[i] = walk(value[i], replacing, format, meta)
321
322
                #replace all '#d' with the description in lower case
323
                replace = walk(information[tag]['description'], lowering, format, meta)
324
                search = '#d'
325
                value[i] = walk(value[i], replacing, format, meta)
326
327
                #replace all '#D' with the description
328
                replace = information[tag]['description']
329
                search = '#D'
330
                value[i] = walk(value[i], replacing, format, meta)
331
332
                #replace all '#s' with the corresponding number
333
                replace = [Str(information[tag]['section'])]
334
                search = '#s'
335
                value[i] = walk(value[i], replacing, format, meta)
336
337
                #replace all '#g' with the corresponding number
338
                replace = [Str(information[tag]['global'])]
339
                search = '#g'
340
                value[i] = walk(value[i], replacing, format, meta)
341
342
                #replace all '#c' with the corresponding number
343
                replace = [Str(information[tag]['count'])]
344
                search = '#c'
345
                value[i] = walk(value[i], replacing, format, meta)
346
347
                #replace all '#n' with the corresponding number
348
                replace = [Str(information[tag]['local'])]
349
                search = '#n'
350
                value[i] = walk(value[i], replacing, format, meta)
351
352
                #replace all '#' with the corresponding number
353
                replace = [Str(information[tag]['local'])]
354
                search = '#'
355
                value[i] = walk(value[i], replacing, format, meta)
356
357
def referencingCite(value, format, meta):
358
    global information
359
    match = re.match('^(@(?P<tag>(?P<category>[a-zA-Z][\w.-]*):(([a-zA-Z][\w.-]*)|(\d*(\.\d*)*))))$', value[1][0]['c'])
360
    if match != None and getCiteShortCut(match.group('category'), meta):
361
362
        # Deal with @prefix:name shortcut
363
        tag = match.group('tag')
364
        if tag in information:
365
            if pandocVersion() < '1.16':
366
                # pandoc 1.15
367
                return Link([Str(information[tag]['local'])], ['#' + tag, ''])
368
            else:
369
                # pandoc > 1.15
370
                return Link(['', [], []], [Str(information[tag]['local'])], ['#' + tag, ''])
371
372
def replacing(key, value, format, meta):
373
    global replace, search
374
    if key == 'Str':
375
        prepare = value.split(search)
376
        if len(prepare) > 1:
377
378
            ret = []
379
380
            if prepare[0] != '':
381
                ret.append(Str(prepare[0]))
382
383
            for string in prepare[1:]:
384
                ret.extend(replace)
385
                if string != '':
386
                    ret.append(Str(string))
387
388
            return ret
389
390
def hasMeta(meta):
391
    return 'pandoc-numbering' in meta and meta['pandoc-numbering']['t'] == 'MetaList'
392
393
def isCorrect(definition):
394
    return definition['t'] == 'MetaMap' and\
395
        'category' in definition['c'] and\
396
        definition['c']['category']['t'] == 'MetaInlines' and\
397
        len(definition['c']['category']['c']) == 1 and\
398
        definition['c']['category']['c'][0]['t'] == 'Str'
399
400
def hasProperty(definition, name, type):
401
    return name in definition['c'] and definition['c'][name]['t'] == type
402
403
def getProperty(definition, name):
404
    return definition['c'][name]['c']
405
406
def getFirstValue(definition, name):
407
	return getProperty(definition, name)[0]['c']
408
409
def addListings(doc, format, meta):
410
411
    global collections, information
412
413
    if hasMeta(meta):
414
415
        listings = []
416
417
        # Loop on all listings definition
418
        for definition in meta['pandoc-numbering']['c']:
419
            if isCorrect(definition) and hasProperty(definition, 'listing', 'MetaInlines'):
420
421
                # Get the category name
422
                category = getFirstValue(definition, 'category')
423
424
                # Get the title
425
                title = getProperty(definition, 'listing')
426
427
                if format == 'latex':
428
429
                    # Special case for latex output
430
431
                    # Get the link color
432
                    if 'toccolor' in meta:
433
                        linkcolor = '\\hypersetup{linkcolor=' + stringify(meta['toccolor']['c']) + '}'
434
                    else:
435
                        linkcolor = '\\hypersetup{linkcolor=black}'
436
437
                    # Get the tab
438
                    if hasProperty(definition, 'tab', 'MetaString'):
439
                        try:
440
                            tab = float(getProperty(definition, 'tab'))
441
                        except ValueError:
442
                            tab = None
443
                    else:
444
                        tab = None
445
446
                    # Get the space
447
                    if hasProperty(definition, 'space', 'MetaString'):
448
                        try:
449
                            space = float(getProperty(definition, 'space'))
450
                        except ValueError:
451
                            space = None
452
                    else:
453
                        space = None
454
455
                    # Deal with default tab length
456
                    if tab == None:
457
                        tab = 1.5
458
459
                    # Deal with default space length
460
                    if space == None:
461
                        level = 0
462
                        if category in collections:
463
                            # Loop on the collection
464
                            for tag in collections[category]:
465
                                level = max(level, information[tag]['section'].count('.'))
466
                        space = level + 2.3
467
468
                    # Add a RawBlock
469
                    latexCategory = re.sub('[^a-z]+', '', category)
470
                    latex = [
471
                        linkcolor,
472
                        '\\makeatletter',
473
                        '\\newcommand*\\l@' + latexCategory + '{\\@dottedtocline{1}{' + str(tab) + 'em}{'+ str(space) +'em}}',
474
                        '\\@starttoc{' + latexCategory + '}',
475
                        '\\makeatother'
476
                    ]
477
                    elt = [RawBlock('tex', ''.join(latex))]
478
                else:
479
                    if category in collections:
480
                        # Prepare the list
481
                        elements = []
482
483
                        # Loop on the collection
484
                        for tag in collections[category]:
485
486
                            # Add an item to the list
487
                            text = information[tag]['toc']
488
489
                            if pandocVersion() < '1.16':
490
                                # pandoc 1.15
491
                                link = Link(text, ['#' + tag, ''])
492
                            else:
493
                                # pandoc 1.16
494
                                link = Link(['', [], []], text, ['#' + tag, ''])
495
496
                            elements.append([Plain([link])])
497
498 View Code Duplication
                        # Add a bullet list
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
499
                        elt = [BulletList(elements)]
500
                    else:
501
502
                        # Add nothing
503
                        elt = []
504
505
                # Add a new listing
506
                listings = listings + [Header(1, ['', ['unnumbered'], []], title)] + elt
507
508
        # Add listings to the document
509
        doc[1] = listings + doc[1]
510
511
def getFormat(category, meta):
512
    if not hasattr(getFormat, 'value'):
513
        getFormat.value = {}
514
        if hasMeta(meta):
515
            # Loop on all listings definition
516
            for definition in meta['pandoc-numbering']['c']:
517
                if isCorrect(definition) and hasProperty(definition, 'format', 'MetaBool'):
518
                    getFormat.value[getFirstValue(definition, 'category')] = getProperty(definition, 'format')
519
520
    if not category in getFormat.value:
521
        getFormat.value[category] = True
522
523
    return getFormat.value[category]
524
525
def getCiteShortCut(category, meta):
526
    if not hasattr(getCiteShortCut, 'value'):
527
        getCiteShortCut.value = {}
528
        if hasMeta(meta):
529
            # Loop on all listings definition
530
            for definition in meta['pandoc-numbering']['c']:
531
                if isCorrect(definition) and hasProperty(definition, 'cite-shortcut', 'MetaBool'):
532
                    getCiteShortCut.value[getFirstValue(definition, 'category')] = getProperty(definition, 'cite-shortcut')
533
534
    if not category in getCiteShortCut.value:
535
        getCiteShortCut.value[category] = False
536
537
    return getCiteShortCut.value[category]
538
539
def getDefaultLevels(category, meta):
540
    if not hasattr(getDefaultLevels, 'value'):
541
        getDefaultLevels.value = {}
542
        if hasMeta(meta):
543
            # Loop on all listings definition
544
            for definition in meta['pandoc-numbering']['c']:
545
                if isCorrect(definition):
546
                    levelInf = 0
547
                    levelSup = 0
548
                    if hasProperty(definition, 'sectioning', 'MetaInlines') and\
549
                       len(getProperty(definition, 'sectioning')) == 1 and\
550
                       getProperty(definition, 'sectioning')[0]['t'] == 'Str':
551
552
                        global headerRegex
553
554
                        match = re.match('^' + headerRegex + '$', getFirstValue(definition, 'sectioning'))
555
                        if match:
556
                            # Compute the levelInf and levelSup values
557
                            levelInf = len(match.group('hidden')) // 2
558
                            levelSup = len(match.group('header')) // 2
559
                    else:
560
                        if hasProperty(definition, 'first', 'MetaString'):
561
                            try:
562
                                levelInf = max(min(int(getProperty(definition, 'first')) - 1, 6), 0)
563
                            except ValueError:
564 View Code Duplication
                                pass
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
565
                        if hasProperty(definition, 'last', 'MetaString'):
566
                            try:
567
                                levelSup = max(min(int(getProperty(definition, 'last')), 6), levelInf)
568
                            except ValueError:
569
                                pass
570
                    getDefaultLevels.value[getFirstValue(definition, 'category')] = [levelInf, levelSup]
571
572
    if not category in getDefaultLevels.value:
573
        getDefaultLevels.value[category] = [0, 0]
574
575
    return getDefaultLevels.value[category]
576
577
def getClasses(category, meta):
578
    if not hasattr(getClasses, 'value'):
579
        getClasses.value = {}
580
        if hasMeta(meta):
581
            # Loop on all listings definition
582
            for definition in meta['pandoc-numbering']['c']:
583
                if isCorrect(definition) and hasProperty(definition, 'classes', 'MetaList'):
584
                    classes = []
585
                    for elt in getProperty(definition, 'classes'):
586
                        classes.append(stringify(elt))
587
                    getClasses.value[getFirstValue(definition, 'category')] = classes
588
589
    if not category in getClasses.value:
590
        getClasses.value[category] = [category]
591
592
    return getClasses.value[category]
593
594
def pandocVersion():
595
    if not hasattr(pandocVersion, 'value'):
596
        p = subprocess.Popen(['pandoc', '-v'], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
597
        out, err = p.communicate()
598
        pandocVersion.value = re.search(b'pandoc (?P<version>.*)', out).group('version').decode('utf-8')
599
    return pandocVersion.value
600
601
def main():
602
    toJSONFilters([numbering, referencing])
603
604
if __name__ == '__main__':
605
    main()
606