Completed
Push — master ( db4538...cf398c )
by Christophe
01:11
created

getFormat()   B

Complexity

Conditions 7

Size

Total Lines 13

Duplication

Lines 13
Ratio 100 %

Importance

Changes 0
Metric Value
cc 7
dl 13
loc 13
rs 7.3333
c 0
b 0
f 0
1
#!/usr/bin/env python
2
3
"""
4
Pandoc filter to number all kinds of things.
5
"""
6
7
from pandocfilters import walk, stringify, Str, Space, Para, BulletList, Plain, Strong, Span, Link, Emph, RawInline, RawBlock, Header
8
from functools import reduce
9
import json
10
import io
11
import sys
12
import codecs
13
import re
14
import unicodedata
15
import subprocess
16
17
count = {}
18
information = {}
19
collections = {}
20
headers = [0, 0, 0, 0, 0, 0]
21
headerRegex = '(?P<header>(?P<hidden>(-\.)*)(\+\.)*)'
22
23
def toJSONFilters(actions):
24
    """Converts a list of actions into a filter that reads a JSON-formatted
25
    pandoc document from stdin, transforms it by walking the tree
26
    with the actions, and returns a new JSON-formatted pandoc document
27
    to stdout.  The argument is a list of functions action(key, value, format, meta),
28
    where key is the type of the pandoc object (e.g. 'Str', 'Para'),
29
    value is the contents of the object (e.g. a string for 'Str',
30
    a list of inline elements for 'Para'), format is the target
31
    output format (which will be taken for the first command line
32
    argument if present), and meta is the document's metadata.
33
    If the function returns None, the object to which it applies
34
    will remain unchanged.  If it returns an object, the object will
35
    be replaced.    If it returns a list, the list will be spliced in to
36
    the list to which the target object belongs.    (So, returning an
37
    empty list deletes the object.)
38
    """
39
    try:
40
        input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
41
    except AttributeError:
42
        # Python 2 does not have sys.stdin.buffer.
43
        # REF: http://stackoverflow.com/questions/2467928/python-unicodeencodeerror-when-reading-from-stdin
44
        input_stream = codecs.getreader("utf-8")(sys.stdin)
45
46
    doc = json.loads(input_stream.read())
47
    if len(sys.argv) > 1:
48
        format = sys.argv[1]
49
    else:
50
        format = ""
51
    altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
52
    addListings(altered, format, altered[0]['unMeta'])
53
    json.dump(altered, sys.stdout)
54
55
def removeAccents(string):
56
    nfkd_form = unicodedata.normalize('NFKD', string)
57
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
58
59
def toIdentifier(string):
60
    # replace invalid characters by dash
61
    string = re.sub('[^0-9a-zA-Z_-]+', '-', removeAccents(string.lower()))
62
63
    # Remove leading digits
64
    string = re.sub('^[^a-zA-Z]+', '', string)
65
66
    return string
67
68
def toLatex(x):
69
    """Walks the tree x and returns concatenated string content,
70
    leaving out all formatting.
71
    """
72
    result = []
73
74
    def go(key, val, format, meta):
75
        if key in ['Str', 'MetaString']:
76
            result.append(val)
77
        elif key == 'Code':
78
            result.append(val[1])
79
        elif key == 'Math':
80
            # Modified from the stringify function in the pandocfilter package
81
            if format == 'latex':
82
                result.append('$' + val[1] + '$')
83
            else:
84
                result.append(val[1])
85
        elif key == 'LineBreak':
86
            result.append(" ")
87
        elif key == 'Space':
88
            result.append(" ")
89
        elif key == 'Note':
90
            # Do not stringify value from Note node
91
            del val[:]
92
93
    walk(x, go, 'latex', {})
94
    return ''.join(result)
95
96
def numbering(key, value, format, meta):
97
    global headerRegex
98
    if key == 'Header':
99
        [level, [id, classes, attributes], content] = value
100
        if 'unnumbered' not in classes:
101
            headers[level - 1] = headers[level - 1] + 1
102
            for index in range(level, 6):
103
                headers[index] = 0
104
    elif key == 'Para':
105
        if len(value) >= 3 and value[-2] == Space() and value[-1]['t'] == 'Str':
106
            last = value[-1]['c']
107
108
            match = re.match('^' + headerRegex + '#((?P<prefix>[a-zA-Z][\w.-]*):)?(?P<name>[a-zA-Z][\w:.-]*)?$', last)
109
110
            if match:
111
                # Is it a Para and the last element is an identifier beginning with '#'
112
                global count, information, collections
113
114
                # Detect the title
115
                title = []
116
                if value[-3]['t'] == 'Str' and value[-3]['c'][-1:] == ')':
117
                    for (i, item) in enumerate(value):
118
                        if item['t'] == 'Str' and item['c'][0] == '(':
119
                            title = value[i:-2]
120
                            title[0]['c'] = title[0]['c'][1:]
121
                            title[-1]['c'] = title[-1]['c'][:-1]
122
                            value = value[:i - 1] + value[-2:]
123
                            break
124
125
                # Compute the description
126
                description = value[:-2]
127
128
                # Compute the basicCategory and the category
129
                if match.group('prefix') == None:
130
                    basicCategory = toIdentifier(stringify(description))
131
                else:
132
                    basicCategory = match.group('prefix')
133
134
                # Compute the levelInf and levelSup values
135
                levelInf = len(match.group('hidden')) // 2
136
                levelSup = len(match.group('header')) // 2
137
138
                # Get the default inf and sup level
139
                if levelInf == 0 and levelSup == 0:
140
                    [levelInf, levelSup] = getDefaultLevels(basicCategory, meta)
141
142
                # Compute the section number
143
                sectionNumber = '.'.join(map(str, headers[:levelSup]))
144
145
                # Compute the leading (composed of the section numbering and a dot)
146
                if levelSup != 0:
147
                    leading = sectionNumber + '.'
148
                else:
149
                    leading = ''
150
151
                category = basicCategory + ':' + leading
152
153
                # Is it a new category?
154
                if category not in count:
155
                    count[category] = 0
156
157
                count[category] = count[category] + 1
158
159
                # Get the number
160
                number = str(count[category])
161
162
                # Determine the final tag
163
                if match.group('name') == None:
164
                    tag = category + number
165
                else:
166
                    tag = basicCategory + ':' + match.group('name')
167
168
                # Replace the '-.-.+.+...#' by the category count (omitting the hidden part)
169
                localNumber = '.'.join(map(str, headers[levelInf:levelSup] + [number]))
170
171
                # Compute the globalNumber
172
                if sectionNumber:
173
                    globalNumber = sectionNumber + '.' + number
174
                else:
175
                    globalNumber = number
176
177
                # Is the automatic formatting required for this category?
178
                if getFormat(basicCategory, meta):
179
                    # Prepare the final text
180
                    text = [Strong(description + [Space(), Str(localNumber)])]
181
182
                    # Add the title to the final text
183
                    if title:
184
                        text = text + [Space(), Emph([Str('(')] + title + [Str(')')])]
185
186
                    # Compute the link
187
                    link = description + [Space(), Str(localNumber)]
188
189
                    # Compute the toc
190
                    toc = [Str(globalNumber), Space()]
191
                    if title:
192
                        toc = toc + title
193
                    else:
194
                        toc = toc + description
195
196
                else:
197
                    # Prepare the final text
198
                    text = [
199
                        Span(['', ['description'], []], description),
200
                        Span(['', ['title'], []], title),
201
                        Span(['', ['local'], []], [Str(localNumber)]),
202
                        Span(['', ['global'], []], [Str(globalNumber)]),
203
                        Span(['', ['section'], []], [Str(sectionNumber)]),
204
                    ]
205
206
                    # Compute the link
207
                    link = [Span(['', ['pandoc-numbering-link'] + getClasses(basicCategory, meta), []], text)]
208
209
                    # Compute the toc
210
                    toc = [Span(['', ['pandoc-numbering-toc'] + getClasses(basicCategory, meta), []], text)]
211
212
213
                # Store the numbers and the label for automatic numbering (See referencing function)
214
                information[tag] = {
215
                    'section': sectionNumber,
216
                    'local': localNumber,
217
                    'global': globalNumber,
218
                    'count': number,
219
                    'description': description,
220
                    'title': title,
221
                    'link': link,
222
                    'toc': toc
223
                }
224
225
                # Prepare the contents
226
                contents = [Span([tag, ['pandoc-numbering-text'] + getClasses(basicCategory, meta), []], text)]
227
228
                # Compute collections
229
                if basicCategory not in collections:
230
                    collections[basicCategory] = []
231
232
                collections[basicCategory].append(tag)
233
234
                # Special case for LaTeX
235
                if format == 'latex' and getFormat(basicCategory, meta):
236
                    latexCategory = re.sub('[^a-z]+', '', basicCategory)
237
                    if title:
238
                      entry = title
239
                    else:
240
                      entry = description
241
                    latex = '\\phantomsection\\addcontentsline{' + latexCategory + '}{' + latexCategory + '}{\\protect\\numberline {' + \
242
                        leading + number + '}{\ignorespaces ' + toLatex(entry) + '}}'
243
                    contents.insert(0, RawInline('tex', latex))
244
245
                # Return the contents in a Para element
246
                return Para(contents)
247
            elif re.match('^' + headerRegex + '##(?P<prefix>[a-zA-Z][\w.-]*:)?(?P<name>[a-zA-Z][\w:.-]*)?$', last):
248
                # Special case where the last element is '...##...'
249
                value[-1]['c'] = value[-1]['c'].replace('##', '#', 1)
250
                return Para(value)
251
252
replace = None
253
search = None
254
255
def lowering(key, value, format, meta):
256
    if key == 'Str':
257
        return Str(value.lower())
258
259
def referencing(key, value, format, meta):
260
    global information, replace, search
261
262
    # Is it a link with a right reference?
263
    if key == 'Link':
264
        if pandocVersion() < '1.16':
265
            # pandoc 1.15
266
            [text, [reference, title]] = value
267
        else:
268
            # pandoc > 1.15
269
            [attributes, text, [reference, title]] = value
270
271
        if re.match('^(#([a-zA-Z][\w:.-]*))$', reference):
272
            # Compute the name
273
            tag = reference[1:]
274
275
            if tag in information:
276
                if pandocVersion() < '1.16':
277
                    # pandoc 1.15
278
                    i = 0
279
                else:
280
                    # pandoc > 1.15
281
                    i = 1
282
283
                # Replace all '#t', '#T', '#d', '#D', '#s', '#g', '#c', '#n', '#' with the corresponding text in the title
284
                value[i + 1][1] = value[i + 1][1].replace('#t', stringify(information[tag]['title']).lower())
285
                value[i + 1][1] = value[i + 1][1].replace('#T', stringify(information[tag]['title']))
286
                value[i + 1][1] = value[i + 1][1].replace('#d', stringify(information[tag]['description']).lower())
287
                value[i + 1][1] = value[i + 1][1].replace('#D', stringify(information[tag]['description']))
288
                value[i + 1][1] = value[i + 1][1].replace('#s', information[tag]['section'])
289
                value[i + 1][1] = value[i + 1][1].replace('#g', information[tag]['global'])
290
                value[i + 1][1] = value[i + 1][1].replace('#c', information[tag]['count'])
291
                value[i + 1][1] = value[i + 1][1].replace('#n', information[tag]['local'])
292
                value[i + 1][1] = value[i + 1][1].replace('#', information[tag]['local'])
293
294
                if text == []:
295
                    # The link text is empty, replace it with the default label
296
                    value[i] = information[tag]['link']
297
                else:
298
                    # The link text is not empty
299
300
                    #replace all '#t' with the title in lower case
301
                    replace = walk(information[tag]['title'], lowering, format, meta)
302
                    search = '#t'
303
                    value[i] = walk(value[i], replacing, format, meta)
304
305
                    #replace all '#T' with the title
306
                    replace = information[tag]['title']
307
                    search = '#T'
308
                    value[i] = walk(value[i], replacing, format, meta)
309
310
                    #replace all '#d' with the description in lower case
311
                    replace = walk(information[tag]['description'], lowering, format, meta)
312
                    search = '#d'
313
                    value[i] = walk(value[i], replacing, format, meta)
314
315
                    #replace all '#D' with the description
316
                    replace = information[tag]['description']
317
                    search = '#D'
318
                    value[i] = walk(value[i], replacing, format, meta)
319
320
                    #replace all '#s' with the corresponding number
321
                    replace = [Str(information[tag]['section'])]
322
                    search = '#s'
323
                    value[i] = walk(value[i], replacing, format, meta)
324
325
                    #replace all '#g' with the corresponding number
326
                    replace = [Str(information[tag]['global'])]
327
                    search = '#g'
328
                    value[i] = walk(value[i], replacing, format, meta)
329
330
                    #replace all '#c' with the corresponding number
331
                    replace = [Str(information[tag]['count'])]
332
                    search = '#c'
333
                    value[i] = walk(value[i], replacing, format, meta)
334
335
                    #replace all '#n' with the corresponding number
336
                    replace = [Str(information[tag]['local'])]
337
                    search = '#n'
338
                    value[i] = walk(value[i], replacing, format, meta)
339
340
                    #replace all '#' with the corresponding number
341
                    replace = [Str(information[tag]['local'])]
342
                    search = '#'
343
                    value[i] = walk(value[i], replacing, format, meta)
344
345
    elif key == 'Cite':
346
        match = re.match('^(@(?P<tag>(?P<category>[a-zA-Z][\w.-]*):(([a-zA-Z][\w.-]*)|(\d*(\.\d*)*))))$', value[1][0]['c'])
347
        if match != None and getCiteShortCut(match.group('category'), meta):
348
349
            # Deal with @prefix:name shortcut
350
            tag = match.group('tag')
351
            if tag in information:
352
                if pandocVersion() < '1.16':
353
                    # pandoc 1.15
354
                    return Link([Str(information[tag]['local'])], ['#' + tag, ''])
355
                else:
356
                    # pandoc > 1.15
357
                    return Link(['', [], []], [Str(information[tag]['local'])], ['#' + tag, ''])
358
359
def replacing(key, value, format, meta):
360
    global replace, search
361
    if key == 'Str':
362
        prepare = value.split(search)
363
        if len(prepare) > 1:
364
365
            ret = []
366
367
            if prepare[0] != '':
368
                ret.append(Str(prepare[0]))
369
370
            for string in prepare[1:]:
371
                ret.extend(replace)
372
                if string != '':
373
                    ret.append(Str(string))
374
375
            return ret
376
377
def hasMeta(meta):
378
    return 'pandoc-numbering' in meta and meta['pandoc-numbering']['t'] == 'MetaList'
379
380
def isCorrect(definition):
381
    return definition['t'] == 'MetaMap' and\
382
        'category' in definition['c'] and\
383
        definition['c']['category']['t'] == 'MetaInlines' and\
384
        len(definition['c']['category']['c']) == 1 and\
385
        definition['c']['category']['c'][0]['t'] == 'Str'
386
387
def hasProperty(definition, name, type):
388
    return name in definition['c'] and definition['c'][name]['t'] == type
389
390
def getProperty(definition, name):
391
    return definition['c'][name]['c']
392
393
def getFirstValue(definition, name):
394
	return getProperty(definition, name)[0]['c']
395
396
def addListings(doc, format, meta):
397
398
    global collections, information
399
400
    if hasMeta(meta):
401
402
        listings = []
403
404
        # Loop on all listings definition
405
        for definition in meta['pandoc-numbering']['c']:
406
            if isCorrect(definition) and hasProperty(definition, 'listing', 'MetaInlines'):
407
408
                # Get the category name
409
                category = getFirstValue(definition, 'category')
410
411
                # Get the title
412
                title = getProperty(definition, 'listing')
413
414
                if format == 'latex':
415
416
                    # Special case for latex output
417
418
                    # Get the link color
419
                    if 'toccolor' in meta:
420
                        linkcolor = '\\hypersetup{linkcolor=' + stringify(meta['toccolor']['c']) + '}'
421
                    else:
422
                        linkcolor = '\\hypersetup{linkcolor=black}'
423
424
                    # Get the tab
425
                    if hasProperty(definition, 'tab', 'MetaString'):
426
                        try:
427
                            tab = float(getProperty(definition, 'tab'))
428
                        except ValueError:
429
                            tab = None
430
                    else:
431
                        tab = None
432
433
                    # Get the space
434
                    if hasProperty(definition, 'space', 'MetaString'):
435
                        try:
436
                            space = float(getProperty(definition, 'space'))
437
                        except ValueError:
438
                            space = None
439
                    else:
440
                        space = None
441
442
                    # Deal with default tab length
443
                    if tab == None:
444
                        tab = 1.5
445
446
                    # Deal with default space length
447
                    if space == None:
448
                        level = 0
449
                        if category in collections:
450
                            # Loop on the collection
451
                            for tag in collections[category]:
452
                                level = max(level, information[tag]['section'].count('.'))
453
                        space = level + 2.3
454
455
                    # Add a RawBlock
456
                    latexCategory = re.sub('[^a-z]+', '', category)
457
                    latex = [
458
                        linkcolor,
459
                        '\\makeatletter',
460
                        '\\newcommand*\\l@' + latexCategory + '{\\@dottedtocline{1}{' + str(tab) + 'em}{'+ str(space) +'em}}',
461
                        '\\@starttoc{' + latexCategory + '}',
462
                        '\\makeatother'
463
                    ]
464
                    elt = [RawBlock('tex', ''.join(latex))]
465
                else:
466
                    if category in collections:
467
                        # Prepare the list
468
                        elements = []
469
470
                        # Loop on the collection
471
                        for tag in collections[category]:
472
473
                            # Add an item to the list
474
                            text = information[tag]['toc']
475
476
                            if pandocVersion() < '1.16':
477
                                # pandoc 1.15
478
                                link = Link(text, ['#' + tag, ''])
479
                            else:
480
                                # pandoc 1.16
481
                                link = Link(['', [], []], text, ['#' + tag, ''])
482
483
                            elements.append([Plain([link])])
484
485
                        # Add a bullet list
486
                        elt = [BulletList(elements)]
487
                    else:
488
489
                        # Add nothing
490
                        elt = []
491
492
                # Add a new listing
493
                listings = listings + [Header(1, ['', ['unnumbered'], []], title)] + elt
494
495
        # Add listings to the document
496
        doc[1] = listings + doc[1]
497
498 View Code Duplication
def getFormat(category, meta):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
499
    if not hasattr(getFormat, 'value'):
500
        getFormat.value = {}
501
        if hasMeta(meta):
502
            # Loop on all listings definition
503
            for definition in meta['pandoc-numbering']['c']:
504
                if isCorrect(definition) and hasProperty(definition, 'format', 'MetaBool'):
505
                    getFormat.value[getFirstValue(definition, 'category')] = getProperty(definition, 'format')
506
507
    if not category in getFormat.value:
508
        getFormat.value[category] = True
509
510
    return getFormat.value[category]
511
512 View Code Duplication
def getCiteShortCut(category, meta):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
513
    if not hasattr(getCiteShortCut, 'value'):
514
        getCiteShortCut.value = {}
515
        if hasMeta(meta):
516
            # Loop on all listings definition
517
            for definition in meta['pandoc-numbering']['c']:
518
                if isCorrect(definition) and hasProperty(definition, 'cite-shortcut', 'MetaBool'):
519
                    getCiteShortCut.value[getFirstValue(definition, 'category')] = getProperty(definition, 'cite-shortcut')
520
521
    if not category in getCiteShortCut.value:
522
        getCiteShortCut.value[category] = False
523
524
    return getCiteShortCut.value[category]
525
526
def getDefaultLevels(category, meta):
527
    if not hasattr(getDefaultLevels, 'value'):
528
        getDefaultLevels.value = {}
529
        if hasMeta(meta):
530
            # Loop on all listings definition
531
            for definition in meta['pandoc-numbering']['c']:
532
                if isCorrect(definition):
533
                    levelInf = 0
534
                    levelSup = 0
535
                    if hasProperty(definition, 'sectioning', 'MetaInlines') and\
536
                       len(getProperty(definition, 'sectioning')) == 1 and\
537
                       getProperty(definition, 'sectioning')[0]['t'] == 'Str':
538
539
                        global headerRegex
540
541
                        match = re.match('^' + headerRegex + '$', getFirstValue(definition, 'sectioning'))
542
                        if match:
543
                            # Compute the levelInf and levelSup values
544
                            levelInf = len(match.group('hidden')) // 2
545
                            levelSup = len(match.group('header')) // 2
546
                    else:
547
                        if hasProperty(definition, 'first', 'MetaString'):
548
                            try:
549
                                levelInf = max(min(int(getProperty(definition, 'first')) - 1, 6), 0)
550
                            except ValueError:
551
                                pass
552
                        if hasProperty(definition, 'last', 'MetaString'):
553
                            try:
554
                                levelSup = max(min(int(getProperty(definition, 'last')), 6), levelInf)
555
                            except ValueError:
556
                                pass
557
                    getDefaultLevels.value[getFirstValue(definition, 'category')] = [levelInf, levelSup]
558
559
    if not category in getDefaultLevels.value:
560
        getDefaultLevels.value[category] = [0, 0]
561
562
    return getDefaultLevels.value[category]
563
564 View Code Duplication
def getClasses(category, meta):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
565
    if not hasattr(getClasses, 'value'):
566
        getClasses.value = {}
567
        if hasMeta(meta):
568
            # Loop on all listings definition
569
            for definition in meta['pandoc-numbering']['c']:
570
                if isCorrect(definition) and hasProperty(definition, 'classes', 'MetaList'):
571
                    classes = []
572
                    for elt in getProperty(definition, 'classes'):
573
                        classes.append(stringify(elt))
574
                    getClasses.value[getFirstValue(definition, 'category')] = classes
575
576
    if not category in getClasses.value:
577
        getClasses.value[category] = [category]
578
579
    return getClasses.value[category]
580
581
def pandocVersion():
582
    if not hasattr(pandocVersion, 'value'):
583
        p = subprocess.Popen(['pandoc', '-v'], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
584
        out, err = p.communicate()
585
        pandocVersion.value = re.search(b'pandoc (?P<version>.*)', out).group('version').decode('utf-8')
586
    return pandocVersion.value
587
588
def main():
589
    toJSONFilters([numbering, referencing])
590
591
if __name__ == '__main__':
592
    main()
593