Completed
Push — master ( 73c814...850425 )
by Christophe
01:22
created

getTab()   A

Complexity

Conditions 4

Size

Total Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 4
c 1
b 0
f 0
dl 0
loc 15
rs 9.2
1
#!/usr/bin/env python
2
3
"""
4
Pandoc filter to number all kinds of things.
5
"""
6
7
from pandocfilters import walk, stringify, Str, Space, Para, BulletList, Plain, Strong, Span, Link, Emph, RawInline, RawBlock, Header
8
from functools import reduce
9
import json
10
import io
11
import sys
12
import codecs
13
import re
14
import unicodedata
15
import subprocess
16
17
count = {}
18
information = {}
19
collections = {}
20
headers = [0, 0, 0, 0, 0, 0]
21
headerRegex = '(?P<header>(?P<hidden>(-\.)*)(\+\.)*)'
22
23
def toJSONFilters(actions):
24
    """Converts a list of actions into a filter that reads a JSON-formatted
25
    pandoc document from stdin, transforms it by walking the tree
26
    with the actions, and returns a new JSON-formatted pandoc document
27
    to stdout.  The argument is a list of functions action(key, value, format, meta),
28
    where key is the type of the pandoc object (e.g. 'Str', 'Para'),
29
    value is the contents of the object (e.g. a string for 'Str',
30
    a list of inline elements for 'Para'), format is the target
31
    output format (which will be taken for the first command line
32
    argument if present), and meta is the document's metadata.
33
    If the function returns None, the object to which it applies
34
    will remain unchanged.  If it returns an object, the object will
35
    be replaced.    If it returns a list, the list will be spliced in to
36
    the list to which the target object belongs.    (So, returning an
37
    empty list deletes the object.)
38
    """
39
    try:
40
        input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
41
    except AttributeError:
42
        # Python 2 does not have sys.stdin.buffer.
43
        # REF: http://stackoverflow.com/questions/2467928/python-unicodeencodeerror-when-reading-from-stdin
44
        input_stream = codecs.getreader("utf-8")(sys.stdin)
45
46
    doc = json.loads(input_stream.read())
47
    if len(sys.argv) > 1:
48
        format = sys.argv[1]
49
    else:
50
        format = ""
51
    altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
52
    addListings(altered, format, altered[0]['unMeta'])
53
    json.dump(altered, sys.stdout)
54
55
def removeAccents(string):
56
    nfkd_form = unicodedata.normalize('NFKD', string)
57
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
58
59
def toIdentifier(string):
60
    # replace invalid characters by dash
61
    string = re.sub('[^0-9a-zA-Z_-]+', '-', removeAccents(string.lower()))
62
63
    # Remove leading digits
64
    string = re.sub('^[^a-zA-Z]+', '', string)
65
66
    return string
67
68
def toLatex(x):
69
    """Walks the tree x and returns concatenated string content,
70
    leaving out all formatting.
71
    """
72
    result = []
73
74
    def go(key, val, format, meta):
75
        if key in ['Str', 'MetaString']:
76
            result.append(val)
77
        elif key == 'Code':
78
            result.append(val[1])
79
        elif key == 'Math':
80
            # Modified from the stringify function in the pandocfilter package
81
            if format == 'latex':
82
                result.append('$' + val[1] + '$')
83
            else:
84
                result.append(val[1])
85
        elif key == 'LineBreak':
86
            result.append(" ")
87
        elif key == 'Space':
88
            result.append(" ")
89
        elif key == 'Note':
90
            # Do not stringify value from Note node
91
            del val[:]
92
93
    walk(x, go, 'latex', {})
94
    return ''.join(result)
95
96
def numbering(key, value, format, meta):
97
    if key == 'Header':
98
        return numberingHeader(value)
99
    elif key == 'Para':
100
        return numberingPara(value, format, meta)
101
102
def numberingHeader(value):
103
    [level, [id, classes, attributes], content] = value
104
    if 'unnumbered' not in classes:
105
        headers[level - 1] = headers[level - 1] + 1
106
        for index in range(level, 6):
107
            headers[index] = 0
108
109
def numberingPara(value, format, meta):
110
    if len(value) >= 3 and value[-2] == Space() and value[-1]['t'] == 'Str':
111
        last = value[-1]['c']
112
        match = re.match('^' + headerRegex + '#((?P<prefix>[a-zA-Z][\w.-]*):)?(?P<name>[a-zA-Z][\w:.-]*)?$', last)
113
        if match:
114
            # Is it a Para and the last element is an identifier beginning with '#'
115
            return numberingEffective(match, value, format, meta)
116
        elif re.match('^' + headerRegex + '##(?P<prefix>[a-zA-Z][\w.-]*:)?(?P<name>[a-zA-Z][\w:.-]*)?$', last):
117
            # Special case where the last element is '...##...'
118
            return numberingSharpSharp(value)
119
120
def numberingEffective(match, value, format, meta):
121
    title = computeTitle(value)
122
    description = computeDescription(value)
123
    basicCategory = computeBasicCategory(match, description)
124
    [levelInf, levelSup] = computeLevels(match, basicCategory, meta)
125
    sectionNumber = computeSectionNumber(levelSup)
126
    leading = computeLeading(levelSup, sectionNumber)
127
    category = computeCategory(basicCategory, leading)
128
    number = str(count[category])
129
    tag = computeTag(match, basicCategory, category, number)
130
    localNumber = computeLocalNumber(levelInf, levelSup, number)
131
    globalNumber = computeGlobalNumber(sectionNumber, number)
132
    [text, link, toc] = computeTextLinkToc(meta, basicCategory, description, title, localNumber, globalNumber, sectionNumber)
133
134
    # Store the numbers and the label for automatic numbering (See referencing function)
135
    information[tag] = {
136
        'section': sectionNumber,
137
        'local': localNumber,
138
        'global': globalNumber,
139
        'count': number,
140
        'description': description,
141
        'title': title,
142
        'link': link,
143
        'toc': toc
144
    }
145
146
    # Prepare the contents
147
    contents = [Span([tag, ['pandoc-numbering-text'] + getClasses(basicCategory, meta), []], text)]
148
149
    # Compute collections
150
    if basicCategory not in collections:
151
        collections[basicCategory] = []
152
153
    collections[basicCategory].append(tag)
154
155
    # Special case for LaTeX
156
    if format == 'latex' and getFormat(basicCategory, meta):
157
        addLaTeX(contents, basicCategory, title, description, leading, number)
158
159
    # Return the contents in a Para element
160
    return Para(contents)
161
162
def computeTitle(value):
163
    title = []
164
    if value[-3]['t'] == 'Str' and value[-3]['c'][-1:] == ')':
165
        for (i, item) in enumerate(value):
166
            if item['t'] == 'Str' and item['c'][0] == '(':
167
                title = value[i:-2]
168
                title[0]['c'] = title[0]['c'][1:]
169
                title[-1]['c'] = title[-1]['c'][:-1]
170
                del value[i-1:-2]
171
                break
172
    return title
173
174
def computeDescription(value):
175
    return value[:-2]
176
177
def computeBasicCategory(match, description):
178
    if match.group('prefix') == None:
179
        return toIdentifier(stringify(description))
180
    else:
181
        return match.group('prefix')
182
183
def computeLevels(match, basicCategory, meta):
184
    # Compute the levelInf and levelSup values
185
    levelInf = len(match.group('hidden')) // 2
186
    levelSup = len(match.group('header')) // 2
187
188
    # Get the default inf and sup level
189
    if levelInf == 0 and levelSup == 0:
190
        [levelInf, levelSup] = getDefaultLevels(basicCategory, meta)
191
192
    return [levelInf, levelSup]
193
194
def computeSectionNumber(levelSup):
195
    return '.'.join(map(str, headers[:levelSup]))
196
197
def computeLeading(levelSup, sectionNumber):
198
    # Compute the leading (composed of the section numbering and a dot)
199
    if levelSup != 0:
200
        return sectionNumber + '.'
201
    else:
202
        return ''
203
204
def computeCategory(basicCategory, leading):
205
    category = basicCategory + ':' + leading
206
207
    # Is it a new category?
208
    if category not in count:
209
        count[category] = 0
210
211
    count[category] = count[category] + 1
212
213
    return category
214
215
def computeTag(match, basicCategory, category, number):
216
    # Determine the final tag
217
    if match.group('name') == None:
218
        return category + number
219
    else:
220
        return basicCategory + ':' + match.group('name')
221
222
def computeLocalNumber(levelInf, levelSup, number):
223
    # Replace the '-.-.+.+...#' by the category count (omitting the hidden part)
224
    return '.'.join(map(str, headers[levelInf:levelSup] + [number]))
225
226
def computeGlobalNumber(sectionNumber, number):
227
    # Compute the globalNumber
228
    if sectionNumber:
229
        return sectionNumber + '.' + number
230
    else:
231
        return number
232
233
def computeTextLinkToc(meta, basicCategory, description, title, localNumber, globalNumber, sectionNumber):
234
    # Is the automatic formatting required for this category?
235
    if getFormat(basicCategory, meta):
236
        # Prepare the final text
237
        text = [Strong(description + [Space(), Str(localNumber)])]
238
239
        # Add the title to the final text
240
        if title:
241
            text = text + [Space(), Emph([Str('(')] + title + [Str(')')])]
242
243
        # Compute the link
244
        link = description + [Space(), Str(localNumber)]
245
246
        # Compute the toc
247
        toc = [Str(globalNumber), Space()]
248
        if title:
249
            toc = toc + title
250
        else:
251
            toc = toc + description
252
253
    else:
254
        # Prepare the final text
255
        text = [
256
            Span(['', ['description'], []], description),
257
            Span(['', ['title'], []], title),
258
            Span(['', ['local'], []], [Str(localNumber)]),
259
            Span(['', ['global'], []], [Str(globalNumber)]),
260
            Span(['', ['section'], []], [Str(sectionNumber)]),
261
        ]
262
263
        # Compute the link
264
        link = [Span(['', ['pandoc-numbering-link'] + getClasses(basicCategory, meta), []], text)]
265
266
        # Compute the toc
267
        toc = [Span(['', ['pandoc-numbering-toc'] + getClasses(basicCategory, meta), []], text)]
268
    return [text, link, toc]
269
270
def addLaTeX(contents, basicCategory, title, description, leading, number):
271
    latexCategory = re.sub('[^a-z]+', '', basicCategory)
272
    if title:
273
      entry = title
274
    else:
275
      entry = description
276
    latex = '\\phantomsection\\addcontentsline{' + latexCategory + '}{' + latexCategory + '}{\\protect\\numberline {' + \
277
        leading + number + '}{\ignorespaces ' + toLatex(entry) + '}}'
278
    contents.insert(0, RawInline('tex', latex))
279
280
def numberingSharpSharp(value):
281
    value[-1]['c'] = value[-1]['c'].replace('##', '#', 1)
282
283
replace = None
284
search = None
285
286
def lowering(key, value, format, meta):
287
    if key == 'Str':
288
        return Str(value.lower())
289
290
def referencing(key, value, format, meta):
291
    if key == 'Link':
292
        return referencingLink(value, format, meta)
293
    elif key == 'Cite':
294
        return referencingCite(value, format, meta)
295
296
def referencingLink(value, format, meta):
297
    global replace, search
298
    if pandocVersion() < '1.16':
299
        # pandoc 1.15
300
        [text, [reference, title]] = value
301
    else:
302
        # pandoc > 1.15
303
        [attributes, text, [reference, title]] = value
304
305
    if re.match('^(#([a-zA-Z][\w:.-]*))$', reference):
306
        # Compute the name
307
        tag = reference[1:]
308
309
        if tag in information:
310
            if pandocVersion() < '1.16':
311
                # pandoc 1.15
312
                i = 0
313
            else:
314
                # pandoc > 1.15
315
                i = 1
316
317
            # Replace all '#t', '#T', '#d', '#D', '#s', '#g', '#c', '#n', '#' with the corresponding text in the title
318
            value[i + 1][1] = value[i + 1][1].replace('#t', stringify(information[tag]['title']).lower())
319
            value[i + 1][1] = value[i + 1][1].replace('#T', stringify(information[tag]['title']))
320
            value[i + 1][1] = value[i + 1][1].replace('#d', stringify(information[tag]['description']).lower())
321
            value[i + 1][1] = value[i + 1][1].replace('#D', stringify(information[tag]['description']))
322
            value[i + 1][1] = value[i + 1][1].replace('#s', information[tag]['section'])
323
            value[i + 1][1] = value[i + 1][1].replace('#g', information[tag]['global'])
324
            value[i + 1][1] = value[i + 1][1].replace('#c', information[tag]['count'])
325
            value[i + 1][1] = value[i + 1][1].replace('#n', information[tag]['local'])
326
            value[i + 1][1] = value[i + 1][1].replace('#', information[tag]['local'])
327
328
            if text == []:
329
                # The link text is empty, replace it with the default label
330
                value[i] = information[tag]['link']
331
            else:
332
                # The link text is not empty
333
334
                #replace all '#t' with the title in lower case
335
                replace = walk(information[tag]['title'], lowering, format, meta)
336
                search = '#t'
337
                value[i] = walk(value[i], replacing, format, meta)
338
339
                #replace all '#T' with the title
340
                replace = information[tag]['title']
341
                search = '#T'
342
                value[i] = walk(value[i], replacing, format, meta)
343
344
                #replace all '#d' with the description in lower case
345
                replace = walk(information[tag]['description'], lowering, format, meta)
346
                search = '#d'
347
                value[i] = walk(value[i], replacing, format, meta)
348
349
                #replace all '#D' with the description
350
                replace = information[tag]['description']
351
                search = '#D'
352
                value[i] = walk(value[i], replacing, format, meta)
353
354
                #replace all '#s' with the corresponding number
355
                replace = [Str(information[tag]['section'])]
356
                search = '#s'
357
                value[i] = walk(value[i], replacing, format, meta)
358
359
                #replace all '#g' with the corresponding number
360
                replace = [Str(information[tag]['global'])]
361
                search = '#g'
362
                value[i] = walk(value[i], replacing, format, meta)
363
364
                #replace all '#c' with the corresponding number
365
                replace = [Str(information[tag]['count'])]
366
                search = '#c'
367
                value[i] = walk(value[i], replacing, format, meta)
368
369
                #replace all '#n' with the corresponding number
370
                replace = [Str(information[tag]['local'])]
371
                search = '#n'
372
                value[i] = walk(value[i], replacing, format, meta)
373
374
                #replace all '#' with the corresponding number
375
                replace = [Str(information[tag]['local'])]
376
                search = '#'
377
                value[i] = walk(value[i], replacing, format, meta)
378
379
def referencingCite(value, format, meta):
380
    match = re.match('^(@(?P<tag>(?P<category>[a-zA-Z][\w.-]*):(([a-zA-Z][\w.-]*)|(\d*(\.\d*)*))))$', value[1][0]['c'])
381
    if match != None and getCiteShortCut(match.group('category'), meta):
382
383
        # Deal with @prefix:name shortcut
384
        tag = match.group('tag')
385
        if tag in information:
386
            if pandocVersion() < '1.16':
387
                # pandoc 1.15
388
                return Link([Str(information[tag]['local'])], ['#' + tag, ''])
389
            else:
390
                # pandoc > 1.15
391
                return Link(['', [], []], [Str(information[tag]['local'])], ['#' + tag, ''])
392
393
def replacing(key, value, format, meta):
394
    if key == 'Str':
395
        prepare = value.split(search)
396
        if len(prepare) > 1:
397
398
            ret = []
399
400
            if prepare[0] != '':
401
                ret.append(Str(prepare[0]))
402
403
            for string in prepare[1:]:
404
                ret.extend(replace)
405
                if string != '':
406
                    ret.append(Str(string))
407
408
            return ret
409
410
def hasMeta(meta):
411
    return 'pandoc-numbering' in meta and meta['pandoc-numbering']['t'] == 'MetaList'
412
413
def isCorrect(definition):
414
    return definition['t'] == 'MetaMap' and\
415
        'category' in definition['c'] and\
416
        definition['c']['category']['t'] == 'MetaInlines' and\
417
        len(definition['c']['category']['c']) == 1 and\
418
        definition['c']['category']['c'][0]['t'] == 'Str'
419
420
def hasProperty(definition, name, type):
421
    return name in definition['c'] and definition['c'][name]['t'] == type
422
423
def getProperty(definition, name):
424
    return definition['c'][name]['c']
425
426
def getFirstValue(definition, name):
427
	return getProperty(definition, name)[0]['c']
428
429
def addListings(doc, format, meta):
430
    if hasMeta(meta):
431
        listings = []
432
433
        # Loop on all listings definition
434
        for definition in meta['pandoc-numbering']['c']:
435
            if isCorrect(definition) and hasProperty(definition, 'listing', 'MetaInlines'):
436
437
                # Get the category name
438
                category = getFirstValue(definition, 'category')
439
440
                # Get the title
441
                title = getProperty(definition, 'listing')
442
443
                listings.append(Header(1, ['', ['unnumbered'], []], title))
444
445
                if format == 'latex':
446
                    extendListingsLaTeX(listings, meta, definition, category)
447
                else:
448
                    extendListingsOther(listings, meta, definition, category)
449
450
        # Add listings to the document
451
        doc[1][0:0] = listings
452
453
def extendListingsLaTeX(listings, meta, definition, category):
454
    space = getSpace(definition, category)
455
    tab = getTab(definition, category)
456
    # Add a RawBlock
457
    latexCategory = re.sub('[^a-z]+', '', category)
458
    latex = [
459
        getLinkColor(meta),
460
        '\\makeatletter',
461
        '\\newcommand*\\l@' + latexCategory + '{\\@dottedtocline{1}{' + str(tab) + 'em}{'+ str(space) +'em}}',
462
        '\\@starttoc{' + latexCategory + '}',
463
        '\\makeatother'
464
    ]
465
    listings.append(RawBlock('tex', ''.join(latex)))
466
467
def getLinkColor(meta):
468
    # Get the link color
469
    if 'toccolor' in meta:
470
        return '\\hypersetup{linkcolor=' + stringify(meta['toccolor']['c']) + '}'
471
    else:
472
        return '\\hypersetup{linkcolor=black}'
473
474
def getTab(definition, category):
475
    # Get the tab
476
    if hasProperty(definition, 'tab', 'MetaString'):
477
        try:
478
            tab = float(getProperty(definition, 'tab'))
479
        except ValueError:
480
            tab = None
481
    else:
482
        tab = None
483
484
    # Deal with default tab length
485
    if tab == None:
486
        return 1.5
487
    else:
488
        return tab
489
490
def getSpace(definition, category):
491
    # Get the space
492
    if hasProperty(definition, 'space', 'MetaString'):
493
        try:
494
            space = float(getProperty(definition, 'space'))
495
        except ValueError:
496
            space = None
497
    else:
498
        space = None
499
500
    # Deal with default space length
501
    if space == None:
502
        level = 0
503
        if category in collections:
504
            # Loop on the collection
505
            for tag in collections[category]:
506
                level = max(level, information[tag]['section'].count('.'))
507
        return level + 2.3
508
    else:
509
        return space
510
511
def extendListingsOther(listings, meta, definition, category):
512
    if category in collections:
513
        # Prepare the list
514
        elements = []
515
516
        # Loop on the collection
517
        for tag in collections[category]:
518
519
            # Add an item to the list
520
            text = information[tag]['toc']
521
522
            if pandocVersion() < '1.16':
523
                # pandoc 1.15
524
                link = Link(text, ['#' + tag, ''])
525
            else:
526
                # pandoc 1.16
527
                link = Link(['', [], []], text, ['#' + tag, ''])
528
529
            elements.append([Plain([link])])
530
531
        # Add a bullet list
532
        listings.append(BulletList(elements))
533
534
def getValue(category, meta, fct, default, analyzeDefinition):
535
    if not hasattr(fct, 'value'):
536
        fct.value = {}
537
        if hasMeta(meta):
538
            # Loop on all listings definition
539
            for definition in meta['pandoc-numbering']['c']:
540
                if isCorrect(definition):
541
                    analyzeDefinition(definition)
542
543
    if not category in fct.value:
544
        fct.value[category] = default
545
546
    return fct.value[category]
547
548
def getFormat(category, meta):
549
    def analyzeDefinition(definition):
550
        if hasProperty(definition, 'format', 'MetaBool'):
551
            getFormat.value[getFirstValue(definition, 'category')] = getProperty(definition, 'format')
552
        
553
    return getValue(category, meta, getFormat, True, analyzeDefinition)
554
555
def getCiteShortCut(category, meta):
556
    def analyzeDefinition(definition):
557
        if hasProperty(definition, 'cite-shortcut', 'MetaBool'):
558
            getCiteShortCut.value[getFirstValue(definition, 'category')] = getProperty(definition, 'cite-shortcut')
559
560
    return getValue(category, meta, getCiteShortCut, False, analyzeDefinition)
561
562
def getLevelsFromYaml(definition):
563
    levelInf = 0
564
    levelSup = 0
565
    if hasProperty(definition, 'first', 'MetaString'):
566
        try:
567
            levelInf = max(min(int(getProperty(definition, 'first')) - 1, 6), 0)
568
        except ValueError:
569
            pass
570
    if hasProperty(definition, 'last', 'MetaString'):
571
        try:
572
            levelSup = max(min(int(getProperty(definition, 'last')), 6), levelInf)
573
        except ValueError:
574
            pass
575
    return [levelInf, levelSup]
576
577
def getLevelsFromRegex(definition):
578
    match = re.match('^' + headerRegex + '$', getFirstValue(definition, 'sectioning'))
579
    if match:
580
        # Compute the levelInf and levelSup values
581
        return [len(match.group('hidden')) // 2, len(match.group('header')) // 2]
582
    else:
583
        return [0, 0]
584
585
def getDefaultLevels(category, meta):
586
    def analyzeDefinition(definition):
587
        if hasProperty(definition, 'sectioning', 'MetaInlines') and\
588
           len(getProperty(definition, 'sectioning')) == 1 and\
589
           getProperty(definition, 'sectioning')[0]['t'] == 'Str':
590
591
            getDefaultLevels.value[getFirstValue(definition, 'category')] = getLevelsFromRegex(definition)
592
        else:
593
            getDefaultLevels.value[getFirstValue(definition, 'category')] = getLevelsFromYaml(definition)
594
595
    return getValue(category, meta, getDefaultLevels, [0, 0], analyzeDefinition)
596
597
def getClasses(category, meta): 
598
    def analyzeDefinition(definition):
599
        if hasProperty(definition, 'classes', 'MetaList'):
600
            classes = []
601
            for elt in getProperty(definition, 'classes'):
602
                classes.append(stringify(elt))
603
            getClasses.value[getFirstValue(definition, 'category')] = classes
604
605
    return getValue(category, meta, getClasses, [category], analyzeDefinition)
606
607
def pandocVersion():
608
    if not hasattr(pandocVersion, 'value'):
609
        p = subprocess.Popen(['pandoc', '-v'], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
610
        out, err = p.communicate()
611
        pandocVersion.value = re.search(b'pandoc (?P<version>.*)', out).group('version').decode('utf-8')
612
    return pandocVersion.value
613
614
def main():
615
    toJSONFilters([numbering, referencing])
616
617
if __name__ == '__main__':
618
    main()
619