1
|
|
|
#!/usr/bin/env python |
2
|
|
|
|
3
|
|
|
""" |
4
|
|
|
Pandoc filter to create lists of all kinds |
5
|
|
|
""" |
6
|
|
|
|
7
|
|
|
from pandocfilters import toJSONFilters, walk, Str, Plain, Link, BulletList, Para, RawInline |
8
|
|
|
from functools import reduce |
9
|
|
|
from copy import deepcopy |
10
|
|
|
import io |
11
|
|
|
import sys |
12
|
|
|
import codecs |
13
|
|
|
import json |
14
|
|
|
import re |
15
|
|
|
import unicodedata |
16
|
|
|
import subprocess |
17
|
|
|
|
18
|
|
|
collections = {} |
19
|
|
|
headers = [0, 0, 0, 0, 0, 0] |
20
|
|
|
headers2 = [0, 0, 0, 0, 0, 0] |
21
|
|
|
|
22
|
|
|
def stringify(x, format): |
23
|
|
|
"""Walks the tree x and returns concatenated string content, |
24
|
|
|
leaving out all formatting. |
25
|
|
|
""" |
26
|
|
|
result = [] |
27
|
|
|
|
28
|
|
|
def go(key, val, format, meta): |
29
|
|
|
if key in ['Str', 'MetaString']: |
30
|
|
|
result.append(val) |
31
|
|
|
elif key == 'Code': |
32
|
|
|
result.append(val[1]) |
33
|
|
|
elif key == 'Math': |
34
|
|
|
# Modified from the stringify function in the pandocfilter package |
35
|
|
|
if format == 'latex': |
36
|
|
|
result.append('$' + val[1] + '$') |
37
|
|
|
else: |
38
|
|
|
result.append(val[1]) |
39
|
|
|
elif key == 'LineBreak': |
40
|
|
|
result.append(" ") |
41
|
|
|
elif key == 'Space': |
42
|
|
|
result.append(" ") |
43
|
|
|
elif key == 'Note': |
44
|
|
|
# Do not stringify value from Note node |
45
|
|
|
del val[:] |
46
|
|
|
|
47
|
|
|
walk(x, go, format, {}) |
48
|
|
|
return ''.join(result) |
49
|
|
|
|
50
|
|
|
def collect(key, value, format, meta): |
51
|
|
|
global headers |
52
|
|
|
|
53
|
|
|
# Is it a header? Keep the correct numbered headers in the headers array |
54
|
|
|
if key == 'Header': |
55
|
|
|
[level, [id, classes, attributes], content] = value |
56
|
|
|
if 'unnumbered' not in classes: |
57
|
|
|
headers[level - 1] = headers[level - 1] + 1 |
58
|
|
|
for index in range(level, 6): |
59
|
|
|
headers[index] = 0 |
60
|
|
|
|
61
|
|
|
# Is it a link with a right tag? |
62
|
|
|
elif key == 'Span': |
63
|
|
|
|
64
|
|
|
# Get the Span |
65
|
|
|
[[anchor, classes, other], text] = value |
66
|
|
|
|
67
|
|
|
# Is the anchor correct? |
68
|
|
|
result = re.match('^([a-zA-Z][\w.-]*):([\w.-]+)$', anchor) |
69
|
|
|
if result: |
70
|
|
|
global collections |
71
|
|
|
|
72
|
|
|
# Compute the name |
73
|
|
|
name = result.group(1) |
74
|
|
|
|
75
|
|
|
# Compute the identifier |
76
|
|
|
identifier = result.group(2) |
77
|
|
|
|
78
|
|
|
# Store the new item |
79
|
|
|
string = stringify(deepcopy(text), format) |
80
|
|
|
|
81
|
|
|
# Prepare the names |
82
|
|
|
names = [] |
83
|
|
|
|
84
|
|
|
# Add the atomic name to the list |
85
|
|
|
names.append(name) |
86
|
|
|
|
87
|
|
|
# Prepare the latex output |
88
|
|
|
if format == 'latex': |
89
|
|
|
latex = '\\phantomsection\\addcontentsline{' + name + '}{figure}{' + string + '}' |
90
|
|
|
|
91
|
|
|
# Loop on all the headers |
92
|
|
|
for i in [0, 1, 2, 3, 4, 5]: |
93
|
|
|
if headers[i] > 0: |
94
|
|
|
# Add an alternate name to the list |
95
|
|
|
altName = name + ':' + '.'.join(map(str, headers[:i+1])) |
96
|
|
|
names.append(altName) |
97
|
|
|
if format == 'latex': |
98
|
|
|
# Complete the latex output |
99
|
|
|
latex = latex + '\\phantomsection\\addcontentsline{' + altName + '}{figure}{' + string + '}' |
100
|
|
|
latex = latex + '\\phantomsection\\addcontentsline{' + altName + '_}{figure}{' + string + '}' |
101
|
|
|
else: |
102
|
|
|
break |
103
|
|
|
|
104
|
|
|
for name in names: |
105
|
|
|
# Prepare the new collections if needed |
106
|
|
|
if name not in collections: |
107
|
|
|
collections[name] = [] |
108
|
|
|
collections[name].append({'identifier': identifier, 'text': string}) |
109
|
|
|
|
110
|
|
|
# Special case for LaTeX output |
111
|
|
|
if format == 'latex': |
112
|
|
|
text.insert(0, RawInline('tex', latex)) |
113
|
|
|
value[1] = text |
114
|
|
|
|
115
|
|
|
def listof(key, value, format, meta): |
116
|
|
|
global headers2 |
117
|
|
|
|
118
|
|
|
# Is it a header? |
119
|
|
|
if key == 'Header': |
120
|
|
|
[level, [id, classes, attributes], content] = value |
121
|
|
|
if 'unnumbered' not in classes: |
122
|
|
|
headers2[level - 1] = headers2[level - 1] + 1 |
123
|
|
|
for index in range(level, 6): |
124
|
|
|
headers2[index] = 0 |
125
|
|
|
|
126
|
|
|
# Is it a paragraph with only one string? |
127
|
|
|
if key == 'Para' and len(value) == 1 and value[0]['t'] == 'Str': |
128
|
|
|
|
129
|
|
|
# Is it {tag}? |
130
|
|
|
result = re.match('^{(?P<name>(?P<prefix>[a-zA-Z][\w.-]*)(?P<section>\:((?P<sharp>#(\.#)*)|(\d+(\.\d+)*)))?)}$', value[0]['c']) |
131
|
|
|
if result: |
132
|
|
|
|
133
|
|
|
prefix = result.group('prefix') |
134
|
|
|
|
135
|
|
|
# Get the collection name |
136
|
|
|
if result.group('sharp') == None: |
137
|
|
|
name = result.group('name') |
138
|
|
|
else: |
139
|
|
|
level = (len(result.group('sharp')) - 1) // 2 + 1 |
140
|
|
|
name = prefix + ':' + '.'.join(map(str, headers2[:level])) |
141
|
|
|
|
142
|
|
|
# Is it an existing collection |
143
|
|
|
if name in collections: |
144
|
|
|
|
145
|
|
|
if format == 'latex': |
146
|
|
|
# Special case for LaTeX output |
147
|
|
|
if 'toccolor' in meta: |
148
|
|
|
linkcolor = '\\hypersetup{linkcolor=' + stringify(meta['toccolor']['c'], format) + '}' |
149
|
|
|
else: |
150
|
|
|
linkcolor = '\\hypersetup{linkcolor=black}' |
151
|
|
|
if result.group('sharp') == None: |
152
|
|
|
suffix = '' |
153
|
|
|
else: |
154
|
|
|
suffix = '_' |
155
|
|
|
return Para([RawInline('tex', linkcolor + '\\makeatletter\\@starttoc{' + name + suffix + '}\\makeatother')]) |
156
|
|
|
|
157
|
|
|
else: |
158
|
|
|
# Prepare the list |
159
|
|
|
elements = [] |
160
|
|
|
|
161
|
|
|
# Loop on the collection |
162
|
|
|
for value in collections[name]: |
163
|
|
|
|
164
|
|
|
# Add an item to the list |
165
|
|
|
if pandocVersion() < '1.16': |
166
|
|
|
# pandoc 1.15 |
167
|
|
|
link = Link([Str(value['text'])], ['#' + prefix + ':' + value['identifier'], '']) |
168
|
|
|
else: |
169
|
|
|
# pandoc 1.16 |
170
|
|
|
link = Link(['', [], []], [Str(value['text'])], ['#' + prefix + ':' + value['identifier'], '']) |
171
|
|
|
|
172
|
|
|
elements.append([Plain([link])]) |
173
|
|
|
|
174
|
|
|
# Return a bullet list |
175
|
|
|
return BulletList(elements) |
176
|
|
|
|
177
|
|
|
# Special case where the paragraph start with '{{...' |
178
|
|
|
elif re.match('^{{[a-zA-Z][\w.-]*}$', value[0]['c']): |
179
|
|
|
value[0]['c'] = value[0]['c'][1:] |
180
|
|
|
|
181
|
|
|
def pandocVersion(): |
182
|
|
|
if not hasattr(pandocVersion, 'value'): |
183
|
|
|
p = subprocess.Popen(['pandoc', '-v'], stdout=subprocess.PIPE,stderr=subprocess.PIPE) |
184
|
|
|
out, err = p.communicate() |
185
|
|
|
pandocVersion.value = re.search(b'pandoc (?P<version>.*)', out).group('version').decode('utf-8') |
186
|
|
|
return pandocVersion.value |
187
|
|
|
|
188
|
|
|
|
189
|
|
|
def main(): |
190
|
|
|
toJSONFilters([collect, listof]) |
191
|
|
|
|
192
|
|
|
if __name__ == '__main__': |
193
|
|
|
main() |
194
|
|
|
|
195
|
|
|
|