1
|
|
|
""" |
2
|
|
|
Attribute List Extension for Python-Markdown |
3
|
|
|
============================================ |
4
|
|
|
|
5
|
|
|
Adds attribute list syntax. Inspired by |
6
|
|
|
[maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s |
7
|
|
|
feature of the same name. |
8
|
|
|
|
9
|
|
|
See <https://pythonhosted.org/Markdown/extensions/attr_list.html> |
10
|
|
|
for documentation. |
11
|
|
|
|
12
|
|
|
Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/). |
13
|
|
|
|
14
|
|
|
All changes Copyright 2011-2014 The Python Markdown Project |
15
|
|
|
|
16
|
|
|
License: [BSD](http://www.opensource.org/licenses/bsd-license.php) |
17
|
|
|
|
18
|
|
|
""" |
19
|
|
|
|
20
|
|
|
from __future__ import absolute_import |
21
|
|
|
from __future__ import unicode_literals |
22
|
|
|
from . import Extension |
23
|
|
|
from ..treeprocessors import Treeprocessor |
24
|
|
|
from ..util import isBlockLevel |
25
|
|
|
import re |
26
|
|
|
|
27
|
|
|
try: |
28
|
|
|
Scanner = re.Scanner |
29
|
|
|
except AttributeError: # pragma: no cover |
30
|
|
|
# must be on Python 2.4 |
31
|
|
|
from sre import Scanner |
32
|
|
|
|
33
|
|
|
|
34
|
|
|
def _handle_double_quote(s, t): |
35
|
|
|
k, v = t.split('=', 1) |
36
|
|
|
return k, v.strip('"') |
37
|
|
|
|
38
|
|
|
|
39
|
|
|
def _handle_single_quote(s, t): |
40
|
|
|
k, v = t.split('=', 1) |
41
|
|
|
return k, v.strip("'") |
42
|
|
|
|
43
|
|
|
|
44
|
|
|
def _handle_key_value(s, t): |
45
|
|
|
return t.split('=', 1) |
46
|
|
|
|
47
|
|
|
|
48
|
|
|
def _handle_word(s, t): |
49
|
|
|
if t.startswith('.'): |
50
|
|
|
return '.', t[1:] |
51
|
|
|
if t.startswith('#'): |
52
|
|
|
return 'id', t[1:] |
53
|
|
|
return t, t |
54
|
|
|
|
55
|
|
|
|
56
|
|
|
_scanner = Scanner([ |
57
|
|
|
(r'[^ =]+=".*?"', _handle_double_quote), |
58
|
|
|
(r"[^ =]+='.*?'", _handle_single_quote), |
59
|
|
|
(r'[^ =]+=[^ =]+', _handle_key_value), |
60
|
|
|
(r'[^ =]+', _handle_word), |
61
|
|
|
(r' ', None) |
62
|
|
|
]) |
63
|
|
|
|
64
|
|
|
|
65
|
|
|
def get_attrs(str): |
66
|
|
|
""" Parse attribute list and return a list of attribute tuples. """ |
67
|
|
|
return _scanner.scan(str)[0] |
68
|
|
|
|
69
|
|
|
|
70
|
|
|
def isheader(elem): |
71
|
|
|
return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] |
72
|
|
|
|
73
|
|
|
|
74
|
|
|
class AttrListTreeprocessor(Treeprocessor): |
75
|
|
|
|
76
|
|
|
BASE_RE = r'\{\:?([^\}\n]*)\}' |
77
|
|
|
HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE) |
78
|
|
|
BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE) |
79
|
|
|
INLINE_RE = re.compile(r'^%s' % BASE_RE) |
80
|
|
|
NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff' |
81
|
|
|
r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d' |
82
|
|
|
r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff' |
83
|
|
|
r'\uf900-\ufdcf\ufdf0-\ufffd' |
84
|
|
|
r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') |
85
|
|
|
|
86
|
|
|
def run(self, doc): |
87
|
|
|
for elem in doc.iter(): |
88
|
|
|
if isBlockLevel(elem.tag): |
89
|
|
|
# Block level: check for attrs on last line of text |
90
|
|
|
RE = self.BLOCK_RE |
91
|
|
|
if isheader(elem) or elem.tag == 'dt': |
92
|
|
|
# header or def-term: check for attrs at end of line |
93
|
|
|
RE = self.HEADER_RE |
94
|
|
|
if len(elem) and elem.tag == 'li': |
95
|
|
|
# special case list items. children may include a ul or ol. |
96
|
|
|
pos = None |
97
|
|
|
# find the ul or ol position |
98
|
|
|
for i, child in enumerate(elem): |
99
|
|
|
if child.tag in ['ul', 'ol']: |
100
|
|
|
pos = i |
101
|
|
|
break |
102
|
|
|
if pos is None and elem[-1].tail: |
103
|
|
|
# use tail of last child. no ul or ol. |
104
|
|
|
m = RE.search(elem[-1].tail) |
105
|
|
|
if m: |
106
|
|
|
self.assign_attrs(elem, m.group(1)) |
107
|
|
|
elem[-1].tail = elem[-1].tail[:m.start()] |
108
|
|
|
elif pos is not None and pos > 0 and elem[pos-1].tail: |
109
|
|
|
# use tail of last child before ul or ol |
110
|
|
|
m = RE.search(elem[pos-1].tail) |
111
|
|
|
if m: |
112
|
|
|
self.assign_attrs(elem, m.group(1)) |
113
|
|
|
elem[pos-1].tail = elem[pos-1].tail[:m.start()] |
114
|
|
|
elif elem.text: |
115
|
|
|
# use text. ul is first child. |
116
|
|
|
m = RE.search(elem.text) |
117
|
|
|
if m: |
118
|
|
|
self.assign_attrs(elem, m.group(1)) |
119
|
|
|
elem.text = elem.text[:m.start()] |
120
|
|
|
elif len(elem) and elem[-1].tail: |
121
|
|
|
# has children. Get from tail of last child |
122
|
|
|
m = RE.search(elem[-1].tail) |
123
|
|
|
if m: |
124
|
|
|
self.assign_attrs(elem, m.group(1)) |
125
|
|
|
elem[-1].tail = elem[-1].tail[:m.start()] |
126
|
|
|
if isheader(elem): |
127
|
|
|
# clean up trailing #s |
128
|
|
|
elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() |
129
|
|
|
elif elem.text: |
130
|
|
|
# no children. Get from text. |
131
|
|
|
m = RE.search(elem.text) |
132
|
|
|
if not m and elem.tag == 'td': |
133
|
|
|
m = re.search(self.BASE_RE, elem.text) |
134
|
|
|
if m: |
135
|
|
|
self.assign_attrs(elem, m.group(1)) |
136
|
|
|
elem.text = elem.text[:m.start()] |
137
|
|
|
if isheader(elem): |
138
|
|
|
# clean up trailing #s |
139
|
|
|
elem.text = elem.text.rstrip('#').rstrip() |
140
|
|
|
else: |
141
|
|
|
# inline: check for attrs at start of tail |
142
|
|
|
if elem.tail: |
143
|
|
|
m = self.INLINE_RE.match(elem.tail) |
144
|
|
|
if m: |
145
|
|
|
self.assign_attrs(elem, m.group(1)) |
146
|
|
|
elem.tail = elem.tail[m.end():] |
147
|
|
|
|
148
|
|
|
def assign_attrs(self, elem, attrs): |
149
|
|
|
""" Assign attrs to element. """ |
150
|
|
|
for k, v in get_attrs(attrs): |
151
|
|
|
if k == '.': |
152
|
|
|
# add to class |
153
|
|
|
cls = elem.get('class') |
154
|
|
|
if cls: |
155
|
|
|
elem.set('class', '%s %s' % (cls, v)) |
156
|
|
|
else: |
157
|
|
|
elem.set('class', v) |
158
|
|
|
else: |
159
|
|
|
# assign attr k with v |
160
|
|
|
elem.set(self.sanitize_name(k), v) |
161
|
|
|
|
162
|
|
|
def sanitize_name(self, name): |
163
|
|
|
""" |
164
|
|
|
Sanitize name as 'an XML Name, minus the ":"'. |
165
|
|
|
See http://www.w3.org/TR/REC-xml-names/#NT-NCName |
166
|
|
|
""" |
167
|
|
|
return self.NAME_RE.sub('_', name) |
168
|
|
|
|
169
|
|
|
|
170
|
|
|
class AttrListExtension(Extension): |
171
|
|
|
def extendMarkdown(self, md, md_globals): |
172
|
|
|
md.treeprocessors.add( |
173
|
|
|
'attr_list', AttrListTreeprocessor(md), '>prettify' |
174
|
|
|
) |
175
|
|
|
|
176
|
|
|
|
177
|
|
|
def makeExtension(*args, **kwargs): |
178
|
|
|
return AttrListExtension(*args, **kwargs) |
179
|
|
|
|