|
1
|
|
|
""" |
|
2
|
|
|
INLINE PATTERNS |
|
3
|
|
|
============================================================================= |
|
4
|
|
|
|
|
5
|
|
|
Inline patterns such as *emphasis* are handled by means of auxiliary |
|
6
|
|
|
objects, one per pattern. Pattern objects must be instances of classes |
|
7
|
|
|
that extend markdown.Pattern. Each pattern object uses a single regular |
|
8
|
|
|
expression and needs support the following methods: |
|
9
|
|
|
|
|
10
|
|
|
pattern.getCompiledRegExp() # returns a regular expression |
|
11
|
|
|
|
|
12
|
|
|
pattern.handleMatch(m) # takes a match object and returns |
|
13
|
|
|
# an ElementTree element or just plain text |
|
14
|
|
|
|
|
15
|
|
|
All of python markdown's built-in patterns subclass from Pattern, |
|
16
|
|
|
but you can add additional patterns that don't. |
|
17
|
|
|
|
|
18
|
|
|
Also note that all the regular expressions used by inline must |
|
19
|
|
|
capture the whole block. For this reason, they all start with |
|
20
|
|
|
'^(.*)' and end with '(.*)!'. In case with built-in expression |
|
21
|
|
|
Pattern takes care of adding the "^(.*)" and "(.*)!". |
|
22
|
|
|
|
|
23
|
|
|
Finally, the order in which regular expressions are applied is very |
|
24
|
|
|
important - e.g. if we first replace http://.../ links with <a> tags |
|
25
|
|
|
and _then_ try to replace inline html, we would end up with a mess. |
|
26
|
|
|
So, we apply the expressions in the following order: |
|
27
|
|
|
|
|
28
|
|
|
* escape and backticks have to go before everything else, so |
|
29
|
|
|
that we can preempt any markdown patterns by escaping them. |
|
30
|
|
|
|
|
31
|
|
|
* then we handle auto-links (must be done before inline html) |
|
32
|
|
|
|
|
33
|
|
|
* then we handle inline HTML. At this point we will simply |
|
34
|
|
|
replace all inline HTML strings with a placeholder and add |
|
35
|
|
|
the actual HTML to a hash. |
|
36
|
|
|
|
|
37
|
|
|
* then inline images (must be done before links) |
|
38
|
|
|
|
|
39
|
|
|
* then bracketed links, first regular then reference-style |
|
40
|
|
|
|
|
41
|
|
|
* finally we apply strong and emphasis |
|
42
|
|
|
""" |
|
43
|
|
|
|
|
44
|
|
|
from __future__ import absolute_import |
|
45
|
|
|
from __future__ import unicode_literals |
|
46
|
|
|
from . import util |
|
47
|
|
|
from . import odict |
|
48
|
|
|
import re |
|
49
|
|
|
try: # pragma: no cover |
|
50
|
|
|
from urllib.parse import urlparse, urlunparse |
|
51
|
|
|
except ImportError: # pragma: no cover |
|
52
|
|
|
from urlparse import urlparse, urlunparse |
|
53
|
|
|
try: # pragma: no cover |
|
54
|
|
|
from html import entities |
|
55
|
|
|
except ImportError: # pragma: no cover |
|
56
|
|
|
import htmlentitydefs as entities |
|
57
|
|
|
|
|
58
|
|
|
|
|
59
|
|
|
def build_inlinepatterns(md_instance, **kwargs): |
|
60
|
|
|
""" Build the default set of inline patterns for Markdown. """ |
|
61
|
|
|
inlinePatterns = odict.OrderedDict() |
|
62
|
|
|
inlinePatterns["backtick"] = BacktickPattern(BACKTICK_RE) |
|
63
|
|
|
inlinePatterns["escape"] = EscapePattern(ESCAPE_RE, md_instance) |
|
64
|
|
|
inlinePatterns["reference"] = ReferencePattern(REFERENCE_RE, md_instance) |
|
65
|
|
|
inlinePatterns["link"] = LinkPattern(LINK_RE, md_instance) |
|
66
|
|
|
inlinePatterns["image_link"] = ImagePattern(IMAGE_LINK_RE, md_instance) |
|
67
|
|
|
inlinePatterns["image_reference"] = ImageReferencePattern( |
|
68
|
|
|
IMAGE_REFERENCE_RE, md_instance |
|
69
|
|
|
) |
|
70
|
|
|
inlinePatterns["short_reference"] = ReferencePattern( |
|
71
|
|
|
SHORT_REF_RE, md_instance |
|
72
|
|
|
) |
|
73
|
|
|
inlinePatterns["autolink"] = AutolinkPattern(AUTOLINK_RE, md_instance) |
|
74
|
|
|
inlinePatterns["automail"] = AutomailPattern(AUTOMAIL_RE, md_instance) |
|
75
|
|
|
inlinePatterns["linebreak"] = SubstituteTagPattern(LINE_BREAK_RE, 'br') |
|
76
|
|
|
if md_instance.safeMode != 'escape': |
|
77
|
|
|
inlinePatterns["html"] = HtmlPattern(HTML_RE, md_instance) |
|
78
|
|
|
inlinePatterns["entity"] = HtmlPattern(ENTITY_RE, md_instance) |
|
79
|
|
|
inlinePatterns["not_strong"] = SimpleTextPattern(NOT_STRONG_RE) |
|
80
|
|
|
inlinePatterns["em_strong"] = DoubleTagPattern(EM_STRONG_RE, 'strong,em') |
|
81
|
|
|
inlinePatterns["strong_em"] = DoubleTagPattern(STRONG_EM_RE, 'em,strong') |
|
82
|
|
|
inlinePatterns["strong"] = SimpleTagPattern(STRONG_RE, 'strong') |
|
83
|
|
|
inlinePatterns["emphasis"] = SimpleTagPattern(EMPHASIS_RE, 'em') |
|
84
|
|
|
if md_instance.smart_emphasis: |
|
85
|
|
|
inlinePatterns["emphasis2"] = SimpleTagPattern(SMART_EMPHASIS_RE, 'em') |
|
86
|
|
|
else: |
|
87
|
|
|
inlinePatterns["emphasis2"] = SimpleTagPattern(EMPHASIS_2_RE, 'em') |
|
88
|
|
|
return inlinePatterns |
|
89
|
|
|
|
|
90
|
|
|
|
|
91
|
|
|
""" |
|
92
|
|
|
The actual regular expressions for patterns |
|
93
|
|
|
----------------------------------------------------------------------------- |
|
94
|
|
|
""" |
|
95
|
|
|
|
|
96
|
|
|
NOBRACKET = r'[^\]\[]*' |
|
97
|
|
|
BRK = ( |
|
98
|
|
|
r'\[(' + |
|
99
|
|
|
(NOBRACKET + r'(\[')*6 + |
|
100
|
|
|
(NOBRACKET + r'\])*')*6 + |
|
101
|
|
|
NOBRACKET + r')\]' |
|
102
|
|
|
) |
|
103
|
|
|
NOIMG = r'(?<!\!)' |
|
104
|
|
|
|
|
105
|
|
|
# `e=f()` or ``e=f("`")`` |
|
106
|
|
|
BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\3(?!`))' |
|
107
|
|
|
|
|
108
|
|
|
# \< |
|
109
|
|
|
ESCAPE_RE = r'\\(.)' |
|
110
|
|
|
|
|
111
|
|
|
# *emphasis* |
|
112
|
|
|
EMPHASIS_RE = r'(\*)([^\*]+)\2' |
|
113
|
|
|
|
|
114
|
|
|
# **strong** |
|
115
|
|
|
STRONG_RE = r'(\*{2}|_{2})(.+?)\2' |
|
116
|
|
|
|
|
117
|
|
|
# ***strongem*** or ***em*strong** |
|
118
|
|
|
EM_STRONG_RE = r'(\*|_)\2{2}(.+?)\2(.*?)\2{2}' |
|
119
|
|
|
|
|
120
|
|
|
# ***strong**em* |
|
121
|
|
|
STRONG_EM_RE = r'(\*|_)\2{2}(.+?)\2{2}(.*?)\2' |
|
122
|
|
|
|
|
123
|
|
|
# _smart_emphasis_ |
|
124
|
|
|
SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\2(?!\w)' |
|
125
|
|
|
|
|
126
|
|
|
# _emphasis_ |
|
127
|
|
|
EMPHASIS_2_RE = r'(_)(.+?)\2' |
|
128
|
|
|
|
|
129
|
|
|
# [text](url) or [text](<url>) or [text](url "title") |
|
130
|
|
|
LINK_RE = NOIMG + BRK + \ |
|
131
|
|
|
r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12\s*)?\)''' |
|
132
|
|
|
|
|
133
|
|
|
#  or  |
|
134
|
|
|
IMAGE_LINK_RE = r'\!' + BRK + r'\s*\(\s*(<.*?>|([^"\)\s]+\s*"[^"]*"|[^\)\s]*))\s*\)' |
|
135
|
|
|
|
|
136
|
|
|
# [Google][3] |
|
137
|
|
|
REFERENCE_RE = NOIMG + BRK + r'\s?\[([^\]]*)\]' |
|
138
|
|
|
|
|
139
|
|
|
# [Google] |
|
140
|
|
|
SHORT_REF_RE = NOIMG + r'\[([^\]]+)\]' |
|
141
|
|
|
|
|
142
|
|
|
# ![alt text][2] |
|
143
|
|
|
IMAGE_REFERENCE_RE = r'\!' + BRK + r'\s?\[([^\]]*)\]' |
|
144
|
|
|
|
|
145
|
|
|
# stand-alone * or _ |
|
146
|
|
|
NOT_STRONG_RE = r'((^| )(\*|_)( |$))' |
|
147
|
|
|
|
|
148
|
|
|
# <http://www.123.com> |
|
149
|
|
|
AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>' |
|
150
|
|
|
|
|
151
|
|
|
# <[email protected]> |
|
152
|
|
|
AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' |
|
153
|
|
|
|
|
154
|
|
|
# <...> |
|
155
|
|
|
HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)' |
|
156
|
|
|
|
|
157
|
|
|
# & |
|
158
|
|
|
ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' |
|
159
|
|
|
|
|
160
|
|
|
# two spaces at end of line |
|
161
|
|
|
LINE_BREAK_RE = r' \n' |
|
162
|
|
|
|
|
163
|
|
|
|
|
164
|
|
|
def dequote(string): |
|
165
|
|
|
"""Remove quotes from around a string.""" |
|
166
|
|
|
if ((string.startswith('"') and string.endswith('"')) or |
|
167
|
|
|
(string.startswith("'") and string.endswith("'"))): |
|
168
|
|
|
return string[1:-1] |
|
169
|
|
|
else: |
|
170
|
|
|
return string |
|
171
|
|
|
|
|
172
|
|
|
|
|
173
|
|
|
ATTR_RE = re.compile(r"\{@([^\}]*)=([^\}]*)}") # {@id=123} |
|
174
|
|
|
|
|
175
|
|
|
|
|
176
|
|
|
def handleAttributes(text, parent): |
|
177
|
|
|
"""Set values of an element based on attribute definitions ({@id=123}).""" |
|
178
|
|
|
def attributeCallback(match): |
|
179
|
|
|
parent.set(match.group(1), match.group(2).replace('\n', ' ')) |
|
180
|
|
|
return ATTR_RE.sub(attributeCallback, text) |
|
181
|
|
|
|
|
182
|
|
|
|
|
183
|
|
|
""" |
|
184
|
|
|
The pattern classes |
|
185
|
|
|
----------------------------------------------------------------------------- |
|
186
|
|
|
""" |
|
187
|
|
|
|
|
188
|
|
|
|
|
189
|
|
|
class Pattern(object): |
|
190
|
|
|
"""Base class that inline patterns subclass. """ |
|
191
|
|
|
|
|
192
|
|
|
def __init__(self, pattern, markdown_instance=None): |
|
193
|
|
|
""" |
|
194
|
|
|
Create an instant of an inline pattern. |
|
195
|
|
|
|
|
196
|
|
|
Keyword arguments: |
|
197
|
|
|
|
|
198
|
|
|
* pattern: A regular expression that matches a pattern |
|
199
|
|
|
|
|
200
|
|
|
""" |
|
201
|
|
|
self.pattern = pattern |
|
202
|
|
|
self.compiled_re = re.compile("^(.*?)%s(.*)$" % pattern, |
|
203
|
|
|
re.DOTALL | re.UNICODE) |
|
204
|
|
|
|
|
205
|
|
|
# Api for Markdown to pass safe_mode into instance |
|
206
|
|
|
self.safe_mode = False |
|
207
|
|
|
if markdown_instance: |
|
208
|
|
|
self.markdown = markdown_instance |
|
209
|
|
|
|
|
210
|
|
|
def getCompiledRegExp(self): |
|
211
|
|
|
""" Return a compiled regular expression. """ |
|
212
|
|
|
return self.compiled_re |
|
213
|
|
|
|
|
214
|
|
|
def handleMatch(self, m): |
|
215
|
|
|
"""Return a ElementTree element from the given match. |
|
216
|
|
|
|
|
217
|
|
|
Subclasses should override this method. |
|
218
|
|
|
|
|
219
|
|
|
Keyword arguments: |
|
220
|
|
|
|
|
221
|
|
|
* m: A re match object containing a match of the pattern. |
|
222
|
|
|
|
|
223
|
|
|
""" |
|
224
|
|
|
pass # pragma: no cover |
|
225
|
|
|
|
|
226
|
|
|
def type(self): |
|
227
|
|
|
""" Return class name, to define pattern type """ |
|
228
|
|
|
return self.__class__.__name__ |
|
229
|
|
|
|
|
230
|
|
|
def unescape(self, text): |
|
231
|
|
|
""" Return unescaped text given text with an inline placeholder. """ |
|
232
|
|
|
try: |
|
233
|
|
|
stash = self.markdown.treeprocessors['inline'].stashed_nodes |
|
234
|
|
|
except KeyError: # pragma: no cover |
|
235
|
|
|
return text |
|
236
|
|
|
|
|
237
|
|
|
def itertext(el): # pragma: no cover |
|
238
|
|
|
' Reimplement Element.itertext for older python versions ' |
|
239
|
|
|
tag = el.tag |
|
240
|
|
|
if not isinstance(tag, util.string_type) and tag is not None: |
|
241
|
|
|
return |
|
242
|
|
|
if el.text: |
|
243
|
|
|
yield el.text |
|
244
|
|
|
for e in el: |
|
245
|
|
|
for s in itertext(e): |
|
246
|
|
|
yield s |
|
247
|
|
|
if e.tail: |
|
248
|
|
|
yield e.tail |
|
249
|
|
|
|
|
250
|
|
|
def get_stash(m): |
|
251
|
|
|
id = m.group(1) |
|
252
|
|
|
if id in stash: |
|
253
|
|
|
value = stash.get(id) |
|
254
|
|
|
if isinstance(value, util.string_type): |
|
255
|
|
|
return value |
|
256
|
|
|
else: |
|
257
|
|
|
# An etree Element - return text content only |
|
258
|
|
|
return ''.join(itertext(value)) |
|
259
|
|
|
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) |
|
260
|
|
|
|
|
261
|
|
|
|
|
262
|
|
|
class SimpleTextPattern(Pattern): |
|
263
|
|
|
""" Return a simple text of group(2) of a Pattern. """ |
|
264
|
|
|
def handleMatch(self, m): |
|
265
|
|
|
return m.group(2) |
|
266
|
|
|
|
|
267
|
|
|
|
|
268
|
|
|
class EscapePattern(Pattern): |
|
269
|
|
|
""" Return an escaped character. """ |
|
270
|
|
|
|
|
271
|
|
|
def handleMatch(self, m): |
|
272
|
|
|
char = m.group(2) |
|
273
|
|
|
if char in self.markdown.ESCAPED_CHARS: |
|
274
|
|
|
return '%s%s%s' % (util.STX, ord(char), util.ETX) |
|
275
|
|
|
else: |
|
276
|
|
|
return None |
|
277
|
|
|
|
|
278
|
|
|
|
|
279
|
|
|
class SimpleTagPattern(Pattern): |
|
280
|
|
|
""" |
|
281
|
|
|
Return element of type `tag` with a text attribute of group(3) |
|
282
|
|
|
of a Pattern. |
|
283
|
|
|
|
|
284
|
|
|
""" |
|
285
|
|
|
def __init__(self, pattern, tag): |
|
286
|
|
|
Pattern.__init__(self, pattern) |
|
287
|
|
|
self.tag = tag |
|
288
|
|
|
|
|
289
|
|
|
def handleMatch(self, m): |
|
290
|
|
|
el = util.etree.Element(self.tag) |
|
291
|
|
|
el.text = m.group(3) |
|
292
|
|
|
return el |
|
293
|
|
|
|
|
294
|
|
|
|
|
295
|
|
|
class SubstituteTagPattern(SimpleTagPattern): |
|
296
|
|
|
""" Return an element of type `tag` with no children. """ |
|
297
|
|
|
def handleMatch(self, m): |
|
298
|
|
|
return util.etree.Element(self.tag) |
|
299
|
|
|
|
|
300
|
|
|
|
|
301
|
|
|
class BacktickPattern(Pattern): |
|
302
|
|
|
""" Return a `<code>` element containing the matching text. """ |
|
303
|
|
|
def __init__(self, pattern): |
|
304
|
|
|
Pattern.__init__(self, pattern) |
|
305
|
|
|
self.ESCAPED_BSLASH = '%s%s%s' % (util.STX, ord('\\'), util.ETX) |
|
306
|
|
|
self.tag = 'code' |
|
307
|
|
|
|
|
308
|
|
|
def handleMatch(self, m): |
|
309
|
|
|
if m.group(4): |
|
310
|
|
|
el = util.etree.Element(self.tag) |
|
311
|
|
|
el.text = util.AtomicString(m.group(4).strip()) |
|
312
|
|
|
return el |
|
313
|
|
|
else: |
|
314
|
|
|
return m.group(2).replace('\\\\', self.ESCAPED_BSLASH) |
|
315
|
|
|
|
|
316
|
|
|
|
|
317
|
|
|
class DoubleTagPattern(SimpleTagPattern): |
|
318
|
|
|
"""Return a ElementTree element nested in tag2 nested in tag1. |
|
319
|
|
|
|
|
320
|
|
|
Useful for strong emphasis etc. |
|
321
|
|
|
|
|
322
|
|
|
""" |
|
323
|
|
|
def handleMatch(self, m): |
|
324
|
|
|
tag1, tag2 = self.tag.split(",") |
|
325
|
|
|
el1 = util.etree.Element(tag1) |
|
326
|
|
|
el2 = util.etree.SubElement(el1, tag2) |
|
327
|
|
|
el2.text = m.group(3) |
|
328
|
|
|
if len(m.groups()) == 5: |
|
329
|
|
|
el2.tail = m.group(4) |
|
330
|
|
|
return el1 |
|
331
|
|
|
|
|
332
|
|
|
|
|
333
|
|
|
class HtmlPattern(Pattern): |
|
334
|
|
|
""" Store raw inline html and return a placeholder. """ |
|
335
|
|
|
def handleMatch(self, m): |
|
336
|
|
|
rawhtml = self.unescape(m.group(2)) |
|
337
|
|
|
place_holder = self.markdown.htmlStash.store(rawhtml) |
|
338
|
|
|
return place_holder |
|
339
|
|
|
|
|
340
|
|
|
def unescape(self, text): |
|
341
|
|
|
""" Return unescaped text given text with an inline placeholder. """ |
|
342
|
|
|
try: |
|
343
|
|
|
stash = self.markdown.treeprocessors['inline'].stashed_nodes |
|
344
|
|
|
except KeyError: # pragma: no cover |
|
345
|
|
|
return text |
|
346
|
|
|
|
|
347
|
|
|
def get_stash(m): |
|
348
|
|
|
id = m.group(1) |
|
349
|
|
|
value = stash.get(id) |
|
350
|
|
|
if value is not None: |
|
351
|
|
|
try: |
|
352
|
|
|
return self.markdown.serializer(value) |
|
353
|
|
|
except: |
|
354
|
|
|
return r'\%s' % value |
|
355
|
|
|
|
|
356
|
|
|
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) |
|
357
|
|
|
|
|
358
|
|
|
|
|
359
|
|
|
class LinkPattern(Pattern): |
|
360
|
|
|
""" Return a link element from the given match. """ |
|
361
|
|
|
def handleMatch(self, m): |
|
362
|
|
|
el = util.etree.Element("a") |
|
363
|
|
|
el.text = m.group(2) |
|
364
|
|
|
title = m.group(13) |
|
365
|
|
|
href = m.group(9) |
|
366
|
|
|
|
|
367
|
|
|
if href: |
|
368
|
|
|
if href[0] == "<": |
|
369
|
|
|
href = href[1:-1] |
|
370
|
|
|
el.set("href", self.sanitize_url(self.unescape(href.strip()))) |
|
371
|
|
|
else: |
|
372
|
|
|
el.set("href", "") |
|
373
|
|
|
|
|
374
|
|
|
if title: |
|
375
|
|
|
title = dequote(self.unescape(title)) |
|
376
|
|
|
el.set("title", title) |
|
377
|
|
|
return el |
|
378
|
|
|
|
|
379
|
|
|
def sanitize_url(self, url): |
|
380
|
|
|
""" |
|
381
|
|
|
Sanitize a url against xss attacks in "safe_mode". |
|
382
|
|
|
|
|
383
|
|
|
Rather than specifically blacklisting `javascript:alert("XSS")` and all |
|
384
|
|
|
its aliases (see <http://ha.ckers.org/xss.html>), we whitelist known |
|
385
|
|
|
safe url formats. Most urls contain a network location, however some |
|
386
|
|
|
are known not to (i.e.: mailto links). Script urls do not contain a |
|
387
|
|
|
location. Additionally, for `javascript:...`, the scheme would be |
|
388
|
|
|
"javascript" but some aliases will appear to `urlparse()` to have no |
|
389
|
|
|
scheme. On top of that relative links (i.e.: "foo/bar.html") have no |
|
390
|
|
|
scheme. Therefore we must check "path", "parameters", "query" and |
|
391
|
|
|
"fragment" for any literal colons. We don't check "scheme" for colons |
|
392
|
|
|
because it *should* never have any and "netloc" must allow the form: |
|
393
|
|
|
`username:password@host:port`. |
|
394
|
|
|
|
|
395
|
|
|
""" |
|
396
|
|
|
if not self.markdown.safeMode: |
|
397
|
|
|
# Return immediately bipassing parsing. |
|
398
|
|
|
return url |
|
399
|
|
|
|
|
400
|
|
|
try: |
|
401
|
|
|
scheme, netloc, path, params, query, fragment = url = urlparse(url) |
|
402
|
|
|
except ValueError: # pragma: no cover |
|
403
|
|
|
# Bad url - so bad it couldn't be parsed. |
|
404
|
|
|
return '' |
|
405
|
|
|
|
|
406
|
|
|
locless_schemes = ['', 'mailto', 'news'] |
|
407
|
|
|
allowed_schemes = locless_schemes + ['http', 'https', 'ftp', 'ftps'] |
|
408
|
|
|
if scheme not in allowed_schemes: |
|
409
|
|
|
# Not a known (allowed) scheme. Not safe. |
|
410
|
|
|
return '' |
|
411
|
|
|
|
|
412
|
|
|
if netloc == '' and scheme not in locless_schemes: # pragma: no cover |
|
413
|
|
|
# This should not happen. Treat as suspect. |
|
414
|
|
|
return '' |
|
415
|
|
|
|
|
416
|
|
|
for part in url[2:]: |
|
417
|
|
|
if ":" in part: |
|
418
|
|
|
# A colon in "path", "parameters", "query" |
|
419
|
|
|
# or "fragment" is suspect. |
|
420
|
|
|
return '' |
|
421
|
|
|
|
|
422
|
|
|
# Url passes all tests. Return url as-is. |
|
423
|
|
|
return urlunparse(url) |
|
424
|
|
|
|
|
425
|
|
|
|
|
426
|
|
|
class ImagePattern(LinkPattern): |
|
427
|
|
|
""" Return a img element from the given match. """ |
|
428
|
|
|
def handleMatch(self, m): |
|
429
|
|
|
el = util.etree.Element("img") |
|
430
|
|
|
src_parts = m.group(9).split() |
|
431
|
|
|
if src_parts: |
|
432
|
|
|
src = src_parts[0] |
|
433
|
|
|
if src[0] == "<" and src[-1] == ">": |
|
434
|
|
|
src = src[1:-1] |
|
435
|
|
|
el.set('src', self.sanitize_url(self.unescape(src))) |
|
436
|
|
|
else: |
|
437
|
|
|
el.set('src', "") |
|
438
|
|
|
if len(src_parts) > 1: |
|
439
|
|
|
el.set('title', dequote(self.unescape(" ".join(src_parts[1:])))) |
|
440
|
|
|
|
|
441
|
|
|
if self.markdown.enable_attributes: |
|
442
|
|
|
truealt = handleAttributes(m.group(2), el) |
|
443
|
|
|
else: |
|
444
|
|
|
truealt = m.group(2) |
|
445
|
|
|
|
|
446
|
|
|
el.set('alt', self.unescape(truealt)) |
|
447
|
|
|
return el |
|
448
|
|
|
|
|
449
|
|
|
|
|
450
|
|
|
class ReferencePattern(LinkPattern): |
|
451
|
|
|
""" Match to a stored reference and return link element. """ |
|
452
|
|
|
|
|
453
|
|
|
NEWLINE_CLEANUP_RE = re.compile(r'[ ]?\n', re.MULTILINE) |
|
454
|
|
|
|
|
455
|
|
|
def handleMatch(self, m): |
|
456
|
|
|
try: |
|
457
|
|
|
id = m.group(9).lower() |
|
458
|
|
|
except IndexError: |
|
459
|
|
|
id = None |
|
460
|
|
|
if not id: |
|
461
|
|
|
# if we got something like "[Google][]" or "[Google]" |
|
462
|
|
|
# we'll use "google" as the id |
|
463
|
|
|
id = m.group(2).lower() |
|
464
|
|
|
|
|
465
|
|
|
# Clean up linebreaks in id |
|
466
|
|
|
id = self.NEWLINE_CLEANUP_RE.sub(' ', id) |
|
467
|
|
|
if id not in self.markdown.references: # ignore undefined refs |
|
468
|
|
|
return None |
|
469
|
|
|
href, title = self.markdown.references[id] |
|
470
|
|
|
|
|
471
|
|
|
text = m.group(2) |
|
472
|
|
|
return self.makeTag(href, title, text) |
|
473
|
|
|
|
|
474
|
|
|
def makeTag(self, href, title, text): |
|
475
|
|
|
el = util.etree.Element('a') |
|
476
|
|
|
|
|
477
|
|
|
el.set('href', self.sanitize_url(href)) |
|
478
|
|
|
if title: |
|
479
|
|
|
el.set('title', title) |
|
480
|
|
|
|
|
481
|
|
|
el.text = text |
|
482
|
|
|
return el |
|
483
|
|
|
|
|
484
|
|
|
|
|
485
|
|
|
class ImageReferencePattern(ReferencePattern): |
|
486
|
|
|
""" Match to a stored reference and return img element. """ |
|
487
|
|
|
def makeTag(self, href, title, text): |
|
488
|
|
|
el = util.etree.Element("img") |
|
489
|
|
|
el.set("src", self.sanitize_url(href)) |
|
490
|
|
|
if title: |
|
491
|
|
|
el.set("title", title) |
|
492
|
|
|
|
|
493
|
|
|
if self.markdown.enable_attributes: |
|
494
|
|
|
text = handleAttributes(text, el) |
|
495
|
|
|
|
|
496
|
|
|
el.set("alt", self.unescape(text)) |
|
497
|
|
|
return el |
|
498
|
|
|
|
|
499
|
|
|
|
|
500
|
|
|
class AutolinkPattern(Pattern): |
|
501
|
|
|
""" Return a link Element given an autolink (`<http://example/com>`). """ |
|
502
|
|
|
def handleMatch(self, m): |
|
503
|
|
|
el = util.etree.Element("a") |
|
504
|
|
|
el.set('href', self.unescape(m.group(2))) |
|
505
|
|
|
el.text = util.AtomicString(m.group(2)) |
|
506
|
|
|
return el |
|
507
|
|
|
|
|
508
|
|
|
|
|
509
|
|
|
class AutomailPattern(Pattern): |
|
510
|
|
|
""" |
|
511
|
|
|
Return a mailto link Element given an automail link (`<[email protected]>`). |
|
512
|
|
|
""" |
|
513
|
|
|
def handleMatch(self, m): |
|
514
|
|
|
el = util.etree.Element('a') |
|
515
|
|
|
email = self.unescape(m.group(2)) |
|
516
|
|
|
if email.startswith("mailto:"): |
|
517
|
|
|
email = email[len("mailto:"):] |
|
518
|
|
|
|
|
519
|
|
|
def codepoint2name(code): |
|
520
|
|
|
"""Return entity definition by code, or the code if not defined.""" |
|
521
|
|
|
entity = entities.codepoint2name.get(code) |
|
522
|
|
|
if entity: |
|
523
|
|
|
return "%s%s;" % (util.AMP_SUBSTITUTE, entity) |
|
524
|
|
|
else: |
|
525
|
|
|
return "%s#%d;" % (util.AMP_SUBSTITUTE, code) |
|
526
|
|
|
|
|
527
|
|
|
letters = [codepoint2name(ord(letter)) for letter in email] |
|
528
|
|
|
el.text = util.AtomicString(''.join(letters)) |
|
529
|
|
|
|
|
530
|
|
|
mailto = "mailto:" + email |
|
531
|
|
|
mailto = "".join([util.AMP_SUBSTITUTE + '#%d;' % |
|
532
|
|
|
ord(letter) for letter in mailto]) |
|
533
|
|
|
el.set('href', mailto) |
|
534
|
|
|
return el |
|
535
|
|
|
|