1
|
|
|
""" |
2
|
|
|
INLINE PATTERNS |
3
|
|
|
============================================================================= |
4
|
|
|
|
5
|
|
|
Inline patterns such as *emphasis* are handled by means of auxiliary |
6
|
|
|
objects, one per pattern. Pattern objects must be instances of classes |
7
|
|
|
that extend markdown.Pattern. Each pattern object uses a single regular |
8
|
|
|
expression and needs support the following methods: |
9
|
|
|
|
10
|
|
|
pattern.getCompiledRegExp() # returns a regular expression |
11
|
|
|
|
12
|
|
|
pattern.handleMatch(m) # takes a match object and returns |
13
|
|
|
# an ElementTree element or just plain text |
14
|
|
|
|
15
|
|
|
All of python markdown's built-in patterns subclass from Pattern, |
16
|
|
|
but you can add additional patterns that don't. |
17
|
|
|
|
18
|
|
|
Also note that all the regular expressions used by inline must |
19
|
|
|
capture the whole block. For this reason, they all start with |
20
|
|
|
'^(.*)' and end with '(.*)!'. In case with built-in expression |
21
|
|
|
Pattern takes care of adding the "^(.*)" and "(.*)!". |
22
|
|
|
|
23
|
|
|
Finally, the order in which regular expressions are applied is very |
24
|
|
|
important - e.g. if we first replace http://.../ links with <a> tags |
25
|
|
|
and _then_ try to replace inline html, we would end up with a mess. |
26
|
|
|
So, we apply the expressions in the following order: |
27
|
|
|
|
28
|
|
|
* escape and backticks have to go before everything else, so |
29
|
|
|
that we can preempt any markdown patterns by escaping them. |
30
|
|
|
|
31
|
|
|
* then we handle auto-links (must be done before inline html) |
32
|
|
|
|
33
|
|
|
* then we handle inline HTML. At this point we will simply |
34
|
|
|
replace all inline HTML strings with a placeholder and add |
35
|
|
|
the actual HTML to a hash. |
36
|
|
|
|
37
|
|
|
* then inline images (must be done before links) |
38
|
|
|
|
39
|
|
|
* then bracketed links, first regular then reference-style |
40
|
|
|
|
41
|
|
|
* finally we apply strong and emphasis |
42
|
|
|
""" |
43
|
|
|
|
44
|
|
|
from __future__ import absolute_import |
45
|
|
|
from __future__ import unicode_literals |
46
|
|
|
from . import util |
47
|
|
|
from . import odict |
48
|
|
|
import re |
49
|
|
|
try: # pragma: no cover |
50
|
|
|
from urllib.parse import urlparse, urlunparse |
51
|
|
|
except ImportError: # pragma: no cover |
52
|
|
|
from urlparse import urlparse, urlunparse |
53
|
|
|
try: # pragma: no cover |
54
|
|
|
from html import entities |
55
|
|
|
except ImportError: # pragma: no cover |
56
|
|
|
import htmlentitydefs as entities |
57
|
|
|
|
58
|
|
|
|
59
|
|
|
def build_inlinepatterns(md_instance, **kwargs): |
60
|
|
|
""" Build the default set of inline patterns for Markdown. """ |
61
|
|
|
inlinePatterns = odict.OrderedDict() |
62
|
|
|
inlinePatterns["backtick"] = BacktickPattern(BACKTICK_RE) |
63
|
|
|
inlinePatterns["escape"] = EscapePattern(ESCAPE_RE, md_instance) |
64
|
|
|
inlinePatterns["reference"] = ReferencePattern(REFERENCE_RE, md_instance) |
65
|
|
|
inlinePatterns["link"] = LinkPattern(LINK_RE, md_instance) |
66
|
|
|
inlinePatterns["image_link"] = ImagePattern(IMAGE_LINK_RE, md_instance) |
67
|
|
|
inlinePatterns["image_reference"] = ImageReferencePattern( |
68
|
|
|
IMAGE_REFERENCE_RE, md_instance |
69
|
|
|
) |
70
|
|
|
inlinePatterns["short_reference"] = ReferencePattern( |
71
|
|
|
SHORT_REF_RE, md_instance |
72
|
|
|
) |
73
|
|
|
inlinePatterns["autolink"] = AutolinkPattern(AUTOLINK_RE, md_instance) |
74
|
|
|
inlinePatterns["automail"] = AutomailPattern(AUTOMAIL_RE, md_instance) |
75
|
|
|
inlinePatterns["linebreak"] = SubstituteTagPattern(LINE_BREAK_RE, 'br') |
76
|
|
|
if md_instance.safeMode != 'escape': |
77
|
|
|
inlinePatterns["html"] = HtmlPattern(HTML_RE, md_instance) |
78
|
|
|
inlinePatterns["entity"] = HtmlPattern(ENTITY_RE, md_instance) |
79
|
|
|
inlinePatterns["not_strong"] = SimpleTextPattern(NOT_STRONG_RE) |
80
|
|
|
inlinePatterns["em_strong"] = DoubleTagPattern(EM_STRONG_RE, 'strong,em') |
81
|
|
|
inlinePatterns["strong_em"] = DoubleTagPattern(STRONG_EM_RE, 'em,strong') |
82
|
|
|
inlinePatterns["strong"] = SimpleTagPattern(STRONG_RE, 'strong') |
83
|
|
|
inlinePatterns["emphasis"] = SimpleTagPattern(EMPHASIS_RE, 'em') |
84
|
|
|
if md_instance.smart_emphasis: |
85
|
|
|
inlinePatterns["emphasis2"] = SimpleTagPattern(SMART_EMPHASIS_RE, 'em') |
86
|
|
|
else: |
87
|
|
|
inlinePatterns["emphasis2"] = SimpleTagPattern(EMPHASIS_2_RE, 'em') |
88
|
|
|
return inlinePatterns |
89
|
|
|
|
90
|
|
|
|
91
|
|
|
""" |
92
|
|
|
The actual regular expressions for patterns |
93
|
|
|
----------------------------------------------------------------------------- |
94
|
|
|
""" |
95
|
|
|
|
96
|
|
|
NOBRACKET = r'[^\]\[]*' |
97
|
|
|
BRK = ( |
98
|
|
|
r'\[(' + |
99
|
|
|
(NOBRACKET + r'(\[')*6 + |
100
|
|
|
(NOBRACKET + r'\])*')*6 + |
101
|
|
|
NOBRACKET + r')\]' |
102
|
|
|
) |
103
|
|
|
NOIMG = r'(?<!\!)' |
104
|
|
|
|
105
|
|
|
# `e=f()` or ``e=f("`")`` |
106
|
|
|
BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\3(?!`))' |
107
|
|
|
|
108
|
|
|
# \< |
109
|
|
|
ESCAPE_RE = r'\\(.)' |
110
|
|
|
|
111
|
|
|
# *emphasis* |
112
|
|
|
EMPHASIS_RE = r'(\*)([^\*]+)\2' |
113
|
|
|
|
114
|
|
|
# **strong** |
115
|
|
|
STRONG_RE = r'(\*{2}|_{2})(.+?)\2' |
116
|
|
|
|
117
|
|
|
# ***strongem*** or ***em*strong** |
118
|
|
|
EM_STRONG_RE = r'(\*|_)\2{2}(.+?)\2(.*?)\2{2}' |
119
|
|
|
|
120
|
|
|
# ***strong**em* |
121
|
|
|
STRONG_EM_RE = r'(\*|_)\2{2}(.+?)\2{2}(.*?)\2' |
122
|
|
|
|
123
|
|
|
# _smart_emphasis_ |
124
|
|
|
SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\2(?!\w)' |
125
|
|
|
|
126
|
|
|
# _emphasis_ |
127
|
|
|
EMPHASIS_2_RE = r'(_)(.+?)\2' |
128
|
|
|
|
129
|
|
|
# [text](url) or [text](<url>) or [text](url "title") |
130
|
|
|
LINK_RE = NOIMG + BRK + \ |
131
|
|
|
r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12\s*)?\)''' |
132
|
|
|
|
133
|
|
|
#  or  |
134
|
|
|
IMAGE_LINK_RE = r'\!' + BRK + r'\s*\(\s*(<.*?>|([^"\)\s]+\s*"[^"]*"|[^\)\s]*))\s*\)' |
135
|
|
|
|
136
|
|
|
# [Google][3] |
137
|
|
|
REFERENCE_RE = NOIMG + BRK + r'\s?\[([^\]]*)\]' |
138
|
|
|
|
139
|
|
|
# [Google] |
140
|
|
|
SHORT_REF_RE = NOIMG + r'\[([^\]]+)\]' |
141
|
|
|
|
142
|
|
|
# ![alt text][2] |
143
|
|
|
IMAGE_REFERENCE_RE = r'\!' + BRK + r'\s?\[([^\]]*)\]' |
144
|
|
|
|
145
|
|
|
# stand-alone * or _ |
146
|
|
|
NOT_STRONG_RE = r'((^| )(\*|_)( |$))' |
147
|
|
|
|
148
|
|
|
# <http://www.123.com> |
149
|
|
|
AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>' |
150
|
|
|
|
151
|
|
|
# <[email protected]> |
152
|
|
|
AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' |
153
|
|
|
|
154
|
|
|
# <...> |
155
|
|
|
HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)' |
156
|
|
|
|
157
|
|
|
# & |
158
|
|
|
ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' |
159
|
|
|
|
160
|
|
|
# two spaces at end of line |
161
|
|
|
LINE_BREAK_RE = r' \n' |
162
|
|
|
|
163
|
|
|
|
164
|
|
|
def dequote(string): |
165
|
|
|
"""Remove quotes from around a string.""" |
166
|
|
|
if ((string.startswith('"') and string.endswith('"')) or |
167
|
|
|
(string.startswith("'") and string.endswith("'"))): |
168
|
|
|
return string[1:-1] |
169
|
|
|
else: |
170
|
|
|
return string |
171
|
|
|
|
172
|
|
|
|
173
|
|
|
ATTR_RE = re.compile(r"\{@([^\}]*)=([^\}]*)}") # {@id=123} |
174
|
|
|
|
175
|
|
|
|
176
|
|
|
def handleAttributes(text, parent): |
177
|
|
|
"""Set values of an element based on attribute definitions ({@id=123}).""" |
178
|
|
|
def attributeCallback(match): |
179
|
|
|
parent.set(match.group(1), match.group(2).replace('\n', ' ')) |
180
|
|
|
return ATTR_RE.sub(attributeCallback, text) |
181
|
|
|
|
182
|
|
|
|
183
|
|
|
""" |
184
|
|
|
The pattern classes |
185
|
|
|
----------------------------------------------------------------------------- |
186
|
|
|
""" |
187
|
|
|
|
188
|
|
|
|
189
|
|
|
class Pattern(object): |
190
|
|
|
"""Base class that inline patterns subclass. """ |
191
|
|
|
|
192
|
|
|
def __init__(self, pattern, markdown_instance=None): |
193
|
|
|
""" |
194
|
|
|
Create an instant of an inline pattern. |
195
|
|
|
|
196
|
|
|
Keyword arguments: |
197
|
|
|
|
198
|
|
|
* pattern: A regular expression that matches a pattern |
199
|
|
|
|
200
|
|
|
""" |
201
|
|
|
self.pattern = pattern |
202
|
|
|
self.compiled_re = re.compile("^(.*?)%s(.*)$" % pattern, |
203
|
|
|
re.DOTALL | re.UNICODE) |
204
|
|
|
|
205
|
|
|
# Api for Markdown to pass safe_mode into instance |
206
|
|
|
self.safe_mode = False |
207
|
|
|
if markdown_instance: |
208
|
|
|
self.markdown = markdown_instance |
209
|
|
|
|
210
|
|
|
def getCompiledRegExp(self): |
211
|
|
|
""" Return a compiled regular expression. """ |
212
|
|
|
return self.compiled_re |
213
|
|
|
|
214
|
|
|
def handleMatch(self, m): |
215
|
|
|
"""Return a ElementTree element from the given match. |
216
|
|
|
|
217
|
|
|
Subclasses should override this method. |
218
|
|
|
|
219
|
|
|
Keyword arguments: |
220
|
|
|
|
221
|
|
|
* m: A re match object containing a match of the pattern. |
222
|
|
|
|
223
|
|
|
""" |
224
|
|
|
pass # pragma: no cover |
225
|
|
|
|
226
|
|
|
def type(self): |
227
|
|
|
""" Return class name, to define pattern type """ |
228
|
|
|
return self.__class__.__name__ |
229
|
|
|
|
230
|
|
|
def unescape(self, text): |
231
|
|
|
""" Return unescaped text given text with an inline placeholder. """ |
232
|
|
|
try: |
233
|
|
|
stash = self.markdown.treeprocessors['inline'].stashed_nodes |
234
|
|
|
except KeyError: # pragma: no cover |
235
|
|
|
return text |
236
|
|
|
|
237
|
|
|
def itertext(el): # pragma: no cover |
238
|
|
|
' Reimplement Element.itertext for older python versions ' |
239
|
|
|
tag = el.tag |
240
|
|
|
if not isinstance(tag, util.string_type) and tag is not None: |
241
|
|
|
return |
242
|
|
|
if el.text: |
243
|
|
|
yield el.text |
244
|
|
|
for e in el: |
245
|
|
|
for s in itertext(e): |
246
|
|
|
yield s |
247
|
|
|
if e.tail: |
248
|
|
|
yield e.tail |
249
|
|
|
|
250
|
|
|
def get_stash(m): |
251
|
|
|
id = m.group(1) |
252
|
|
|
if id in stash: |
253
|
|
|
value = stash.get(id) |
254
|
|
|
if isinstance(value, util.string_type): |
255
|
|
|
return value |
256
|
|
|
else: |
257
|
|
|
# An etree Element - return text content only |
258
|
|
|
return ''.join(itertext(value)) |
259
|
|
|
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) |
260
|
|
|
|
261
|
|
|
|
262
|
|
|
class SimpleTextPattern(Pattern): |
263
|
|
|
""" Return a simple text of group(2) of a Pattern. """ |
264
|
|
|
def handleMatch(self, m): |
265
|
|
|
return m.group(2) |
266
|
|
|
|
267
|
|
|
|
268
|
|
|
class EscapePattern(Pattern): |
269
|
|
|
""" Return an escaped character. """ |
270
|
|
|
|
271
|
|
|
def handleMatch(self, m): |
272
|
|
|
char = m.group(2) |
273
|
|
|
if char in self.markdown.ESCAPED_CHARS: |
274
|
|
|
return '%s%s%s' % (util.STX, ord(char), util.ETX) |
275
|
|
|
else: |
276
|
|
|
return None |
277
|
|
|
|
278
|
|
|
|
279
|
|
|
class SimpleTagPattern(Pattern): |
280
|
|
|
""" |
281
|
|
|
Return element of type `tag` with a text attribute of group(3) |
282
|
|
|
of a Pattern. |
283
|
|
|
|
284
|
|
|
""" |
285
|
|
|
def __init__(self, pattern, tag): |
286
|
|
|
Pattern.__init__(self, pattern) |
287
|
|
|
self.tag = tag |
288
|
|
|
|
289
|
|
|
def handleMatch(self, m): |
290
|
|
|
el = util.etree.Element(self.tag) |
291
|
|
|
el.text = m.group(3) |
292
|
|
|
return el |
293
|
|
|
|
294
|
|
|
|
295
|
|
|
class SubstituteTagPattern(SimpleTagPattern): |
296
|
|
|
""" Return an element of type `tag` with no children. """ |
297
|
|
|
def handleMatch(self, m): |
298
|
|
|
return util.etree.Element(self.tag) |
299
|
|
|
|
300
|
|
|
|
301
|
|
|
class BacktickPattern(Pattern): |
302
|
|
|
""" Return a `<code>` element containing the matching text. """ |
303
|
|
|
def __init__(self, pattern): |
304
|
|
|
Pattern.__init__(self, pattern) |
305
|
|
|
self.ESCAPED_BSLASH = '%s%s%s' % (util.STX, ord('\\'), util.ETX) |
306
|
|
|
self.tag = 'code' |
307
|
|
|
|
308
|
|
|
def handleMatch(self, m): |
309
|
|
|
if m.group(4): |
310
|
|
|
el = util.etree.Element(self.tag) |
311
|
|
|
el.text = util.AtomicString(m.group(4).strip()) |
312
|
|
|
return el |
313
|
|
|
else: |
314
|
|
|
return m.group(2).replace('\\\\', self.ESCAPED_BSLASH) |
315
|
|
|
|
316
|
|
|
|
317
|
|
|
class DoubleTagPattern(SimpleTagPattern): |
318
|
|
|
"""Return a ElementTree element nested in tag2 nested in tag1. |
319
|
|
|
|
320
|
|
|
Useful for strong emphasis etc. |
321
|
|
|
|
322
|
|
|
""" |
323
|
|
|
def handleMatch(self, m): |
324
|
|
|
tag1, tag2 = self.tag.split(",") |
325
|
|
|
el1 = util.etree.Element(tag1) |
326
|
|
|
el2 = util.etree.SubElement(el1, tag2) |
327
|
|
|
el2.text = m.group(3) |
328
|
|
|
if len(m.groups()) == 5: |
329
|
|
|
el2.tail = m.group(4) |
330
|
|
|
return el1 |
331
|
|
|
|
332
|
|
|
|
333
|
|
|
class HtmlPattern(Pattern): |
334
|
|
|
""" Store raw inline html and return a placeholder. """ |
335
|
|
|
def handleMatch(self, m): |
336
|
|
|
rawhtml = self.unescape(m.group(2)) |
337
|
|
|
place_holder = self.markdown.htmlStash.store(rawhtml) |
338
|
|
|
return place_holder |
339
|
|
|
|
340
|
|
|
def unescape(self, text): |
341
|
|
|
""" Return unescaped text given text with an inline placeholder. """ |
342
|
|
|
try: |
343
|
|
|
stash = self.markdown.treeprocessors['inline'].stashed_nodes |
344
|
|
|
except KeyError: # pragma: no cover |
345
|
|
|
return text |
346
|
|
|
|
347
|
|
|
def get_stash(m): |
348
|
|
|
id = m.group(1) |
349
|
|
|
value = stash.get(id) |
350
|
|
|
if value is not None: |
351
|
|
|
try: |
352
|
|
|
return self.markdown.serializer(value) |
353
|
|
|
except: |
354
|
|
|
return r'\%s' % value |
355
|
|
|
|
356
|
|
|
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) |
357
|
|
|
|
358
|
|
|
|
359
|
|
|
class LinkPattern(Pattern): |
360
|
|
|
""" Return a link element from the given match. """ |
361
|
|
|
def handleMatch(self, m): |
362
|
|
|
el = util.etree.Element("a") |
363
|
|
|
el.text = m.group(2) |
364
|
|
|
title = m.group(13) |
365
|
|
|
href = m.group(9) |
366
|
|
|
|
367
|
|
|
if href: |
368
|
|
|
if href[0] == "<": |
369
|
|
|
href = href[1:-1] |
370
|
|
|
el.set("href", self.sanitize_url(self.unescape(href.strip()))) |
371
|
|
|
else: |
372
|
|
|
el.set("href", "") |
373
|
|
|
|
374
|
|
|
if title: |
375
|
|
|
title = dequote(self.unescape(title)) |
376
|
|
|
el.set("title", title) |
377
|
|
|
return el |
378
|
|
|
|
379
|
|
|
def sanitize_url(self, url): |
380
|
|
|
""" |
381
|
|
|
Sanitize a url against xss attacks in "safe_mode". |
382
|
|
|
|
383
|
|
|
Rather than specifically blacklisting `javascript:alert("XSS")` and all |
384
|
|
|
its aliases (see <http://ha.ckers.org/xss.html>), we whitelist known |
385
|
|
|
safe url formats. Most urls contain a network location, however some |
386
|
|
|
are known not to (i.e.: mailto links). Script urls do not contain a |
387
|
|
|
location. Additionally, for `javascript:...`, the scheme would be |
388
|
|
|
"javascript" but some aliases will appear to `urlparse()` to have no |
389
|
|
|
scheme. On top of that relative links (i.e.: "foo/bar.html") have no |
390
|
|
|
scheme. Therefore we must check "path", "parameters", "query" and |
391
|
|
|
"fragment" for any literal colons. We don't check "scheme" for colons |
392
|
|
|
because it *should* never have any and "netloc" must allow the form: |
393
|
|
|
`username:password@host:port`. |
394
|
|
|
|
395
|
|
|
""" |
396
|
|
|
if not self.markdown.safeMode: |
397
|
|
|
# Return immediately bipassing parsing. |
398
|
|
|
return url |
399
|
|
|
|
400
|
|
|
try: |
401
|
|
|
scheme, netloc, path, params, query, fragment = url = urlparse(url) |
402
|
|
|
except ValueError: # pragma: no cover |
403
|
|
|
# Bad url - so bad it couldn't be parsed. |
404
|
|
|
return '' |
405
|
|
|
|
406
|
|
|
locless_schemes = ['', 'mailto', 'news'] |
407
|
|
|
allowed_schemes = locless_schemes + ['http', 'https', 'ftp', 'ftps'] |
408
|
|
|
if scheme not in allowed_schemes: |
409
|
|
|
# Not a known (allowed) scheme. Not safe. |
410
|
|
|
return '' |
411
|
|
|
|
412
|
|
|
if netloc == '' and scheme not in locless_schemes: # pragma: no cover |
413
|
|
|
# This should not happen. Treat as suspect. |
414
|
|
|
return '' |
415
|
|
|
|
416
|
|
|
for part in url[2:]: |
417
|
|
|
if ":" in part: |
418
|
|
|
# A colon in "path", "parameters", "query" |
419
|
|
|
# or "fragment" is suspect. |
420
|
|
|
return '' |
421
|
|
|
|
422
|
|
|
# Url passes all tests. Return url as-is. |
423
|
|
|
return urlunparse(url) |
424
|
|
|
|
425
|
|
|
|
426
|
|
|
class ImagePattern(LinkPattern): |
427
|
|
|
""" Return a img element from the given match. """ |
428
|
|
|
def handleMatch(self, m): |
429
|
|
|
el = util.etree.Element("img") |
430
|
|
|
src_parts = m.group(9).split() |
431
|
|
|
if src_parts: |
432
|
|
|
src = src_parts[0] |
433
|
|
|
if src[0] == "<" and src[-1] == ">": |
434
|
|
|
src = src[1:-1] |
435
|
|
|
el.set('src', self.sanitize_url(self.unescape(src))) |
436
|
|
|
else: |
437
|
|
|
el.set('src', "") |
438
|
|
|
if len(src_parts) > 1: |
439
|
|
|
el.set('title', dequote(self.unescape(" ".join(src_parts[1:])))) |
440
|
|
|
|
441
|
|
|
if self.markdown.enable_attributes: |
442
|
|
|
truealt = handleAttributes(m.group(2), el) |
443
|
|
|
else: |
444
|
|
|
truealt = m.group(2) |
445
|
|
|
|
446
|
|
|
el.set('alt', self.unescape(truealt)) |
447
|
|
|
return el |
448
|
|
|
|
449
|
|
|
|
450
|
|
|
class ReferencePattern(LinkPattern): |
451
|
|
|
""" Match to a stored reference and return link element. """ |
452
|
|
|
|
453
|
|
|
NEWLINE_CLEANUP_RE = re.compile(r'[ ]?\n', re.MULTILINE) |
454
|
|
|
|
455
|
|
|
def handleMatch(self, m): |
456
|
|
|
try: |
457
|
|
|
id = m.group(9).lower() |
458
|
|
|
except IndexError: |
459
|
|
|
id = None |
460
|
|
|
if not id: |
461
|
|
|
# if we got something like "[Google][]" or "[Google]" |
462
|
|
|
# we'll use "google" as the id |
463
|
|
|
id = m.group(2).lower() |
464
|
|
|
|
465
|
|
|
# Clean up linebreaks in id |
466
|
|
|
id = self.NEWLINE_CLEANUP_RE.sub(' ', id) |
467
|
|
|
if id not in self.markdown.references: # ignore undefined refs |
468
|
|
|
return None |
469
|
|
|
href, title = self.markdown.references[id] |
470
|
|
|
|
471
|
|
|
text = m.group(2) |
472
|
|
|
return self.makeTag(href, title, text) |
473
|
|
|
|
474
|
|
|
def makeTag(self, href, title, text): |
475
|
|
|
el = util.etree.Element('a') |
476
|
|
|
|
477
|
|
|
el.set('href', self.sanitize_url(href)) |
478
|
|
|
if title: |
479
|
|
|
el.set('title', title) |
480
|
|
|
|
481
|
|
|
el.text = text |
482
|
|
|
return el |
483
|
|
|
|
484
|
|
|
|
485
|
|
|
class ImageReferencePattern(ReferencePattern): |
486
|
|
|
""" Match to a stored reference and return img element. """ |
487
|
|
|
def makeTag(self, href, title, text): |
488
|
|
|
el = util.etree.Element("img") |
489
|
|
|
el.set("src", self.sanitize_url(href)) |
490
|
|
|
if title: |
491
|
|
|
el.set("title", title) |
492
|
|
|
|
493
|
|
|
if self.markdown.enable_attributes: |
494
|
|
|
text = handleAttributes(text, el) |
495
|
|
|
|
496
|
|
|
el.set("alt", self.unescape(text)) |
497
|
|
|
return el |
498
|
|
|
|
499
|
|
|
|
500
|
|
|
class AutolinkPattern(Pattern): |
501
|
|
|
""" Return a link Element given an autolink (`<http://example/com>`). """ |
502
|
|
|
def handleMatch(self, m): |
503
|
|
|
el = util.etree.Element("a") |
504
|
|
|
el.set('href', self.unescape(m.group(2))) |
505
|
|
|
el.text = util.AtomicString(m.group(2)) |
506
|
|
|
return el |
507
|
|
|
|
508
|
|
|
|
509
|
|
|
class AutomailPattern(Pattern): |
510
|
|
|
""" |
511
|
|
|
Return a mailto link Element given an automail link (`<[email protected]>`). |
512
|
|
|
""" |
513
|
|
|
def handleMatch(self, m): |
514
|
|
|
el = util.etree.Element('a') |
515
|
|
|
email = self.unescape(m.group(2)) |
516
|
|
|
if email.startswith("mailto:"): |
517
|
|
|
email = email[len("mailto:"):] |
518
|
|
|
|
519
|
|
|
def codepoint2name(code): |
520
|
|
|
"""Return entity definition by code, or the code if not defined.""" |
521
|
|
|
entity = entities.codepoint2name.get(code) |
522
|
|
|
if entity: |
523
|
|
|
return "%s%s;" % (util.AMP_SUBSTITUTE, entity) |
524
|
|
|
else: |
525
|
|
|
return "%s#%d;" % (util.AMP_SUBSTITUTE, code) |
526
|
|
|
|
527
|
|
|
letters = [codepoint2name(ord(letter)) for letter in email] |
528
|
|
|
el.text = util.AtomicString(''.join(letters)) |
529
|
|
|
|
530
|
|
|
mailto = "mailto:" + email |
531
|
|
|
mailto = "".join([util.AMP_SUBSTITUTE + '#%d;' % |
532
|
|
|
ord(letter) for letter in mailto]) |
533
|
|
|
el.set('href', mailto) |
534
|
|
|
return el |
535
|
|
|
|