Completed
Push — master ( 32cfa8...ec62d3 )
by Dongxin
48s
created

AttrListTreeprocessor.sanitize_name()   A

Complexity

Conditions 1

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
c 1
b 0
f 0
dl 0
loc 6
rs 9.4285
1
"""
2
Attribute List Extension for Python-Markdown
3
============================================
4
5
Adds attribute list syntax. Inspired by
6
[maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
7
feature of the same name.
8
9
See <https://pythonhosted.org/Markdown/extensions/attr_list.html>
10
for documentation.
11
12
Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).
13
14
All changes Copyright 2011-2014 The Python Markdown Project
15
16
License: [BSD](http://www.opensource.org/licenses/bsd-license.php)
17
18
"""
19
20
from __future__ import absolute_import
21
from __future__ import unicode_literals
22
from . import Extension
23
from ..treeprocessors import Treeprocessor
24
from ..util import isBlockLevel
25
import re
26
27
try:
28
    Scanner = re.Scanner
29
except AttributeError:  # pragma: no cover
30
    # must be on Python 2.4
31
    from sre import Scanner
32
33
34
def _handle_double_quote(s, t):
35
    k, v = t.split('=', 1)
36
    return k, v.strip('"')
37
38
39
def _handle_single_quote(s, t):
40
    k, v = t.split('=', 1)
41
    return k, v.strip("'")
42
43
44
def _handle_key_value(s, t):
45
    return t.split('=', 1)
46
47
48
def _handle_word(s, t):
49
    if t.startswith('.'):
50
        return '.', t[1:]
51
    if t.startswith('#'):
52
        return 'id', t[1:]
53
    return t, t
54
55
56
_scanner = Scanner([
57
    (r'[^ =]+=".*?"', _handle_double_quote),
58
    (r"[^ =]+='.*?'", _handle_single_quote),
59
    (r'[^ =]+=[^ =]+', _handle_key_value),
60
    (r'[^ =]+', _handle_word),
61
    (r' ', None)
62
])
63
64
65
def get_attrs(str):
66
    """ Parse attribute list and return a list of attribute tuples. """
67
    return _scanner.scan(str)[0]
68
69
70
def isheader(elem):
71
    return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
72
73
74
class AttrListTreeprocessor(Treeprocessor):
75
76
    BASE_RE = r'\{\:?([^\}\n]*)\}'
77
    HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE)
78
    BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE)
79
    INLINE_RE = re.compile(r'^%s' % BASE_RE)
80
    NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
81
                         r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
82
                         r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
83
                         r'\uf900-\ufdcf\ufdf0-\ufffd'
84
                         r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
85
86
    def run(self, doc):
87
        for elem in doc.iter():
88
            if isBlockLevel(elem.tag):
89
                # Block level: check for attrs on last line of text
90
                RE = self.BLOCK_RE
91
                if isheader(elem) or elem.tag == 'dt':
92
                    # header or def-term: check for attrs at end of line
93
                    RE = self.HEADER_RE
94
                if len(elem) and elem.tag == 'li':
95
                    # special case list items. children may include a ul or ol.
96
                    pos = None
97
                    # find the ul or ol position
98
                    for i, child in enumerate(elem):
99
                        if child.tag in ['ul', 'ol']:
100
                            pos = i
101
                            break
102
                    if pos is None and elem[-1].tail:
103
                        # use tail of last child. no ul or ol.
104
                        m = RE.search(elem[-1].tail)
105
                        if m:
106
                            self.assign_attrs(elem, m.group(1))
107
                            elem[-1].tail = elem[-1].tail[:m.start()]
108
                    elif pos is not None and pos > 0 and elem[pos-1].tail:
109
                        # use tail of last child before ul or ol
110
                        m = RE.search(elem[pos-1].tail)
111
                        if m:
112
                            self.assign_attrs(elem, m.group(1))
113
                            elem[pos-1].tail = elem[pos-1].tail[:m.start()]
114
                    elif elem.text:
115
                        # use text. ul is first child.
116
                        m = RE.search(elem.text)
117
                        if m:
118
                            self.assign_attrs(elem, m.group(1))
119
                            elem.text = elem.text[:m.start()]
120
                elif len(elem) and elem[-1].tail:
121
                    # has children. Get from tail of last child
122
                    m = RE.search(elem[-1].tail)
123
                    if m:
124
                        self.assign_attrs(elem, m.group(1))
125
                        elem[-1].tail = elem[-1].tail[:m.start()]
126
                        if isheader(elem):
127
                            # clean up trailing #s
128
                            elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
129
                elif elem.text:
130
                    # no children. Get from text.
131
                    m = RE.search(elem.text)
132
                    if not m and elem.tag == 'td':
133
                        m = re.search(self.BASE_RE, elem.text)
134
                    if m:
135
                        self.assign_attrs(elem, m.group(1))
136
                        elem.text = elem.text[:m.start()]
137
                        if isheader(elem):
138
                            # clean up trailing #s
139
                            elem.text = elem.text.rstrip('#').rstrip()
140
            else:
141
                # inline: check for attrs at start of tail
142
                if elem.tail:
143
                    m = self.INLINE_RE.match(elem.tail)
144
                    if m:
145
                        self.assign_attrs(elem, m.group(1))
146
                        elem.tail = elem.tail[m.end():]
147
148
    def assign_attrs(self, elem, attrs):
149
        """ Assign attrs to element. """
150
        for k, v in get_attrs(attrs):
151
            if k == '.':
152
                # add to class
153
                cls = elem.get('class')
154
                if cls:
155
                    elem.set('class', '%s %s' % (cls, v))
156
                else:
157
                    elem.set('class', v)
158
            else:
159
                # assign attr k with v
160
                elem.set(self.sanitize_name(k), v)
161
162
    def sanitize_name(self, name):
163
        """
164
        Sanitize name as 'an XML Name, minus the ":"'.
165
        See http://www.w3.org/TR/REC-xml-names/#NT-NCName
166
        """
167
        return self.NAME_RE.sub('_', name)
168
169
170
class AttrListExtension(Extension):
171
    def extendMarkdown(self, md, md_globals):
172
        md.treeprocessors.add(
173
            'attr_list', AttrListTreeprocessor(md), '>prettify'
174
        )
175
176
177
def makeExtension(*args, **kwargs):
178
    return AttrListExtension(*args, **kwargs)
179