AttrListTreeprocessor.sanitize_name() - Code Metrics - Inspection of "pythonx/markdown_parser.py" - MikeCoder/markdown-preview.vim - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 32cfa8...ec62d3 )

by Dongxin

created 2017-08-15 03:07 UTC

AttrListTreeprocessor.sanitize_name() A

↳ Parent: AttrListTreeprocessor

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	1
c	1
b	0
f	0
dl	0
loc	6
rs	9.4285

"""
Attribute List Extension for Python-Markdown
============================================

Adds attribute list syntax. Inspired by
[maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
feature of the same name.

See <https://pythonhosted.org/Markdown/extensions/attr_list.html>
for documentation.

Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).

All changes Copyright 2011-2014 The Python Markdown Project

License: [BSD](http://www.opensource.org/licenses/bsd-license.php)

"""

from __future__ import absolute_import
from __future__ import unicode_literals
from . import Extension
from ..treeprocessors import Treeprocessor
from ..util import isBlockLevel
import re

try:
    Scanner = re.Scanner
except AttributeError:  # pragma: no cover
    # must be on Python 2.4
    from sre import Scanner


def _handle_double_quote(s, t):
    k, v = t.split('=', 1)
    return k, v.strip('"')


def _handle_single_quote(s, t):
    k, v = t.split('=', 1)
    return k, v.strip("'")


def _handle_key_value(s, t):
    return t.split('=', 1)


def _handle_word(s, t):
    if t.startswith('.'):
        return '.', t[1:]
    if t.startswith('#'):
        return 'id', t[1:]
    return t, t


_scanner = Scanner([
    (r'[^ =]+=".*?"', _handle_double_quote),
    (r"[^ =]+='.*?'", _handle_single_quote),
    (r'[^ =]+=[^ =]+', _handle_key_value),
    (r'[^ =]+', _handle_word),
    (r' ', None)
])


def get_attrs(str):
    """ Parse attribute list and return a list of attribute tuples. """
    return _scanner.scan(str)[0]


def isheader(elem):
    return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']


class AttrListTreeprocessor(Treeprocessor):

    BASE_RE = r'\{\:?([^\}\n]*)\}'
    HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE)
    BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE)
    INLINE_RE = re.compile(r'^%s' % BASE_RE)
    NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
                         r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
                         r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
                         r'\uf900-\ufdcf\ufdf0-\ufffd'
                         r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')

    def run(self, doc):
        for elem in doc.iter():
            if isBlockLevel(elem.tag):
                # Block level: check for attrs on last line of text
                RE = self.BLOCK_RE
                if isheader(elem) or elem.tag == 'dt':
                    # header or def-term: check for attrs at end of line
                    RE = self.HEADER_RE
                if len(elem) and elem.tag == 'li':
                    # special case list items. children may include a ul or ol.
                    pos = None
                    # find the ul or ol position
                    for i, child in enumerate(elem):
                        if child.tag in ['ul', 'ol']:
                            pos = i
                            break
                    if pos is None and elem[-1].tail:
                        # use tail of last child. no ul or ol.
                        m = RE.search(elem[-1].tail)
                        if m:
                            self.assign_attrs(elem, m.group(1))
                            elem[-1].tail = elem[-1].tail[:m.start()]
                    elif pos is not None and pos > 0 and elem[pos-1].tail:
                        # use tail of last child before ul or ol
                        m = RE.search(elem[pos-1].tail)
                        if m:
                            self.assign_attrs(elem, m.group(1))
                            elem[pos-1].tail = elem[pos-1].tail[:m.start()]
                    elif elem.text:
                        # use text. ul is first child.
                        m = RE.search(elem.text)
                        if m:
                            self.assign_attrs(elem, m.group(1))
                            elem.text = elem.text[:m.start()]
                elif len(elem) and elem[-1].tail:
                    # has children. Get from tail of last child
                    m = RE.search(elem[-1].tail)
                    if m:
                        self.assign_attrs(elem, m.group(1))
                        elem[-1].tail = elem[-1].tail[:m.start()]
                        if isheader(elem):
                            # clean up trailing #s
                            elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
                elif elem.text:
                    # no children. Get from text.
                    m = RE.search(elem.text)
                    if not m and elem.tag == 'td':
                        m = re.search(self.BASE_RE, elem.text)
                    if m:
                        self.assign_attrs(elem, m.group(1))
                        elem.text = elem.text[:m.start()]
                        if isheader(elem):
                            # clean up trailing #s
                            elem.text = elem.text.rstrip('#').rstrip()
            else:
                # inline: check for attrs at start of tail
                if elem.tail:
                    m = self.INLINE_RE.match(elem.tail)
                    if m:
                        self.assign_attrs(elem, m.group(1))
                        elem.tail = elem.tail[m.end():]

    def assign_attrs(self, elem, attrs):
        """ Assign attrs to element. """
        for k, v in get_attrs(attrs):
            if k == '.':
                # add to class
                cls = elem.get('class')
                if cls:
                    elem.set('class', '%s %s' % (cls, v))
                else:
                    elem.set('class', v)
            else:
                # assign attr k with v
                elem.set(self.sanitize_name(k), v)

    def sanitize_name(self, name):
        """
        Sanitize name as 'an XML Name, minus the ":"'.
        See http://www.w3.org/TR/REC-xml-names/#NT-NCName
        """
        return self.NAME_RE.sub('_', name)


class AttrListExtension(Extension):
    def extendMarkdown(self, md, md_globals):
        md.treeprocessors.add(
            'attr_list', AttrListTreeprocessor(md), '>prettify'
        )


def makeExtension(*args, **kwargs):
    return AttrListExtension(*args, **kwargs)


1			"""
2			Attribute List Extension for Python-Markdown
3			============================================
4
5			Adds attribute list syntax. Inspired by
6			[maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
7			feature of the same name.
8
9			See <https://pythonhosted.org/Markdown/extensions/attr_list.html>
10			for documentation.
11
12			Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).
13
14			All changes Copyright 2011-2014 The Python Markdown Project
15
16			License: [BSD](http://www.opensource.org/licenses/bsd-license.php)
17
18			"""
19
20			from __future__ import absolute_import
21			from __future__ import unicode_literals
22			from . import Extension
23			from ..treeprocessors import Treeprocessor
24			from ..util import isBlockLevel
25			import re
26
27			try:
28			Scanner = re.Scanner
29			except AttributeError: # pragma: no cover
30			# must be on Python 2.4
31			from sre import Scanner
32
33
34			def _handle_double_quote(s, t):
35			k, v = t.split('=', 1)
36			return k, v.strip('"')
37
38
39			def _handle_single_quote(s, t):
40			k, v = t.split('=', 1)
41			return k, v.strip("'")
42
43
44			def _handle_key_value(s, t):
45			return t.split('=', 1)
46
47
48			def _handle_word(s, t):
49			if t.startswith('.'):
50			return '.', t[1:]
51			if t.startswith('#'):
52			return 'id', t[1:]
53			return t, t
54
55
56			_scanner = Scanner([
57			(r'[^ =]+=".*?"', _handle_double_quote),
58			(r"[^ =]+='.*?'", _handle_single_quote),
59			(r'[^ =]+=[^ =]+', _handle_key_value),
60			(r'[^ =]+', _handle_word),
61			(r' ', None)
62			])
63
64
65			def get_attrs(str):
66			""" Parse attribute list and return a list of attribute tuples. """
67			return _scanner.scan(str)[0]
68
69
70			def isheader(elem):
71			return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
72
73
74			class AttrListTreeprocessor(Treeprocessor):
75
76			BASE_RE = r'\{\:?([^\}\n]*)\}'
77			HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE)
78			BLOCK_RE = re.compile(r'\n[ ]%s[ ]$' % BASE_RE)
79			INLINE_RE = re.compile(r'^%s' % BASE_RE)
80			NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
81			r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
82			r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
83			r'\uf900-\ufdcf\ufdf0-\ufffd'
84			r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
85
86			def run(self, doc):
87			for elem in doc.iter():
88			if isBlockLevel(elem.tag):
89			# Block level: check for attrs on last line of text
90			RE = self.BLOCK_RE
91			if isheader(elem) or elem.tag == 'dt':
92			# header or def-term: check for attrs at end of line
93			RE = self.HEADER_RE
94			if len(elem) and elem.tag == 'li':
95			# special case list items. children may include a ul or ol.
96			pos = None
97			# find the ul or ol position
98			for i, child in enumerate(elem):
99			if child.tag in ['ul', 'ol']:
100			pos = i
101			break
102			if pos is None and elem[-1].tail:
103			# use tail of last child. no ul or ol.
104			m = RE.search(elem[-1].tail)
105			if m:
106			self.assign_attrs(elem, m.group(1))
107			elem[-1].tail = elem[-1].tail[:m.start()]
108			elif pos is not None and pos > 0 and elem[pos-1].tail:
109			# use tail of last child before ul or ol
110			m = RE.search(elem[pos-1].tail)
111			if m:
112			self.assign_attrs(elem, m.group(1))
113			elem[pos-1].tail = elem[pos-1].tail[:m.start()]
114			elif elem.text:
115			# use text. ul is first child.
116			m = RE.search(elem.text)
117			if m:
118			self.assign_attrs(elem, m.group(1))
119			elem.text = elem.text[:m.start()]
120			elif len(elem) and elem[-1].tail:
121			# has children. Get from tail of last child
122			m = RE.search(elem[-1].tail)
123			if m:
124			self.assign_attrs(elem, m.group(1))
125			elem[-1].tail = elem[-1].tail[:m.start()]
126			if isheader(elem):
127			# clean up trailing #s
128			elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
129			elif elem.text:
130			# no children. Get from text.
131			m = RE.search(elem.text)
132			if not m and elem.tag == 'td':
133			m = re.search(self.BASE_RE, elem.text)
134			if m:
135			self.assign_attrs(elem, m.group(1))
136			elem.text = elem.text[:m.start()]
137			if isheader(elem):
138			# clean up trailing #s
139			elem.text = elem.text.rstrip('#').rstrip()
140			else:
141			# inline: check for attrs at start of tail
142			if elem.tail:
143			m = self.INLINE_RE.match(elem.tail)
144			if m:
145			self.assign_attrs(elem, m.group(1))
146			elem.tail = elem.tail[m.end():]
147
148			def assign_attrs(self, elem, attrs):
149			""" Assign attrs to element. """
150			for k, v in get_attrs(attrs):
151			if k == '.':
152			# add to class
153			cls = elem.get('class')
154			if cls:
155			elem.set('class', '%s %s' % (cls, v))
156			else:
157			elem.set('class', v)
158			else:
159			# assign attr k with v
160			elem.set(self.sanitize_name(k), v)
161
162			def sanitize_name(self, name):
163			"""
164			Sanitize name as 'an XML Name, minus the ":"'.
165			See http://www.w3.org/TR/REC-xml-names/#NT-NCName
166			"""
167			return self.NAME_RE.sub('_', name)
168
169
170			class AttrListExtension(Extension):
171			def extendMarkdown(self, md, md_globals):
172			md.treeprocessors.add(
173			'attr_list', AttrListTreeprocessor(md), '>prettify'
174			)
175
176
177			def makeExtension(args, *kwargs):
178			return AttrListExtension(args, *kwargs)
179

MikeCoder / markdown-preview.vim

Push — master ( 32cfa8...ec62d3 )

AttrListTreeprocessor.sanitize_name() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like