Markdown.convert() - Code Metrics - Inspection of "pythonx/markdown_parser.py" - MikeCoder/markdown-preview.vim - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 32cfa8...ec62d3 )

by Dongxin

created 2017-08-15 03:07 UTC

Markdown.convert() D

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	2
Bugs	0	Features	0

Metric	Value
cc	10
c	2
b	0
f	0
dl	0
loc	69
rs	4.2857

How to fix Long Method Complexity

"""
Python Markdown
===============

Python Markdown converts Markdown to HTML and can be used as a library or
called from the command line.

## Basic usage as a module:

    import markdown
    html = markdown.markdown(your_text_string)

See <https://pythonhosted.org/Markdown/> for more
information and instructions on how to extend the functionality of
Python Markdown.  Read that before you try modifying this file.

## Authors and License

Started by [Manfred Stienstra](http://www.dwerg.net/).  Continued and
maintained  by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan
Limberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com).

Contact: [email protected]

Copyright 2007-2013 The Python Markdown Project (v. 1.7 and later)
Copyright 200? Django Software Foundation (OrderedDict implementation)
Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
Copyright 2004 Manfred Stienstra (the original version)

License: BSD (see LICENSE for details).
"""

from __future__ import absolute_import
from __future__ import unicode_literals
from .__version__ import version, version_info  # noqa
import codecs
import sys
import logging
import warnings
import importlib
from . import util
from .preprocessors import build_preprocessors
from .blockprocessors import build_block_parser
from .treeprocessors import build_treeprocessors
from .inlinepatterns import build_inlinepatterns
from .postprocessors import build_postprocessors
from .extensions import Extension
from .serializers import to_html_string, to_xhtml_string

__all__ = ['Markdown', 'markdown', 'markdownFromFile']


logger = logging.getLogger('MARKDOWN')


class Markdown(object):
    """Convert Markdown to HTML."""

    doc_tag = "div"     # Element used to wrap document - later removed

    option_defaults = {
        'html_replacement_text': '[HTML_REMOVED]',
        'tab_length':            4,
        'enable_attributes':     True,
        'smart_emphasis':        True,
        'lazy_ol':               True,
    }

    output_formats = {
        'html':   to_html_string,
        'html4':  to_html_string,
        'html5':  to_html_string,
        'xhtml':  to_xhtml_string,
        'xhtml1': to_xhtml_string,
        'xhtml5': to_xhtml_string,
    }

    def __init__(self, *args, **kwargs):
        """
        Creates a new Markdown instance.

        Keyword arguments:

        * extensions: A list of extensions.
           If they are of type string, the module mdx_name.py will be loaded.
           If they are a subclass of markdown.Extension, they will be used
           as-is.
        * extension_configs: Configuration settings for extensions.
        * output_format: Format of output. Supported formats are:
            * "xhtml1": Outputs XHTML 1.x. Default.
            * "xhtml5": Outputs XHTML style tags of HTML 5
            * "xhtml": Outputs latest supported version of XHTML
              (currently XHTML 1.1).
            * "html4": Outputs HTML 4
            * "html5": Outputs HTML style tags of HTML 5
            * "html": Outputs latest supported version of HTML
              (currently HTML 4).
            Note that it is suggested that the more specific formats ("xhtml1"
            and "html4") be used as "xhtml" or "html" may change in the future
            if it makes sense at that time.
        * safe_mode: Deprecated! Disallow raw html. One of "remove", "replace"
          or "escape".
        * html_replacement_text: Deprecated! Text used when safe_mode is set
          to "replace".
        * tab_length: Length of tabs in the source. Default: 4
        * enable_attributes: Enable the conversion of attributes. Default: True
        * smart_emphasis: Treat `_connected_words_` intelligently Default: True
        * lazy_ol: Ignore number of first item of ordered lists. Default: True

        """

        # For backward compatibility, loop through old positional args
        pos = ['extensions', 'extension_configs', 'safe_mode', 'output_format']
        for c, arg in enumerate(args):
            if pos[c] not in kwargs:
                kwargs[pos[c]] = arg
            if c+1 == len(pos):  # pragma: no cover
                # ignore any additional args
                break
        if len(args):
            warnings.warn('Positional arguments are deprecated in Markdown. '
                          'Use keyword arguments only.',
                          DeprecationWarning)

        # Loop through kwargs and assign defaults
        for option, default in self.option_defaults.items():
            setattr(self, option, kwargs.get(option, default))

        self.safeMode = kwargs.get('safe_mode', False)
        if self.safeMode and 'enable_attributes' not in kwargs:
            # Disable attributes in safeMode when not explicitly set
            self.enable_attributes = False

        if 'safe_mode' in kwargs:
            warnings.warn('"safe_mode" is deprecated in Python-Markdown. '
                          'Use an HTML sanitizer (like '
                          'Bleach https://bleach.readthedocs.io/) '
                          'if you are parsing untrusted markdown text. '
                          'See the 2.6 release notes for more info',
                          DeprecationWarning)

        if 'html_replacement_text' in kwargs:
            warnings.warn('The "html_replacement_text" keyword is '
                          'deprecated along with "safe_mode".',
                          DeprecationWarning)

        self.ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
                              '(', ')', '>', '#', '+', '-', '.', '!']

        self.registeredExtensions = []
        self.docType = ""
        self.stripTopLevelTags = True

        self.build_parser()

        self.references = {}
        self.htmlStash = util.HtmlStash()
        self.registerExtensions(extensions=kwargs.get('extensions', []),
                                configs=kwargs.get('extension_configs', {}))
        self.set_output_format(kwargs.get('output_format', 'xhtml1'))
        self.reset()

    def build_parser(self):
        """ Build the parser from the various parts. """
        self.preprocessors = build_preprocessors(self)
        self.parser = build_block_parser(self)
        self.inlinePatterns = build_inlinepatterns(self)
        self.treeprocessors = build_treeprocessors(self)
        self.postprocessors = build_postprocessors(self)
        return self

    def registerExtensions(self, extensions, configs):
        """
        Register extensions with this instance of Markdown.

        Keyword arguments:

        * extensions: A list of extensions, which can either
           be strings or objects.  See the docstring on Markdown.
        * configs: A dictionary mapping module names to config options.

        """
        for ext in extensions:
            if isinstance(ext, util.string_type):
                ext = self.build_extension(ext, configs.get(ext, {}))
            if isinstance(ext, Extension):
                ext.extendMarkdown(self, globals())
                logger.debug(
                    'Successfully loaded extension "%s.%s".'
                    % (ext.__class__.__module__, ext.__class__.__name__)
                )
            elif ext is not None:
                raise TypeError(
                    'Extension "%s.%s" must be of type: "markdown.Extension"'
                    % (ext.__class__.__module__, ext.__class__.__name__))

        return self

    def build_extension(self, ext_name, configs):
        """Build extension by name, then return the module.

        The extension name may contain arguments as part of the string in the
        following format: "extname(key1=value1,key2=value2)"

        """

        configs = dict(configs)

        # Parse extensions config params (ignore the order)
        pos = ext_name.find("(")  # find the first "("
        if pos > 0:
            ext_args = ext_name[pos+1:-1]
            ext_name = ext_name[:pos]
            pairs = [x.split("=") for x in ext_args.split(",")]
            configs.update([(x.strip(), y.strip()) for (x, y) in pairs])
            warnings.warn('Setting configs in the Named Extension string is '
                          'deprecated. It is recommended that you '
                          'pass an instance of the extension class to '
                          'Markdown or use the "extension_configs" keyword. '
                          'The current behavior will raise an error in version 2.7. '
                          'See the Release Notes for Python-Markdown version '
                          '2.6 for more info.', DeprecationWarning)

        # Get class name (if provided): `path.to.module:ClassName`
        ext_name, class_name = ext_name.split(':', 1) \
            if ':' in ext_name else (ext_name, '')

        # Try loading the extension first from one place, then another
        try:
            # Assume string uses dot syntax (`path.to.some.module`)
            module = importlib.import_module(ext_name)
            logger.debug(
                'Successfuly imported extension module "%s".' % ext_name
            )
            # For backward compat (until deprecation)
            # check that this is an extension.
            if ('.' not in ext_name and not (hasattr(module, 'makeExtension') or
               (class_name and hasattr(module, class_name)))):
                # We have a name conflict
                # eg: extensions=['tables'] and PyTables is installed
                raise ImportError
        except ImportError:
            # Preppend `markdown.extensions.` to name
            module_name = '.'.join(['markdown.extensions', ext_name])
            try:
                module = importlib.import_module(module_name)
                logger.debug(
                    'Successfuly imported extension module "%s".' %
                    module_name
                )
                warnings.warn('Using short names for Markdown\'s builtin '
                              'extensions is deprecated. Use the '
                              'full path to the extension with Python\'s dot '
                              'notation (eg: "%s" instead of "%s"). The '
                              'current behavior will raise an error in version '
                              '2.7. See the Release Notes for '
                              'Python-Markdown version 2.6 for more info.' %
                              (module_name, ext_name),
                              DeprecationWarning)
            except ImportError:
                # Preppend `mdx_` to name
                module_name_old_style = '_'.join(['mdx', ext_name])
                try:
                    module = importlib.import_module(module_name_old_style)
                    logger.debug(
                        'Successfuly imported extension module "%s".' %
                        module_name_old_style)
                    warnings.warn('Markdown\'s behavior of prepending "mdx_" '
                                  'to an extension name is deprecated. '
                                  'Use the full path to the '
                                  'extension with Python\'s dot notation '
                                  '(eg: "%s" instead of "%s"). The current '
                                  'behavior will raise an error in version 2.7. '
                                  'See the Release Notes for Python-Markdown '
                                  'version 2.6 for more info.' %
                                  (module_name_old_style, ext_name),
                                  DeprecationWarning)
                except ImportError as e:
                    message = "Failed loading extension '%s' from '%s', '%s' " \
                        "or '%s'" % (ext_name, ext_name, module_name,
                                     module_name_old_style)
                    e.args = (message,) + e.args[1:]
                    raise

        if class_name:
            # Load given class name from module.
            return getattr(module, class_name)(**configs)
        else:
            # Expect  makeExtension() function to return a class.
            try:
                return module.makeExtension(**configs)
            except AttributeError as e:
                message = e.args[0]
                message = "Failed to initiate extension " \
                          "'%s': %s" % (ext_name, message)
                e.args = (message,) + e.args[1:]
                raise

    def registerExtension(self, extension):
        """ This gets called by the extension """
        self.registeredExtensions.append(extension)
        return self

    def reset(self):
        """
        Resets all state variables so that we can start with a new text.
        """
        self.htmlStash.reset()
        self.references.clear()

        for extension in self.registeredExtensions:
            if hasattr(extension, 'reset'):
                extension.reset()

        return self

    def set_output_format(self, format):
        """ Set the output format for the class instance. """
        self.output_format = format.lower()
        try:
            self.serializer = self.output_formats[self.output_format]
        except KeyError as e:
            valid_formats = list(self.output_formats.keys())
            valid_formats.sort()
            message = 'Invalid Output Format: "%s". Use one of %s.' \
                % (self.output_format,
                   '"' + '", "'.join(valid_formats) + '"')
            e.args = (message,) + e.args[1:]
            raise
        return self

    def convert(self, source):
        """
        Convert markdown to serialized XHTML or HTML.

        Keyword arguments:

        * source: Source text as a Unicode string.

        Markdown processing takes place in five steps:

        1. A bunch of "preprocessors" munge the input text.
        2. BlockParser() parses the high-level structural elements of the
           pre-processed text into an ElementTree.
        3. A bunch of "treeprocessors" are run against the ElementTree. One
           such treeprocessor runs InlinePatterns against the ElementTree,
           detecting inline markup.
        4. Some post-processors are run against the text after the ElementTree
           has been serialized into text.
        5. The output is written to a string.

        """

        # Fixup the source text
        if not source.strip():
            return ''  # a blank unicode string

        try:
            source = util.text_type(source)
        except UnicodeDecodeError as e:
            # Customise error message while maintaining original trackback
            e.reason += '. -- Note: Markdown only accepts unicode input!'
            raise

        # Split into lines and run the line preprocessors.
        self.lines = source.split("\n")
        for prep in self.preprocessors.values():
            self.lines = prep.run(self.lines)

        # Parse the high-level elements.
        root = self.parser.parseDocument(self.lines).getroot()

        # Run the tree-processors
        for treeprocessor in self.treeprocessors.values():
            newRoot = treeprocessor.run(root)
            if newRoot is not None:
                root = newRoot

        # Serialize _properly_.  Strip top-level tags.
        output = self.serializer(root)
        if self.stripTopLevelTags:
            try:
                start = output.index(
                    '<%s>' % self.doc_tag) + len(self.doc_tag) + 2
                end = output.rindex('</%s>' % self.doc_tag)
                output = output[start:end].strip()
            except ValueError:  # pragma: no cover
                if output.strip().endswith('<%s />' % self.doc_tag):
                    # We have an empty document
                    output = ''
                else:
                    # We have a serious problem
                    raise ValueError('Markdown failed to strip top-level '
                                     'tags. Document=%r' % output.strip())

        # Run the text post-processors
        for pp in self.postprocessors.values():
            output = pp.run(output)

        return output.strip()

    def convertFile(self, input=None, output=None, encoding=None):
        """Converts a Markdown file and returns the HTML as a Unicode string.

        Decodes the file using the provided encoding (defaults to utf-8),
        passes the file content to markdown, and outputs the html to either
        the provided stream or the file with provided name, using the same
        encoding as the source file. The 'xmlcharrefreplace' error handler is
        used when encoding the output.

        **Note:** This is the only place that decoding and encoding of Unicode
        takes place in Python-Markdown.  (All other code is Unicode-in /
        Unicode-out.)

        Keyword arguments:

        * input: File object or path. Reads from stdin if `None`.
        * output: File object or path. Writes to stdout if `None`.
        * encoding: Encoding of input and output files. Defaults to utf-8.

        """

        encoding = encoding or "utf-8"

        # Read the source
        if input:
            if isinstance(input, util.string_type):
                input_file = codecs.open(input, mode="r", encoding=encoding)
            else:
                input_file = codecs.getreader(encoding)(input)
            text = input_file.read()
            input_file.close()
        else:
            text = sys.stdin.read()
            if not isinstance(text, util.text_type):
                text = text.decode(encoding)

        text = text.lstrip('\ufeff')  # remove the byte-order mark

        # Convert
        html = self.convert(text)

        # Write to file or stdout
        if output:
            if isinstance(output, util.string_type):
                output_file = codecs.open(output, "w",
                                          encoding=encoding,
                                          errors="xmlcharrefreplace")
                output_file.write(html)
                output_file.close()
            else:
                writer = codecs.getwriter(encoding)
                output_file = writer(output, errors="xmlcharrefreplace")
                output_file.write(html)
                # Don't close here. User may want to write more.
        else:
            # Encode manually and write bytes to stdout.
            html = html.encode(encoding, "xmlcharrefreplace")
            try:
                # Write bytes directly to buffer (Python 3).
                sys.stdout.buffer.write(html)
            except AttributeError:
                # Probably Python 2, which works with bytes by default.
                sys.stdout.write(html)

        return self


"""
EXPORTED FUNCTIONS
=============================================================================

Those are the two functions we really mean to export: markdown() and
markdownFromFile().
"""


def markdown(text, *args, **kwargs):
    """Convert a Markdown string to HTML and return HTML as a Unicode string.

    This is a shortcut function for `Markdown` class to cover the most
    basic use case.  It initializes an instance of Markdown, loads the
    necessary extensions and runs the parser on the given text.

    Keyword arguments:

    * text: Markdown formatted text as Unicode or ASCII string.
    * Any arguments accepted by the Markdown class.

    Returns: An HTML document as a string.

    """
    md = Markdown(*args, **kwargs)
    return md.convert(text)


def markdownFromFile(*args, **kwargs):
    """Read markdown code from a file and write it to a file or a stream.

    This is a shortcut function which initializes an instance of Markdown,
    and calls the convertFile method rather than convert.

    Keyword arguments:

    * input: a file name or readable object.
    * output: a file name or writable object.
    * encoding: Encoding of input and output.
    * Any arguments accepted by the Markdown class.

    """
    # For backward compatibility loop through positional args
    pos = ['input', 'output', 'extensions', 'encoding']
    c = 0
    for arg in args:
        if pos[c] not in kwargs:
            kwargs[pos[c]] = arg
        c += 1
        if c == len(pos):
            break
    if len(args):
        warnings.warn('Positional arguments are depreacted in '
                      'Markdown and will raise an error in version 2.7. '
                      'Use keyword arguments only.',
                      DeprecationWarning)

    md = Markdown(**kwargs)
    md.convertFile(kwargs.get('input', None),
                   kwargs.get('output', None),
                   kwargs.get('encoding', None))


1			"""
2			Python Markdown
3			===============
4
5			Python Markdown converts Markdown to HTML and can be used as a library or
6			called from the command line.
7
8			## Basic usage as a module:
9
10			import markdown
11			html = markdown.markdown(your_text_string)
12
13			See <https://pythonhosted.org/Markdown/> for more
14			information and instructions on how to extend the functionality of
15			Python Markdown. Read that before you try modifying this file.
16
17			## Authors and License
18
19			Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and
20			maintained by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan
21			Limberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com).
22
23			Contact: [email protected]
24
25			Copyright 2007-2013 The Python Markdown Project (v. 1.7 and later)
26			Copyright 200? Django Software Foundation (OrderedDict implementation)
27			Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
28			Copyright 2004 Manfred Stienstra (the original version)
29
30			License: BSD (see LICENSE for details).
31			"""
32
33			from __future__ import absolute_import
34			from __future__ import unicode_literals
35			from .__version__ import version, version_info # noqa
36			import codecs
37			import sys
38			import logging
39			import warnings
40			import importlib
41			from . import util
42			from .preprocessors import build_preprocessors
43			from .blockprocessors import build_block_parser
44			from .treeprocessors import build_treeprocessors
45			from .inlinepatterns import build_inlinepatterns
46			from .postprocessors import build_postprocessors
47			from .extensions import Extension
48			from .serializers import to_html_string, to_xhtml_string
49
50			__all__ = ['Markdown', 'markdown', 'markdownFromFile']
51
52
53			logger = logging.getLogger('MARKDOWN')
54
55
56			class Markdown(object):
57			"""Convert Markdown to HTML."""
58
59			doc_tag = "div" # Element used to wrap document - later removed
60
61			option_defaults = {
62			'html_replacement_text': '[HTML_REMOVED]',
63			'tab_length': 4,
64			'enable_attributes': True,
65			'smart_emphasis': True,
66			'lazy_ol': True,
67			}
68
69			output_formats = {
70			'html': to_html_string,
71			'html4': to_html_string,
72			'html5': to_html_string,
73			'xhtml': to_xhtml_string,
74			'xhtml1': to_xhtml_string,
75			'xhtml5': to_xhtml_string,
76			}
77
78			def __init__(self, args, *kwargs):
79			"""
80			Creates a new Markdown instance.
81
82			Keyword arguments:
83
84			* extensions: A list of extensions.
85			If they are of type string, the module mdx_name.py will be loaded.
86			If they are a subclass of markdown.Extension, they will be used
87			as-is.
88			* extension_configs: Configuration settings for extensions.
89			* output_format: Format of output. Supported formats are:
90			* "xhtml1": Outputs XHTML 1.x. Default.
91			* "xhtml5": Outputs XHTML style tags of HTML 5
92			* "xhtml": Outputs latest supported version of XHTML
93			(currently XHTML 1.1).
94			* "html4": Outputs HTML 4
95			* "html5": Outputs HTML style tags of HTML 5
96			* "html": Outputs latest supported version of HTML
97			(currently HTML 4).
98			Note that it is suggested that the more specific formats ("xhtml1"
99			and "html4") be used as "xhtml" or "html" may change in the future
100			if it makes sense at that time.
101			* safe_mode: Deprecated! Disallow raw html. One of "remove", "replace"
102			or "escape".
103			* html_replacement_text: Deprecated! Text used when safe_mode is set
104			to "replace".
105			* tab_length: Length of tabs in the source. Default: 4
106			* enable_attributes: Enable the conversion of attributes. Default: True
107			* smart_emphasis: Treat `_connected_words_` intelligently Default: True
108			* lazy_ol: Ignore number of first item of ordered lists. Default: True
109
110			"""
111
112			# For backward compatibility, loop through old positional args
113			pos = ['extensions', 'extension_configs', 'safe_mode', 'output_format']
114			for c, arg in enumerate(args):
115			if pos[c] not in kwargs:
116			kwargs[pos[c]] = arg
117			if c+1 == len(pos): # pragma: no cover
118			# ignore any additional args
119			break
120			if len(args):
121			warnings.warn('Positional arguments are deprecated in Markdown. '
122			'Use keyword arguments only.',
123			DeprecationWarning)
124
125			# Loop through kwargs and assign defaults
126			for option, default in self.option_defaults.items():
127			setattr(self, option, kwargs.get(option, default))
128
129			self.safeMode = kwargs.get('safe_mode', False)
130			if self.safeMode and 'enable_attributes' not in kwargs:
131			# Disable attributes in safeMode when not explicitly set
132			self.enable_attributes = False
133
134			if 'safe_mode' in kwargs:
135			warnings.warn('"safe_mode" is deprecated in Python-Markdown. '
136			'Use an HTML sanitizer (like '
137			'Bleach https://bleach.readthedocs.io/) '
138			'if you are parsing untrusted markdown text. '
139			'See the 2.6 release notes for more info',
140			DeprecationWarning)
141
142			if 'html_replacement_text' in kwargs:
143			warnings.warn('The "html_replacement_text" keyword is '
144			'deprecated along with "safe_mode".',
145			DeprecationWarning)
146
147			self.ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
148			'(', ')', '>', '#', '+', '-', '.', '!']
149
150			self.registeredExtensions = []
151			self.docType = ""
152			self.stripTopLevelTags = True
153
154			self.build_parser()
155
156			self.references = {}
157			self.htmlStash = util.HtmlStash()
158			self.registerExtensions(extensions=kwargs.get('extensions', []),
159			configs=kwargs.get('extension_configs', {}))
160			self.set_output_format(kwargs.get('output_format', 'xhtml1'))
161			self.reset()
162
163			def build_parser(self):
164			""" Build the parser from the various parts. """
165			self.preprocessors = build_preprocessors(self)
166			self.parser = build_block_parser(self)
167			self.inlinePatterns = build_inlinepatterns(self)
168			self.treeprocessors = build_treeprocessors(self)
169			self.postprocessors = build_postprocessors(self)
170			return self
171
172			def registerExtensions(self, extensions, configs):
173			"""
174			Register extensions with this instance of Markdown.
175
176			Keyword arguments:
177
178			* extensions: A list of extensions, which can either
179			be strings or objects. See the docstring on Markdown.
180			* configs: A dictionary mapping module names to config options.
181
182			"""
183			for ext in extensions:
184			if isinstance(ext, util.string_type):
185			ext = self.build_extension(ext, configs.get(ext, {}))
186			if isinstance(ext, Extension):
187			ext.extendMarkdown(self, globals())
188			logger.debug(
189			'Successfully loaded extension "%s.%s".'
190			% (ext.__class__.__module__, ext.__class__.__name__)
191			)
192			elif ext is not None:
193			raise TypeError(
194			'Extension "%s.%s" must be of type: "markdown.Extension"'
195			% (ext.__class__.__module__, ext.__class__.__name__))
196
197			return self
198
199			def build_extension(self, ext_name, configs):
200			"""Build extension by name, then return the module.
201
202			The extension name may contain arguments as part of the string in the
203			following format: "extname(key1=value1,key2=value2)"
204
205			"""
206
207			configs = dict(configs)
208
209			# Parse extensions config params (ignore the order)
210			pos = ext_name.find("(") # find the first "("
211			if pos > 0:
212			ext_args = ext_name[pos+1:-1]
213			ext_name = ext_name[:pos]
214			pairs = [x.split("=") for x in ext_args.split(",")]
215			configs.update([(x.strip(), y.strip()) for (x, y) in pairs])
216			warnings.warn('Setting configs in the Named Extension string is '
217			'deprecated. It is recommended that you '
218			'pass an instance of the extension class to '
219			'Markdown or use the "extension_configs" keyword. '
220			'The current behavior will raise an error in version 2.7. '
221			'See the Release Notes for Python-Markdown version '
222			'2.6 for more info.', DeprecationWarning)
223
224			# Get class name (if provided): `path.to.module:ClassName`
225			ext_name, class_name = ext_name.split(':', 1) \
226			if ':' in ext_name else (ext_name, '')
227
228			# Try loading the extension first from one place, then another
229			try:
230			# Assume string uses dot syntax (`path.to.some.module`)
231			module = importlib.import_module(ext_name)
232			logger.debug(
233			'Successfuly imported extension module "%s".' % ext_name
234			)
235			# For backward compat (until deprecation)
236			# check that this is an extension.
237			if ('.' not in ext_name and not (hasattr(module, 'makeExtension') or
238			(class_name and hasattr(module, class_name)))):
239			# We have a name conflict
240			# eg: extensions=['tables'] and PyTables is installed
241			raise ImportError
242			except ImportError:
243			# Preppend `markdown.extensions.` to name
244			module_name = '.'.join(['markdown.extensions', ext_name])
245			try:
246			module = importlib.import_module(module_name)
247			logger.debug(
248			'Successfuly imported extension module "%s".' %
249			module_name
250			)
251			warnings.warn('Using short names for Markdown\'s builtin '
252			'extensions is deprecated. Use the '
253			'full path to the extension with Python\'s dot '
254			'notation (eg: "%s" instead of "%s"). The '
255			'current behavior will raise an error in version '
256			'2.7. See the Release Notes for '
257			'Python-Markdown version 2.6 for more info.' %
258			(module_name, ext_name),
259			DeprecationWarning)
260			except ImportError:
261			# Preppend `mdx_` to name
262			module_name_old_style = '_'.join(['mdx', ext_name])
263			try:
264			module = importlib.import_module(module_name_old_style)
265			logger.debug(
266			'Successfuly imported extension module "%s".' %
267			module_name_old_style)
268			warnings.warn('Markdown\'s behavior of prepending "mdx_" '
269			'to an extension name is deprecated. '
270			'Use the full path to the '
271			'extension with Python\'s dot notation '
272			'(eg: "%s" instead of "%s"). The current '
273			'behavior will raise an error in version 2.7. '
274			'See the Release Notes for Python-Markdown '
275			'version 2.6 for more info.' %
276			(module_name_old_style, ext_name),
277			DeprecationWarning)
278			except ImportError as e:
279			message = "Failed loading extension '%s' from '%s', '%s' " \
280			"or '%s'" % (ext_name, ext_name, module_name,
281			module_name_old_style)
282			e.args = (message,) + e.args[1:]
283			raise
284
285			if class_name:
286			# Load given class name from module.
287			return getattr(module, class_name)(**configs)
288			else:
289			# Expect makeExtension() function to return a class.
290			try:
291			return module.makeExtension(**configs)
292			except AttributeError as e:
293			message = e.args[0]
294			message = "Failed to initiate extension " \
295			"'%s': %s" % (ext_name, message)
296			e.args = (message,) + e.args[1:]
297			raise
298
299			def registerExtension(self, extension):
300			""" This gets called by the extension """
301			self.registeredExtensions.append(extension)
302			return self
303
304			def reset(self):
305			"""
306			Resets all state variables so that we can start with a new text.
307			"""
308			self.htmlStash.reset()
309			self.references.clear()
310
311			for extension in self.registeredExtensions:
312			if hasattr(extension, 'reset'):
313			extension.reset()
314
315			return self
316
317			def set_output_format(self, format):
318			""" Set the output format for the class instance. """
319			self.output_format = format.lower()
320			try:
321			self.serializer = self.output_formats[self.output_format]
322			except KeyError as e:
323			valid_formats = list(self.output_formats.keys())
324			valid_formats.sort()
325			message = 'Invalid Output Format: "%s". Use one of %s.' \
326			% (self.output_format,
327			'"' + '", "'.join(valid_formats) + '"')
328			e.args = (message,) + e.args[1:]
329			raise
330			return self
331
332			def convert(self, source):
333			"""
334			Convert markdown to serialized XHTML or HTML.
335
336			Keyword arguments:
337
338			* source: Source text as a Unicode string.
339
340			Markdown processing takes place in five steps:
341
342			1. A bunch of "preprocessors" munge the input text.
343			2. BlockParser() parses the high-level structural elements of the
344			pre-processed text into an ElementTree.
345			3. A bunch of "treeprocessors" are run against the ElementTree. One
346			such treeprocessor runs InlinePatterns against the ElementTree,
347			detecting inline markup.
348			4. Some post-processors are run against the text after the ElementTree
349			has been serialized into text.
350			5. The output is written to a string.
351
352			"""
353
354			# Fixup the source text
355			if not source.strip():
356			return '' # a blank unicode string
357
358			try:
359			source = util.text_type(source)
360			except UnicodeDecodeError as e:
361			# Customise error message while maintaining original trackback
362			e.reason += '. -- Note: Markdown only accepts unicode input!'
363			raise
364
365			# Split into lines and run the line preprocessors.
366			self.lines = source.split("\n")
367			for prep in self.preprocessors.values():
368			self.lines = prep.run(self.lines)
369
370			# Parse the high-level elements.
371			root = self.parser.parseDocument(self.lines).getroot()
372
373			# Run the tree-processors
374			for treeprocessor in self.treeprocessors.values():
375			newRoot = treeprocessor.run(root)
376			if newRoot is not None:
377			root = newRoot
378
379			# Serialize _properly_. Strip top-level tags.
380			output = self.serializer(root)
381			if self.stripTopLevelTags:
382			try:
383			start = output.index(
384			'<%s>' % self.doc_tag) + len(self.doc_tag) + 2
385			end = output.rindex('</%s>' % self.doc_tag)
386			output = output[start:end].strip()
387			except ValueError: # pragma: no cover
388			if output.strip().endswith('<%s />' % self.doc_tag):
389			# We have an empty document
390			output = ''
391			else:
392			# We have a serious problem
393			raise ValueError('Markdown failed to strip top-level '
394			'tags. Document=%r' % output.strip())
395
396			# Run the text post-processors
397			for pp in self.postprocessors.values():
398			output = pp.run(output)
399
400			return output.strip()
401
402			def convertFile(self, input=None, output=None, encoding=None):
403			"""Converts a Markdown file and returns the HTML as a Unicode string.
404
405			Decodes the file using the provided encoding (defaults to utf-8),
406			passes the file content to markdown, and outputs the html to either
407			the provided stream or the file with provided name, using the same
408			encoding as the source file. The 'xmlcharrefreplace' error handler is
409			used when encoding the output.
410
411			Note: This is the only place that decoding and encoding of Unicode
412			takes place in Python-Markdown. (All other code is Unicode-in /
413			Unicode-out.)
414
415			Keyword arguments:
416
417			* input: File object or path. Reads from stdin if `None`.
418			* output: File object or path. Writes to stdout if `None`.
419			* encoding: Encoding of input and output files. Defaults to utf-8.
420
421			"""
422
423			encoding = encoding or "utf-8"
424
425			# Read the source
426			if input:
427			if isinstance(input, util.string_type):
428			input_file = codecs.open(input, mode="r", encoding=encoding)
429			else:
430			input_file = codecs.getreader(encoding)(input)
431			text = input_file.read()
432			input_file.close()
433			else:
434			text = sys.stdin.read()
435			if not isinstance(text, util.text_type):
436			text = text.decode(encoding)
437
438			text = text.lstrip('\ufeff') # remove the byte-order mark
439
440			# Convert
441			html = self.convert(text)
442
443			# Write to file or stdout
444			if output:
445			if isinstance(output, util.string_type):
446			output_file = codecs.open(output, "w",
447			encoding=encoding,
448			errors="xmlcharrefreplace")
449			output_file.write(html)
450			output_file.close()
451			else:
452			writer = codecs.getwriter(encoding)
453			output_file = writer(output, errors="xmlcharrefreplace")
454			output_file.write(html)
455			# Don't close here. User may want to write more.
456			else:
457			# Encode manually and write bytes to stdout.
458			html = html.encode(encoding, "xmlcharrefreplace")
459			try:
460			# Write bytes directly to buffer (Python 3).
461			sys.stdout.buffer.write(html)
462			except AttributeError:
463			# Probably Python 2, which works with bytes by default.
464			sys.stdout.write(html)
465
466			return self
467
468
469			"""
470			EXPORTED FUNCTIONS
471			=============================================================================
472
473			Those are the two functions we really mean to export: markdown() and
474			markdownFromFile().
475			"""
476
477
478			def markdown(text, args, *kwargs):
479			"""Convert a Markdown string to HTML and return HTML as a Unicode string.
480
481			This is a shortcut function for `Markdown` class to cover the most
482			basic use case. It initializes an instance of Markdown, loads the
483			necessary extensions and runs the parser on the given text.
484
485			Keyword arguments:
486
487			* text: Markdown formatted text as Unicode or ASCII string.
488			* Any arguments accepted by the Markdown class.
489
490			Returns: An HTML document as a string.
491
492			"""
493			md = Markdown(args, *kwargs)
494			return md.convert(text)
495
496
497			def markdownFromFile(args, *kwargs):
498			"""Read markdown code from a file and write it to a file or a stream.
499
500			This is a shortcut function which initializes an instance of Markdown,
501			and calls the convertFile method rather than convert.
502
503			Keyword arguments:
504
505			* input: a file name or readable object.
506			* output: a file name or writable object.
507			* encoding: Encoding of input and output.
508			* Any arguments accepted by the Markdown class.
509
510			"""
511			# For backward compatibility loop through positional args
512			pos = ['input', 'output', 'extensions', 'encoding']
513			c = 0
514			for arg in args:
515			if pos[c] not in kwargs:
516			kwargs[pos[c]] = arg
517			c += 1
518			if c == len(pos):
519			break
520			if len(args):
521			warnings.warn('Positional arguments are depreacted in '
522			'Markdown and will raise an error in version 2.7. '
523			'Use keyword arguments only.',
524			DeprecationWarning)
525
526			md = Markdown(**kwargs)
527			md.convertFile(kwargs.get('input', None),
528			kwargs.get('output', None),
529			kwargs.get('encoding', None))
530

MikeCoder / markdown-preview.vim

Push — master ( 32cfa8...ec62d3 )

Markdown.convert() D

Complexity

Size

Duplication

Importance

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like