Completed
Push — master ( 32cfa8...ec62d3 )
by Dongxin
48s
created

FootnotePostprocessor   A

Complexity

Total Complexity 2

Size/Duplication

Total Lines 10
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 10
rs 10
wmc 2

2 Methods

Rating   Name   Duplication   Size   Complexity  
A __init__() 0 2 1
A run() 0 5 1
1
"""
2
Footnotes Extension for Python-Markdown
3
=======================================
4
5
Adds footnote handling to Python-Markdown.
6
7
See <https://pythonhosted.org/Markdown/extensions/footnotes.html>
8
for documentation.
9
10
Copyright The Python Markdown Project
11
12
License: [BSD](http://www.opensource.org/licenses/bsd-license.php)
13
14
"""
15
16
from __future__ import absolute_import
17
from __future__ import unicode_literals
18
from . import Extension
19
from ..preprocessors import Preprocessor
20
from ..inlinepatterns import Pattern
21
from ..treeprocessors import Treeprocessor
22
from ..postprocessors import Postprocessor
23
from .. import util
24
from ..odict import OrderedDict
25
import re
26
import copy
27
28
FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX
29
NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX
30
DEF_RE = re.compile(r'[ ]{0,3}\[\^([^\]]*)\]:\s*(.*)')
31
TABBED_RE = re.compile(r'((\t)|(    ))(.*)')
32
RE_REF_ID = re.compile(r'(fnref)(\d+)')
33
34
35
class FootnoteExtension(Extension):
36
    """ Footnote Extension. """
37
38
    def __init__(self, *args, **kwargs):
39
        """ Setup configs. """
40
41
        self.config = {
42
            'PLACE_MARKER':
43
                ["///Footnotes Go Here///",
44
                 "The text string that marks where the footnotes go"],
45
            'UNIQUE_IDS':
46
                [False,
47
                 "Avoid name collisions across "
48
                 "multiple calls to reset()."],
49
            "BACKLINK_TEXT":
50
                ["&#8617;",
51
                 "The text string that links from the footnote "
52
                 "to the reader's place."]
53
        }
54
        super(FootnoteExtension, self).__init__(*args, **kwargs)
55
56
        # In multiple invocations, emit links that don't get tangled.
57
        self.unique_prefix = 0
58
        self.found_refs = {}
59
        self.used_refs = set()
60
61
        self.reset()
62
63
    def extendMarkdown(self, md, md_globals):
64
        """ Add pieces to Markdown. """
65
        md.registerExtension(self)
66
        self.parser = md.parser
67
        self.md = md
68
        # Insert a preprocessor before ReferencePreprocessor
69
        md.preprocessors.add(
70
            "footnote", FootnotePreprocessor(self), "<reference"
71
        )
72
        # Insert an inline pattern before ImageReferencePattern
73
        FOOTNOTE_RE = r'\[\^([^\]]*)\]'  # blah blah [^1] blah
74
        md.inlinePatterns.add(
75
            "footnote", FootnotePattern(FOOTNOTE_RE, self), "<reference"
76
        )
77
        # Insert a tree-processor that would actually add the footnote div
78
        # This must be before all other treeprocessors (i.e., inline and
79
        # codehilite) so they can run on the the contents of the div.
80
        md.treeprocessors.add(
81
            "footnote", FootnoteTreeprocessor(self), "_begin"
82
        )
83
84
        # Insert a tree-processor that will run after inline is done.
85
        # In this tree-processor we want to check our duplicate footnote tracker
86
        # And add additional backrefs to the footnote pointing back to the
87
        # duplicated references.
88
        md.treeprocessors.add(
89
            "footnote-duplicate", FootnotePostTreeprocessor(self), '>inline'
90
        )
91
92
        # Insert a postprocessor after amp_substitute oricessor
93
        md.postprocessors.add(
94
            "footnote", FootnotePostprocessor(self), ">amp_substitute"
95
        )
96
97
    def reset(self):
98
        """ Clear footnotes on reset, and prepare for distinct document. """
99
        self.footnotes = OrderedDict()
100
        self.unique_prefix += 1
101
        self.found_refs = {}
102
        self.used_refs = set()
103
104
    def unique_ref(self, reference, found=False):
105
        """ Get a unique reference if there are duplicates. """
106
        if not found:
107
            return reference
108
109
        original_ref = reference
110
        while reference in self.used_refs:
111
            ref, rest = reference.split(self.get_separator(), 1)
112
            m = RE_REF_ID.match(ref)
113
            if m:
114
                reference = '%s%d%s%s' % (m.group(1), int(m.group(2))+1, self.get_separator(), rest)
115
            else:
116
                reference = '%s%d%s%s' % (ref, 2, self.get_separator(), rest)
117
118
        self.used_refs.add(reference)
119
        if original_ref in self.found_refs:
120
            self.found_refs[original_ref] += 1
121
        else:
122
            self.found_refs[original_ref] = 1
123
        return reference
124
125
    def findFootnotesPlaceholder(self, root):
126
        """ Return ElementTree Element that contains Footnote placeholder. """
127
        def finder(element):
128
            for child in element:
129
                if child.text:
130
                    if child.text.find(self.getConfig("PLACE_MARKER")) > -1:
131
                        return child, element, True
132
                if child.tail:
133
                    if child.tail.find(self.getConfig("PLACE_MARKER")) > -1:
134
                        return child, element, False
135
                child_res = finder(child)
136
                if child_res is not None:
137
                    return child_res
138
            return None
139
140
        res = finder(root)
141
        return res
142
143
    def setFootnote(self, id, text):
144
        """ Store a footnote for later retrieval. """
145
        self.footnotes[id] = text
146
147
    def get_separator(self):
148
        if self.md.output_format in ['html5', 'xhtml5']:
149
            return '-'
150
        return ':'
151
152
    def makeFootnoteId(self, id):
153
        """ Return footnote link id. """
154
        if self.getConfig("UNIQUE_IDS"):
155
            return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id)
156
        else:
157
            return 'fn%s%s' % (self.get_separator(), id)
158
159
    def makeFootnoteRefId(self, id, found=False):
160
        """ Return footnote back-link id. """
161
        if self.getConfig("UNIQUE_IDS"):
162
            return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found)
163
        else:
164
            return self.unique_ref('fnref%s%s' % (self.get_separator(), id), found)
165
166
    def makeFootnotesDiv(self, root):
167
        """ Return div of footnotes as et Element. """
168
169
        if not list(self.footnotes.keys()):
170
            return None
171
172
        div = util.etree.Element("div")
173
        div.set('class', 'footnote')
174
        util.etree.SubElement(div, "hr")
175
        ol = util.etree.SubElement(div, "ol")
176
        surrogate_parent = util.etree.Element("div")
177
178
        for id in self.footnotes.keys():
179
            li = util.etree.SubElement(ol, "li")
180
            li.set("id", self.makeFootnoteId(id))
181
            # Parse footnote with surrogate parent as li cannot be used.
182
            # List block handlers have special logic to deal with li.
183
            # When we are done parsing, we will copy everything over to li.
184
            self.parser.parseChunk(surrogate_parent, self.footnotes[id])
185
            for el in list(surrogate_parent):
186
                li.append(el)
187
                surrogate_parent.remove(el)
188
            backlink = util.etree.Element("a")
189
            backlink.set("href", "#" + self.makeFootnoteRefId(id))
190
            if self.md.output_format not in ['html5', 'xhtml5']:
191
                backlink.set("rev", "footnote")  # Invalid in HTML5
192
            backlink.set("class", "footnote-backref")
193
            backlink.set(
194
                "title",
195
                "Jump back to footnote %d in the text" %
196
                (self.footnotes.index(id)+1)
197
            )
198
            backlink.text = FN_BACKLINK_TEXT
199
200
            if li.getchildren():
201
                node = li[-1]
202
                if node.tag == "p":
203
                    node.text = node.text + NBSP_PLACEHOLDER
204
                    node.append(backlink)
205
                else:
206
                    p = util.etree.SubElement(li, "p")
207
                    p.append(backlink)
208
        return div
209
210
211
class FootnotePreprocessor(Preprocessor):
212
    """ Find all footnote references and store for later use. """
213
214
    def __init__(self, footnotes):
215
        self.footnotes = footnotes
216
217
    def run(self, lines):
218
        """
219
        Loop through lines and find, set, and remove footnote definitions.
220
221
        Keywords:
222
223
        * lines: A list of lines of text
224
225
        Return: A list of lines of text with footnote definitions removed.
226
227
        """
228
        newlines = []
229
        i = 0
230
        while True:
231
            m = DEF_RE.match(lines[i])
232
            if m:
233
                fn, _i = self.detectTabbed(lines[i+1:])
234
                fn.insert(0, m.group(2))
235
                i += _i-1  # skip past footnote
236
                self.footnotes.setFootnote(m.group(1), "\n".join(fn))
237
            else:
238
                newlines.append(lines[i])
239
            if len(lines) > i+1:
240
                i += 1
241
            else:
242
                break
243
        return newlines
244
245
    def detectTabbed(self, lines):
246
        """ Find indented text and remove indent before further proccesing.
247
248
        Keyword arguments:
249
250
        * lines: an array of strings
251
252
        Returns: a list of post processed items and the index of last line.
253
254
        """
255
        items = []
256
        blank_line = False  # have we encountered a blank line yet?
257
        i = 0  # to keep track of where we are
258
259
        def detab(line):
260
            match = TABBED_RE.match(line)
261
            if match:
262
                return match.group(4)
263
264
        for line in lines:
265
            if line.strip():  # Non-blank line
266
                detabbed_line = detab(line)
267
                if detabbed_line:
268
                    items.append(detabbed_line)
269
                    i += 1
270
                    continue
271
                elif not blank_line and not DEF_RE.match(line):
272
                    # not tabbed but still part of first par.
273
                    items.append(line)
274
                    i += 1
275
                    continue
276
                else:
277
                    return items, i+1
278
279
            else:  # Blank line: _maybe_ we are done.
280
                blank_line = True
281
                i += 1  # advance
282
283
                # Find the next non-blank line
284
                for j in range(i, len(lines)):
285
                    if lines[j].strip():
286
                        next_line = lines[j]
287
                        break
288
                else:
289
                    break  # There is no more text; we are done.
290
291
                # Check if the next non-blank line is tabbed
292
                if detab(next_line):  # Yes, more work to do.
293
                    items.append("")
294
                    continue
295
                else:
296
                    break  # No, we are done.
297
        else:
298
            i += 1
299
300
        return items, i
301
302
303
class FootnotePattern(Pattern):
304
    """ InlinePattern for footnote markers in a document's body text. """
305
306
    def __init__(self, pattern, footnotes):
307
        super(FootnotePattern, self).__init__(pattern)
308
        self.footnotes = footnotes
309
310
    def handleMatch(self, m):
311
        id = m.group(2)
312
        if id in self.footnotes.footnotes.keys():
313
            sup = util.etree.Element("sup")
314
            a = util.etree.SubElement(sup, "a")
315
            sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True))
316
            a.set('href', '#' + self.footnotes.makeFootnoteId(id))
317
            if self.footnotes.md.output_format not in ['html5', 'xhtml5']:
318
                a.set('rel', 'footnote')  # invalid in HTML5
319
            a.set('class', 'footnote-ref')
320
            a.text = util.text_type(self.footnotes.footnotes.index(id) + 1)
321
            return sup
322
        else:
323
            return None
324
325
326
class FootnotePostTreeprocessor(Treeprocessor):
327
    """ Ammend footnote div with duplicates. """
328
329
    def __init__(self, footnotes):
330
        self.footnotes = footnotes
331
332
    def add_duplicates(self, li, duplicates):
333
        """ Adjust current li and add the duplicates: fnref2, fnref3, etc. """
334
        for link in li.iter('a'):
335
            # Find the link that needs to be duplicated.
336
            if link.attrib.get('class', '') == 'footnote-backref':
337
                ref, rest = link.attrib['href'].split(self.footnotes.get_separator(), 1)
338
                # Duplicate link the number of times we need to
339
                # and point the to the appropriate references.
340
                links = []
341
                for index in range(2, duplicates + 1):
342
                    sib_link = copy.deepcopy(link)
343
                    sib_link.attrib['href'] = '%s%d%s%s' % (ref, index, self.footnotes.get_separator(), rest)
344
                    links.append(sib_link)
345
                    self.offset += 1
346
                # Add all the new duplicate links.
347
                el = list(li)[-1]
348
                for l in links:
349
                    el.append(l)
350
                break
351
352
    def get_num_duplicates(self, li):
353
        """ Get the number of duplicate refs of the footnote. """
354
        fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1)
355
        link_id = '%sref%s%s' % (fn, self.footnotes.get_separator(), rest)
356
        return self.footnotes.found_refs.get(link_id, 0)
357
358
    def handle_duplicates(self, parent):
359
        """ Find duplicate footnotes and format and add the duplicates. """
360
        for li in list(parent):
361
            # Check number of duplicates footnotes and insert
362
            # additional links if needed.
363
            count = self.get_num_duplicates(li)
364
            if count > 1:
365
                self.add_duplicates(li, count)
366
367
    def run(self, root):
368
        """ Crawl the footnote div and add missing duplicate footnotes. """
369
        self.offset = 0
370
        for div in root.iter('div'):
371
            if div.attrib.get('class', '') == 'footnote':
372
                # Footnotes shoul be under the first orderd list under
373
                # the footnote div.  So once we find it, quit.
374
                for ol in div.iter('ol'):
375
                    self.handle_duplicates(ol)
376
                    break
377
378
379
class FootnoteTreeprocessor(Treeprocessor):
380
    """ Build and append footnote div to end of document. """
381
382
    def __init__(self, footnotes):
383
        self.footnotes = footnotes
384
385
    def run(self, root):
386
        footnotesDiv = self.footnotes.makeFootnotesDiv(root)
387
        if footnotesDiv is not None:
388
            result = self.footnotes.findFootnotesPlaceholder(root)
389
            if result:
390
                child, parent, isText = result
391
                ind = parent.getchildren().index(child)
392
                if isText:
393
                    parent.remove(child)
394
                    parent.insert(ind, footnotesDiv)
395
                else:
396
                    parent.insert(ind + 1, footnotesDiv)
397
                    child.tail = None
398
            else:
399
                root.append(footnotesDiv)
400
401
402
class FootnotePostprocessor(Postprocessor):
403
    """ Replace placeholders with html entities. """
404
    def __init__(self, footnotes):
405
        self.footnotes = footnotes
406
407
    def run(self, text):
408
        text = text.replace(
409
            FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT")
410
        )
411
        return text.replace(NBSP_PLACEHOLDER, "&#160;")
412
413
414
def makeExtension(*args, **kwargs):
415
    """ Return an instance of the FootnoteExtension """
416
    return FootnoteExtension(*args, **kwargs)
417