Code Duplication    Length = 268-268 lines in 2 locations

pythonx/markdown_parser.py 1 location

@@ 160-427 (lines=268) @@
157
            r'<!--[\s\S]*?-->',
158
            r'<(%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_block_tag, _valid_attr),
159
            r'<%s(?:%s)*?\s*\/?>' % (_block_tag, _valid_attr),
160
        )
161
    )
162
    table = re.compile(
163
        r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*'
164
    )
165
    nptable = re.compile(
166
        r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*'
167
    )
168
    text = re.compile(r'^[^\n]+')
169
170
171
class BlockLexer(object):
172
    """Block level lexer for block grammars."""
173
    grammar_class = BlockGrammar
174
175
    default_rules = [
176
        'newline', 'hrule', 'block_code', 'fences', 'heading',
177
        'nptable', 'lheading', 'block_quote',
178
        'list_block', 'block_html', 'def_links',
179
        'def_footnotes', 'table', 'paragraph', 'text'
180
    ]
181
182
    list_rules = (
183
        'newline', 'block_code', 'fences', 'lheading', 'hrule',
184
        'block_quote', 'list_block', 'block_html', 'text',
185
    )
186
187
    footnote_rules = (
188
        'newline', 'block_code', 'fences', 'heading',
189
        'nptable', 'lheading', 'hrule', 'block_quote',
190
        'list_block', 'block_html', 'table', 'paragraph', 'text'
191
    )
192
193
    def __init__(self, rules=None, **kwargs):
194
        self.tokens = []
195
        self.def_links = {}
196
        self.def_footnotes = {}
197
198
        if not rules:
199
            rules = self.grammar_class()
200
201
        self.rules = rules
202
203
    def __call__(self, text, rules=None):
204
        return self.parse(text, rules)
205
206
    def parse(self, text, rules=None):
207
        text = text.rstrip('\n')
208
209
        if not rules:
210
            rules = self.default_rules
211
212
        def manipulate(text):
213
            for key in rules:
214
                rule = getattr(self.rules, key)
215
                m = rule.match(text)
216
                if not m:
217
                    continue
218
                getattr(self, 'parse_%s' % key)(m)
219
                return m
220
            return False  # pragma: no cover
221
222
        while text:
223
            m = manipulate(text)
224
            if m is not False:
225
                text = text[len(m.group(0)):]
226
                continue
227
            if text:  # pragma: no cover
228
                raise RuntimeError('Infinite loop at: %s' % text)
229
        return self.tokens
230
231
    def parse_newline(self, m):
232
        length = len(m.group(0))
233
        if length > 1:
234
            self.tokens.append({'type': 'newline'})
235
236
    def parse_block_code(self, m):
237
        # clean leading whitespace
238
        code = _block_code_leading_pattern.sub('', m.group(0))
239
        self.tokens.append({
240
            'type': 'code',
241
            'lang': None,
242
            'text': code,
243
        })
244
245
    def parse_fences(self, m):
246
        self.tokens.append({
247
            'type': 'code',
248
            'lang': m.group(2),
249
            'text': m.group(3),
250
        })
251
252
    def parse_heading(self, m):
253
        self.tokens.append({
254
            'type': 'heading',
255
            'level': len(m.group(1)),
256
            'text': m.group(2),
257
        })
258
259
    def parse_lheading(self, m):
260
        """Parse setext heading."""
261
        self.tokens.append({
262
            'type': 'heading',
263
            'level': 1 if m.group(2) == '=' else 2,
264
            'text': m.group(1),
265
        })
266
267
    def parse_hrule(self, m):
268
        self.tokens.append({'type': 'hrule'})
269
270
    def parse_list_block(self, m):
271
        bull = m.group(2)
272
        self.tokens.append({
273
            'type': 'list_start',
274
            'ordered': '.' in bull,
275
        })
276
        cap = m.group(0)
277
        self._process_list_item(cap, bull)
278
        self.tokens.append({'type': 'list_end'})
279
280
    def _process_list_item(self, cap, bull):
281
        cap = self.rules.list_item.findall(cap)
282
283
        _next = False
284
        length = len(cap)
285
286
        for i in range(length):
287
            item = cap[i][0]
288
289
            # remove the bullet
290
            space = len(item)
291
            item = self.rules.list_bullet.sub('', item)
292
293
            # outdent
294
            if '\n ' in item:
295
                space = space - len(item)
296
                pattern = re.compile(r'^ {1,%d}' % space, flags=re.M)
297
                item = pattern.sub('', item)
298
299
            # determine whether item is loose or not
300
            loose = _next
301
            if not loose and re.search(r'\n\n(?!\s*$)', item):
302
                loose = True
303
304
            rest = len(item)
305
            if i != length - 1 and rest:
306
                _next = item[rest-1] == '\n'
307
                if not loose:
308
                    loose = _next
309
310
            if loose:
311
                t = 'loose_item_start'
312
            else:
313
                t = 'list_item_start'
314
315
            self.tokens.append({'type': t})
316
            # recurse
317
            self.parse(item, self.list_rules)
318
            self.tokens.append({'type': 'list_item_end'})
319
320
    def parse_block_quote(self, m):
321
        self.tokens.append({'type': 'block_quote_start'})
322
        # clean leading >
323
        cap = _block_quote_leading_pattern.sub('', m.group(0))
324
        self.parse(cap)
325
        self.tokens.append({'type': 'block_quote_end'})
326
327
    def parse_def_links(self, m):
328
        key = _keyify(m.group(1))
329
        self.def_links[key] = {
330
            'link': m.group(2),
331
            'title': m.group(3),
332
        }
333
334
    def parse_def_footnotes(self, m):
335
        key = _keyify(m.group(1))
336
        if key in self.def_footnotes:
337
            # footnote is already defined
338
            return
339
340
        self.def_footnotes[key] = 0
341
342
        self.tokens.append({
343
            'type': 'footnote_start',
344
            'key': key,
345
        })
346
347
        text = m.group(2)
348
349
        if '\n' in text:
350
            lines = text.split('\n')
351
            whitespace = None
352
            for line in lines[1:]:
353
                space = len(line) - len(line.lstrip())
354
                if space and (not whitespace or space < whitespace):
355
                    whitespace = space
356
            newlines = [lines[0]]
357
            for line in lines[1:]:
358
                newlines.append(line[whitespace:])
359
            text = '\n'.join(newlines)
360
361
        self.parse(text, self.footnote_rules)
362
363
        self.tokens.append({
364
            'type': 'footnote_end',
365
            'key': key,
366
        })
367
368
    def parse_table(self, m):
369
        item = self._process_table(m)
370
371
        cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3))
372
        cells = cells.split('\n')
373
        for i, v in enumerate(cells):
374
            v = re.sub(r'^ *\| *| *\| *$', '', v)
375
            cells[i] = re.split(r' *\| *', v)
376
377
        item['cells'] = cells
378
        self.tokens.append(item)
379
380
    def parse_nptable(self, m):
381
        item = self._process_table(m)
382
383
        cells = re.sub(r'\n$', '', m.group(3))
384
        cells = cells.split('\n')
385
        for i, v in enumerate(cells):
386
            cells[i] = re.split(r' *\| *', v)
387
388
        item['cells'] = cells
389
        self.tokens.append(item)
390
391
    def _process_table(self, m):
392
        header = re.sub(r'^ *| *\| *$', '', m.group(1))
393
        header = re.split(r' *\| *', header)
394
        align = re.sub(r' *|\| *$', '', m.group(2))
395
        align = re.split(r' *\| *', align)
396
397
        for i, v in enumerate(align):
398
            if re.search(r'^ *-+: *$', v):
399
                align[i] = 'right'
400
            elif re.search(r'^ *:-+: *$', v):
401
                align[i] = 'center'
402
            elif re.search(r'^ *:-+ *$', v):
403
                align[i] = 'left'
404
            else:
405
                align[i] = None
406
407
        item = {
408
            'type': 'table',
409
            'header': header,
410
            'align': align,
411
        }
412
        return item
413
414
    def parse_block_html(self, m):
415
        tag = m.group(1)
416
        if not tag:
417
            text = m.group(0)
418
            self.tokens.append({
419
                'type': 'close_html',
420
                'text': text
421
            })
422
        else:
423
            attr = m.group(2)
424
            text = m.group(3)
425
            self.tokens.append({
426
                'type': 'open_html',
427
                'tag': tag,
428
                'extra': attr,
429
                'text': text
430
            })

pythonx/tests/markdown_parser.py 1 location

@@ 160-427 (lines=268) @@
157
    text = re.compile(r'^[^\n]+')
158
159
160
class BlockLexer(object):
161
    """Block level lexer for block grammars."""
162
    grammar_class = BlockGrammar
163
164
    default_rules = [
165
        'newline', 'hrule', 'block_code', 'fences', 'heading',
166
        'nptable', 'lheading', 'block_quote',
167
        'list_block', 'block_html', 'def_links',
168
        'def_footnotes', 'table', 'paragraph', 'text'
169
    ]
170
171
    list_rules = (
172
        'newline', 'block_code', 'fences', 'lheading', 'hrule',
173
        'block_quote', 'list_block', 'block_html', 'text',
174
    )
175
176
    footnote_rules = (
177
        'newline', 'block_code', 'fences', 'heading',
178
        'nptable', 'lheading', 'hrule', 'block_quote',
179
        'list_block', 'block_html', 'table', 'paragraph', 'text'
180
    )
181
182
    def __init__(self, rules=None, **kwargs):
183
        self.tokens = []
184
        self.def_links = {}
185
        self.def_footnotes = {}
186
187
        if not rules:
188
            rules = self.grammar_class()
189
190
        self.rules = rules
191
192
    def __call__(self, text, rules=None):
193
        return self.parse(text, rules)
194
195
    def parse(self, text, rules=None):
196
        text = text.rstrip('\n')
197
198
        if not rules:
199
            rules = self.default_rules
200
201
        def manipulate(text):
202
            for key in rules:
203
                rule = getattr(self.rules, key)
204
                m = rule.match(text)
205
                if not m:
206
                    continue
207
                getattr(self, 'parse_%s' % key)(m)
208
                return m
209
            return False  # pragma: no cover
210
211
        while text:
212
            m = manipulate(text)
213
            if m is not False:
214
                text = text[len(m.group(0)):]
215
                continue
216
            if text:  # pragma: no cover
217
                raise RuntimeError('Infinite loop at: %s' % text)
218
        return self.tokens
219
220
    def parse_newline(self, m):
221
        length = len(m.group(0))
222
        if length > 1:
223
            self.tokens.append({'type': 'newline'})
224
225
    def parse_block_code(self, m):
226
        # clean leading whitespace
227
        code = _block_code_leadning_pattern.sub('', m.group(0))
228
        self.tokens.append({
229
            'type': 'code',
230
            'lang': None,
231
            'text': code,
232
        })
233
234
    def parse_fences(self, m):
235
        self.tokens.append({
236
            'type': 'code',
237
            'lang': m.group(2),
238
            'text': m.group(3),
239
        })
240
241
    def parse_heading(self, m):
242
        self.tokens.append({
243
            'type': 'heading',
244
            'level': len(m.group(1)),
245
            'text': m.group(2),
246
        })
247
248
    def parse_lheading(self, m):
249
        """Parse setext heading."""
250
        self.tokens.append({
251
            'type': 'heading',
252
            'level': 1 if m.group(2) == '=' else 2,
253
            'text': m.group(1),
254
        })
255
256
    def parse_hrule(self, m):
257
        self.tokens.append({'type': 'hrule'})
258
259
    def parse_list_block(self, m):
260
        bull = m.group(2)
261
        self.tokens.append({
262
            'type': 'list_start',
263
            'ordered': '.' in bull,
264
        })
265
        cap = m.group(0)
266
        self._process_list_item(cap, bull)
267
        self.tokens.append({'type': 'list_end'})
268
269
    def _process_list_item(self, cap, bull):
270
        cap = self.rules.list_item.findall(cap)
271
272
        _next = False
273
        length = len(cap)
274
275
        for i in range(length):
276
            item = cap[i][0]
277
278
            # remove the bullet
279
            space = len(item)
280
            item = self.rules.list_bullet.sub('', item)
281
282
            # outdent
283
            if '\n ' in item:
284
                space = space - len(item)
285
                pattern = re.compile(r'^ {1,%d}' % space, flags=re.M)
286
                item = pattern.sub('', item)
287
288
            # determin whether item is loose or not
289
            loose = _next
290
            if not loose and re.search(r'\n\n(?!\s*$)', item):
291
                loose = True
292
293
            rest = len(item)
294
            if i != length - 1 and rest:
295
                _next = item[rest-1] == '\n'
296
                if not loose:
297
                    loose = _next
298
299
            if loose:
300
                t = 'loose_item_start'
301
            else:
302
                t = 'list_item_start'
303
304
            self.tokens.append({'type': t})
305
            # recurse
306
            self.parse(item, self.list_rules)
307
            self.tokens.append({'type': 'list_item_end'})
308
309
    def parse_block_quote(self, m):
310
        self.tokens.append({'type': 'block_quote_start'})
311
        # clean leading >
312
        cap = _block_quote_leading_pattern.sub('', m.group(0))
313
        self.parse(cap)
314
        self.tokens.append({'type': 'block_quote_end'})
315
316
    def parse_def_links(self, m):
317
        key = _keyify(m.group(1))
318
        self.def_links[key] = {
319
            'link': m.group(2),
320
            'title': m.group(3),
321
        }
322
323
    def parse_def_footnotes(self, m):
324
        key = _keyify(m.group(1))
325
        if key in self.def_footnotes:
326
            # footnote is already defined
327
            return
328
329
        self.def_footnotes[key] = 0
330
331
        self.tokens.append({
332
            'type': 'footnote_start',
333
            'key': key,
334
        })
335
336
        text = m.group(2)
337
338
        if '\n' in text:
339
            lines = text.split('\n')
340
            whitespace = None
341
            for line in lines[1:]:
342
                space = len(line) - len(line.lstrip())
343
                if space and (not whitespace or space < whitespace):
344
                    whitespace = space
345
            newlines = [lines[0]]
346
            for line in lines[1:]:
347
                newlines.append(line[whitespace:])
348
            text = '\n'.join(newlines)
349
350
        self.parse(text, self.footnote_rules)
351
352
        self.tokens.append({
353
            'type': 'footnote_end',
354
            'key': key,
355
        })
356
357
    def parse_table(self, m):
358
        item = self._process_table(m)
359
360
        cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3))
361
        cells = cells.split('\n')
362
        for i, v in enumerate(cells):
363
            v = re.sub(r'^ *\| *| *\| *$', '', v)
364
            cells[i] = re.split(r' *\| *', v)
365
366
        item['cells'] = cells
367
        self.tokens.append(item)
368
369
    def parse_nptable(self, m):
370
        item = self._process_table(m)
371
372
        cells = re.sub(r'\n$', '', m.group(3))
373
        cells = cells.split('\n')
374
        for i, v in enumerate(cells):
375
            cells[i] = re.split(r' *\| *', v)
376
377
        item['cells'] = cells
378
        self.tokens.append(item)
379
380
    def _process_table(self, m):
381
        header = re.sub(r'^ *| *\| *$', '', m.group(1))
382
        header = re.split(r' *\| *', header)
383
        align = re.sub(r' *|\| *$', '', m.group(2))
384
        align = re.split(r' *\| *', align)
385
386
        for i, v in enumerate(align):
387
            if re.search(r'^ *-+: *$', v):
388
                align[i] = 'right'
389
            elif re.search(r'^ *:-+: *$', v):
390
                align[i] = 'center'
391
            elif re.search(r'^ *:-+ *$', v):
392
                align[i] = 'left'
393
            else:
394
                align[i] = None
395
396
        item = {
397
            'type': 'table',
398
            'header': header,
399
            'align': align,
400
        }
401
        return item
402
403
    def parse_block_html(self, m):
404
        tag = m.group(1)
405
        if not tag:
406
            text = m.group(0)
407
            self.tokens.append({
408
                'type': 'close_html',
409
                'text': text
410
            })
411
        else:
412
            attr = m.group(2)
413
            text = m.group(3)
414
            self.tokens.append({
415
                'type': 'open_html',
416
                'tag': tag,
417
                'extra': attr,
418
                'text': text
419
            })
420
421
    def parse_paragraph(self, m):
422
        text = m.group(1).rstrip('\n')
423
        self.tokens.append({'type': 'paragraph', 'text': text})
424
425
    def parse_text(self, m):
426
        text = m.group(0)
427
        self.tokens.append({'type': 'text', 'text': text})
428
429
430
class InlineGrammar(object):