@@ 160-427 (lines=268) @@ | ||
157 | r'<!--[\s\S]*?-->', |
|
158 | r'<(%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_block_tag, _valid_attr), |
|
159 | r'<%s(?:%s)*?\s*\/?>' % (_block_tag, _valid_attr), |
|
160 | ) |
|
161 | ) |
|
162 | table = re.compile( |
|
163 | r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*' |
|
164 | ) |
|
165 | nptable = re.compile( |
|
166 | r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*' |
|
167 | ) |
|
168 | text = re.compile(r'^[^\n]+') |
|
169 | ||
170 | ||
171 | class BlockLexer(object): |
|
172 | """Block level lexer for block grammars.""" |
|
173 | grammar_class = BlockGrammar |
|
174 | ||
175 | default_rules = [ |
|
176 | 'newline', 'hrule', 'block_code', 'fences', 'heading', |
|
177 | 'nptable', 'lheading', 'block_quote', |
|
178 | 'list_block', 'block_html', 'def_links', |
|
179 | 'def_footnotes', 'table', 'paragraph', 'text' |
|
180 | ] |
|
181 | ||
182 | list_rules = ( |
|
183 | 'newline', 'block_code', 'fences', 'lheading', 'hrule', |
|
184 | 'block_quote', 'list_block', 'block_html', 'text', |
|
185 | ) |
|
186 | ||
187 | footnote_rules = ( |
|
188 | 'newline', 'block_code', 'fences', 'heading', |
|
189 | 'nptable', 'lheading', 'hrule', 'block_quote', |
|
190 | 'list_block', 'block_html', 'table', 'paragraph', 'text' |
|
191 | ) |
|
192 | ||
193 | def __init__(self, rules=None, **kwargs): |
|
194 | self.tokens = [] |
|
195 | self.def_links = {} |
|
196 | self.def_footnotes = {} |
|
197 | ||
198 | if not rules: |
|
199 | rules = self.grammar_class() |
|
200 | ||
201 | self.rules = rules |
|
202 | ||
203 | def __call__(self, text, rules=None): |
|
204 | return self.parse(text, rules) |
|
205 | ||
206 | def parse(self, text, rules=None): |
|
207 | text = text.rstrip('\n') |
|
208 | ||
209 | if not rules: |
|
210 | rules = self.default_rules |
|
211 | ||
212 | def manipulate(text): |
|
213 | for key in rules: |
|
214 | rule = getattr(self.rules, key) |
|
215 | m = rule.match(text) |
|
216 | if not m: |
|
217 | continue |
|
218 | getattr(self, 'parse_%s' % key)(m) |
|
219 | return m |
|
220 | return False # pragma: no cover |
|
221 | ||
222 | while text: |
|
223 | m = manipulate(text) |
|
224 | if m is not False: |
|
225 | text = text[len(m.group(0)):] |
|
226 | continue |
|
227 | if text: # pragma: no cover |
|
228 | raise RuntimeError('Infinite loop at: %s' % text) |
|
229 | return self.tokens |
|
230 | ||
231 | def parse_newline(self, m): |
|
232 | length = len(m.group(0)) |
|
233 | if length > 1: |
|
234 | self.tokens.append({'type': 'newline'}) |
|
235 | ||
236 | def parse_block_code(self, m): |
|
237 | # clean leading whitespace |
|
238 | code = _block_code_leading_pattern.sub('', m.group(0)) |
|
239 | self.tokens.append({ |
|
240 | 'type': 'code', |
|
241 | 'lang': None, |
|
242 | 'text': code, |
|
243 | }) |
|
244 | ||
245 | def parse_fences(self, m): |
|
246 | self.tokens.append({ |
|
247 | 'type': 'code', |
|
248 | 'lang': m.group(2), |
|
249 | 'text': m.group(3), |
|
250 | }) |
|
251 | ||
252 | def parse_heading(self, m): |
|
253 | self.tokens.append({ |
|
254 | 'type': 'heading', |
|
255 | 'level': len(m.group(1)), |
|
256 | 'text': m.group(2), |
|
257 | }) |
|
258 | ||
259 | def parse_lheading(self, m): |
|
260 | """Parse setext heading.""" |
|
261 | self.tokens.append({ |
|
262 | 'type': 'heading', |
|
263 | 'level': 1 if m.group(2) == '=' else 2, |
|
264 | 'text': m.group(1), |
|
265 | }) |
|
266 | ||
267 | def parse_hrule(self, m): |
|
268 | self.tokens.append({'type': 'hrule'}) |
|
269 | ||
270 | def parse_list_block(self, m): |
|
271 | bull = m.group(2) |
|
272 | self.tokens.append({ |
|
273 | 'type': 'list_start', |
|
274 | 'ordered': '.' in bull, |
|
275 | }) |
|
276 | cap = m.group(0) |
|
277 | self._process_list_item(cap, bull) |
|
278 | self.tokens.append({'type': 'list_end'}) |
|
279 | ||
280 | def _process_list_item(self, cap, bull): |
|
281 | cap = self.rules.list_item.findall(cap) |
|
282 | ||
283 | _next = False |
|
284 | length = len(cap) |
|
285 | ||
286 | for i in range(length): |
|
287 | item = cap[i][0] |
|
288 | ||
289 | # remove the bullet |
|
290 | space = len(item) |
|
291 | item = self.rules.list_bullet.sub('', item) |
|
292 | ||
293 | # outdent |
|
294 | if '\n ' in item: |
|
295 | space = space - len(item) |
|
296 | pattern = re.compile(r'^ {1,%d}' % space, flags=re.M) |
|
297 | item = pattern.sub('', item) |
|
298 | ||
299 | # determine whether item is loose or not |
|
300 | loose = _next |
|
301 | if not loose and re.search(r'\n\n(?!\s*$)', item): |
|
302 | loose = True |
|
303 | ||
304 | rest = len(item) |
|
305 | if i != length - 1 and rest: |
|
306 | _next = item[rest-1] == '\n' |
|
307 | if not loose: |
|
308 | loose = _next |
|
309 | ||
310 | if loose: |
|
311 | t = 'loose_item_start' |
|
312 | else: |
|
313 | t = 'list_item_start' |
|
314 | ||
315 | self.tokens.append({'type': t}) |
|
316 | # recurse |
|
317 | self.parse(item, self.list_rules) |
|
318 | self.tokens.append({'type': 'list_item_end'}) |
|
319 | ||
320 | def parse_block_quote(self, m): |
|
321 | self.tokens.append({'type': 'block_quote_start'}) |
|
322 | # clean leading > |
|
323 | cap = _block_quote_leading_pattern.sub('', m.group(0)) |
|
324 | self.parse(cap) |
|
325 | self.tokens.append({'type': 'block_quote_end'}) |
|
326 | ||
327 | def parse_def_links(self, m): |
|
328 | key = _keyify(m.group(1)) |
|
329 | self.def_links[key] = { |
|
330 | 'link': m.group(2), |
|
331 | 'title': m.group(3), |
|
332 | } |
|
333 | ||
334 | def parse_def_footnotes(self, m): |
|
335 | key = _keyify(m.group(1)) |
|
336 | if key in self.def_footnotes: |
|
337 | # footnote is already defined |
|
338 | return |
|
339 | ||
340 | self.def_footnotes[key] = 0 |
|
341 | ||
342 | self.tokens.append({ |
|
343 | 'type': 'footnote_start', |
|
344 | 'key': key, |
|
345 | }) |
|
346 | ||
347 | text = m.group(2) |
|
348 | ||
349 | if '\n' in text: |
|
350 | lines = text.split('\n') |
|
351 | whitespace = None |
|
352 | for line in lines[1:]: |
|
353 | space = len(line) - len(line.lstrip()) |
|
354 | if space and (not whitespace or space < whitespace): |
|
355 | whitespace = space |
|
356 | newlines = [lines[0]] |
|
357 | for line in lines[1:]: |
|
358 | newlines.append(line[whitespace:]) |
|
359 | text = '\n'.join(newlines) |
|
360 | ||
361 | self.parse(text, self.footnote_rules) |
|
362 | ||
363 | self.tokens.append({ |
|
364 | 'type': 'footnote_end', |
|
365 | 'key': key, |
|
366 | }) |
|
367 | ||
368 | def parse_table(self, m): |
|
369 | item = self._process_table(m) |
|
370 | ||
371 | cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3)) |
|
372 | cells = cells.split('\n') |
|
373 | for i, v in enumerate(cells): |
|
374 | v = re.sub(r'^ *\| *| *\| *$', '', v) |
|
375 | cells[i] = re.split(r' *\| *', v) |
|
376 | ||
377 | item['cells'] = cells |
|
378 | self.tokens.append(item) |
|
379 | ||
380 | def parse_nptable(self, m): |
|
381 | item = self._process_table(m) |
|
382 | ||
383 | cells = re.sub(r'\n$', '', m.group(3)) |
|
384 | cells = cells.split('\n') |
|
385 | for i, v in enumerate(cells): |
|
386 | cells[i] = re.split(r' *\| *', v) |
|
387 | ||
388 | item['cells'] = cells |
|
389 | self.tokens.append(item) |
|
390 | ||
391 | def _process_table(self, m): |
|
392 | header = re.sub(r'^ *| *\| *$', '', m.group(1)) |
|
393 | header = re.split(r' *\| *', header) |
|
394 | align = re.sub(r' *|\| *$', '', m.group(2)) |
|
395 | align = re.split(r' *\| *', align) |
|
396 | ||
397 | for i, v in enumerate(align): |
|
398 | if re.search(r'^ *-+: *$', v): |
|
399 | align[i] = 'right' |
|
400 | elif re.search(r'^ *:-+: *$', v): |
|
401 | align[i] = 'center' |
|
402 | elif re.search(r'^ *:-+ *$', v): |
|
403 | align[i] = 'left' |
|
404 | else: |
|
405 | align[i] = None |
|
406 | ||
407 | item = { |
|
408 | 'type': 'table', |
|
409 | 'header': header, |
|
410 | 'align': align, |
|
411 | } |
|
412 | return item |
|
413 | ||
414 | def parse_block_html(self, m): |
|
415 | tag = m.group(1) |
|
416 | if not tag: |
|
417 | text = m.group(0) |
|
418 | self.tokens.append({ |
|
419 | 'type': 'close_html', |
|
420 | 'text': text |
|
421 | }) |
|
422 | else: |
|
423 | attr = m.group(2) |
|
424 | text = m.group(3) |
|
425 | self.tokens.append({ |
|
426 | 'type': 'open_html', |
|
427 | 'tag': tag, |
|
428 | 'extra': attr, |
|
429 | 'text': text |
|
430 | }) |
@@ 160-427 (lines=268) @@ | ||
157 | text = re.compile(r'^[^\n]+') |
|
158 | ||
159 | ||
160 | class BlockLexer(object): |
|
161 | """Block level lexer for block grammars.""" |
|
162 | grammar_class = BlockGrammar |
|
163 | ||
164 | default_rules = [ |
|
165 | 'newline', 'hrule', 'block_code', 'fences', 'heading', |
|
166 | 'nptable', 'lheading', 'block_quote', |
|
167 | 'list_block', 'block_html', 'def_links', |
|
168 | 'def_footnotes', 'table', 'paragraph', 'text' |
|
169 | ] |
|
170 | ||
171 | list_rules = ( |
|
172 | 'newline', 'block_code', 'fences', 'lheading', 'hrule', |
|
173 | 'block_quote', 'list_block', 'block_html', 'text', |
|
174 | ) |
|
175 | ||
176 | footnote_rules = ( |
|
177 | 'newline', 'block_code', 'fences', 'heading', |
|
178 | 'nptable', 'lheading', 'hrule', 'block_quote', |
|
179 | 'list_block', 'block_html', 'table', 'paragraph', 'text' |
|
180 | ) |
|
181 | ||
182 | def __init__(self, rules=None, **kwargs): |
|
183 | self.tokens = [] |
|
184 | self.def_links = {} |
|
185 | self.def_footnotes = {} |
|
186 | ||
187 | if not rules: |
|
188 | rules = self.grammar_class() |
|
189 | ||
190 | self.rules = rules |
|
191 | ||
192 | def __call__(self, text, rules=None): |
|
193 | return self.parse(text, rules) |
|
194 | ||
195 | def parse(self, text, rules=None): |
|
196 | text = text.rstrip('\n') |
|
197 | ||
198 | if not rules: |
|
199 | rules = self.default_rules |
|
200 | ||
201 | def manipulate(text): |
|
202 | for key in rules: |
|
203 | rule = getattr(self.rules, key) |
|
204 | m = rule.match(text) |
|
205 | if not m: |
|
206 | continue |
|
207 | getattr(self, 'parse_%s' % key)(m) |
|
208 | return m |
|
209 | return False # pragma: no cover |
|
210 | ||
211 | while text: |
|
212 | m = manipulate(text) |
|
213 | if m is not False: |
|
214 | text = text[len(m.group(0)):] |
|
215 | continue |
|
216 | if text: # pragma: no cover |
|
217 | raise RuntimeError('Infinite loop at: %s' % text) |
|
218 | return self.tokens |
|
219 | ||
220 | def parse_newline(self, m): |
|
221 | length = len(m.group(0)) |
|
222 | if length > 1: |
|
223 | self.tokens.append({'type': 'newline'}) |
|
224 | ||
225 | def parse_block_code(self, m): |
|
226 | # clean leading whitespace |
|
227 | code = _block_code_leadning_pattern.sub('', m.group(0)) |
|
228 | self.tokens.append({ |
|
229 | 'type': 'code', |
|
230 | 'lang': None, |
|
231 | 'text': code, |
|
232 | }) |
|
233 | ||
234 | def parse_fences(self, m): |
|
235 | self.tokens.append({ |
|
236 | 'type': 'code', |
|
237 | 'lang': m.group(2), |
|
238 | 'text': m.group(3), |
|
239 | }) |
|
240 | ||
241 | def parse_heading(self, m): |
|
242 | self.tokens.append({ |
|
243 | 'type': 'heading', |
|
244 | 'level': len(m.group(1)), |
|
245 | 'text': m.group(2), |
|
246 | }) |
|
247 | ||
248 | def parse_lheading(self, m): |
|
249 | """Parse setext heading.""" |
|
250 | self.tokens.append({ |
|
251 | 'type': 'heading', |
|
252 | 'level': 1 if m.group(2) == '=' else 2, |
|
253 | 'text': m.group(1), |
|
254 | }) |
|
255 | ||
256 | def parse_hrule(self, m): |
|
257 | self.tokens.append({'type': 'hrule'}) |
|
258 | ||
259 | def parse_list_block(self, m): |
|
260 | bull = m.group(2) |
|
261 | self.tokens.append({ |
|
262 | 'type': 'list_start', |
|
263 | 'ordered': '.' in bull, |
|
264 | }) |
|
265 | cap = m.group(0) |
|
266 | self._process_list_item(cap, bull) |
|
267 | self.tokens.append({'type': 'list_end'}) |
|
268 | ||
269 | def _process_list_item(self, cap, bull): |
|
270 | cap = self.rules.list_item.findall(cap) |
|
271 | ||
272 | _next = False |
|
273 | length = len(cap) |
|
274 | ||
275 | for i in range(length): |
|
276 | item = cap[i][0] |
|
277 | ||
278 | # remove the bullet |
|
279 | space = len(item) |
|
280 | item = self.rules.list_bullet.sub('', item) |
|
281 | ||
282 | # outdent |
|
283 | if '\n ' in item: |
|
284 | space = space - len(item) |
|
285 | pattern = re.compile(r'^ {1,%d}' % space, flags=re.M) |
|
286 | item = pattern.sub('', item) |
|
287 | ||
288 | # determin whether item is loose or not |
|
289 | loose = _next |
|
290 | if not loose and re.search(r'\n\n(?!\s*$)', item): |
|
291 | loose = True |
|
292 | ||
293 | rest = len(item) |
|
294 | if i != length - 1 and rest: |
|
295 | _next = item[rest-1] == '\n' |
|
296 | if not loose: |
|
297 | loose = _next |
|
298 | ||
299 | if loose: |
|
300 | t = 'loose_item_start' |
|
301 | else: |
|
302 | t = 'list_item_start' |
|
303 | ||
304 | self.tokens.append({'type': t}) |
|
305 | # recurse |
|
306 | self.parse(item, self.list_rules) |
|
307 | self.tokens.append({'type': 'list_item_end'}) |
|
308 | ||
309 | def parse_block_quote(self, m): |
|
310 | self.tokens.append({'type': 'block_quote_start'}) |
|
311 | # clean leading > |
|
312 | cap = _block_quote_leading_pattern.sub('', m.group(0)) |
|
313 | self.parse(cap) |
|
314 | self.tokens.append({'type': 'block_quote_end'}) |
|
315 | ||
316 | def parse_def_links(self, m): |
|
317 | key = _keyify(m.group(1)) |
|
318 | self.def_links[key] = { |
|
319 | 'link': m.group(2), |
|
320 | 'title': m.group(3), |
|
321 | } |
|
322 | ||
323 | def parse_def_footnotes(self, m): |
|
324 | key = _keyify(m.group(1)) |
|
325 | if key in self.def_footnotes: |
|
326 | # footnote is already defined |
|
327 | return |
|
328 | ||
329 | self.def_footnotes[key] = 0 |
|
330 | ||
331 | self.tokens.append({ |
|
332 | 'type': 'footnote_start', |
|
333 | 'key': key, |
|
334 | }) |
|
335 | ||
336 | text = m.group(2) |
|
337 | ||
338 | if '\n' in text: |
|
339 | lines = text.split('\n') |
|
340 | whitespace = None |
|
341 | for line in lines[1:]: |
|
342 | space = len(line) - len(line.lstrip()) |
|
343 | if space and (not whitespace or space < whitespace): |
|
344 | whitespace = space |
|
345 | newlines = [lines[0]] |
|
346 | for line in lines[1:]: |
|
347 | newlines.append(line[whitespace:]) |
|
348 | text = '\n'.join(newlines) |
|
349 | ||
350 | self.parse(text, self.footnote_rules) |
|
351 | ||
352 | self.tokens.append({ |
|
353 | 'type': 'footnote_end', |
|
354 | 'key': key, |
|
355 | }) |
|
356 | ||
357 | def parse_table(self, m): |
|
358 | item = self._process_table(m) |
|
359 | ||
360 | cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3)) |
|
361 | cells = cells.split('\n') |
|
362 | for i, v in enumerate(cells): |
|
363 | v = re.sub(r'^ *\| *| *\| *$', '', v) |
|
364 | cells[i] = re.split(r' *\| *', v) |
|
365 | ||
366 | item['cells'] = cells |
|
367 | self.tokens.append(item) |
|
368 | ||
369 | def parse_nptable(self, m): |
|
370 | item = self._process_table(m) |
|
371 | ||
372 | cells = re.sub(r'\n$', '', m.group(3)) |
|
373 | cells = cells.split('\n') |
|
374 | for i, v in enumerate(cells): |
|
375 | cells[i] = re.split(r' *\| *', v) |
|
376 | ||
377 | item['cells'] = cells |
|
378 | self.tokens.append(item) |
|
379 | ||
380 | def _process_table(self, m): |
|
381 | header = re.sub(r'^ *| *\| *$', '', m.group(1)) |
|
382 | header = re.split(r' *\| *', header) |
|
383 | align = re.sub(r' *|\| *$', '', m.group(2)) |
|
384 | align = re.split(r' *\| *', align) |
|
385 | ||
386 | for i, v in enumerate(align): |
|
387 | if re.search(r'^ *-+: *$', v): |
|
388 | align[i] = 'right' |
|
389 | elif re.search(r'^ *:-+: *$', v): |
|
390 | align[i] = 'center' |
|
391 | elif re.search(r'^ *:-+ *$', v): |
|
392 | align[i] = 'left' |
|
393 | else: |
|
394 | align[i] = None |
|
395 | ||
396 | item = { |
|
397 | 'type': 'table', |
|
398 | 'header': header, |
|
399 | 'align': align, |
|
400 | } |
|
401 | return item |
|
402 | ||
403 | def parse_block_html(self, m): |
|
404 | tag = m.group(1) |
|
405 | if not tag: |
|
406 | text = m.group(0) |
|
407 | self.tokens.append({ |
|
408 | 'type': 'close_html', |
|
409 | 'text': text |
|
410 | }) |
|
411 | else: |
|
412 | attr = m.group(2) |
|
413 | text = m.group(3) |
|
414 | self.tokens.append({ |
|
415 | 'type': 'open_html', |
|
416 | 'tag': tag, |
|
417 | 'extra': attr, |
|
418 | 'text': text |
|
419 | }) |
|
420 | ||
421 | def parse_paragraph(self, m): |
|
422 | text = m.group(1).rstrip('\n') |
|
423 | self.tokens.append({'type': 'paragraph', 'text': text}) |
|
424 | ||
425 | def parse_text(self, m): |
|
426 | text = m.group(0) |
|
427 | self.tokens.append({'type': 'text', 'text': text}) |
|
428 | ||
429 | ||
430 | class InlineGrammar(object): |