| @@ 160-427 (lines=268) @@ | ||
| 157 | r'<!--[\s\S]*?-->', |
|
| 158 | r'<(%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_block_tag, _valid_attr), |
|
| 159 | r'<%s(?:%s)*?\s*\/?>' % (_block_tag, _valid_attr), |
|
| 160 | ) |
|
| 161 | ) |
|
| 162 | table = re.compile( |
|
| 163 | r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*' |
|
| 164 | ) |
|
| 165 | nptable = re.compile( |
|
| 166 | r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*' |
|
| 167 | ) |
|
| 168 | text = re.compile(r'^[^\n]+') |
|
| 169 | ||
| 170 | ||
| 171 | class BlockLexer(object): |
|
| 172 | """Block level lexer for block grammars.""" |
|
| 173 | grammar_class = BlockGrammar |
|
| 174 | ||
| 175 | default_rules = [ |
|
| 176 | 'newline', 'hrule', 'block_code', 'fences', 'heading', |
|
| 177 | 'nptable', 'lheading', 'block_quote', |
|
| 178 | 'list_block', 'block_html', 'def_links', |
|
| 179 | 'def_footnotes', 'table', 'paragraph', 'text' |
|
| 180 | ] |
|
| 181 | ||
| 182 | list_rules = ( |
|
| 183 | 'newline', 'block_code', 'fences', 'lheading', 'hrule', |
|
| 184 | 'block_quote', 'list_block', 'block_html', 'text', |
|
| 185 | ) |
|
| 186 | ||
| 187 | footnote_rules = ( |
|
| 188 | 'newline', 'block_code', 'fences', 'heading', |
|
| 189 | 'nptable', 'lheading', 'hrule', 'block_quote', |
|
| 190 | 'list_block', 'block_html', 'table', 'paragraph', 'text' |
|
| 191 | ) |
|
| 192 | ||
| 193 | def __init__(self, rules=None, **kwargs): |
|
| 194 | self.tokens = [] |
|
| 195 | self.def_links = {} |
|
| 196 | self.def_footnotes = {} |
|
| 197 | ||
| 198 | if not rules: |
|
| 199 | rules = self.grammar_class() |
|
| 200 | ||
| 201 | self.rules = rules |
|
| 202 | ||
| 203 | def __call__(self, text, rules=None): |
|
| 204 | return self.parse(text, rules) |
|
| 205 | ||
| 206 | def parse(self, text, rules=None): |
|
| 207 | text = text.rstrip('\n') |
|
| 208 | ||
| 209 | if not rules: |
|
| 210 | rules = self.default_rules |
|
| 211 | ||
| 212 | def manipulate(text): |
|
| 213 | for key in rules: |
|
| 214 | rule = getattr(self.rules, key) |
|
| 215 | m = rule.match(text) |
|
| 216 | if not m: |
|
| 217 | continue |
|
| 218 | getattr(self, 'parse_%s' % key)(m) |
|
| 219 | return m |
|
| 220 | return False # pragma: no cover |
|
| 221 | ||
| 222 | while text: |
|
| 223 | m = manipulate(text) |
|
| 224 | if m is not False: |
|
| 225 | text = text[len(m.group(0)):] |
|
| 226 | continue |
|
| 227 | if text: # pragma: no cover |
|
| 228 | raise RuntimeError('Infinite loop at: %s' % text) |
|
| 229 | return self.tokens |
|
| 230 | ||
| 231 | def parse_newline(self, m): |
|
| 232 | length = len(m.group(0)) |
|
| 233 | if length > 1: |
|
| 234 | self.tokens.append({'type': 'newline'}) |
|
| 235 | ||
| 236 | def parse_block_code(self, m): |
|
| 237 | # clean leading whitespace |
|
| 238 | code = _block_code_leading_pattern.sub('', m.group(0)) |
|
| 239 | self.tokens.append({ |
|
| 240 | 'type': 'code', |
|
| 241 | 'lang': None, |
|
| 242 | 'text': code, |
|
| 243 | }) |
|
| 244 | ||
| 245 | def parse_fences(self, m): |
|
| 246 | self.tokens.append({ |
|
| 247 | 'type': 'code', |
|
| 248 | 'lang': m.group(2), |
|
| 249 | 'text': m.group(3), |
|
| 250 | }) |
|
| 251 | ||
| 252 | def parse_heading(self, m): |
|
| 253 | self.tokens.append({ |
|
| 254 | 'type': 'heading', |
|
| 255 | 'level': len(m.group(1)), |
|
| 256 | 'text': m.group(2), |
|
| 257 | }) |
|
| 258 | ||
| 259 | def parse_lheading(self, m): |
|
| 260 | """Parse setext heading.""" |
|
| 261 | self.tokens.append({ |
|
| 262 | 'type': 'heading', |
|
| 263 | 'level': 1 if m.group(2) == '=' else 2, |
|
| 264 | 'text': m.group(1), |
|
| 265 | }) |
|
| 266 | ||
| 267 | def parse_hrule(self, m): |
|
| 268 | self.tokens.append({'type': 'hrule'}) |
|
| 269 | ||
| 270 | def parse_list_block(self, m): |
|
| 271 | bull = m.group(2) |
|
| 272 | self.tokens.append({ |
|
| 273 | 'type': 'list_start', |
|
| 274 | 'ordered': '.' in bull, |
|
| 275 | }) |
|
| 276 | cap = m.group(0) |
|
| 277 | self._process_list_item(cap, bull) |
|
| 278 | self.tokens.append({'type': 'list_end'}) |
|
| 279 | ||
| 280 | def _process_list_item(self, cap, bull): |
|
| 281 | cap = self.rules.list_item.findall(cap) |
|
| 282 | ||
| 283 | _next = False |
|
| 284 | length = len(cap) |
|
| 285 | ||
| 286 | for i in range(length): |
|
| 287 | item = cap[i][0] |
|
| 288 | ||
| 289 | # remove the bullet |
|
| 290 | space = len(item) |
|
| 291 | item = self.rules.list_bullet.sub('', item) |
|
| 292 | ||
| 293 | # outdent |
|
| 294 | if '\n ' in item: |
|
| 295 | space = space - len(item) |
|
| 296 | pattern = re.compile(r'^ {1,%d}' % space, flags=re.M) |
|
| 297 | item = pattern.sub('', item) |
|
| 298 | ||
| 299 | # determine whether item is loose or not |
|
| 300 | loose = _next |
|
| 301 | if not loose and re.search(r'\n\n(?!\s*$)', item): |
|
| 302 | loose = True |
|
| 303 | ||
| 304 | rest = len(item) |
|
| 305 | if i != length - 1 and rest: |
|
| 306 | _next = item[rest-1] == '\n' |
|
| 307 | if not loose: |
|
| 308 | loose = _next |
|
| 309 | ||
| 310 | if loose: |
|
| 311 | t = 'loose_item_start' |
|
| 312 | else: |
|
| 313 | t = 'list_item_start' |
|
| 314 | ||
| 315 | self.tokens.append({'type': t}) |
|
| 316 | # recurse |
|
| 317 | self.parse(item, self.list_rules) |
|
| 318 | self.tokens.append({'type': 'list_item_end'}) |
|
| 319 | ||
| 320 | def parse_block_quote(self, m): |
|
| 321 | self.tokens.append({'type': 'block_quote_start'}) |
|
| 322 | # clean leading > |
|
| 323 | cap = _block_quote_leading_pattern.sub('', m.group(0)) |
|
| 324 | self.parse(cap) |
|
| 325 | self.tokens.append({'type': 'block_quote_end'}) |
|
| 326 | ||
| 327 | def parse_def_links(self, m): |
|
| 328 | key = _keyify(m.group(1)) |
|
| 329 | self.def_links[key] = { |
|
| 330 | 'link': m.group(2), |
|
| 331 | 'title': m.group(3), |
|
| 332 | } |
|
| 333 | ||
| 334 | def parse_def_footnotes(self, m): |
|
| 335 | key = _keyify(m.group(1)) |
|
| 336 | if key in self.def_footnotes: |
|
| 337 | # footnote is already defined |
|
| 338 | return |
|
| 339 | ||
| 340 | self.def_footnotes[key] = 0 |
|
| 341 | ||
| 342 | self.tokens.append({ |
|
| 343 | 'type': 'footnote_start', |
|
| 344 | 'key': key, |
|
| 345 | }) |
|
| 346 | ||
| 347 | text = m.group(2) |
|
| 348 | ||
| 349 | if '\n' in text: |
|
| 350 | lines = text.split('\n') |
|
| 351 | whitespace = None |
|
| 352 | for line in lines[1:]: |
|
| 353 | space = len(line) - len(line.lstrip()) |
|
| 354 | if space and (not whitespace or space < whitespace): |
|
| 355 | whitespace = space |
|
| 356 | newlines = [lines[0]] |
|
| 357 | for line in lines[1:]: |
|
| 358 | newlines.append(line[whitespace:]) |
|
| 359 | text = '\n'.join(newlines) |
|
| 360 | ||
| 361 | self.parse(text, self.footnote_rules) |
|
| 362 | ||
| 363 | self.tokens.append({ |
|
| 364 | 'type': 'footnote_end', |
|
| 365 | 'key': key, |
|
| 366 | }) |
|
| 367 | ||
| 368 | def parse_table(self, m): |
|
| 369 | item = self._process_table(m) |
|
| 370 | ||
| 371 | cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3)) |
|
| 372 | cells = cells.split('\n') |
|
| 373 | for i, v in enumerate(cells): |
|
| 374 | v = re.sub(r'^ *\| *| *\| *$', '', v) |
|
| 375 | cells[i] = re.split(r' *\| *', v) |
|
| 376 | ||
| 377 | item['cells'] = cells |
|
| 378 | self.tokens.append(item) |
|
| 379 | ||
| 380 | def parse_nptable(self, m): |
|
| 381 | item = self._process_table(m) |
|
| 382 | ||
| 383 | cells = re.sub(r'\n$', '', m.group(3)) |
|
| 384 | cells = cells.split('\n') |
|
| 385 | for i, v in enumerate(cells): |
|
| 386 | cells[i] = re.split(r' *\| *', v) |
|
| 387 | ||
| 388 | item['cells'] = cells |
|
| 389 | self.tokens.append(item) |
|
| 390 | ||
| 391 | def _process_table(self, m): |
|
| 392 | header = re.sub(r'^ *| *\| *$', '', m.group(1)) |
|
| 393 | header = re.split(r' *\| *', header) |
|
| 394 | align = re.sub(r' *|\| *$', '', m.group(2)) |
|
| 395 | align = re.split(r' *\| *', align) |
|
| 396 | ||
| 397 | for i, v in enumerate(align): |
|
| 398 | if re.search(r'^ *-+: *$', v): |
|
| 399 | align[i] = 'right' |
|
| 400 | elif re.search(r'^ *:-+: *$', v): |
|
| 401 | align[i] = 'center' |
|
| 402 | elif re.search(r'^ *:-+ *$', v): |
|
| 403 | align[i] = 'left' |
|
| 404 | else: |
|
| 405 | align[i] = None |
|
| 406 | ||
| 407 | item = { |
|
| 408 | 'type': 'table', |
|
| 409 | 'header': header, |
|
| 410 | 'align': align, |
|
| 411 | } |
|
| 412 | return item |
|
| 413 | ||
| 414 | def parse_block_html(self, m): |
|
| 415 | tag = m.group(1) |
|
| 416 | if not tag: |
|
| 417 | text = m.group(0) |
|
| 418 | self.tokens.append({ |
|
| 419 | 'type': 'close_html', |
|
| 420 | 'text': text |
|
| 421 | }) |
|
| 422 | else: |
|
| 423 | attr = m.group(2) |
|
| 424 | text = m.group(3) |
|
| 425 | self.tokens.append({ |
|
| 426 | 'type': 'open_html', |
|
| 427 | 'tag': tag, |
|
| 428 | 'extra': attr, |
|
| 429 | 'text': text |
|
| 430 | }) |
|
| @@ 160-427 (lines=268) @@ | ||
| 157 | text = re.compile(r'^[^\n]+') |
|
| 158 | ||
| 159 | ||
| 160 | class BlockLexer(object): |
|
| 161 | """Block level lexer for block grammars.""" |
|
| 162 | grammar_class = BlockGrammar |
|
| 163 | ||
| 164 | default_rules = [ |
|
| 165 | 'newline', 'hrule', 'block_code', 'fences', 'heading', |
|
| 166 | 'nptable', 'lheading', 'block_quote', |
|
| 167 | 'list_block', 'block_html', 'def_links', |
|
| 168 | 'def_footnotes', 'table', 'paragraph', 'text' |
|
| 169 | ] |
|
| 170 | ||
| 171 | list_rules = ( |
|
| 172 | 'newline', 'block_code', 'fences', 'lheading', 'hrule', |
|
| 173 | 'block_quote', 'list_block', 'block_html', 'text', |
|
| 174 | ) |
|
| 175 | ||
| 176 | footnote_rules = ( |
|
| 177 | 'newline', 'block_code', 'fences', 'heading', |
|
| 178 | 'nptable', 'lheading', 'hrule', 'block_quote', |
|
| 179 | 'list_block', 'block_html', 'table', 'paragraph', 'text' |
|
| 180 | ) |
|
| 181 | ||
| 182 | def __init__(self, rules=None, **kwargs): |
|
| 183 | self.tokens = [] |
|
| 184 | self.def_links = {} |
|
| 185 | self.def_footnotes = {} |
|
| 186 | ||
| 187 | if not rules: |
|
| 188 | rules = self.grammar_class() |
|
| 189 | ||
| 190 | self.rules = rules |
|
| 191 | ||
| 192 | def __call__(self, text, rules=None): |
|
| 193 | return self.parse(text, rules) |
|
| 194 | ||
| 195 | def parse(self, text, rules=None): |
|
| 196 | text = text.rstrip('\n') |
|
| 197 | ||
| 198 | if not rules: |
|
| 199 | rules = self.default_rules |
|
| 200 | ||
| 201 | def manipulate(text): |
|
| 202 | for key in rules: |
|
| 203 | rule = getattr(self.rules, key) |
|
| 204 | m = rule.match(text) |
|
| 205 | if not m: |
|
| 206 | continue |
|
| 207 | getattr(self, 'parse_%s' % key)(m) |
|
| 208 | return m |
|
| 209 | return False # pragma: no cover |
|
| 210 | ||
| 211 | while text: |
|
| 212 | m = manipulate(text) |
|
| 213 | if m is not False: |
|
| 214 | text = text[len(m.group(0)):] |
|
| 215 | continue |
|
| 216 | if text: # pragma: no cover |
|
| 217 | raise RuntimeError('Infinite loop at: %s' % text) |
|
| 218 | return self.tokens |
|
| 219 | ||
| 220 | def parse_newline(self, m): |
|
| 221 | length = len(m.group(0)) |
|
| 222 | if length > 1: |
|
| 223 | self.tokens.append({'type': 'newline'}) |
|
| 224 | ||
| 225 | def parse_block_code(self, m): |
|
| 226 | # clean leading whitespace |
|
| 227 | code = _block_code_leadning_pattern.sub('', m.group(0)) |
|
| 228 | self.tokens.append({ |
|
| 229 | 'type': 'code', |
|
| 230 | 'lang': None, |
|
| 231 | 'text': code, |
|
| 232 | }) |
|
| 233 | ||
| 234 | def parse_fences(self, m): |
|
| 235 | self.tokens.append({ |
|
| 236 | 'type': 'code', |
|
| 237 | 'lang': m.group(2), |
|
| 238 | 'text': m.group(3), |
|
| 239 | }) |
|
| 240 | ||
| 241 | def parse_heading(self, m): |
|
| 242 | self.tokens.append({ |
|
| 243 | 'type': 'heading', |
|
| 244 | 'level': len(m.group(1)), |
|
| 245 | 'text': m.group(2), |
|
| 246 | }) |
|
| 247 | ||
| 248 | def parse_lheading(self, m): |
|
| 249 | """Parse setext heading.""" |
|
| 250 | self.tokens.append({ |
|
| 251 | 'type': 'heading', |
|
| 252 | 'level': 1 if m.group(2) == '=' else 2, |
|
| 253 | 'text': m.group(1), |
|
| 254 | }) |
|
| 255 | ||
| 256 | def parse_hrule(self, m): |
|
| 257 | self.tokens.append({'type': 'hrule'}) |
|
| 258 | ||
| 259 | def parse_list_block(self, m): |
|
| 260 | bull = m.group(2) |
|
| 261 | self.tokens.append({ |
|
| 262 | 'type': 'list_start', |
|
| 263 | 'ordered': '.' in bull, |
|
| 264 | }) |
|
| 265 | cap = m.group(0) |
|
| 266 | self._process_list_item(cap, bull) |
|
| 267 | self.tokens.append({'type': 'list_end'}) |
|
| 268 | ||
| 269 | def _process_list_item(self, cap, bull): |
|
| 270 | cap = self.rules.list_item.findall(cap) |
|
| 271 | ||
| 272 | _next = False |
|
| 273 | length = len(cap) |
|
| 274 | ||
| 275 | for i in range(length): |
|
| 276 | item = cap[i][0] |
|
| 277 | ||
| 278 | # remove the bullet |
|
| 279 | space = len(item) |
|
| 280 | item = self.rules.list_bullet.sub('', item) |
|
| 281 | ||
| 282 | # outdent |
|
| 283 | if '\n ' in item: |
|
| 284 | space = space - len(item) |
|
| 285 | pattern = re.compile(r'^ {1,%d}' % space, flags=re.M) |
|
| 286 | item = pattern.sub('', item) |
|
| 287 | ||
| 288 | # determin whether item is loose or not |
|
| 289 | loose = _next |
|
| 290 | if not loose and re.search(r'\n\n(?!\s*$)', item): |
|
| 291 | loose = True |
|
| 292 | ||
| 293 | rest = len(item) |
|
| 294 | if i != length - 1 and rest: |
|
| 295 | _next = item[rest-1] == '\n' |
|
| 296 | if not loose: |
|
| 297 | loose = _next |
|
| 298 | ||
| 299 | if loose: |
|
| 300 | t = 'loose_item_start' |
|
| 301 | else: |
|
| 302 | t = 'list_item_start' |
|
| 303 | ||
| 304 | self.tokens.append({'type': t}) |
|
| 305 | # recurse |
|
| 306 | self.parse(item, self.list_rules) |
|
| 307 | self.tokens.append({'type': 'list_item_end'}) |
|
| 308 | ||
| 309 | def parse_block_quote(self, m): |
|
| 310 | self.tokens.append({'type': 'block_quote_start'}) |
|
| 311 | # clean leading > |
|
| 312 | cap = _block_quote_leading_pattern.sub('', m.group(0)) |
|
| 313 | self.parse(cap) |
|
| 314 | self.tokens.append({'type': 'block_quote_end'}) |
|
| 315 | ||
| 316 | def parse_def_links(self, m): |
|
| 317 | key = _keyify(m.group(1)) |
|
| 318 | self.def_links[key] = { |
|
| 319 | 'link': m.group(2), |
|
| 320 | 'title': m.group(3), |
|
| 321 | } |
|
| 322 | ||
| 323 | def parse_def_footnotes(self, m): |
|
| 324 | key = _keyify(m.group(1)) |
|
| 325 | if key in self.def_footnotes: |
|
| 326 | # footnote is already defined |
|
| 327 | return |
|
| 328 | ||
| 329 | self.def_footnotes[key] = 0 |
|
| 330 | ||
| 331 | self.tokens.append({ |
|
| 332 | 'type': 'footnote_start', |
|
| 333 | 'key': key, |
|
| 334 | }) |
|
| 335 | ||
| 336 | text = m.group(2) |
|
| 337 | ||
| 338 | if '\n' in text: |
|
| 339 | lines = text.split('\n') |
|
| 340 | whitespace = None |
|
| 341 | for line in lines[1:]: |
|
| 342 | space = len(line) - len(line.lstrip()) |
|
| 343 | if space and (not whitespace or space < whitespace): |
|
| 344 | whitespace = space |
|
| 345 | newlines = [lines[0]] |
|
| 346 | for line in lines[1:]: |
|
| 347 | newlines.append(line[whitespace:]) |
|
| 348 | text = '\n'.join(newlines) |
|
| 349 | ||
| 350 | self.parse(text, self.footnote_rules) |
|
| 351 | ||
| 352 | self.tokens.append({ |
|
| 353 | 'type': 'footnote_end', |
|
| 354 | 'key': key, |
|
| 355 | }) |
|
| 356 | ||
| 357 | def parse_table(self, m): |
|
| 358 | item = self._process_table(m) |
|
| 359 | ||
| 360 | cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3)) |
|
| 361 | cells = cells.split('\n') |
|
| 362 | for i, v in enumerate(cells): |
|
| 363 | v = re.sub(r'^ *\| *| *\| *$', '', v) |
|
| 364 | cells[i] = re.split(r' *\| *', v) |
|
| 365 | ||
| 366 | item['cells'] = cells |
|
| 367 | self.tokens.append(item) |
|
| 368 | ||
| 369 | def parse_nptable(self, m): |
|
| 370 | item = self._process_table(m) |
|
| 371 | ||
| 372 | cells = re.sub(r'\n$', '', m.group(3)) |
|
| 373 | cells = cells.split('\n') |
|
| 374 | for i, v in enumerate(cells): |
|
| 375 | cells[i] = re.split(r' *\| *', v) |
|
| 376 | ||
| 377 | item['cells'] = cells |
|
| 378 | self.tokens.append(item) |
|
| 379 | ||
| 380 | def _process_table(self, m): |
|
| 381 | header = re.sub(r'^ *| *\| *$', '', m.group(1)) |
|
| 382 | header = re.split(r' *\| *', header) |
|
| 383 | align = re.sub(r' *|\| *$', '', m.group(2)) |
|
| 384 | align = re.split(r' *\| *', align) |
|
| 385 | ||
| 386 | for i, v in enumerate(align): |
|
| 387 | if re.search(r'^ *-+: *$', v): |
|
| 388 | align[i] = 'right' |
|
| 389 | elif re.search(r'^ *:-+: *$', v): |
|
| 390 | align[i] = 'center' |
|
| 391 | elif re.search(r'^ *:-+ *$', v): |
|
| 392 | align[i] = 'left' |
|
| 393 | else: |
|
| 394 | align[i] = None |
|
| 395 | ||
| 396 | item = { |
|
| 397 | 'type': 'table', |
|
| 398 | 'header': header, |
|
| 399 | 'align': align, |
|
| 400 | } |
|
| 401 | return item |
|
| 402 | ||
| 403 | def parse_block_html(self, m): |
|
| 404 | tag = m.group(1) |
|
| 405 | if not tag: |
|
| 406 | text = m.group(0) |
|
| 407 | self.tokens.append({ |
|
| 408 | 'type': 'close_html', |
|
| 409 | 'text': text |
|
| 410 | }) |
|
| 411 | else: |
|
| 412 | attr = m.group(2) |
|
| 413 | text = m.group(3) |
|
| 414 | self.tokens.append({ |
|
| 415 | 'type': 'open_html', |
|
| 416 | 'tag': tag, |
|
| 417 | 'extra': attr, |
|
| 418 | 'text': text |
|
| 419 | }) |
|
| 420 | ||
| 421 | def parse_paragraph(self, m): |
|
| 422 | text = m.group(1).rstrip('\n') |
|
| 423 | self.tokens.append({'type': 'paragraph', 'text': text}) |
|
| 424 | ||
| 425 | def parse_text(self, m): |
|
| 426 | text = m.group(0) |
|
| 427 | self.tokens.append({'type': 'text', 'text': text}) |
|
| 428 | ||
| 429 | ||
| 430 | class InlineGrammar(object): |
|