# coding: utf-8
"""
mistune
~~~~~~~
The fastest markdown parser in pure Python with renderer feature.
:copyright: (c) 2014 - 2015 by Hsiaoming Yang.
"""
import re
import inspect
__version__ = '0.7.1'
__author__ = 'Hsiaoming Yang <me@lepture.com>'
__all__ = [
'BlockGrammar', 'BlockLexer',
'InlineGrammar', 'InlineLexer',
'Renderer', 'Markdown',
'markdown', 'escape',
]
_key_pattern = re.compile(r'\s+')
_escape_pattern = re.compile(r'&(?!#?\w+;)')
_newline_pattern = re.compile(r'\r\n|\r')
_block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M)
_block_code_leadning_pattern = re.compile(r'^ {4}', re.M)
_inline_tags = [
'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data',
'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark',
'ruby', 'rt', 'rp', 'bdi', 'bdo', 'span', 'br', 'wbr', 'ins', 'del',
'img', 'font',
]
_pre_tags = ['pre', 'script', 'style']
_valid_end = r'(?!:/|[^\w\s@]*@)\b'
_valid_attr = r'''"[^"]*"|'[^']*'|[^'">]'''
_block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end)
def _pure_pattern(regex):
pattern = regex.pattern
if pattern.startswith('^'):
pattern = pattern[1:]
return pattern
def _keyify(key):
return _key_pattern.sub(' ', key.lower())
def escape(text, quote=False, smart_amp=True):
"""Replace special characters "&", "<" and ">" to HTML-safe sequences.
The original cgi.escape will always escape "&", but you can control
this one for a smart escape amp.
:param quote: if set to True, " and ' will be escaped.
:param smart_amp: if set to False, & will always be escaped.
"""
if smart_amp:
text = _escape_pattern.sub('&', text)
else:
text = text.replace('&', '&')
text = text.replace('<', '<')
text = text.replace('>', '>')
if quote:
text = text.replace('"', '"')
text = text.replace("'", ''')
return text
def preprocessing(text, tab=4):
text = _newline_pattern.sub('\n', text)
text = text.replace('\t', ' ' * tab)
text = text.replace('\u00a0', ' ')
text = text.replace('\u2424', '\n')
pattern = re.compile(r'^ +$', re.M)
return pattern.sub('', text)
class BlockGrammar(object):
"""Grammars for block level tokens."""
def_links = re.compile(
r'^ *\[([^^\]]+)\]: *' # [key]:
r'<?([^\s>]+)>?' # <link> or link
r'(?: +["(]([^\n]+)[")])? *(?:\n+|$)'
)
def_footnotes = re.compile(
r'^\[\^([^\]]+)\]: *('
r'[^\n]*(?:\n+|$)' # [^key]:
r'(?: {1,}[^\n]*(?:\n+|$))*'
r')'
)
newline = re.compile(r'^\n+')
block_code = re.compile(r'^( {4}[^\n]+\n*)+')
fences = re.compile(
r'^ *(`{3,}|~{3,}) *(\S+)? *\n' # ```lang
r'([\s\S]+?)\s*'
r'\1 *(?:\n+|$)' # ```
)
hrule = re.compile(r'^ {0,3}[-*_](?: *[-*_]){2,} *(?:\n+|$)')
heading = re.compile(r'^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)')
lheading = re.compile(r'^([^\n]+)\n *(=|-)+ *(?:\n+|$)')
block_quote = re.compile(r'^( *>[^\n]+(\n[^\n]+)*\n*)+')
list_block = re.compile(
r'^( *)([*+-]|\d+\.) [\s\S]+?'
r'(?:'
r'\n+(?=\1?(?:[-*_] *){3,}(?:\n+|$))' # hrule
r'|\n+(?=%s)' # def links
r'|\n+(?=%s)' # def footnotes
r'|\n{2,}'
r'(?! )'
r'(?!\1(?:[*+-]|\d+\.) )\n*'
r'|'
r'\s*$)' % (
_pure_pattern(def_links),
_pure_pattern(def_footnotes),
)
)
list_item = re.compile(
r'^(( *)(?:[*+-]|\d+\.) [^\n]*'
r'(?:\n(?!\2(?:[*+-]|\d+\.) )[^\n]*)*)',
flags=re.M
)
list_bullet = re.compile(r'^ *(?:[*+-]|\d+\.) +')
paragraph = re.compile(
r'^((?:[^\n]+\n?(?!'
r'%s|%s|%s|%s|%s|%s|%s|%s|%s'
r'))+)\n*' % (
_pure_pattern(fences).replace(r'\1', r'\2'),
_pure_pattern(list_block).replace(r'\1', r'\3'),
_pure_pattern(hrule),
_pure_pattern(heading),
_pure_pattern(lheading),
_pure_pattern(block_quote),
_pure_pattern(def_links),
_pure_pattern(def_footnotes),
'<' + _block_tag,
)
)
block_html = re.compile(
r'^ *(?:%s|%s|%s) *(?:\n{2,}|\s*$)' % (
r'<!--[\s\S]*?-->',
r'<(%s)((?:%s)*?)>([\s\S]+?)<\/\1>' % (_block_tag, _valid_attr),
r'<%s(?:%s)*?>' % (_block_tag, _valid_attr),
)
)
table = re.compile(
r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*'
)
nptable = re.compile(
r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*'
)
text = re.compile(r'^[^\n]+')
class BlockLexer(object):
"""Block level lexer for block grammars."""
grammar_class = BlockGrammar
default_rules = [
'newline', 'hrule', 'block_code', 'fences', 'heading',
'nptable', 'lheading', 'block_quote',
'list_block', 'block_html', 'def_links',
|