diff options
author | FreeArtMan <=> | 2015-09-17 17:46:06 +0100 |
---|---|---|
committer | FreeArtMan <=> | 2015-09-17 17:46:06 +0100 |
commit | 816ac021a1965e2e04d4a612b71b1236cbd025ac (patch) | |
tree | 6f53dbba0aebf9a20679e5a960ab1d08af745c5c /src/mistune.py | |
download | md-site-816ac021a1965e2e04d4a612b71b1236cbd025ac.tar.gz md-site-816ac021a1965e2e04d4a612b71b1236cbd025ac.zip |
Initial
Diffstat (limited to 'src/mistune.py')
-rw-r--r-- | src/mistune.py | 1143 |
1 files changed, 1143 insertions, 0 deletions
diff --git a/src/mistune.py b/src/mistune.py new file mode 100644 index 0000000..4c711b2 --- /dev/null +++ b/src/mistune.py @@ -0,0 +1,1143 @@ +# coding: utf-8 +""" + mistune + ~~~~~~~ + + The fastest markdown parser in pure Python with renderer feature. + + :copyright: (c) 2014 - 2015 by Hsiaoming Yang. +""" + +import re +import inspect + +__version__ = '0.7.1' +__author__ = 'Hsiaoming Yang <me@lepture.com>' +__all__ = [ + 'BlockGrammar', 'BlockLexer', + 'InlineGrammar', 'InlineLexer', + 'Renderer', 'Markdown', + 'markdown', 'escape', +] + + +_key_pattern = re.compile(r'\s+') +_escape_pattern = re.compile(r'&(?!#?\w+;)') +_newline_pattern = re.compile(r'\r\n|\r') +_block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M) +_block_code_leadning_pattern = re.compile(r'^ {4}', re.M) +_inline_tags = [ + 'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data', + 'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark', + 'ruby', 'rt', 'rp', 'bdi', 'bdo', 'span', 'br', 'wbr', 'ins', 'del', + 'img', 'font', +] +_pre_tags = ['pre', 'script', 'style'] +_valid_end = r'(?!:/|[^\w\s@]*@)\b' +_valid_attr = r'''"[^"]*"|'[^']*'|[^'">]''' +_block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end) + + +def _pure_pattern(regex): + pattern = regex.pattern + if pattern.startswith('^'): + pattern = pattern[1:] + return pattern + + +def _keyify(key): + return _key_pattern.sub(' ', key.lower()) + + +def escape(text, quote=False, smart_amp=True): + """Replace special characters "&", "<" and ">" to HTML-safe sequences. + + The original cgi.escape will always escape "&", but you can control + this one for a smart escape amp. + + :param quote: if set to True, " and ' will be escaped. + :param smart_amp: if set to False, & will always be escaped. + """ + if smart_amp: + text = _escape_pattern.sub('&', text) + else: + text = text.replace('&', '&') + text = text.replace('<', '<') + text = text.replace('>', '>') + if quote: + text = text.replace('"', '"') + text = text.replace("'", ''') + return text + + +def preprocessing(text, tab=4): + text = _newline_pattern.sub('\n', text) + text = text.replace('\t', ' ' * tab) + text = text.replace('\u00a0', ' ') + text = text.replace('\u2424', '\n') + pattern = re.compile(r'^ +$', re.M) + return pattern.sub('', text) + + +class BlockGrammar(object): + """Grammars for block level tokens.""" + + def_links = re.compile( + r'^ *\[([^^\]]+)\]: *' # [key]: + r'<?([^\s>]+)>?' # <link> or link + r'(?: +["(]([^\n]+)[")])? *(?:\n+|$)' + ) + def_footnotes = re.compile( + r'^\[\^([^\]]+)\]: *(' + r'[^\n]*(?:\n+|$)' # [^key]: + r'(?: {1,}[^\n]*(?:\n+|$))*' + r')' + ) + + newline = re.compile(r'^\n+') + block_code = re.compile(r'^( {4}[^\n]+\n*)+') + fences = re.compile( + r'^ *(`{3,}|~{3,}) *(\S+)? *\n' # ```lang + r'([\s\S]+?)\s*' + r'\1 *(?:\n+|$)' # ``` + ) + hrule = re.compile(r'^ {0,3}[-*_](?: *[-*_]){2,} *(?:\n+|$)') + heading = re.compile(r'^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)') + lheading = re.compile(r'^([^\n]+)\n *(=|-)+ *(?:\n+|$)') + block_quote = re.compile(r'^( *>[^\n]+(\n[^\n]+)*\n*)+') + list_block = re.compile( + r'^( *)([*+-]|\d+\.) [\s\S]+?' + r'(?:' + r'\n+(?=\1?(?:[-*_] *){3,}(?:\n+|$))' # hrule + r'|\n+(?=%s)' # def links + r'|\n+(?=%s)' # def footnotes + r'|\n{2,}' + r'(?! )' + r'(?!\1(?:[*+-]|\d+\.) )\n*' + r'|' + r'\s*$)' % ( + _pure_pattern(def_links), + _pure_pattern(def_footnotes), + ) + ) + list_item = re.compile( + r'^(( *)(?:[*+-]|\d+\.) [^\n]*' + r'(?:\n(?!\2(?:[*+-]|\d+\.) )[^\n]*)*)', + flags=re.M + ) + list_bullet = re.compile(r'^ *(?:[*+-]|\d+\.) +') + paragraph = re.compile( + r'^((?:[^\n]+\n?(?!' + r'%s|%s|%s|%s|%s|%s|%s|%s|%s' + r'))+)\n*' % ( + _pure_pattern(fences).replace(r'\1', r'\2'), + _pure_pattern(list_block).replace(r'\1', r'\3'), + _pure_pattern(hrule), + _pure_pattern(heading), + _pure_pattern(lheading), + _pure_pattern(block_quote), + _pure_pattern(def_links), + _pure_pattern(def_footnotes), + '<' + _block_tag, + ) + ) + block_html = re.compile( + r'^ *(?:%s|%s|%s) *(?:\n{2,}|\s*$)' % ( + r'<!--[\s\S]*?-->', + r'<(%s)((?:%s)*?)>([\s\S]+?)<\/\1>' % (_block_tag, _valid_attr), + r'<%s(?:%s)*?>' % (_block_tag, _valid_attr), + ) + ) + table = re.compile( + r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*' + ) + nptable = re.compile( + r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*' + ) + text = re.compile(r'^[^\n]+') + + +class BlockLexer(object): + """Block level lexer for block grammars.""" + grammar_class = BlockGrammar + + default_rules = [ + 'newline', 'hrule', 'block_code', 'fences', 'heading', + 'nptable', 'lheading', 'block_quote', + 'list_block', 'block_html', 'def_links', + 'def_footnotes', 'table', 'paragraph', 'text' + ] + + list_rules = ( + 'newline', 'block_code', 'fences', 'lheading', 'hrule', + 'block_quote', 'list_block', 'block_html', 'text', + ) + + footnote_rules = ( + 'newline', 'block_code', 'fences', 'heading', + 'nptable', 'lheading', 'hrule', 'block_quote', + 'list_block', 'block_html', 'table', 'paragraph', 'text' + ) + + def __init__(self, rules=None, **kwargs): + self.tokens = [] + self.def_links = {} + self.def_footnotes = {} + + if not rules: + rules = self.grammar_class() + + self.rules = rules + + def __call__(self, text, rules=None): + return self.parse(text, rules) + + def parse(self, text, rules=None): + text = text.rstrip('\n') + + if not rules: + rules = self.default_rules + + def manipulate(text): + for key in rules: + rule = getattr(self.rules, key) + m = rule.match(text) + if not m: + continue + getattr(self, 'parse_%s' % key)(m) + return m + return False # pragma: no cover + + while text: + m = manipulate(text) + if m is not False: + text = text[len(m.group(0)):] + continue + if text: # pragma: no cover + raise RuntimeError('Infinite loop at: %s' % text) + return self.tokens + + def parse_newline(self, m): + length = len(m.group(0)) + if length > 1: + self.tokens.append({'type': 'newline'}) + + def parse_block_code(self, m): + # clean leading whitespace + code = _block_code_leadning_pattern.sub('', m.group(0)) + self.tokens.append({ + 'type': 'code', + 'lang': None, + 'text': code, + }) + + def parse_fences(self, m): + self.tokens.append({ + 'type': 'code', + 'lang': m.group(2), + 'text': m.group(3), + }) + + def parse_heading(self, m): + self.tokens.append({ + 'type': 'heading', + 'level': len(m.group(1)), + 'text': m.group(2), + }) + + def parse_lheading(self, m): + """Parse setext heading.""" + self.tokens.append({ + 'type': 'heading', + 'level': 1 if m.group(2) == '=' else 2, + 'text': m.group(1), + }) + + def parse_hrule(self, m): + self.tokens.append({'type': 'hrule'}) + + def parse_list_block(self, m): + bull = m.group(2) + self.tokens.append({ + 'type': 'list_start', + 'ordered': '.' in bull, + }) + cap = m.group(0) + self._process_list_item(cap, bull) + self.tokens.append({'type': 'list_end'}) + + def _process_list_item(self, cap, bull): + cap = self.rules.list_item.findall(cap) + + _next = False + length = len(cap) + + for i in range(length): + item = cap[i][0] + + # remove the bullet + space = len(item) + item = self.rules.list_bullet.sub('', item) + + # outdent + if '\n ' in item: + space = space - len(item) + pattern = re.compile(r'^ {1,%d}' % space, flags=re.M) + item = pattern.sub('', item) + + # determin whether item is loose or not + loose = _next + if not loose and re.search(r'\n\n(?!\s*$)', item): + loose = True + + rest = len(item) + if i != length - 1 and rest: + _next = item[rest-1] == '\n' + if not loose: + loose = _next + + if loose: + t = 'loose_item_start' + else: + t = 'list_item_start' + + self.tokens.append({'type': t}) + # recurse + self.parse(item, self.list_rules) + self.tokens.append({'type': 'list_item_end'}) + + def parse_block_quote(self, m): + self.tokens.append({'type': 'block_quote_start'}) + # clean leading > + cap = _block_quote_leading_pattern.sub('', m.group(0)) + self.parse(cap) + self.tokens.append({'type': 'block_quote_end'}) + + def parse_def_links(self, m): + key = _keyify(m.group(1)) + self.def_links[key] = { + 'link': m.group(2), + 'title': m.group(3), + } + + def parse_def_footnotes(self, m): + key = _keyify(m.group(1)) + if key in self.def_footnotes: + # footnote is already defined + return + + self.def_footnotes[key] = 0 + + self.tokens.append({ + 'type': 'footnote_start', + 'key': key, + }) + + text = m.group(2) + + if '\n' in text: + lines = text.split('\n') + whitespace = None + for line in lines[1:]: + space = len(line) - len(line.lstrip()) + if space and (not whitespace or space < whitespace): + whitespace = space + newlines = [lines[0]] + for line in lines[1:]: + newlines.append(line[whitespace:]) + text = '\n'.join(newlines) + + self.parse(text, self.footnote_rules) + + self.tokens.append({ + 'type': 'footnote_end', + 'key': key, + }) + + def parse_table(self, m): + item = self._process_table(m) + + cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3)) + cells = cells.split('\n') + for i, v in enumerate(cells): + v = re.sub(r'^ *\| *| *\| *$', '', v) + cells[i] = re.split(r' *\| *', v) + + item['cells'] = cells + self.tokens.append(item) + + def parse_nptable(self, m): + item = self._process_table(m) + + cells = re.sub(r'\n$', '', m.group(3)) + cells = cells.split('\n') + for i, v in enumerate(cells): + cells[i] = re.split(r' *\| *', v) + + item['cells'] = cells + self.tokens.append(item) + + def _process_table(self, m): + header = re.sub(r'^ *| *\| *$', '', m.group(1)) + header = re.split(r' *\| *', header) + align = re.sub(r' *|\| *$', '', m.group(2)) + align = re.split(r' *\| *', align) + + for i, v in enumerate(align): + if re.search(r'^ *-+: *$', v): + align[i] = 'right' + elif re.search(r'^ *:-+: *$', v): + align[i] = 'center' + elif re.search(r'^ *:-+ *$', v): + align[i] = 'left' + else: + align[i] = None + + item = { + 'type': 'table', + 'header': header, + 'align': align, + } + return item + + def parse_block_html(self, m): + tag = m.group(1) + if not tag: + text = m.group(0) + self.tokens.append({ + 'type': 'close_html', + 'text': text + }) + else: + attr = m.group(2) + text = m.group(3) + self.tokens.append({ + 'type': 'open_html', + 'tag': tag, + 'extra': attr, + 'text': text + }) + + def parse_paragraph(self, m): + text = m.group(1).rstrip('\n') + self.tokens.append({'type': 'paragraph', 'text': text}) + + def parse_text(self, m): + text = m.group(0) + self.tokens.append({'type': 'text', 'text': text}) + + +class InlineGrammar(object): + """Grammars for inline level tokens.""" + + escape = re.compile(r'^\\([\\`*{}\[\]()#+\-.!_>~|])') # \* \+ \! .... + inline_html = re.compile( + r'^(?:%s|%s|%s)' % ( + r'<!--[\s\S]*?-->', + r'<(\w+%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_valid_end, _valid_attr), + r'<\w+%s(?:%s)*?>' % (_valid_end, _valid_attr), + ) + ) + autolink = re.compile(r'^<([^ >]+(@|:)[^ >]+)>') + link = re.compile( + r'^!?\[(' + r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*' + r')\]\(' + r'''\s*(<)?([\s\S]*?)(?(2)>)(?:\s+['"]([\s\S]*?)['"])?\s*''' + r'\)' + ) + reflink = re.compile( + r'^!?\[(' + r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*' + r')\]\s*\[([^^\]]*)\]' + ) + nolink = re.compile(r'^!?\[((?:\[[^\]]*\]|[^\[\]])*)\]') + url = re.compile(r'''^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])''') + double_emphasis = re.compile( + r'^_{2}([\s\S]+?)_{2}(?!_)' # __word__ + r'|' + r'^\*{2}([\s\S]+?)\*{2}(?!\*)' # **word** + ) + emphasis = re.compile( + r'^\b_((?:__|[\s\S])+?)_\b' # _word_ + r'|' + r'^\*((?:\*\*|[\s\S])+?)\*(?!\*)' # *word* + ) + code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)') # `code` + linebreak = re.compile(r'^ {2,}\n(?!\s*$)') + strikethrough = re.compile(r'^~~(?=\S)([\s\S]+?\S)~~') # ~~word~~ + footnote = re.compile(r'^\[\^([^\]]+)\]') + text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| {2,}\n|$)') + + def hard_wrap(self): + """Grammar for hard wrap linebreak. You don't need to add two + spaces at the end of a line. + """ + self.linebreak = re.compile(r'^ *\n(?!\s*$)') + self.text = re.compile( + r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| *\n|$)' + ) + + +class InlineLexer(object): + """Inline level lexer for inline grammars.""" + grammar_class = InlineGrammar + + default_rules = [ + 'escape', 'inline_html', 'autolink', 'url', + 'footnote', 'link', 'reflink', 'nolink', + 'double_emphasis', 'emphasis', 'code', + 'linebreak', 'strikethrough', 'text', + ] + inline_html_rules = [ + 'escape', 'autolink', 'url', 'link', 'reflink', + 'nolink', 'double_emphasis', 'emphasis', 'code', + 'linebreak', 'strikethrough', 'text', + ] + + def __init__(self, renderer, rules=None, **kwargs): + self.renderer = renderer + self.links = {} + self.footnotes = {} + self.footnote_index = 0 + + if not rules: + rules = self.grammar_class() + + self.rules = rules + + self._in_link = False + self._in_footnote = False + + kwargs.update(self.renderer.options) + self._parse_inline_html = kwargs.get('parse_inline_html') + + def __call__(self, text, rules=None): + return self.output(text, rules) + + def setup(self, links, footnotes): + self.footnote_index = 0 + self.links = links or {} + self.footnotes = footnotes or {} + + def output(self, text, rules=None): + text = text.rstrip('\n') + if not rules: + rules = list(self.default_rules) + + if self._in_footnote and 'footnote' in rules: + rules.remove('footnote') + + output = self.renderer.placeholder() + + def manipulate(text): + for key in rules: + pattern = getattr(self.rules, key) + m = pattern.match(text) + if not m: + continue + self.line_match = m + out = getattr(self, 'output_%s' % key)(m) + if out is not None: + return m, out + return False # pragma: no cover + + self.line_started = False + while text: + ret = manipulate(text) + self.line_started = True + if ret is not False: + m, out = ret + output += out + text = text[len(m.group(0)):] + continue + if text: # pragma: no cover + raise RuntimeError('Infinite loop at: %s' % text) + + return output + + def output_escape(self, m): + return m.group(1) + + def output_autolink(self, m): + link = m.group(1) + if m.group(2) == '@': + is_email = True + else: + is_email = False + return self.renderer.autolink(link, is_email) + + def output_url(self, m): + link = m.group(1) + if self._in_link: + return self.renderer.text(link) + return self.renderer.autolink(link, False) + + def output_inline_html(self, m): + tag = m.group(1) + if self._parse_inline_html and tag in _inline_tags: + text = m.group(3) + if tag == 'a': + self._in_link = True + text = self.output(text, rules=self.inline_html_rules) + self._in_link = False + else: + text = self.output(text, rules=self.inline_html_rules) + extra = m.group(2) or '' + html = '<%s%s>%s</%s>' % (tag, extra, text, tag) + else: + html = m.group(0) + return self.renderer.inline_html(html) + + def output_footnote(self, m): + key = _keyify(m.group(1)) + if key not in self.footnotes: + return None + if self.footnotes[key]: + return None + self.footnote_index += 1 + self.footnotes[key] = self.footnote_index + return self.renderer.footnote_ref(key, self.footnote_index) + + def output_link(self, m): + return self._process_link(m, m.group(3), m.group(4)) + + def output_reflink(self, m): + key = _keyify(m.group(2) or m.group(1)) + if key not in self.links: + return None + ret = self.links[key] + return self._process_link(m, ret['link'], ret['title']) + + def output_nolink(self, m): + key = _keyify(m.group(1)) + if key not in self.links: + return None + ret = self.links[key] + return self._process_link(m, ret['link'], ret['title']) + + def _process_link(self, m, link, title=None): + line = m.group(0) + text = m.group(1) + if line[0] == '!': + return self.renderer.image(link, title, text) + + self._in_link = True + text = self.output(text) + self._in_link = False + return self.renderer.link(link, title, text) + + def output_double_emphasis(self, m): + text = m.group(2) or m.group(1) + text = self.output(text) + return self.renderer.double_emphasis(text) + + def output_emphasis(self, m): + text = m.group(2) or m.group(1) + text = self.output(text) + return self.renderer.emphasis(text) + + def output_code(self, m): + text = m.group(2) + return self.renderer.codespan(text) + + def output_linebreak(self, m): + return self.renderer.linebreak() + + def output_strikethrough(self, m): + text = self.output(m.group(1)) + return self.renderer.strikethrough(text) + + def output_text(self, m): + text = m.group(0) + return self.renderer.text(text) + + +class Renderer(object): + """The default HTML renderer for rendering Markdown. + """ + + def __init__(self, **kwargs): + self.options = kwargs + + def placeholder(self): + """Returns the default, empty output value for the renderer. + + All renderer methods use the '+=' operator to append to this value. + Default is a string so rendering HTML can build up a result string with + the rendered Markdown. + + Can be overridden by Renderer subclasses to be types like an empty + list, allowing the renderer to create a tree-like structure to + represent the document (which can then be reprocessed later into a + separate format like docx or pdf). + """ + return '' + + def block_code(self, code, lang=None): + """Rendering block level code. ``pre > code``. + + :param code: text content of the code block. + :param lang: language of the given code. + """ + code = code.rstrip('\n') + if not lang: + code = escape(code, smart_amp=False) + return '<pre><code>%s\n</code></pre>\n' % code + code = escape(code, quote=True, smart_amp=False) + #return '<pre><code class="lang-%s">%s\n</code></pre>\n' % (lang, code) + return '<pre><code class="%s">%s\n</code></pre>\n' % (lang, code) + + + def block_quote(self, text): + """Rendering <blockquote> with the given text. + + :param text: text content of the blockquote. + """ + return '<blockquote>%s\n</blockquote>\n' % text.rstrip('\n') + + def block_html(self, html): + """Rendering block level pure html content. + + :param html: text content of the html snippet. + """ + if self.options.get('skip_style') and \ + html.lower().startswith('<style'): + return '' + if self.options.get('escape'): + return escape(html) + return html + + def header(self, text, level, raw=None): + """Rendering header/heading tags like ``<h1>`` ``<h2>``. + + :param text: rendered text content for the header. + :param level: a number for the header level, for example: 1. + :param raw: raw text content of the header. + """ + return '<h%d>%s</h%d>\n' % (level, text, level) + + def hrule(self): + """Rendering method for ``<hr>`` tag.""" + if self.options.get('use_xhtml'): + return '<hr />\n' + return '<hr>\n' + + def list(self, body, ordered=True): + """Rendering list tags like ``<ul>`` and ``<ol>``. + + :param body: body contents of the list. + :param ordered: whether this list is ordered or not. + """ + tag = 'ul' + if ordered: + tag = 'ol' + return '<%s>\n%s</%s>\n' % (tag, body, tag) + + def list_item(self, text): + """Rendering list item snippet. Like ``<li>``.""" + return '<li>%s</li>\n' % text + + def paragraph(self, text): + """Rendering paragraph tags. Like ``<p>``.""" + return '<p>%s</p>\n' % text.strip(' ') + + def table(self, header, body): + """Rendering table element. Wrap header and body in it. + + :param header: header part of the table. + :param body: body part of the table. + """ + return ( + '<table>\n<thead>%s</thead>\n' + '<tbody>\n%s</tbody>\n</table>\n' + ) % (header, body) + + def table_row(self, content): + """Rendering a table row. Like ``<tr>``. + + :param content: content of current table row. + """ + return '<tr>\n%s</tr>\n' % content + + def table_cell(self, content, **flags): + """Rendering a table cell. Like ``<th>`` ``<td>``. + + :param content: content of current table cell. + :param header: whether this is header or not. + :param align: align of current table cell. + """ + if flags['header']: + tag = 'th' + else: + tag = 'td' + align = flags['align'] + if not align: + return '<%s>%s</%s>\n' % (tag, content, tag) + return '<%s style="text-align:%s">%s</%s>\n' % ( + tag, align, content, tag + ) + + def double_emphasis(self, text): + """Rendering **strong** text. + + :param text: text content for emphasis. + """ + return '<strong>%s</strong>' % text + + def emphasis(self, text): + """Rendering *emphasis* text. + + :param text: text content for emphasis. + """ + return '<em>%s</em>' % text + + def codespan(self, text): + """Rendering inline `code` text. + + :param text: text content for inline code. + """ + text = escape(text.rstrip(), smart_amp=False) + return '<code>%s</code>' % text + + def linebreak(self): + """Rendering line break like ``<br>``.""" + if self.options.get('use_xhtml'): + return '<br />\n' + return '<br>\n' + + def strikethrough(self, text): + """Rendering ~~strikethrough~~ text. + + :param text: text content for strikethrough. + """ + return '<del>%s</del>' % text + + def text(self, text): + """Rendering unformatted text. + + :param text: text content. + """ + return escape(text) + + def autolink(self, link, is_email=False): + """Rendering a given link or email address. + + :param link: link content or email address. + :param is_email: whether this is an email or not. + """ + text = link = escape(link) + if is_email: + link = 'mailto:%s' % link + return '<a href="%s">%s</a>' % (link, text) + + def link(self, link, title, text): + """Rendering a given link with content and title. + + :param link: href link for ``<a>`` tag. + :param title: title content for `title` attribute. + :param text: text content for description. + """ + if link.startswith('javascript:'): + link = '' + if not title: + return '<a href="%s">%s</a>' % (link, text) + title = escape(title, quote=True) + return '<a href="%s" title="%s">%s</a>' % (link, title, text) + + def image(self, src, title, text): + """Rendering a image with title and text. + + :param src: source link of the image. + :param title: title text of the image. + :param text: alt text of the image. + """ + if src.startswith('javascript:'): + src = '' + text = escape(text, quote=True) + if title: + title = escape(title, quote=True) + html = '<img src="%s" alt="%s" title="%s"' % (src, text, title) + else: + html = '<img src="%s" alt="%s"' % (src, text) + if self.options.get('use_xhtml'): + return '%s />' % html + return '%s>' % html + + def inline_html(self, html): + """Rendering span level pure html content. + + :param html: text content of the html snippet. + """ + if self.options.get('escape'): + return escape(html) + return html + + def newline(self): + """Rendering newline element.""" + return '' + + def footnote_ref(self, key, index): + """Rendering the ref anchor of a footnote. + + :param key: identity key for the footnote. + :param index: the index count of current footnote. + """ + html = ( + '<sup class="footnote-ref" id="fnref-%s">' + '<a href="#fn-%s" rel="footnote">%d</a></sup>' + ) % (escape(key), escape(key), index) + return html + + def footnote_item(self, key, text): + """Rendering a footnote item. + + :param key: identity key for the footnote. + :param text: text content of the footnote. + """ + back = ( + '<a href="#fnref-%s" rev="footnote">↩</a>' + ) % escape(key) + text = text.rstrip() + if text.endswith('</p>'): + text = re.sub(r'<\/p>$', r'%s</p>' % back, text) + else: + text = '%s<p>%s</p>' % (text, back) + html = '<li id="fn-%s">%s</li>\n' % (escape(key), text) + return html + + def footnotes(self, text): + """Wrapper for all footnotes. + + :param text: contents of all footnotes. + """ + html = '<div class="footnotes">\n%s<ol>%s</ol>\n</div>\n' + return html % (self.hrule(), text) + + +class Markdown(object): + """The Markdown parser. + + :param renderer: An instance of ``Renderer``. + :param inline: An inline lexer class or instance. + :param block: A block lexer class or instance. + """ + def __init__(self, renderer=None, inline=None, block=None, **kwargs): + if not renderer: + renderer = Renderer(**kwargs) + + self.renderer = renderer + + if inline and inspect.isclass(inline): + inline = inline(renderer, **kwargs) + if block and inspect.isclass(block): + block = block(**kwargs) + + if inline: + self.inline = inline + else: + rules = InlineGrammar() + if kwargs.get('hard_wrap'): + rules.hard_wrap() + self.inline = InlineLexer(renderer, rules=rules) + + self.block = block or BlockLexer(BlockGrammar()) + self.options = kwargs + self.footnotes = [] + self.tokens = [] + + # detect if it should parse text in block html + self._parse_block_html = kwargs.get('parse_block_html') + + def __call__(self, text): + return self.parse(text) + + def render(self, text): + """Render the Markdown text. + + :param text: markdown formatted text content. + """ + return self.parse(text) + + def parse(self, text): + out = self.output(preprocessing(text)) + + keys = self.block.def_footnotes + + # reset block + self.block.def_links = {} + self.block.def_footnotes = {} + + # reset inline + self.inline.links = {} + self.inline.footnotes = {} + + if not self.footnotes: + return out + + footnotes = filter(lambda o: keys.get(o['key']), self.footnotes) + self.footnotes = sorted( + footnotes, key=lambda o: keys.get(o['key']), reverse=True + ) + + body = self.renderer.placeholder() + while self.footnotes: + note = self.footnotes.pop() + body += self.renderer.footnote_item( + note['key'], note['text'] + ) + + out += self.renderer.footnotes(body) + return out + + def pop(self): + if not self.tokens: + return None + self.token = self.tokens.pop() + return self.token + + def peek(self): + if self.tokens: + return self.tokens[-1] + return None # pragma: no cover + + def output(self, text, rules=None): + self.tokens = self.block(text, rules) + self.tokens.reverse() + + self.inline.setup(self.block.def_links, self.block.def_footnotes) + + out = self.renderer.placeholder() + while self.pop(): + out += self.tok() + return out + + def tok(self): + t = self.token['type'] + + # sepcial cases + if t.endswith('_start'): + t = t[:-6] + + return getattr(self, 'output_%s' % t)() + + def tok_text(self): + text = self.token['text'] + while self.peek()['type'] == 'text': + text += '\n' + self.pop()['text'] + return self.inline(text) + + def output_newline(self): + return self.renderer.newline() + + def output_hrule(self): + return self.renderer.hrule() + + def output_heading(self): + return self.renderer.header( + self.inline(self.token['text']), + self.token['level'], + self.token['text'], + ) + + def output_code(self): + return self.renderer.block_code( + self.token['text'], self.token['lang'] + ) + + def output_table(self): + aligns = self.token['align'] + aligns_length = len(aligns) + cell = self.renderer.placeholder() + + # header part + header = self.renderer.placeholder() + for i, value in enumerate(self.token['header']): + align = aligns[i] if i < aligns_length else None + flags = {'header': True, 'align': align} + cell += self.renderer.table_cell(self.inline(value), **flags) + + header += self.renderer.table_row(cell) + + # body part + body = self.renderer.placeholder() + for i, row in enumerate(self.token['cells']): + cell = self.renderer.placeholder() + for j, value in enumerate(row): + align = aligns[j] if j < aligns_length else None + flags = {'header': False, 'align': align} + cell += self.renderer.table_cell(self.inline(value), **flags) + body += self.renderer.table_row(cell) + + return self.renderer.table(header, body) + + def output_block_quote(self): + body = self.renderer.placeholder() + while self.pop()['type'] != 'block_quote_end': + body += self.tok() + return self.renderer.block_quote(body) + + def output_list(self): + ordered = self.token['ordered'] + body = self.renderer.placeholder() + while self.pop()['type'] != 'list_end': + body += self.tok() + return self.renderer.list(body, ordered) + + def output_list_item(self): + body = self.renderer.placeholder() + while self.pop()['type'] != 'list_item_end': + if self.token['type'] == 'text': + body += self.tok_text() + else: + body += self.tok() + + return self.renderer.list_item(body) + + def output_loose_item(self): + body = self.renderer.placeholder() + while self.pop()['type'] != 'list_item_end': + body += self.tok() + return self.renderer.list_item(body) + + def output_footnote(self): + self.inline._in_footnote = True + body = self.renderer.placeholder() + key = self.token['key'] + while self.pop()['type'] != 'footnote_end': + body += self.tok() + self.footnotes.append({'key': key, 'text': body}) + self.inline._in_footnote = False + return self.renderer.placeholder() + + def output_close_html(self): + text = self.token['text'] + return self.renderer.block_html(text) + + def output_open_html(self): + text = self.token['text'] + tag = self.token['tag'] + if self._parse_block_html and tag not in _pre_tags: + text = self.inline(text, rules=self.inline.inline_html_rules) + extra = self.token.get('extra') or '' + html = '<%s%s>%s</%s>' % (tag, extra, text, tag) + return self.renderer.block_html(html) + + def output_paragraph(self): + return self.renderer.paragraph(self.inline(self.token['text'])) + + def output_text(self): + return self.renderer.paragraph(self.tok_text()) + + +def markdown(text, escape=True, **kwargs): + """Render markdown formatted text to html. + + :param text: markdown formatted text content. + :param escape: if set to False, all html tags will not be escaped. + :param use_xhtml: output with xhtml tags. + :param hard_wrap: if set to True, it will has GFM line breaks feature. + :param parse_block_html: parse text only in block level html. + :param parse_inline_html: parse text only in inline level html. + """ + return Markdown(escape=escape, **kwargs)(text) |