summaryrefslogtreecommitdiff
path: root/src/mistune.py
diff options
context:
space:
mode:
authorFreeArtMan <=>2015-09-17 17:46:06 +0100
committerFreeArtMan <=>2015-09-17 17:46:06 +0100
commit816ac021a1965e2e04d4a612b71b1236cbd025ac (patch)
tree6f53dbba0aebf9a20679e5a960ab1d08af745c5c /src/mistune.py
downloadmd-site-816ac021a1965e2e04d4a612b71b1236cbd025ac.tar.gz
md-site-816ac021a1965e2e04d4a612b71b1236cbd025ac.zip
Initial
Diffstat (limited to 'src/mistune.py')
-rw-r--r--src/mistune.py1143
1 files changed, 1143 insertions, 0 deletions
diff --git a/src/mistune.py b/src/mistune.py
new file mode 100644
index 0000000..4c711b2
--- /dev/null
+++ b/src/mistune.py
@@ -0,0 +1,1143 @@
+# coding: utf-8
+"""
+ mistune
+ ~~~~~~~
+
+ The fastest markdown parser in pure Python with renderer feature.
+
+ :copyright: (c) 2014 - 2015 by Hsiaoming Yang.
+"""
+
+import re
+import inspect
+
+__version__ = '0.7.1'
+__author__ = 'Hsiaoming Yang <me@lepture.com>'
+__all__ = [
+ 'BlockGrammar', 'BlockLexer',
+ 'InlineGrammar', 'InlineLexer',
+ 'Renderer', 'Markdown',
+ 'markdown', 'escape',
+]
+
+
+_key_pattern = re.compile(r'\s+')
+_escape_pattern = re.compile(r'&(?!#?\w+;)')
+_newline_pattern = re.compile(r'\r\n|\r')
+_block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M)
+_block_code_leadning_pattern = re.compile(r'^ {4}', re.M)
+_inline_tags = [
+ 'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data',
+ 'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark',
+ 'ruby', 'rt', 'rp', 'bdi', 'bdo', 'span', 'br', 'wbr', 'ins', 'del',
+ 'img', 'font',
+]
+_pre_tags = ['pre', 'script', 'style']
+_valid_end = r'(?!:/|[^\w\s@]*@)\b'
+_valid_attr = r'''"[^"]*"|'[^']*'|[^'">]'''
+_block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end)
+
+
+def _pure_pattern(regex):
+ pattern = regex.pattern
+ if pattern.startswith('^'):
+ pattern = pattern[1:]
+ return pattern
+
+
+def _keyify(key):
+ return _key_pattern.sub(' ', key.lower())
+
+
+def escape(text, quote=False, smart_amp=True):
+ """Replace special characters "&", "<" and ">" to HTML-safe sequences.
+
+ The original cgi.escape will always escape "&", but you can control
+ this one for a smart escape amp.
+
+ :param quote: if set to True, " and ' will be escaped.
+ :param smart_amp: if set to False, & will always be escaped.
+ """
+ if smart_amp:
+ text = _escape_pattern.sub('&amp;', text)
+ else:
+ text = text.replace('&', '&amp;')
+ text = text.replace('<', '&lt;')
+ text = text.replace('>', '&gt;')
+ if quote:
+ text = text.replace('"', '&quot;')
+ text = text.replace("'", '&#39;')
+ return text
+
+
+def preprocessing(text, tab=4):
+ text = _newline_pattern.sub('\n', text)
+ text = text.replace('\t', ' ' * tab)
+ text = text.replace('\u00a0', ' ')
+ text = text.replace('\u2424', '\n')
+ pattern = re.compile(r'^ +$', re.M)
+ return pattern.sub('', text)
+
+
+class BlockGrammar(object):
+ """Grammars for block level tokens."""
+
+ def_links = re.compile(
+ r'^ *\[([^^\]]+)\]: *' # [key]:
+ r'<?([^\s>]+)>?' # <link> or link
+ r'(?: +["(]([^\n]+)[")])? *(?:\n+|$)'
+ )
+ def_footnotes = re.compile(
+ r'^\[\^([^\]]+)\]: *('
+ r'[^\n]*(?:\n+|$)' # [^key]:
+ r'(?: {1,}[^\n]*(?:\n+|$))*'
+ r')'
+ )
+
+ newline = re.compile(r'^\n+')
+ block_code = re.compile(r'^( {4}[^\n]+\n*)+')
+ fences = re.compile(
+ r'^ *(`{3,}|~{3,}) *(\S+)? *\n' # ```lang
+ r'([\s\S]+?)\s*'
+ r'\1 *(?:\n+|$)' # ```
+ )
+ hrule = re.compile(r'^ {0,3}[-*_](?: *[-*_]){2,} *(?:\n+|$)')
+ heading = re.compile(r'^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)')
+ lheading = re.compile(r'^([^\n]+)\n *(=|-)+ *(?:\n+|$)')
+ block_quote = re.compile(r'^( *>[^\n]+(\n[^\n]+)*\n*)+')
+ list_block = re.compile(
+ r'^( *)([*+-]|\d+\.) [\s\S]+?'
+ r'(?:'
+ r'\n+(?=\1?(?:[-*_] *){3,}(?:\n+|$))' # hrule
+ r'|\n+(?=%s)' # def links
+ r'|\n+(?=%s)' # def footnotes
+ r'|\n{2,}'
+ r'(?! )'
+ r'(?!\1(?:[*+-]|\d+\.) )\n*'
+ r'|'
+ r'\s*$)' % (
+ _pure_pattern(def_links),
+ _pure_pattern(def_footnotes),
+ )
+ )
+ list_item = re.compile(
+ r'^(( *)(?:[*+-]|\d+\.) [^\n]*'
+ r'(?:\n(?!\2(?:[*+-]|\d+\.) )[^\n]*)*)',
+ flags=re.M
+ )
+ list_bullet = re.compile(r'^ *(?:[*+-]|\d+\.) +')
+ paragraph = re.compile(
+ r'^((?:[^\n]+\n?(?!'
+ r'%s|%s|%s|%s|%s|%s|%s|%s|%s'
+ r'))+)\n*' % (
+ _pure_pattern(fences).replace(r'\1', r'\2'),
+ _pure_pattern(list_block).replace(r'\1', r'\3'),
+ _pure_pattern(hrule),
+ _pure_pattern(heading),
+ _pure_pattern(lheading),
+ _pure_pattern(block_quote),
+ _pure_pattern(def_links),
+ _pure_pattern(def_footnotes),
+ '<' + _block_tag,
+ )
+ )
+ block_html = re.compile(
+ r'^ *(?:%s|%s|%s) *(?:\n{2,}|\s*$)' % (
+ r'<!--[\s\S]*?-->',
+ r'<(%s)((?:%s)*?)>([\s\S]+?)<\/\1>' % (_block_tag, _valid_attr),
+ r'<%s(?:%s)*?>' % (_block_tag, _valid_attr),
+ )
+ )
+ table = re.compile(
+ r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*'
+ )
+ nptable = re.compile(
+ r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*'
+ )
+ text = re.compile(r'^[^\n]+')
+
+
+class BlockLexer(object):
+ """Block level lexer for block grammars."""
+ grammar_class = BlockGrammar
+
+ default_rules = [
+ 'newline', 'hrule', 'block_code', 'fences', 'heading',
+ 'nptable', 'lheading', 'block_quote',
+ 'list_block', 'block_html', 'def_links',
+ 'def_footnotes', 'table', 'paragraph', 'text'
+ ]
+
+ list_rules = (
+ 'newline', 'block_code', 'fences', 'lheading', 'hrule',
+ 'block_quote', 'list_block', 'block_html', 'text',
+ )
+
+ footnote_rules = (
+ 'newline', 'block_code', 'fences', 'heading',
+ 'nptable', 'lheading', 'hrule', 'block_quote',
+ 'list_block', 'block_html', 'table', 'paragraph', 'text'
+ )
+
+ def __init__(self, rules=None, **kwargs):
+ self.tokens = []
+ self.def_links = {}
+ self.def_footnotes = {}
+
+ if not rules:
+ rules = self.grammar_class()
+
+ self.rules = rules
+
+ def __call__(self, text, rules=None):
+ return self.parse(text, rules)
+
+ def parse(self, text, rules=None):
+ text = text.rstrip('\n')
+
+ if not rules:
+ rules = self.default_rules
+
+ def manipulate(text):
+ for key in rules:
+ rule = getattr(self.rules, key)
+ m = rule.match(text)
+ if not m:
+ continue
+ getattr(self, 'parse_%s' % key)(m)
+ return m
+ return False # pragma: no cover
+
+ while text:
+ m = manipulate(text)
+ if m is not False:
+ text = text[len(m.group(0)):]
+ continue
+ if text: # pragma: no cover
+ raise RuntimeError('Infinite loop at: %s' % text)
+ return self.tokens
+
+ def parse_newline(self, m):
+ length = len(m.group(0))
+ if length > 1:
+ self.tokens.append({'type': 'newline'})
+
+ def parse_block_code(self, m):
+ # clean leading whitespace
+ code = _block_code_leadning_pattern.sub('', m.group(0))
+ self.tokens.append({
+ 'type': 'code',
+ 'lang': None,
+ 'text': code,
+ })
+
+ def parse_fences(self, m):
+ self.tokens.append({
+ 'type': 'code',
+ 'lang': m.group(2),
+ 'text': m.group(3),
+ })
+
+ def parse_heading(self, m):
+ self.tokens.append({
+ 'type': 'heading',
+ 'level': len(m.group(1)),
+ 'text': m.group(2),
+ })
+
+ def parse_lheading(self, m):
+ """Parse setext heading."""
+ self.tokens.append({
+ 'type': 'heading',
+ 'level': 1 if m.group(2) == '=' else 2,
+ 'text': m.group(1),
+ })
+
+ def parse_hrule(self, m):
+ self.tokens.append({'type': 'hrule'})
+
+ def parse_list_block(self, m):
+ bull = m.group(2)
+ self.tokens.append({
+ 'type': 'list_start',
+ 'ordered': '.' in bull,
+ })
+ cap = m.group(0)
+ self._process_list_item(cap, bull)
+ self.tokens.append({'type': 'list_end'})
+
+ def _process_list_item(self, cap, bull):
+ cap = self.rules.list_item.findall(cap)
+
+ _next = False
+ length = len(cap)
+
+ for i in range(length):
+ item = cap[i][0]
+
+ # remove the bullet
+ space = len(item)
+ item = self.rules.list_bullet.sub('', item)
+
+ # outdent
+ if '\n ' in item:
+ space = space - len(item)
+ pattern = re.compile(r'^ {1,%d}' % space, flags=re.M)
+ item = pattern.sub('', item)
+
+ # determin whether item is loose or not
+ loose = _next
+ if not loose and re.search(r'\n\n(?!\s*$)', item):
+ loose = True
+
+ rest = len(item)
+ if i != length - 1 and rest:
+ _next = item[rest-1] == '\n'
+ if not loose:
+ loose = _next
+
+ if loose:
+ t = 'loose_item_start'
+ else:
+ t = 'list_item_start'
+
+ self.tokens.append({'type': t})
+ # recurse
+ self.parse(item, self.list_rules)
+ self.tokens.append({'type': 'list_item_end'})
+
+ def parse_block_quote(self, m):
+ self.tokens.append({'type': 'block_quote_start'})
+ # clean leading >
+ cap = _block_quote_leading_pattern.sub('', m.group(0))
+ self.parse(cap)
+ self.tokens.append({'type': 'block_quote_end'})
+
+ def parse_def_links(self, m):
+ key = _keyify(m.group(1))
+ self.def_links[key] = {
+ 'link': m.group(2),
+ 'title': m.group(3),
+ }
+
+ def parse_def_footnotes(self, m):
+ key = _keyify(m.group(1))
+ if key in self.def_footnotes:
+ # footnote is already defined
+ return
+
+ self.def_footnotes[key] = 0
+
+ self.tokens.append({
+ 'type': 'footnote_start',
+ 'key': key,
+ })
+
+ text = m.group(2)
+
+ if '\n' in text:
+ lines = text.split('\n')
+ whitespace = None
+ for line in lines[1:]:
+ space = len(line) - len(line.lstrip())
+ if space and (not whitespace or space < whitespace):
+ whitespace = space
+ newlines = [lines[0]]
+ for line in lines[1:]:
+ newlines.append(line[whitespace:])
+ text = '\n'.join(newlines)
+
+ self.parse(text, self.footnote_rules)
+
+ self.tokens.append({
+ 'type': 'footnote_end',
+ 'key': key,
+ })
+
+ def parse_table(self, m):
+ item = self._process_table(m)
+
+ cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3))
+ cells = cells.split('\n')
+ for i, v in enumerate(cells):
+ v = re.sub(r'^ *\| *| *\| *$', '', v)
+ cells[i] = re.split(r' *\| *', v)
+
+ item['cells'] = cells
+ self.tokens.append(item)
+
+ def parse_nptable(self, m):
+ item = self._process_table(m)
+
+ cells = re.sub(r'\n$', '', m.group(3))
+ cells = cells.split('\n')
+ for i, v in enumerate(cells):
+ cells[i] = re.split(r' *\| *', v)
+
+ item['cells'] = cells
+ self.tokens.append(item)
+
+ def _process_table(self, m):
+ header = re.sub(r'^ *| *\| *$', '', m.group(1))
+ header = re.split(r' *\| *', header)
+ align = re.sub(r' *|\| *$', '', m.group(2))
+ align = re.split(r' *\| *', align)
+
+ for i, v in enumerate(align):
+ if re.search(r'^ *-+: *$', v):
+ align[i] = 'right'
+ elif re.search(r'^ *:-+: *$', v):
+ align[i] = 'center'
+ elif re.search(r'^ *:-+ *$', v):
+ align[i] = 'left'
+ else:
+ align[i] = None
+
+ item = {
+ 'type': 'table',
+ 'header': header,
+ 'align': align,
+ }
+ return item
+
+ def parse_block_html(self, m):
+ tag = m.group(1)
+ if not tag:
+ text = m.group(0)
+ self.tokens.append({
+ 'type': 'close_html',
+ 'text': text
+ })
+ else:
+ attr = m.group(2)
+ text = m.group(3)
+ self.tokens.append({
+ 'type': 'open_html',
+ 'tag': tag,
+ 'extra': attr,
+ 'text': text
+ })
+
+ def parse_paragraph(self, m):
+ text = m.group(1).rstrip('\n')
+ self.tokens.append({'type': 'paragraph', 'text': text})
+
+ def parse_text(self, m):
+ text = m.group(0)
+ self.tokens.append({'type': 'text', 'text': text})
+
+
+class InlineGrammar(object):
+ """Grammars for inline level tokens."""
+
+ escape = re.compile(r'^\\([\\`*{}\[\]()#+\-.!_>~|])') # \* \+ \! ....
+ inline_html = re.compile(
+ r'^(?:%s|%s|%s)' % (
+ r'<!--[\s\S]*?-->',
+ r'<(\w+%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_valid_end, _valid_attr),
+ r'<\w+%s(?:%s)*?>' % (_valid_end, _valid_attr),
+ )
+ )
+ autolink = re.compile(r'^<([^ >]+(@|:)[^ >]+)>')
+ link = re.compile(
+ r'^!?\[('
+ r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
+ r')\]\('
+ r'''\s*(<)?([\s\S]*?)(?(2)>)(?:\s+['"]([\s\S]*?)['"])?\s*'''
+ r'\)'
+ )
+ reflink = re.compile(
+ r'^!?\[('
+ r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
+ r')\]\s*\[([^^\]]*)\]'
+ )
+ nolink = re.compile(r'^!?\[((?:\[[^\]]*\]|[^\[\]])*)\]')
+ url = re.compile(r'''^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])''')
+ double_emphasis = re.compile(
+ r'^_{2}([\s\S]+?)_{2}(?!_)' # __word__
+ r'|'
+ r'^\*{2}([\s\S]+?)\*{2}(?!\*)' # **word**
+ )
+ emphasis = re.compile(
+ r'^\b_((?:__|[\s\S])+?)_\b' # _word_
+ r'|'
+ r'^\*((?:\*\*|[\s\S])+?)\*(?!\*)' # *word*
+ )
+ code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)') # `code`
+ linebreak = re.compile(r'^ {2,}\n(?!\s*$)')
+ strikethrough = re.compile(r'^~~(?=\S)([\s\S]+?\S)~~') # ~~word~~
+ footnote = re.compile(r'^\[\^([^\]]+)\]')
+ text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| {2,}\n|$)')
+
+ def hard_wrap(self):
+ """Grammar for hard wrap linebreak. You don't need to add two
+ spaces at the end of a line.
+ """
+ self.linebreak = re.compile(r'^ *\n(?!\s*$)')
+ self.text = re.compile(
+ r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| *\n|$)'
+ )
+
+
+class InlineLexer(object):
+ """Inline level lexer for inline grammars."""
+ grammar_class = InlineGrammar
+
+ default_rules = [
+ 'escape', 'inline_html', 'autolink', 'url',
+ 'footnote', 'link', 'reflink', 'nolink',
+ 'double_emphasis', 'emphasis', 'code',
+ 'linebreak', 'strikethrough', 'text',
+ ]
+ inline_html_rules = [
+ 'escape', 'autolink', 'url', 'link', 'reflink',
+ 'nolink', 'double_emphasis', 'emphasis', 'code',
+ 'linebreak', 'strikethrough', 'text',
+ ]
+
+ def __init__(self, renderer, rules=None, **kwargs):
+ self.renderer = renderer
+ self.links = {}
+ self.footnotes = {}
+ self.footnote_index = 0
+
+ if not rules:
+ rules = self.grammar_class()
+
+ self.rules = rules
+
+ self._in_link = False
+ self._in_footnote = False
+
+ kwargs.update(self.renderer.options)
+ self._parse_inline_html = kwargs.get('parse_inline_html')
+
+ def __call__(self, text, rules=None):
+ return self.output(text, rules)
+
+ def setup(self, links, footnotes):
+ self.footnote_index = 0
+ self.links = links or {}
+ self.footnotes = footnotes or {}
+
+ def output(self, text, rules=None):
+ text = text.rstrip('\n')
+ if not rules:
+ rules = list(self.default_rules)
+
+ if self._in_footnote and 'footnote' in rules:
+ rules.remove('footnote')
+
+ output = self.renderer.placeholder()
+
+ def manipulate(text):
+ for key in rules:
+ pattern = getattr(self.rules, key)
+ m = pattern.match(text)
+ if not m:
+ continue
+ self.line_match = m
+ out = getattr(self, 'output_%s' % key)(m)
+ if out is not None:
+ return m, out
+ return False # pragma: no cover
+
+ self.line_started = False
+ while text:
+ ret = manipulate(text)
+ self.line_started = True
+ if ret is not False:
+ m, out = ret
+ output += out
+ text = text[len(m.group(0)):]
+ continue
+ if text: # pragma: no cover
+ raise RuntimeError('Infinite loop at: %s' % text)
+
+ return output
+
+ def output_escape(self, m):
+ return m.group(1)
+
+ def output_autolink(self, m):
+ link = m.group(1)
+ if m.group(2) == '@':
+ is_email = True
+ else:
+ is_email = False
+ return self.renderer.autolink(link, is_email)
+
+ def output_url(self, m):
+ link = m.group(1)
+ if self._in_link:
+ return self.renderer.text(link)
+ return self.renderer.autolink(link, False)
+
+ def output_inline_html(self, m):
+ tag = m.group(1)
+ if self._parse_inline_html and tag in _inline_tags:
+ text = m.group(3)
+ if tag == 'a':
+ self._in_link = True
+ text = self.output(text, rules=self.inline_html_rules)
+ self._in_link = False
+ else:
+ text = self.output(text, rules=self.inline_html_rules)
+ extra = m.group(2) or ''
+ html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
+ else:
+ html = m.group(0)
+ return self.renderer.inline_html(html)
+
+ def output_footnote(self, m):
+ key = _keyify(m.group(1))
+ if key not in self.footnotes:
+ return None
+ if self.footnotes[key]:
+ return None
+ self.footnote_index += 1
+ self.footnotes[key] = self.footnote_index
+ return self.renderer.footnote_ref(key, self.footnote_index)
+
+ def output_link(self, m):
+ return self._process_link(m, m.group(3), m.group(4))
+
+ def output_reflink(self, m):
+ key = _keyify(m.group(2) or m.group(1))
+ if key not in self.links:
+ return None
+ ret = self.links[key]
+ return self._process_link(m, ret['link'], ret['title'])
+
+ def output_nolink(self, m):
+ key = _keyify(m.group(1))
+ if key not in self.links:
+ return None
+ ret = self.links[key]
+ return self._process_link(m, ret['link'], ret['title'])
+
+ def _process_link(self, m, link, title=None):
+ line = m.group(0)
+ text = m.group(1)
+ if line[0] == '!':
+ return self.renderer.image(link, title, text)
+
+ self._in_link = True
+ text = self.output(text)
+ self._in_link = False
+ return self.renderer.link(link, title, text)
+
+ def output_double_emphasis(self, m):
+ text = m.group(2) or m.group(1)
+ text = self.output(text)
+ return self.renderer.double_emphasis(text)
+
+ def output_emphasis(self, m):
+ text = m.group(2) or m.group(1)
+ text = self.output(text)
+ return self.renderer.emphasis(text)
+
+ def output_code(self, m):
+ text = m.group(2)
+ return self.renderer.codespan(text)
+
+ def output_linebreak(self, m):
+ return self.renderer.linebreak()
+
+ def output_strikethrough(self, m):
+ text = self.output(m.group(1))
+ return self.renderer.strikethrough(text)
+
+ def output_text(self, m):
+ text = m.group(0)
+ return self.renderer.text(text)
+
+
+class Renderer(object):
+ """The default HTML renderer for rendering Markdown.
+ """
+
+ def __init__(self, **kwargs):
+ self.options = kwargs
+
+ def placeholder(self):
+ """Returns the default, empty output value for the renderer.
+
+ All renderer methods use the '+=' operator to append to this value.
+ Default is a string so rendering HTML can build up a result string with
+ the rendered Markdown.
+
+ Can be overridden by Renderer subclasses to be types like an empty
+ list, allowing the renderer to create a tree-like structure to
+ represent the document (which can then be reprocessed later into a
+ separate format like docx or pdf).
+ """
+ return ''
+
+ def block_code(self, code, lang=None):
+ """Rendering block level code. ``pre > code``.
+
+ :param code: text content of the code block.
+ :param lang: language of the given code.
+ """
+ code = code.rstrip('\n')
+ if not lang:
+ code = escape(code, smart_amp=False)
+ return '<pre><code>%s\n</code></pre>\n' % code
+ code = escape(code, quote=True, smart_amp=False)
+ #return '<pre><code class="lang-%s">%s\n</code></pre>\n' % (lang, code)
+ return '<pre><code class="%s">%s\n</code></pre>\n' % (lang, code)
+
+
+ def block_quote(self, text):
+ """Rendering <blockquote> with the given text.
+
+ :param text: text content of the blockquote.
+ """
+ return '<blockquote>%s\n</blockquote>\n' % text.rstrip('\n')
+
+ def block_html(self, html):
+ """Rendering block level pure html content.
+
+ :param html: text content of the html snippet.
+ """
+ if self.options.get('skip_style') and \
+ html.lower().startswith('<style'):
+ return ''
+ if self.options.get('escape'):
+ return escape(html)
+ return html
+
+ def header(self, text, level, raw=None):
+ """Rendering header/heading tags like ``<h1>`` ``<h2>``.
+
+ :param text: rendered text content for the header.
+ :param level: a number for the header level, for example: 1.
+ :param raw: raw text content of the header.
+ """
+ return '<h%d>%s</h%d>\n' % (level, text, level)
+
+ def hrule(self):
+ """Rendering method for ``<hr>`` tag."""
+ if self.options.get('use_xhtml'):
+ return '<hr />\n'
+ return '<hr>\n'
+
+ def list(self, body, ordered=True):
+ """Rendering list tags like ``<ul>`` and ``<ol>``.
+
+ :param body: body contents of the list.
+ :param ordered: whether this list is ordered or not.
+ """
+ tag = 'ul'
+ if ordered:
+ tag = 'ol'
+ return '<%s>\n%s</%s>\n' % (tag, body, tag)
+
+ def list_item(self, text):
+ """Rendering list item snippet. Like ``<li>``."""
+ return '<li>%s</li>\n' % text
+
+ def paragraph(self, text):
+ """Rendering paragraph tags. Like ``<p>``."""
+ return '<p>%s</p>\n' % text.strip(' ')
+
+ def table(self, header, body):
+ """Rendering table element. Wrap header and body in it.
+
+ :param header: header part of the table.
+ :param body: body part of the table.
+ """
+ return (
+ '<table>\n<thead>%s</thead>\n'
+ '<tbody>\n%s</tbody>\n</table>\n'
+ ) % (header, body)
+
+ def table_row(self, content):
+ """Rendering a table row. Like ``<tr>``.
+
+ :param content: content of current table row.
+ """
+ return '<tr>\n%s</tr>\n' % content
+
+ def table_cell(self, content, **flags):
+ """Rendering a table cell. Like ``<th>`` ``<td>``.
+
+ :param content: content of current table cell.
+ :param header: whether this is header or not.
+ :param align: align of current table cell.
+ """
+ if flags['header']:
+ tag = 'th'
+ else:
+ tag = 'td'
+ align = flags['align']
+ if not align:
+ return '<%s>%s</%s>\n' % (tag, content, tag)
+ return '<%s style="text-align:%s">%s</%s>\n' % (
+ tag, align, content, tag
+ )
+
+ def double_emphasis(self, text):
+ """Rendering **strong** text.
+
+ :param text: text content for emphasis.
+ """
+ return '<strong>%s</strong>' % text
+
+ def emphasis(self, text):
+ """Rendering *emphasis* text.
+
+ :param text: text content for emphasis.
+ """
+ return '<em>%s</em>' % text
+
+ def codespan(self, text):
+ """Rendering inline `code` text.
+
+ :param text: text content for inline code.
+ """
+ text = escape(text.rstrip(), smart_amp=False)
+ return '<code>%s</code>' % text
+
+ def linebreak(self):
+ """Rendering line break like ``<br>``."""
+ if self.options.get('use_xhtml'):
+ return '<br />\n'
+ return '<br>\n'
+
+ def strikethrough(self, text):
+ """Rendering ~~strikethrough~~ text.
+
+ :param text: text content for strikethrough.
+ """
+ return '<del>%s</del>' % text
+
+ def text(self, text):
+ """Rendering unformatted text.
+
+ :param text: text content.
+ """
+ return escape(text)
+
+ def autolink(self, link, is_email=False):
+ """Rendering a given link or email address.
+
+ :param link: link content or email address.
+ :param is_email: whether this is an email or not.
+ """
+ text = link = escape(link)
+ if is_email:
+ link = 'mailto:%s' % link
+ return '<a href="%s">%s</a>' % (link, text)
+
+ def link(self, link, title, text):
+ """Rendering a given link with content and title.
+
+ :param link: href link for ``<a>`` tag.
+ :param title: title content for `title` attribute.
+ :param text: text content for description.
+ """
+ if link.startswith('javascript:'):
+ link = ''
+ if not title:
+ return '<a href="%s">%s</a>' % (link, text)
+ title = escape(title, quote=True)
+ return '<a href="%s" title="%s">%s</a>' % (link, title, text)
+
+ def image(self, src, title, text):
+ """Rendering a image with title and text.
+
+ :param src: source link of the image.
+ :param title: title text of the image.
+ :param text: alt text of the image.
+ """
+ if src.startswith('javascript:'):
+ src = ''
+ text = escape(text, quote=True)
+ if title:
+ title = escape(title, quote=True)
+ html = '<img src="%s" alt="%s" title="%s"' % (src, text, title)
+ else:
+ html = '<img src="%s" alt="%s"' % (src, text)
+ if self.options.get('use_xhtml'):
+ return '%s />' % html
+ return '%s>' % html
+
+ def inline_html(self, html):
+ """Rendering span level pure html content.
+
+ :param html: text content of the html snippet.
+ """
+ if self.options.get('escape'):
+ return escape(html)
+ return html
+
+ def newline(self):
+ """Rendering newline element."""
+ return ''
+
+ def footnote_ref(self, key, index):
+ """Rendering the ref anchor of a footnote.
+
+ :param key: identity key for the footnote.
+ :param index: the index count of current footnote.
+ """
+ html = (
+ '<sup class="footnote-ref" id="fnref-%s">'
+ '<a href="#fn-%s" rel="footnote">%d</a></sup>'
+ ) % (escape(key), escape(key), index)
+ return html
+
+ def footnote_item(self, key, text):
+ """Rendering a footnote item.
+
+ :param key: identity key for the footnote.
+ :param text: text content of the footnote.
+ """
+ back = (
+ '<a href="#fnref-%s" rev="footnote">&#8617;</a>'
+ ) % escape(key)
+ text = text.rstrip()
+ if text.endswith('</p>'):
+ text = re.sub(r'<\/p>$', r'%s</p>' % back, text)
+ else:
+ text = '%s<p>%s</p>' % (text, back)
+ html = '<li id="fn-%s">%s</li>\n' % (escape(key), text)
+ return html
+
+ def footnotes(self, text):
+ """Wrapper for all footnotes.
+
+ :param text: contents of all footnotes.
+ """
+ html = '<div class="footnotes">\n%s<ol>%s</ol>\n</div>\n'
+ return html % (self.hrule(), text)
+
+
+class Markdown(object):
+ """The Markdown parser.
+
+ :param renderer: An instance of ``Renderer``.
+ :param inline: An inline lexer class or instance.
+ :param block: A block lexer class or instance.
+ """
+ def __init__(self, renderer=None, inline=None, block=None, **kwargs):
+ if not renderer:
+ renderer = Renderer(**kwargs)
+
+ self.renderer = renderer
+
+ if inline and inspect.isclass(inline):
+ inline = inline(renderer, **kwargs)
+ if block and inspect.isclass(block):
+ block = block(**kwargs)
+
+ if inline:
+ self.inline = inline
+ else:
+ rules = InlineGrammar()
+ if kwargs.get('hard_wrap'):
+ rules.hard_wrap()
+ self.inline = InlineLexer(renderer, rules=rules)
+
+ self.block = block or BlockLexer(BlockGrammar())
+ self.options = kwargs
+ self.footnotes = []
+ self.tokens = []
+
+ # detect if it should parse text in block html
+ self._parse_block_html = kwargs.get('parse_block_html')
+
+ def __call__(self, text):
+ return self.parse(text)
+
+ def render(self, text):
+ """Render the Markdown text.
+
+ :param text: markdown formatted text content.
+ """
+ return self.parse(text)
+
+ def parse(self, text):
+ out = self.output(preprocessing(text))
+
+ keys = self.block.def_footnotes
+
+ # reset block
+ self.block.def_links = {}
+ self.block.def_footnotes = {}
+
+ # reset inline
+ self.inline.links = {}
+ self.inline.footnotes = {}
+
+ if not self.footnotes:
+ return out
+
+ footnotes = filter(lambda o: keys.get(o['key']), self.footnotes)
+ self.footnotes = sorted(
+ footnotes, key=lambda o: keys.get(o['key']), reverse=True
+ )
+
+ body = self.renderer.placeholder()
+ while self.footnotes:
+ note = self.footnotes.pop()
+ body += self.renderer.footnote_item(
+ note['key'], note['text']
+ )
+
+ out += self.renderer.footnotes(body)
+ return out
+
+ def pop(self):
+ if not self.tokens:
+ return None
+ self.token = self.tokens.pop()
+ return self.token
+
+ def peek(self):
+ if self.tokens:
+ return self.tokens[-1]
+ return None # pragma: no cover
+
+ def output(self, text, rules=None):
+ self.tokens = self.block(text, rules)
+ self.tokens.reverse()
+
+ self.inline.setup(self.block.def_links, self.block.def_footnotes)
+
+ out = self.renderer.placeholder()
+ while self.pop():
+ out += self.tok()
+ return out
+
+ def tok(self):
+ t = self.token['type']
+
+ # sepcial cases
+ if t.endswith('_start'):
+ t = t[:-6]
+
+ return getattr(self, 'output_%s' % t)()
+
+ def tok_text(self):
+ text = self.token['text']
+ while self.peek()['type'] == 'text':
+ text += '\n' + self.pop()['text']
+ return self.inline(text)
+
+ def output_newline(self):
+ return self.renderer.newline()
+
+ def output_hrule(self):
+ return self.renderer.hrule()
+
+ def output_heading(self):
+ return self.renderer.header(
+ self.inline(self.token['text']),
+ self.token['level'],
+ self.token['text'],
+ )
+
+ def output_code(self):
+ return self.renderer.block_code(
+ self.token['text'], self.token['lang']
+ )
+
+ def output_table(self):
+ aligns = self.token['align']
+ aligns_length = len(aligns)
+ cell = self.renderer.placeholder()
+
+ # header part
+ header = self.renderer.placeholder()
+ for i, value in enumerate(self.token['header']):
+ align = aligns[i] if i < aligns_length else None
+ flags = {'header': True, 'align': align}
+ cell += self.renderer.table_cell(self.inline(value), **flags)
+
+ header += self.renderer.table_row(cell)
+
+ # body part
+ body = self.renderer.placeholder()
+ for i, row in enumerate(self.token['cells']):
+ cell = self.renderer.placeholder()
+ for j, value in enumerate(row):
+ align = aligns[j] if j < aligns_length else None
+ flags = {'header': False, 'align': align}
+ cell += self.renderer.table_cell(self.inline(value), **flags)
+ body += self.renderer.table_row(cell)
+
+ return self.renderer.table(header, body)
+
+ def output_block_quote(self):
+ body = self.renderer.placeholder()
+ while self.pop()['type'] != 'block_quote_end':
+ body += self.tok()
+ return self.renderer.block_quote(body)
+
+ def output_list(self):
+ ordered = self.token['ordered']
+ body = self.renderer.placeholder()
+ while self.pop()['type'] != 'list_end':
+ body += self.tok()
+ return self.renderer.list(body, ordered)
+
+ def output_list_item(self):
+ body = self.renderer.placeholder()
+ while self.pop()['type'] != 'list_item_end':
+ if self.token['type'] == 'text':
+ body += self.tok_text()
+ else:
+ body += self.tok()
+
+ return self.renderer.list_item(body)
+
+ def output_loose_item(self):
+ body = self.renderer.placeholder()
+ while self.pop()['type'] != 'list_item_end':
+ body += self.tok()
+ return self.renderer.list_item(body)
+
+ def output_footnote(self):
+ self.inline._in_footnote = True
+ body = self.renderer.placeholder()
+ key = self.token['key']
+ while self.pop()['type'] != 'footnote_end':
+ body += self.tok()
+ self.footnotes.append({'key': key, 'text': body})
+ self.inline._in_footnote = False
+ return self.renderer.placeholder()
+
+ def output_close_html(self):
+ text = self.token['text']
+ return self.renderer.block_html(text)
+
+ def output_open_html(self):
+ text = self.token['text']
+ tag = self.token['tag']
+ if self._parse_block_html and tag not in _pre_tags:
+ text = self.inline(text, rules=self.inline.inline_html_rules)
+ extra = self.token.get('extra') or ''
+ html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
+ return self.renderer.block_html(html)
+
+ def output_paragraph(self):
+ return self.renderer.paragraph(self.inline(self.token['text']))
+
+ def output_text(self):
+ return self.renderer.paragraph(self.tok_text())
+
+
+def markdown(text, escape=True, **kwargs):
+ """Render markdown formatted text to html.
+
+ :param text: markdown formatted text content.
+ :param escape: if set to False, all html tags will not be escaped.
+ :param use_xhtml: output with xhtml tags.
+ :param hard_wrap: if set to True, it will has GFM line breaks feature.
+ :param parse_block_html: parse text only in block level html.
+ :param parse_inline_html: parse text only in inline level html.
+ """
+ return Markdown(escape=escape, **kwargs)(text)