diff options
Diffstat (limited to 'src/mistune.py')
-rw-r--r-- | src/mistune.py | 1143 |
1 files changed, 0 insertions, 1143 deletions
diff --git a/src/mistune.py b/src/mistune.py deleted file mode 100644 index 4c711b2..0000000 --- a/src/mistune.py +++ /dev/null @@ -1,1143 +0,0 @@ -# coding: utf-8 -""" - mistune - ~~~~~~~ - - The fastest markdown parser in pure Python with renderer feature. - - :copyright: (c) 2014 - 2015 by Hsiaoming Yang. -""" - -import re -import inspect - -__version__ = '0.7.1' -__author__ = 'Hsiaoming Yang <me@lepture.com>' -__all__ = [ - 'BlockGrammar', 'BlockLexer', - 'InlineGrammar', 'InlineLexer', - 'Renderer', 'Markdown', - 'markdown', 'escape', -] - - -_key_pattern = re.compile(r'\s+') -_escape_pattern = re.compile(r'&(?!#?\w+;)') -_newline_pattern = re.compile(r'\r\n|\r') -_block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M) -_block_code_leadning_pattern = re.compile(r'^ {4}', re.M) -_inline_tags = [ - 'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data', - 'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark', - 'ruby', 'rt', 'rp', 'bdi', 'bdo', 'span', 'br', 'wbr', 'ins', 'del', - 'img', 'font', -] -_pre_tags = ['pre', 'script', 'style'] -_valid_end = r'(?!:/|[^\w\s@]*@)\b' -_valid_attr = r'''"[^"]*"|'[^']*'|[^'">]''' -_block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end) - - -def _pure_pattern(regex): - pattern = regex.pattern - if pattern.startswith('^'): - pattern = pattern[1:] - return pattern - - -def _keyify(key): - return _key_pattern.sub(' ', key.lower()) - - -def escape(text, quote=False, smart_amp=True): - """Replace special characters "&", "<" and ">" to HTML-safe sequences. - - The original cgi.escape will always escape "&", but you can control - this one for a smart escape amp. - - :param quote: if set to True, " and ' will be escaped. - :param smart_amp: if set to False, & will always be escaped. - """ - if smart_amp: - text = _escape_pattern.sub('&', text) - else: - text = text.replace('&', '&') - text = text.replace('<', '<') - text = text.replace('>', '>') - if quote: - text = text.replace('"', '"') - text = text.replace("'", ''') - return text - - -def preprocessing(text, tab=4): - text = _newline_pattern.sub('\n', text) - text = text.replace('\t', ' ' * tab) - text = text.replace('\u00a0', ' ') - text = text.replace('\u2424', '\n') - pattern = re.compile(r'^ +$', re.M) - return pattern.sub('', text) - - -class BlockGrammar(object): - """Grammars for block level tokens.""" - - def_links = re.compile( - r'^ *\[([^^\]]+)\]: *' # [key]: - r'<?([^\s>]+)>?' # <link> or link - r'(?: +["(]([^\n]+)[")])? *(?:\n+|$)' - ) - def_footnotes = re.compile( - r'^\[\^([^\]]+)\]: *(' - r'[^\n]*(?:\n+|$)' # [^key]: - r'(?: {1,}[^\n]*(?:\n+|$))*' - r')' - ) - - newline = re.compile(r'^\n+') - block_code = re.compile(r'^( {4}[^\n]+\n*)+') - fences = re.compile( - r'^ *(`{3,}|~{3,}) *(\S+)? *\n' # ```lang - r'([\s\S]+?)\s*' - r'\1 *(?:\n+|$)' # ``` - ) - hrule = re.compile(r'^ {0,3}[-*_](?: *[-*_]){2,} *(?:\n+|$)') - heading = re.compile(r'^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)') - lheading = re.compile(r'^([^\n]+)\n *(=|-)+ *(?:\n+|$)') - block_quote = re.compile(r'^( *>[^\n]+(\n[^\n]+)*\n*)+') - list_block = re.compile( - r'^( *)([*+-]|\d+\.) [\s\S]+?' - r'(?:' - r'\n+(?=\1?(?:[-*_] *){3,}(?:\n+|$))' # hrule - r'|\n+(?=%s)' # def links - r'|\n+(?=%s)' # def footnotes - r'|\n{2,}' - r'(?! )' - r'(?!\1(?:[*+-]|\d+\.) )\n*' - r'|' - r'\s*$)' % ( - _pure_pattern(def_links), - _pure_pattern(def_footnotes), - ) - ) - list_item = re.compile( - r'^(( *)(?:[*+-]|\d+\.) [^\n]*' - r'(?:\n(?!\2(?:[*+-]|\d+\.) )[^\n]*)*)', - flags=re.M - ) - list_bullet = re.compile(r'^ *(?:[*+-]|\d+\.) +') - paragraph = re.compile( - r'^((?:[^\n]+\n?(?!' - r'%s|%s|%s|%s|%s|%s|%s|%s|%s' - r'))+)\n*' % ( - _pure_pattern(fences).replace(r'\1', r'\2'), - _pure_pattern(list_block).replace(r'\1', r'\3'), - _pure_pattern(hrule), - _pure_pattern(heading), - _pure_pattern(lheading), - _pure_pattern(block_quote), - _pure_pattern(def_links), - _pure_pattern(def_footnotes), - '<' + _block_tag, - ) - ) - block_html = re.compile( - r'^ *(?:%s|%s|%s) *(?:\n{2,}|\s*$)' % ( - r'<!--[\s\S]*?-->', - r'<(%s)((?:%s)*?)>([\s\S]+?)<\/\1>' % (_block_tag, _valid_attr), - r'<%s(?:%s)*?>' % (_block_tag, _valid_attr), - ) - ) - table = re.compile( - r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*' - ) - nptable = re.compile( - r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*' - ) - text = re.compile(r'^[^\n]+') - - -class BlockLexer(object): - """Block level lexer for block grammars.""" - grammar_class = BlockGrammar - - default_rules = [ - 'newline', 'hrule', 'block_code', 'fences', 'heading', - 'nptable', 'lheading', 'block_quote', - 'list_block', 'block_html', 'def_links', - 'def_footnotes', 'table', 'paragraph', 'text' - ] - - list_rules = ( - 'newline', 'block_code', 'fences', 'lheading', 'hrule', - 'block_quote', 'list_block', 'block_html', 'text', - ) - - footnote_rules = ( - 'newline', 'block_code', 'fences', 'heading', - 'nptable', 'lheading', 'hrule', 'block_quote', - 'list_block', 'block_html', 'table', 'paragraph', 'text' - ) - - def __init__(self, rules=None, **kwargs): - self.tokens = [] - self.def_links = {} - self.def_footnotes = {} - - if not rules: - rules = self.grammar_class() - - self.rules = rules - - def __call__(self, text, rules=None): - return self.parse(text, rules) - - def parse(self, text, rules=None): - text = text.rstrip('\n') - - if not rules: - rules = self.default_rules - - def manipulate(text): - for key in rules: - rule = getattr(self.rules, key) - m = rule.match(text) - if not m: - continue - getattr(self, 'parse_%s' % key)(m) - return m - return False # pragma: no cover - - while text: - m = manipulate(text) - if m is not False: - text = text[len(m.group(0)):] - continue - if text: # pragma: no cover - raise RuntimeError('Infinite loop at: %s' % text) - return self.tokens - - def parse_newline(self, m): - length = len(m.group(0)) - if length > 1: - self.tokens.append({'type': 'newline'}) - - def parse_block_code(self, m): - # clean leading whitespace - code = _block_code_leadning_pattern.sub('', m.group(0)) - self.tokens.append({ - 'type': 'code', - 'lang': None, - 'text': code, - }) - - def parse_fences(self, m): - self.tokens.append({ - 'type': 'code', - 'lang': m.group(2), - 'text': m.group(3), - }) - - def parse_heading(self, m): - self.tokens.append({ - 'type': 'heading', - 'level': len(m.group(1)), - 'text': m.group(2), - }) - - def parse_lheading(self, m): - """Parse setext heading.""" - self.tokens.append({ - 'type': 'heading', - 'level': 1 if m.group(2) == '=' else 2, - 'text': m.group(1), - }) - - def parse_hrule(self, m): - self.tokens.append({'type': 'hrule'}) - - def parse_list_block(self, m): - bull = m.group(2) - self.tokens.append({ - 'type': 'list_start', - 'ordered': '.' in bull, - }) - cap = m.group(0) - self._process_list_item(cap, bull) - self.tokens.append({'type': 'list_end'}) - - def _process_list_item(self, cap, bull): - cap = self.rules.list_item.findall(cap) - - _next = False - length = len(cap) - - for i in range(length): - item = cap[i][0] - - # remove the bullet - space = len(item) - item = self.rules.list_bullet.sub('', item) - - # outdent - if '\n ' in item: - space = space - len(item) - pattern = re.compile(r'^ {1,%d}' % space, flags=re.M) - item = pattern.sub('', item) - - # determin whether item is loose or not - loose = _next - if not loose and re.search(r'\n\n(?!\s*$)', item): - loose = True - - rest = len(item) - if i != length - 1 and rest: - _next = item[rest-1] == '\n' - if not loose: - loose = _next - - if loose: - t = 'loose_item_start' - else: - t = 'list_item_start' - - self.tokens.append({'type': t}) - # recurse - self.parse(item, self.list_rules) - self.tokens.append({'type': 'list_item_end'}) - - def parse_block_quote(self, m): - self.tokens.append({'type': 'block_quote_start'}) - # clean leading > - cap = _block_quote_leading_pattern.sub('', m.group(0)) - self.parse(cap) - self.tokens.append({'type': 'block_quote_end'}) - - def parse_def_links(self, m): - key = _keyify(m.group(1)) - self.def_links[key] = { - 'link': m.group(2), - 'title': m.group(3), - } - - def parse_def_footnotes(self, m): - key = _keyify(m.group(1)) - if key in self.def_footnotes: - # footnote is already defined - return - - self.def_footnotes[key] = 0 - - self.tokens.append({ - 'type': 'footnote_start', - 'key': key, - }) - - text = m.group(2) - - if '\n' in text: - lines = text.split('\n') - whitespace = None - for line in lines[1:]: - space = len(line) - len(line.lstrip()) - if space and (not whitespace or space < whitespace): - whitespace = space - newlines = [lines[0]] - for line in lines[1:]: - newlines.append(line[whitespace:]) - text = '\n'.join(newlines) - - self.parse(text, self.footnote_rules) - - self.tokens.append({ - 'type': 'footnote_end', - 'key': key, - }) - - def parse_table(self, m): - item = self._process_table(m) - - cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3)) - cells = cells.split('\n') - for i, v in enumerate(cells): - v = re.sub(r'^ *\| *| *\| *$', '', v) - cells[i] = re.split(r' *\| *', v) - - item['cells'] = cells - self.tokens.append(item) - - def parse_nptable(self, m): - item = self._process_table(m) - - cells = re.sub(r'\n$', '', m.group(3)) - cells = cells.split('\n') - for i, v in enumerate(cells): - cells[i] = re.split(r' *\| *', v) - - item['cells'] = cells - self.tokens.append(item) - - def _process_table(self, m): - header = re.sub(r'^ *| *\| *$', '', m.group(1)) - header = re.split(r' *\| *', header) - align = re.sub(r' *|\| *$', '', m.group(2)) - align = re.split(r' *\| *', align) - - for i, v in enumerate(align): - if re.search(r'^ *-+: *$', v): - align[i] = 'right' - elif re.search(r'^ *:-+: *$', v): - align[i] = 'center' - elif re.search(r'^ *:-+ *$', v): - align[i] = 'left' - else: - align[i] = None - - item = { - 'type': 'table', - 'header': header, - 'align': align, - } - return item - - def parse_block_html(self, m): - tag = m.group(1) - if not tag: - text = m.group(0) - self.tokens.append({ - 'type': 'close_html', - 'text': text - }) - else: - attr = m.group(2) - text = m.group(3) - self.tokens.append({ - 'type': 'open_html', - 'tag': tag, - 'extra': attr, - 'text': text - }) - - def parse_paragraph(self, m): - text = m.group(1).rstrip('\n') - self.tokens.append({'type': 'paragraph', 'text': text}) - - def parse_text(self, m): - text = m.group(0) - self.tokens.append({'type': 'text', 'text': text}) - - -class InlineGrammar(object): - """Grammars for inline level tokens.""" - - escape = re.compile(r'^\\([\\`*{}\[\]()#+\-.!_>~|])') # \* \+ \! .... - inline_html = re.compile( - r'^(?:%s|%s|%s)' % ( - r'<!--[\s\S]*?-->', - r'<(\w+%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_valid_end, _valid_attr), - r'<\w+%s(?:%s)*?>' % (_valid_end, _valid_attr), - ) - ) - autolink = re.compile(r'^<([^ >]+(@|:)[^ >]+)>') - link = re.compile( - r'^!?\[(' - r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*' - r')\]\(' - r'''\s*(<)?([\s\S]*?)(?(2)>)(?:\s+['"]([\s\S]*?)['"])?\s*''' - r'\)' - ) - reflink = re.compile( - r'^!?\[(' - r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*' - r')\]\s*\[([^^\]]*)\]' - ) - nolink = re.compile(r'^!?\[((?:\[[^\]]*\]|[^\[\]])*)\]') - url = re.compile(r'''^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])''') - double_emphasis = re.compile( - r'^_{2}([\s\S]+?)_{2}(?!_)' # __word__ - r'|' - r'^\*{2}([\s\S]+?)\*{2}(?!\*)' # **word** - ) - emphasis = re.compile( - r'^\b_((?:__|[\s\S])+?)_\b' # _word_ - r'|' - r'^\*((?:\*\*|[\s\S])+?)\*(?!\*)' # *word* - ) - code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)') # `code` - linebreak = re.compile(r'^ {2,}\n(?!\s*$)') - strikethrough = re.compile(r'^~~(?=\S)([\s\S]+?\S)~~') # ~~word~~ - footnote = re.compile(r'^\[\^([^\]]+)\]') - text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| {2,}\n|$)') - - def hard_wrap(self): - """Grammar for hard wrap linebreak. You don't need to add two - spaces at the end of a line. - """ - self.linebreak = re.compile(r'^ *\n(?!\s*$)') - self.text = re.compile( - r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| *\n|$)' - ) - - -class InlineLexer(object): - """Inline level lexer for inline grammars.""" - grammar_class = InlineGrammar - - default_rules = [ - 'escape', 'inline_html', 'autolink', 'url', - 'footnote', 'link', 'reflink', 'nolink', - 'double_emphasis', 'emphasis', 'code', - 'linebreak', 'strikethrough', 'text', - ] - inline_html_rules = [ - 'escape', 'autolink', 'url', 'link', 'reflink', - 'nolink', 'double_emphasis', 'emphasis', 'code', - 'linebreak', 'strikethrough', 'text', - ] - - def __init__(self, renderer, rules=None, **kwargs): - self.renderer = renderer - self.links = {} - self.footnotes = {} - self.footnote_index = 0 - - if not rules: - rules = self.grammar_class() - - self.rules = rules - - self._in_link = False - self._in_footnote = False - - kwargs.update(self.renderer.options) - self._parse_inline_html = kwargs.get('parse_inline_html') - - def __call__(self, text, rules=None): - return self.output(text, rules) - - def setup(self, links, footnotes): - self.footnote_index = 0 - self.links = links or {} - self.footnotes = footnotes or {} - - def output(self, text, rules=None): - text = text.rstrip('\n') - if not rules: - rules = list(self.default_rules) - - if self._in_footnote and 'footnote' in rules: - rules.remove('footnote') - - output = self.renderer.placeholder() - - def manipulate(text): - for key in rules: - pattern = getattr(self.rules, key) - m = pattern.match(text) - if not m: - continue - self.line_match = m - out = getattr(self, 'output_%s' % key)(m) - if out is not None: - return m, out - return False # pragma: no cover - - self.line_started = False - while text: - ret = manipulate(text) - self.line_started = True - if ret is not False: - m, out = ret - output += out - text = text[len(m.group(0)):] - continue - if text: # pragma: no cover - raise RuntimeError('Infinite loop at: %s' % text) - - return output - - def output_escape(self, m): - return m.group(1) - - def output_autolink(self, m): - link = m.group(1) - if m.group(2) == '@': - is_email = True - else: - is_email = False - return self.renderer.autolink(link, is_email) - - def output_url(self, m): - link = m.group(1) - if self._in_link: - return self.renderer.text(link) - return self.renderer.autolink(link, False) - - def output_inline_html(self, m): - tag = m.group(1) - if self._parse_inline_html and tag in _inline_tags: - text = m.group(3) - if tag == 'a': - self._in_link = True - text = self.output(text, rules=self.inline_html_rules) - self._in_link = False - else: - text = self.output(text, rules=self.inline_html_rules) - extra = m.group(2) or '' - html = '<%s%s>%s</%s>' % (tag, extra, text, tag) - else: - html = m.group(0) - return self.renderer.inline_html(html) - - def output_footnote(self, m): - key = _keyify(m.group(1)) - if key not in self.footnotes: - return None - if self.footnotes[key]: - return None - self.footnote_index += 1 - self.footnotes[key] = self.footnote_index - return self.renderer.footnote_ref(key, self.footnote_index) - - def output_link(self, m): - return self._process_link(m, m.group(3), m.group(4)) - - def output_reflink(self, m): - key = _keyify(m.group(2) or m.group(1)) - if key not in self.links: - return None - ret = self.links[key] - return self._process_link(m, ret['link'], ret['title']) - - def output_nolink(self, m): - key = _keyify(m.group(1)) - if key not in self.links: - return None - ret = self.links[key] - return self._process_link(m, ret['link'], ret['title']) - - def _process_link(self, m, link, title=None): - line = m.group(0) - text = m.group(1) - if line[0] == '!': - return self.renderer.image(link, title, text) - - self._in_link = True - text = self.output(text) - self._in_link = False - return self.renderer.link(link, title, text) - - def output_double_emphasis(self, m): - text = m.group(2) or m.group(1) - text = self.output(text) - return self.renderer.double_emphasis(text) - - def output_emphasis(self, m): - text = m.group(2) or m.group(1) - text = self.output(text) - return self.renderer.emphasis(text) - - def output_code(self, m): - text = m.group(2) - return self.renderer.codespan(text) - - def output_linebreak(self, m): - return self.renderer.linebreak() - - def output_strikethrough(self, m): - text = self.output(m.group(1)) - return self.renderer.strikethrough(text) - - def output_text(self, m): - text = m.group(0) - return self.renderer.text(text) - - -class Renderer(object): - """The default HTML renderer for rendering Markdown. - """ - - def __init__(self, **kwargs): - self.options = kwargs - - def placeholder(self): - """Returns the default, empty output value for the renderer. - - All renderer methods use the '+=' operator to append to this value. - Default is a string so rendering HTML can build up a result string with - the rendered Markdown. - - Can be overridden by Renderer subclasses to be types like an empty - list, allowing the renderer to create a tree-like structure to - represent the document (which can then be reprocessed later into a - separate format like docx or pdf). - """ - return '' - - def block_code(self, code, lang=None): - """Rendering block level code. ``pre > code``. - - :param code: text content of the code block. - :param lang: language of the given code. - """ - code = code.rstrip('\n') - if not lang: - code = escape(code, smart_amp=False) - return '<pre><code>%s\n</code></pre>\n' % code - code = escape(code, quote=True, smart_amp=False) - #return '<pre><code class="lang-%s">%s\n</code></pre>\n' % (lang, code) - return '<pre><code class="%s">%s\n</code></pre>\n' % (lang, code) - - - def block_quote(self, text): - """Rendering <blockquote> with the given text. - - :param text: text content of the blockquote. - """ - return '<blockquote>%s\n</blockquote>\n' % text.rstrip('\n') - - def block_html(self, html): - """Rendering block level pure html content. - - :param html: text content of the html snippet. - """ - if self.options.get('skip_style') and \ - html.lower().startswith('<style'): - return '' - if self.options.get('escape'): - return escape(html) - return html - - def header(self, text, level, raw=None): - """Rendering header/heading tags like ``<h1>`` ``<h2>``. - - :param text: rendered text content for the header. - :param level: a number for the header level, for example: 1. - :param raw: raw text content of the header. - """ - return '<h%d>%s</h%d>\n' % (level, text, level) - - def hrule(self): - """Rendering method for ``<hr>`` tag.""" - if self.options.get('use_xhtml'): - return '<hr />\n' - return '<hr>\n' - - def list(self, body, ordered=True): - """Rendering list tags like ``<ul>`` and ``<ol>``. - - :param body: body contents of the list. - :param ordered: whether this list is ordered or not. - """ - tag = 'ul' - if ordered: - tag = 'ol' - return '<%s>\n%s</%s>\n' % (tag, body, tag) - - def list_item(self, text): - """Rendering list item snippet. Like ``<li>``.""" - return '<li>%s</li>\n' % text - - def paragraph(self, text): - """Rendering paragraph tags. Like ``<p>``.""" - return '<p>%s</p>\n' % text.strip(' ') - - def table(self, header, body): - """Rendering table element. Wrap header and body in it. - - :param header: header part of the table. - :param body: body part of the table. - """ - return ( - '<table>\n<thead>%s</thead>\n' - '<tbody>\n%s</tbody>\n</table>\n' - ) % (header, body) - - def table_row(self, content): - """Rendering a table row. Like ``<tr>``. - - :param content: content of current table row. - """ - return '<tr>\n%s</tr>\n' % content - - def table_cell(self, content, **flags): - """Rendering a table cell. Like ``<th>`` ``<td>``. - - :param content: content of current table cell. - :param header: whether this is header or not. - :param align: align of current table cell. - """ - if flags['header']: - tag = 'th' - else: - tag = 'td' - align = flags['align'] - if not align: - return '<%s>%s</%s>\n' % (tag, content, tag) - return '<%s style="text-align:%s">%s</%s>\n' % ( - tag, align, content, tag - ) - - def double_emphasis(self, text): - """Rendering **strong** text. - - :param text: text content for emphasis. - """ - return '<strong>%s</strong>' % text - - def emphasis(self, text): - """Rendering *emphasis* text. - - :param text: text content for emphasis. - """ - return '<em>%s</em>' % text - - def codespan(self, text): - """Rendering inline `code` text. - - :param text: text content for inline code. - """ - text = escape(text.rstrip(), smart_amp=False) - return '<code>%s</code>' % text - - def linebreak(self): - """Rendering line break like ``<br>``.""" - if self.options.get('use_xhtml'): - return '<br />\n' - return '<br>\n' - - def strikethrough(self, text): - """Rendering ~~strikethrough~~ text. - - :param text: text content for strikethrough. - """ - return '<del>%s</del>' % text - - def text(self, text): - """Rendering unformatted text. - - :param text: text content. - """ - return escape(text) - - def autolink(self, link, is_email=False): - """Rendering a given link or email address. - - :param link: link content or email address. - :param is_email: whether this is an email or not. - """ - text = link = escape(link) - if is_email: - link = 'mailto:%s' % link - return '<a href="%s">%s</a>' % (link, text) - - def link(self, link, title, text): - """Rendering a given link with content and title. - - :param link: href link for ``<a>`` tag. - :param title: title content for `title` attribute. - :param text: text content for description. - """ - if link.startswith('javascript:'): - link = '' - if not title: - return '<a href="%s">%s</a>' % (link, text) - title = escape(title, quote=True) - return '<a href="%s" title="%s">%s</a>' % (link, title, text) - - def image(self, src, title, text): - """Rendering a image with title and text. - - :param src: source link of the image. - :param title: title text of the image. - :param text: alt text of the image. - """ - if src.startswith('javascript:'): - src = '' - text = escape(text, quote=True) - if title: - title = escape(title, quote=True) - html = '<img src="%s" alt="%s" title="%s"' % (src, text, title) - else: - html = '<img src="%s" alt="%s"' % (src, text) - if self.options.get('use_xhtml'): - return '%s />' % html - return '%s>' % html - - def inline_html(self, html): - """Rendering span level pure html content. - - :param html: text content of the html snippet. - """ - if self.options.get('escape'): - return escape(html) - return html - - def newline(self): - """Rendering newline element.""" - return '' - - def footnote_ref(self, key, index): - """Rendering the ref anchor of a footnote. - - :param key: identity key for the footnote. - :param index: the index count of current footnote. - """ - html = ( - '<sup class="footnote-ref" id="fnref-%s">' - '<a href="#fn-%s" rel="footnote">%d</a></sup>' - ) % (escape(key), escape(key), index) - return html - - def footnote_item(self, key, text): - """Rendering a footnote item. - - :param key: identity key for the footnote. - :param text: text content of the footnote. - """ - back = ( - '<a href="#fnref-%s" rev="footnote">↩</a>' - ) % escape(key) - text = text.rstrip() - if text.endswith('</p>'): - text = re.sub(r'<\/p>$', r'%s</p>' % back, text) - else: - text = '%s<p>%s</p>' % (text, back) - html = '<li id="fn-%s">%s</li>\n' % (escape(key), text) - return html - - def footnotes(self, text): - """Wrapper for all footnotes. - - :param text: contents of all footnotes. - """ - html = '<div class="footnotes">\n%s<ol>%s</ol>\n</div>\n' - return html % (self.hrule(), text) - - -class Markdown(object): - """The Markdown parser. - - :param renderer: An instance of ``Renderer``. - :param inline: An inline lexer class or instance. - :param block: A block lexer class or instance. - """ - def __init__(self, renderer=None, inline=None, block=None, **kwargs): - if not renderer: - renderer = Renderer(**kwargs) - - self.renderer = renderer - - if inline and inspect.isclass(inline): - inline = inline(renderer, **kwargs) - if block and inspect.isclass(block): - block = block(**kwargs) - - if inline: - self.inline = inline - else: - rules = InlineGrammar() - if kwargs.get('hard_wrap'): - rules.hard_wrap() - self.inline = InlineLexer(renderer, rules=rules) - - self.block = block or BlockLexer(BlockGrammar()) - self.options = kwargs - self.footnotes = [] - self.tokens = [] - - # detect if it should parse text in block html - self._parse_block_html = kwargs.get('parse_block_html') - - def __call__(self, text): - return self.parse(text) - - def render(self, text): - """Render the Markdown text. - - :param text: markdown formatted text content. - """ - return self.parse(text) - - def parse(self, text): - out = self.output(preprocessing(text)) - - keys = self.block.def_footnotes - - # reset block - self.block.def_links = {} - self.block.def_footnotes = {} - - # reset inline - self.inline.links = {} - self.inline.footnotes = {} - - if not self.footnotes: - return out - - footnotes = filter(lambda o: keys.get(o['key']), self.footnotes) - self.footnotes = sorted( - footnotes, key=lambda o: keys.get(o['key']), reverse=True - ) - - body = self.renderer.placeholder() - while self.footnotes: - note = self.footnotes.pop() - body += self.renderer.footnote_item( - note['key'], note['text'] - ) - - out += self.renderer.footnotes(body) - return out - - def pop(self): - if not self.tokens: - return None - self.token = self.tokens.pop() - return self.token - - def peek(self): - if self.tokens: - return self.tokens[-1] - return None # pragma: no cover - - def output(self, text, rules=None): - self.tokens = self.block(text, rules) - self.tokens.reverse() - - self.inline.setup(self.block.def_links, self.block.def_footnotes) - - out = self.renderer.placeholder() - while self.pop(): - out += self.tok() - return out - - def tok(self): - t = self.token['type'] - - # sepcial cases - if t.endswith('_start'): - t = t[:-6] - - return getattr(self, 'output_%s' % t)() - - def tok_text(self): - text = self.token['text'] - while self.peek()['type'] == 'text': - text += '\n' + self.pop()['text'] - return self.inline(text) - - def output_newline(self): - return self.renderer.newline() - - def output_hrule(self): - return self.renderer.hrule() - - def output_heading(self): - return self.renderer.header( - self.inline(self.token['text']), - self.token['level'], - self.token['text'], - ) - - def output_code(self): - return self.renderer.block_code( - self.token['text'], self.token['lang'] - ) - - def output_table(self): - aligns = self.token['align'] - aligns_length = len(aligns) - cell = self.renderer.placeholder() - - # header part - header = self.renderer.placeholder() - for i, value in enumerate(self.token['header']): - align = aligns[i] if i < aligns_length else None - flags = {'header': True, 'align': align} - cell += self.renderer.table_cell(self.inline(value), **flags) - - header += self.renderer.table_row(cell) - - # body part - body = self.renderer.placeholder() - for i, row in enumerate(self.token['cells']): - cell = self.renderer.placeholder() - for j, value in enumerate(row): - align = aligns[j] if j < aligns_length else None - flags = {'header': False, 'align': align} - cell += self.renderer.table_cell(self.inline(value), **flags) - body += self.renderer.table_row(cell) - - return self.renderer.table(header, body) - - def output_block_quote(self): - body = self.renderer.placeholder() - while self.pop()['type'] != 'block_quote_end': - body += self.tok() - return self.renderer.block_quote(body) - - def output_list(self): - ordered = self.token['ordered'] - body = self.renderer.placeholder() - while self.pop()['type'] != 'list_end': - body += self.tok() - return self.renderer.list(body, ordered) - - def output_list_item(self): - body = self.renderer.placeholder() - while self.pop()['type'] != 'list_item_end': - if self.token['type'] == 'text': - body += self.tok_text() - else: - body += self.tok() - - return self.renderer.list_item(body) - - def output_loose_item(self): - body = self.renderer.placeholder() - while self.pop()['type'] != 'list_item_end': - body += self.tok() - return self.renderer.list_item(body) - - def output_footnote(self): - self.inline._in_footnote = True - body = self.renderer.placeholder() - key = self.token['key'] - while self.pop()['type'] != 'footnote_end': - body += self.tok() - self.footnotes.append({'key': key, 'text': body}) - self.inline._in_footnote = False - return self.renderer.placeholder() - - def output_close_html(self): - text = self.token['text'] - return self.renderer.block_html(text) - - def output_open_html(self): - text = self.token['text'] - tag = self.token['tag'] - if self._parse_block_html and tag not in _pre_tags: - text = self.inline(text, rules=self.inline.inline_html_rules) - extra = self.token.get('extra') or '' - html = '<%s%s>%s</%s>' % (tag, extra, text, tag) - return self.renderer.block_html(html) - - def output_paragraph(self): - return self.renderer.paragraph(self.inline(self.token['text'])) - - def output_text(self): - return self.renderer.paragraph(self.tok_text()) - - -def markdown(text, escape=True, **kwargs): - """Render markdown formatted text to html. - - :param text: markdown formatted text content. - :param escape: if set to False, all html tags will not be escaped. - :param use_xhtml: output with xhtml tags. - :param hard_wrap: if set to True, it will has GFM line breaks feature. - :param parse_block_html: parse text only in block level html. - :param parse_inline_html: parse text only in inline level html. - """ - return Markdown(escape=escape, **kwargs)(text) |