summaryrefslogtreecommitdiff
path: root/src/mistune.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/mistune.py')
-rw-r--r--src/mistune.py1143
1 files changed, 0 insertions, 1143 deletions
diff --git a/src/mistune.py b/src/mistune.py
deleted file mode 100644
index 4c711b2..0000000
--- a/src/mistune.py
+++ /dev/null
@@ -1,1143 +0,0 @@
-# coding: utf-8
-"""
- mistune
- ~~~~~~~
-
- The fastest markdown parser in pure Python with renderer feature.
-
- :copyright: (c) 2014 - 2015 by Hsiaoming Yang.
-"""
-
-import re
-import inspect
-
-__version__ = '0.7.1'
-__author__ = 'Hsiaoming Yang <me@lepture.com>'
-__all__ = [
- 'BlockGrammar', 'BlockLexer',
- 'InlineGrammar', 'InlineLexer',
- 'Renderer', 'Markdown',
- 'markdown', 'escape',
-]
-
-
-_key_pattern = re.compile(r'\s+')
-_escape_pattern = re.compile(r'&(?!#?\w+;)')
-_newline_pattern = re.compile(r'\r\n|\r')
-_block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M)
-_block_code_leadning_pattern = re.compile(r'^ {4}', re.M)
-_inline_tags = [
- 'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data',
- 'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark',
- 'ruby', 'rt', 'rp', 'bdi', 'bdo', 'span', 'br', 'wbr', 'ins', 'del',
- 'img', 'font',
-]
-_pre_tags = ['pre', 'script', 'style']
-_valid_end = r'(?!:/|[^\w\s@]*@)\b'
-_valid_attr = r'''"[^"]*"|'[^']*'|[^'">]'''
-_block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end)
-
-
-def _pure_pattern(regex):
- pattern = regex.pattern
- if pattern.startswith('^'):
- pattern = pattern[1:]
- return pattern
-
-
-def _keyify(key):
- return _key_pattern.sub(' ', key.lower())
-
-
-def escape(text, quote=False, smart_amp=True):
- """Replace special characters "&", "<" and ">" to HTML-safe sequences.
-
- The original cgi.escape will always escape "&", but you can control
- this one for a smart escape amp.
-
- :param quote: if set to True, " and ' will be escaped.
- :param smart_amp: if set to False, & will always be escaped.
- """
- if smart_amp:
- text = _escape_pattern.sub('&amp;', text)
- else:
- text = text.replace('&', '&amp;')
- text = text.replace('<', '&lt;')
- text = text.replace('>', '&gt;')
- if quote:
- text = text.replace('"', '&quot;')
- text = text.replace("'", '&#39;')
- return text
-
-
-def preprocessing(text, tab=4):
- text = _newline_pattern.sub('\n', text)
- text = text.replace('\t', ' ' * tab)
- text = text.replace('\u00a0', ' ')
- text = text.replace('\u2424', '\n')
- pattern = re.compile(r'^ +$', re.M)
- return pattern.sub('', text)
-
-
-class BlockGrammar(object):
- """Grammars for block level tokens."""
-
- def_links = re.compile(
- r'^ *\[([^^\]]+)\]: *' # [key]:
- r'<?([^\s>]+)>?' # <link> or link
- r'(?: +["(]([^\n]+)[")])? *(?:\n+|$)'
- )
- def_footnotes = re.compile(
- r'^\[\^([^\]]+)\]: *('
- r'[^\n]*(?:\n+|$)' # [^key]:
- r'(?: {1,}[^\n]*(?:\n+|$))*'
- r')'
- )
-
- newline = re.compile(r'^\n+')
- block_code = re.compile(r'^( {4}[^\n]+\n*)+')
- fences = re.compile(
- r'^ *(`{3,}|~{3,}) *(\S+)? *\n' # ```lang
- r'([\s\S]+?)\s*'
- r'\1 *(?:\n+|$)' # ```
- )
- hrule = re.compile(r'^ {0,3}[-*_](?: *[-*_]){2,} *(?:\n+|$)')
- heading = re.compile(r'^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)')
- lheading = re.compile(r'^([^\n]+)\n *(=|-)+ *(?:\n+|$)')
- block_quote = re.compile(r'^( *>[^\n]+(\n[^\n]+)*\n*)+')
- list_block = re.compile(
- r'^( *)([*+-]|\d+\.) [\s\S]+?'
- r'(?:'
- r'\n+(?=\1?(?:[-*_] *){3,}(?:\n+|$))' # hrule
- r'|\n+(?=%s)' # def links
- r'|\n+(?=%s)' # def footnotes
- r'|\n{2,}'
- r'(?! )'
- r'(?!\1(?:[*+-]|\d+\.) )\n*'
- r'|'
- r'\s*$)' % (
- _pure_pattern(def_links),
- _pure_pattern(def_footnotes),
- )
- )
- list_item = re.compile(
- r'^(( *)(?:[*+-]|\d+\.) [^\n]*'
- r'(?:\n(?!\2(?:[*+-]|\d+\.) )[^\n]*)*)',
- flags=re.M
- )
- list_bullet = re.compile(r'^ *(?:[*+-]|\d+\.) +')
- paragraph = re.compile(
- r'^((?:[^\n]+\n?(?!'
- r'%s|%s|%s|%s|%s|%s|%s|%s|%s'
- r'))+)\n*' % (
- _pure_pattern(fences).replace(r'\1', r'\2'),
- _pure_pattern(list_block).replace(r'\1', r'\3'),
- _pure_pattern(hrule),
- _pure_pattern(heading),
- _pure_pattern(lheading),
- _pure_pattern(block_quote),
- _pure_pattern(def_links),
- _pure_pattern(def_footnotes),
- '<' + _block_tag,
- )
- )
- block_html = re.compile(
- r'^ *(?:%s|%s|%s) *(?:\n{2,}|\s*$)' % (
- r'<!--[\s\S]*?-->',
- r'<(%s)((?:%s)*?)>([\s\S]+?)<\/\1>' % (_block_tag, _valid_attr),
- r'<%s(?:%s)*?>' % (_block_tag, _valid_attr),
- )
- )
- table = re.compile(
- r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*'
- )
- nptable = re.compile(
- r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*'
- )
- text = re.compile(r'^[^\n]+')
-
-
-class BlockLexer(object):
- """Block level lexer for block grammars."""
- grammar_class = BlockGrammar
-
- default_rules = [
- 'newline', 'hrule', 'block_code', 'fences', 'heading',
- 'nptable', 'lheading', 'block_quote',
- 'list_block', 'block_html', 'def_links',
- 'def_footnotes', 'table', 'paragraph', 'text'
- ]
-
- list_rules = (
- 'newline', 'block_code', 'fences', 'lheading', 'hrule',
- 'block_quote', 'list_block', 'block_html', 'text',
- )
-
- footnote_rules = (
- 'newline', 'block_code', 'fences', 'heading',
- 'nptable', 'lheading', 'hrule', 'block_quote',
- 'list_block', 'block_html', 'table', 'paragraph', 'text'
- )
-
- def __init__(self, rules=None, **kwargs):
- self.tokens = []
- self.def_links = {}
- self.def_footnotes = {}
-
- if not rules:
- rules = self.grammar_class()
-
- self.rules = rules
-
- def __call__(self, text, rules=None):
- return self.parse(text, rules)
-
- def parse(self, text, rules=None):
- text = text.rstrip('\n')
-
- if not rules:
- rules = self.default_rules
-
- def manipulate(text):
- for key in rules:
- rule = getattr(self.rules, key)
- m = rule.match(text)
- if not m:
- continue
- getattr(self, 'parse_%s' % key)(m)
- return m
- return False # pragma: no cover
-
- while text:
- m = manipulate(text)
- if m is not False:
- text = text[len(m.group(0)):]
- continue
- if text: # pragma: no cover
- raise RuntimeError('Infinite loop at: %s' % text)
- return self.tokens
-
- def parse_newline(self, m):
- length = len(m.group(0))
- if length > 1:
- self.tokens.append({'type': 'newline'})
-
- def parse_block_code(self, m):
- # clean leading whitespace
- code = _block_code_leadning_pattern.sub('', m.group(0))
- self.tokens.append({
- 'type': 'code',
- 'lang': None,
- 'text': code,
- })
-
- def parse_fences(self, m):
- self.tokens.append({
- 'type': 'code',
- 'lang': m.group(2),
- 'text': m.group(3),
- })
-
- def parse_heading(self, m):
- self.tokens.append({
- 'type': 'heading',
- 'level': len(m.group(1)),
- 'text': m.group(2),
- })
-
- def parse_lheading(self, m):
- """Parse setext heading."""
- self.tokens.append({
- 'type': 'heading',
- 'level': 1 if m.group(2) == '=' else 2,
- 'text': m.group(1),
- })
-
- def parse_hrule(self, m):
- self.tokens.append({'type': 'hrule'})
-
- def parse_list_block(self, m):
- bull = m.group(2)
- self.tokens.append({
- 'type': 'list_start',
- 'ordered': '.' in bull,
- })
- cap = m.group(0)
- self._process_list_item(cap, bull)
- self.tokens.append({'type': 'list_end'})
-
- def _process_list_item(self, cap, bull):
- cap = self.rules.list_item.findall(cap)
-
- _next = False
- length = len(cap)
-
- for i in range(length):
- item = cap[i][0]
-
- # remove the bullet
- space = len(item)
- item = self.rules.list_bullet.sub('', item)
-
- # outdent
- if '\n ' in item:
- space = space - len(item)
- pattern = re.compile(r'^ {1,%d}' % space, flags=re.M)
- item = pattern.sub('', item)
-
- # determin whether item is loose or not
- loose = _next
- if not loose and re.search(r'\n\n(?!\s*$)', item):
- loose = True
-
- rest = len(item)
- if i != length - 1 and rest:
- _next = item[rest-1] == '\n'
- if not loose:
- loose = _next
-
- if loose:
- t = 'loose_item_start'
- else:
- t = 'list_item_start'
-
- self.tokens.append({'type': t})
- # recurse
- self.parse(item, self.list_rules)
- self.tokens.append({'type': 'list_item_end'})
-
- def parse_block_quote(self, m):
- self.tokens.append({'type': 'block_quote_start'})
- # clean leading >
- cap = _block_quote_leading_pattern.sub('', m.group(0))
- self.parse(cap)
- self.tokens.append({'type': 'block_quote_end'})
-
- def parse_def_links(self, m):
- key = _keyify(m.group(1))
- self.def_links[key] = {
- 'link': m.group(2),
- 'title': m.group(3),
- }
-
- def parse_def_footnotes(self, m):
- key = _keyify(m.group(1))
- if key in self.def_footnotes:
- # footnote is already defined
- return
-
- self.def_footnotes[key] = 0
-
- self.tokens.append({
- 'type': 'footnote_start',
- 'key': key,
- })
-
- text = m.group(2)
-
- if '\n' in text:
- lines = text.split('\n')
- whitespace = None
- for line in lines[1:]:
- space = len(line) - len(line.lstrip())
- if space and (not whitespace or space < whitespace):
- whitespace = space
- newlines = [lines[0]]
- for line in lines[1:]:
- newlines.append(line[whitespace:])
- text = '\n'.join(newlines)
-
- self.parse(text, self.footnote_rules)
-
- self.tokens.append({
- 'type': 'footnote_end',
- 'key': key,
- })
-
- def parse_table(self, m):
- item = self._process_table(m)
-
- cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3))
- cells = cells.split('\n')
- for i, v in enumerate(cells):
- v = re.sub(r'^ *\| *| *\| *$', '', v)
- cells[i] = re.split(r' *\| *', v)
-
- item['cells'] = cells
- self.tokens.append(item)
-
- def parse_nptable(self, m):
- item = self._process_table(m)
-
- cells = re.sub(r'\n$', '', m.group(3))
- cells = cells.split('\n')
- for i, v in enumerate(cells):
- cells[i] = re.split(r' *\| *', v)
-
- item['cells'] = cells
- self.tokens.append(item)
-
- def _process_table(self, m):
- header = re.sub(r'^ *| *\| *$', '', m.group(1))
- header = re.split(r' *\| *', header)
- align = re.sub(r' *|\| *$', '', m.group(2))
- align = re.split(r' *\| *', align)
-
- for i, v in enumerate(align):
- if re.search(r'^ *-+: *$', v):
- align[i] = 'right'
- elif re.search(r'^ *:-+: *$', v):
- align[i] = 'center'
- elif re.search(r'^ *:-+ *$', v):
- align[i] = 'left'
- else:
- align[i] = None
-
- item = {
- 'type': 'table',
- 'header': header,
- 'align': align,
- }
- return item
-
- def parse_block_html(self, m):
- tag = m.group(1)
- if not tag:
- text = m.group(0)
- self.tokens.append({
- 'type': 'close_html',
- 'text': text
- })
- else:
- attr = m.group(2)
- text = m.group(3)
- self.tokens.append({
- 'type': 'open_html',
- 'tag': tag,
- 'extra': attr,
- 'text': text
- })
-
- def parse_paragraph(self, m):
- text = m.group(1).rstrip('\n')
- self.tokens.append({'type': 'paragraph', 'text': text})
-
- def parse_text(self, m):
- text = m.group(0)
- self.tokens.append({'type': 'text', 'text': text})
-
-
-class InlineGrammar(object):
- """Grammars for inline level tokens."""
-
- escape = re.compile(r'^\\([\\`*{}\[\]()#+\-.!_>~|])') # \* \+ \! ....
- inline_html = re.compile(
- r'^(?:%s|%s|%s)' % (
- r'<!--[\s\S]*?-->',
- r'<(\w+%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_valid_end, _valid_attr),
- r'<\w+%s(?:%s)*?>' % (_valid_end, _valid_attr),
- )
- )
- autolink = re.compile(r'^<([^ >]+(@|:)[^ >]+)>')
- link = re.compile(
- r'^!?\[('
- r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
- r')\]\('
- r'''\s*(<)?([\s\S]*?)(?(2)>)(?:\s+['"]([\s\S]*?)['"])?\s*'''
- r'\)'
- )
- reflink = re.compile(
- r'^!?\[('
- r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
- r')\]\s*\[([^^\]]*)\]'
- )
- nolink = re.compile(r'^!?\[((?:\[[^\]]*\]|[^\[\]])*)\]')
- url = re.compile(r'''^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])''')
- double_emphasis = re.compile(
- r'^_{2}([\s\S]+?)_{2}(?!_)' # __word__
- r'|'
- r'^\*{2}([\s\S]+?)\*{2}(?!\*)' # **word**
- )
- emphasis = re.compile(
- r'^\b_((?:__|[\s\S])+?)_\b' # _word_
- r'|'
- r'^\*((?:\*\*|[\s\S])+?)\*(?!\*)' # *word*
- )
- code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)') # `code`
- linebreak = re.compile(r'^ {2,}\n(?!\s*$)')
- strikethrough = re.compile(r'^~~(?=\S)([\s\S]+?\S)~~') # ~~word~~
- footnote = re.compile(r'^\[\^([^\]]+)\]')
- text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| {2,}\n|$)')
-
- def hard_wrap(self):
- """Grammar for hard wrap linebreak. You don't need to add two
- spaces at the end of a line.
- """
- self.linebreak = re.compile(r'^ *\n(?!\s*$)')
- self.text = re.compile(
- r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| *\n|$)'
- )
-
-
-class InlineLexer(object):
- """Inline level lexer for inline grammars."""
- grammar_class = InlineGrammar
-
- default_rules = [
- 'escape', 'inline_html', 'autolink', 'url',
- 'footnote', 'link', 'reflink', 'nolink',
- 'double_emphasis', 'emphasis', 'code',
- 'linebreak', 'strikethrough', 'text',
- ]
- inline_html_rules = [
- 'escape', 'autolink', 'url', 'link', 'reflink',
- 'nolink', 'double_emphasis', 'emphasis', 'code',
- 'linebreak', 'strikethrough', 'text',
- ]
-
- def __init__(self, renderer, rules=None, **kwargs):
- self.renderer = renderer
- self.links = {}
- self.footnotes = {}
- self.footnote_index = 0
-
- if not rules:
- rules = self.grammar_class()
-
- self.rules = rules
-
- self._in_link = False
- self._in_footnote = False
-
- kwargs.update(self.renderer.options)
- self._parse_inline_html = kwargs.get('parse_inline_html')
-
- def __call__(self, text, rules=None):
- return self.output(text, rules)
-
- def setup(self, links, footnotes):
- self.footnote_index = 0
- self.links = links or {}
- self.footnotes = footnotes or {}
-
- def output(self, text, rules=None):
- text = text.rstrip('\n')
- if not rules:
- rules = list(self.default_rules)
-
- if self._in_footnote and 'footnote' in rules:
- rules.remove('footnote')
-
- output = self.renderer.placeholder()
-
- def manipulate(text):
- for key in rules:
- pattern = getattr(self.rules, key)
- m = pattern.match(text)
- if not m:
- continue
- self.line_match = m
- out = getattr(self, 'output_%s' % key)(m)
- if out is not None:
- return m, out
- return False # pragma: no cover
-
- self.line_started = False
- while text:
- ret = manipulate(text)
- self.line_started = True
- if ret is not False:
- m, out = ret
- output += out
- text = text[len(m.group(0)):]
- continue
- if text: # pragma: no cover
- raise RuntimeError('Infinite loop at: %s' % text)
-
- return output
-
- def output_escape(self, m):
- return m.group(1)
-
- def output_autolink(self, m):
- link = m.group(1)
- if m.group(2) == '@':
- is_email = True
- else:
- is_email = False
- return self.renderer.autolink(link, is_email)
-
- def output_url(self, m):
- link = m.group(1)
- if self._in_link:
- return self.renderer.text(link)
- return self.renderer.autolink(link, False)
-
- def output_inline_html(self, m):
- tag = m.group(1)
- if self._parse_inline_html and tag in _inline_tags:
- text = m.group(3)
- if tag == 'a':
- self._in_link = True
- text = self.output(text, rules=self.inline_html_rules)
- self._in_link = False
- else:
- text = self.output(text, rules=self.inline_html_rules)
- extra = m.group(2) or ''
- html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
- else:
- html = m.group(0)
- return self.renderer.inline_html(html)
-
- def output_footnote(self, m):
- key = _keyify(m.group(1))
- if key not in self.footnotes:
- return None
- if self.footnotes[key]:
- return None
- self.footnote_index += 1
- self.footnotes[key] = self.footnote_index
- return self.renderer.footnote_ref(key, self.footnote_index)
-
- def output_link(self, m):
- return self._process_link(m, m.group(3), m.group(4))
-
- def output_reflink(self, m):
- key = _keyify(m.group(2) or m.group(1))
- if key not in self.links:
- return None
- ret = self.links[key]
- return self._process_link(m, ret['link'], ret['title'])
-
- def output_nolink(self, m):
- key = _keyify(m.group(1))
- if key not in self.links:
- return None
- ret = self.links[key]
- return self._process_link(m, ret['link'], ret['title'])
-
- def _process_link(self, m, link, title=None):
- line = m.group(0)
- text = m.group(1)
- if line[0] == '!':
- return self.renderer.image(link, title, text)
-
- self._in_link = True
- text = self.output(text)
- self._in_link = False
- return self.renderer.link(link, title, text)
-
- def output_double_emphasis(self, m):
- text = m.group(2) or m.group(1)
- text = self.output(text)
- return self.renderer.double_emphasis(text)
-
- def output_emphasis(self, m):
- text = m.group(2) or m.group(1)
- text = self.output(text)
- return self.renderer.emphasis(text)
-
- def output_code(self, m):
- text = m.group(2)
- return self.renderer.codespan(text)
-
- def output_linebreak(self, m):
- return self.renderer.linebreak()
-
- def output_strikethrough(self, m):
- text = self.output(m.group(1))
- return self.renderer.strikethrough(text)
-
- def output_text(self, m):
- text = m.group(0)
- return self.renderer.text(text)
-
-
-class Renderer(object):
- """The default HTML renderer for rendering Markdown.
- """
-
- def __init__(self, **kwargs):
- self.options = kwargs
-
- def placeholder(self):
- """Returns the default, empty output value for the renderer.
-
- All renderer methods use the '+=' operator to append to this value.
- Default is a string so rendering HTML can build up a result string with
- the rendered Markdown.
-
- Can be overridden by Renderer subclasses to be types like an empty
- list, allowing the renderer to create a tree-like structure to
- represent the document (which can then be reprocessed later into a
- separate format like docx or pdf).
- """
- return ''
-
- def block_code(self, code, lang=None):
- """Rendering block level code. ``pre > code``.
-
- :param code: text content of the code block.
- :param lang: language of the given code.
- """
- code = code.rstrip('\n')
- if not lang:
- code = escape(code, smart_amp=False)
- return '<pre><code>%s\n</code></pre>\n' % code
- code = escape(code, quote=True, smart_amp=False)
- #return '<pre><code class="lang-%s">%s\n</code></pre>\n' % (lang, code)
- return '<pre><code class="%s">%s\n</code></pre>\n' % (lang, code)
-
-
- def block_quote(self, text):
- """Rendering <blockquote> with the given text.
-
- :param text: text content of the blockquote.
- """
- return '<blockquote>%s\n</blockquote>\n' % text.rstrip('\n')
-
- def block_html(self, html):
- """Rendering block level pure html content.
-
- :param html: text content of the html snippet.
- """
- if self.options.get('skip_style') and \
- html.lower().startswith('<style'):
- return ''
- if self.options.get('escape'):
- return escape(html)
- return html
-
- def header(self, text, level, raw=None):
- """Rendering header/heading tags like ``<h1>`` ``<h2>``.
-
- :param text: rendered text content for the header.
- :param level: a number for the header level, for example: 1.
- :param raw: raw text content of the header.
- """
- return '<h%d>%s</h%d>\n' % (level, text, level)
-
- def hrule(self):
- """Rendering method for ``<hr>`` tag."""
- if self.options.get('use_xhtml'):
- return '<hr />\n'
- return '<hr>\n'
-
- def list(self, body, ordered=True):
- """Rendering list tags like ``<ul>`` and ``<ol>``.
-
- :param body: body contents of the list.
- :param ordered: whether this list is ordered or not.
- """
- tag = 'ul'
- if ordered:
- tag = 'ol'
- return '<%s>\n%s</%s>\n' % (tag, body, tag)
-
- def list_item(self, text):
- """Rendering list item snippet. Like ``<li>``."""
- return '<li>%s</li>\n' % text
-
- def paragraph(self, text):
- """Rendering paragraph tags. Like ``<p>``."""
- return '<p>%s</p>\n' % text.strip(' ')
-
- def table(self, header, body):
- """Rendering table element. Wrap header and body in it.
-
- :param header: header part of the table.
- :param body: body part of the table.
- """
- return (
- '<table>\n<thead>%s</thead>\n'
- '<tbody>\n%s</tbody>\n</table>\n'
- ) % (header, body)
-
- def table_row(self, content):
- """Rendering a table row. Like ``<tr>``.
-
- :param content: content of current table row.
- """
- return '<tr>\n%s</tr>\n' % content
-
- def table_cell(self, content, **flags):
- """Rendering a table cell. Like ``<th>`` ``<td>``.
-
- :param content: content of current table cell.
- :param header: whether this is header or not.
- :param align: align of current table cell.
- """
- if flags['header']:
- tag = 'th'
- else:
- tag = 'td'
- align = flags['align']
- if not align:
- return '<%s>%s</%s>\n' % (tag, content, tag)
- return '<%s style="text-align:%s">%s</%s>\n' % (
- tag, align, content, tag
- )
-
- def double_emphasis(self, text):
- """Rendering **strong** text.
-
- :param text: text content for emphasis.
- """
- return '<strong>%s</strong>' % text
-
- def emphasis(self, text):
- """Rendering *emphasis* text.
-
- :param text: text content for emphasis.
- """
- return '<em>%s</em>' % text
-
- def codespan(self, text):
- """Rendering inline `code` text.
-
- :param text: text content for inline code.
- """
- text = escape(text.rstrip(), smart_amp=False)
- return '<code>%s</code>' % text
-
- def linebreak(self):
- """Rendering line break like ``<br>``."""
- if self.options.get('use_xhtml'):
- return '<br />\n'
- return '<br>\n'
-
- def strikethrough(self, text):
- """Rendering ~~strikethrough~~ text.
-
- :param text: text content for strikethrough.
- """
- return '<del>%s</del>' % text
-
- def text(self, text):
- """Rendering unformatted text.
-
- :param text: text content.
- """
- return escape(text)
-
- def autolink(self, link, is_email=False):
- """Rendering a given link or email address.
-
- :param link: link content or email address.
- :param is_email: whether this is an email or not.
- """
- text = link = escape(link)
- if is_email:
- link = 'mailto:%s' % link
- return '<a href="%s">%s</a>' % (link, text)
-
- def link(self, link, title, text):
- """Rendering a given link with content and title.
-
- :param link: href link for ``<a>`` tag.
- :param title: title content for `title` attribute.
- :param text: text content for description.
- """
- if link.startswith('javascript:'):
- link = ''
- if not title:
- return '<a href="%s">%s</a>' % (link, text)
- title = escape(title, quote=True)
- return '<a href="%s" title="%s">%s</a>' % (link, title, text)
-
- def image(self, src, title, text):
- """Rendering a image with title and text.
-
- :param src: source link of the image.
- :param title: title text of the image.
- :param text: alt text of the image.
- """
- if src.startswith('javascript:'):
- src = ''
- text = escape(text, quote=True)
- if title:
- title = escape(title, quote=True)
- html = '<img src="%s" alt="%s" title="%s"' % (src, text, title)
- else:
- html = '<img src="%s" alt="%s"' % (src, text)
- if self.options.get('use_xhtml'):
- return '%s />' % html
- return '%s>' % html
-
- def inline_html(self, html):
- """Rendering span level pure html content.
-
- :param html: text content of the html snippet.
- """
- if self.options.get('escape'):
- return escape(html)
- return html
-
- def newline(self):
- """Rendering newline element."""
- return ''
-
- def footnote_ref(self, key, index):
- """Rendering the ref anchor of a footnote.
-
- :param key: identity key for the footnote.
- :param index: the index count of current footnote.
- """
- html = (
- '<sup class="footnote-ref" id="fnref-%s">'
- '<a href="#fn-%s" rel="footnote">%d</a></sup>'
- ) % (escape(key), escape(key), index)
- return html
-
- def footnote_item(self, key, text):
- """Rendering a footnote item.
-
- :param key: identity key for the footnote.
- :param text: text content of the footnote.
- """
- back = (
- '<a href="#fnref-%s" rev="footnote">&#8617;</a>'
- ) % escape(key)
- text = text.rstrip()
- if text.endswith('</p>'):
- text = re.sub(r'<\/p>$', r'%s</p>' % back, text)
- else:
- text = '%s<p>%s</p>' % (text, back)
- html = '<li id="fn-%s">%s</li>\n' % (escape(key), text)
- return html
-
- def footnotes(self, text):
- """Wrapper for all footnotes.
-
- :param text: contents of all footnotes.
- """
- html = '<div class="footnotes">\n%s<ol>%s</ol>\n</div>\n'
- return html % (self.hrule(), text)
-
-
-class Markdown(object):
- """The Markdown parser.
-
- :param renderer: An instance of ``Renderer``.
- :param inline: An inline lexer class or instance.
- :param block: A block lexer class or instance.
- """
- def __init__(self, renderer=None, inline=None, block=None, **kwargs):
- if not renderer:
- renderer = Renderer(**kwargs)
-
- self.renderer = renderer
-
- if inline and inspect.isclass(inline):
- inline = inline(renderer, **kwargs)
- if block and inspect.isclass(block):
- block = block(**kwargs)
-
- if inline:
- self.inline = inline
- else:
- rules = InlineGrammar()
- if kwargs.get('hard_wrap'):
- rules.hard_wrap()
- self.inline = InlineLexer(renderer, rules=rules)
-
- self.block = block or BlockLexer(BlockGrammar())
- self.options = kwargs
- self.footnotes = []
- self.tokens = []
-
- # detect if it should parse text in block html
- self._parse_block_html = kwargs.get('parse_block_html')
-
- def __call__(self, text):
- return self.parse(text)
-
- def render(self, text):
- """Render the Markdown text.
-
- :param text: markdown formatted text content.
- """
- return self.parse(text)
-
- def parse(self, text):
- out = self.output(preprocessing(text))
-
- keys = self.block.def_footnotes
-
- # reset block
- self.block.def_links = {}
- self.block.def_footnotes = {}
-
- # reset inline
- self.inline.links = {}
- self.inline.footnotes = {}
-
- if not self.footnotes:
- return out
-
- footnotes = filter(lambda o: keys.get(o['key']), self.footnotes)
- self.footnotes = sorted(
- footnotes, key=lambda o: keys.get(o['key']), reverse=True
- )
-
- body = self.renderer.placeholder()
- while self.footnotes:
- note = self.footnotes.pop()
- body += self.renderer.footnote_item(
- note['key'], note['text']
- )
-
- out += self.renderer.footnotes(body)
- return out
-
- def pop(self):
- if not self.tokens:
- return None
- self.token = self.tokens.pop()
- return self.token
-
- def peek(self):
- if self.tokens:
- return self.tokens[-1]
- return None # pragma: no cover
-
- def output(self, text, rules=None):
- self.tokens = self.block(text, rules)
- self.tokens.reverse()
-
- self.inline.setup(self.block.def_links, self.block.def_footnotes)
-
- out = self.renderer.placeholder()
- while self.pop():
- out += self.tok()
- return out
-
- def tok(self):
- t = self.token['type']
-
- # sepcial cases
- if t.endswith('_start'):
- t = t[:-6]
-
- return getattr(self, 'output_%s' % t)()
-
- def tok_text(self):
- text = self.token['text']
- while self.peek()['type'] == 'text':
- text += '\n' + self.pop()['text']
- return self.inline(text)
-
- def output_newline(self):
- return self.renderer.newline()
-
- def output_hrule(self):
- return self.renderer.hrule()
-
- def output_heading(self):
- return self.renderer.header(
- self.inline(self.token['text']),
- self.token['level'],
- self.token['text'],
- )
-
- def output_code(self):
- return self.renderer.block_code(
- self.token['text'], self.token['lang']
- )
-
- def output_table(self):
- aligns = self.token['align']
- aligns_length = len(aligns)
- cell = self.renderer.placeholder()
-
- # header part
- header = self.renderer.placeholder()
- for i, value in enumerate(self.token['header']):
- align = aligns[i] if i < aligns_length else None
- flags = {'header': True, 'align': align}
- cell += self.renderer.table_cell(self.inline(value), **flags)
-
- header += self.renderer.table_row(cell)
-
- # body part
- body = self.renderer.placeholder()
- for i, row in enumerate(self.token['cells']):
- cell = self.renderer.placeholder()
- for j, value in enumerate(row):
- align = aligns[j] if j < aligns_length else None
- flags = {'header': False, 'align': align}
- cell += self.renderer.table_cell(self.inline(value), **flags)
- body += self.renderer.table_row(cell)
-
- return self.renderer.table(header, body)
-
- def output_block_quote(self):
- body = self.renderer.placeholder()
- while self.pop()['type'] != 'block_quote_end':
- body += self.tok()
- return self.renderer.block_quote(body)
-
- def output_list(self):
- ordered = self.token['ordered']
- body = self.renderer.placeholder()
- while self.pop()['type'] != 'list_end':
- body += self.tok()
- return self.renderer.list(body, ordered)
-
- def output_list_item(self):
- body = self.renderer.placeholder()
- while self.pop()['type'] != 'list_item_end':
- if self.token['type'] == 'text':
- body += self.tok_text()
- else:
- body += self.tok()
-
- return self.renderer.list_item(body)
-
- def output_loose_item(self):
- body = self.renderer.placeholder()
- while self.pop()['type'] != 'list_item_end':
- body += self.tok()
- return self.renderer.list_item(body)
-
- def output_footnote(self):
- self.inline._in_footnote = True
- body = self.renderer.placeholder()
- key = self.token['key']
- while self.pop()['type'] != 'footnote_end':
- body += self.tok()
- self.footnotes.append({'key': key, 'text': body})
- self.inline._in_footnote = False
- return self.renderer.placeholder()
-
- def output_close_html(self):
- text = self.token['text']
- return self.renderer.block_html(text)
-
- def output_open_html(self):
- text = self.token['text']
- tag = self.token['tag']
- if self._parse_block_html and tag not in _pre_tags:
- text = self.inline(text, rules=self.inline.inline_html_rules)
- extra = self.token.get('extra') or ''
- html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
- return self.renderer.block_html(html)
-
- def output_paragraph(self):
- return self.renderer.paragraph(self.inline(self.token['text']))
-
- def output_text(self):
- return self.renderer.paragraph(self.tok_text())
-
-
-def markdown(text, escape=True, **kwargs):
- """Render markdown formatted text to html.
-
- :param text: markdown formatted text content.
- :param escape: if set to False, all html tags will not be escaped.
- :param use_xhtml: output with xhtml tags.
- :param hard_wrap: if set to True, it will has GFM line breaks feature.
- :param parse_block_html: parse text only in block level html.
- :param parse_inline_html: parse text only in inline level html.
- """
- return Markdown(escape=escape, **kwargs)(text)