summaryrefslogtreecommitdiff
path: root/src/mistune/renderers
diff options
context:
space:
mode:
Diffstat (limited to 'src/mistune/renderers')
-rw-r--r--src/mistune/renderers/__init__.py0
-rw-r--r--src/mistune/renderers/_list.py60
-rw-r--r--src/mistune/renderers/html.py151
-rw-r--r--src/mistune/renderers/markdown.py146
-rw-r--r--src/mistune/renderers/rst.py147
5 files changed, 504 insertions, 0 deletions
diff --git a/src/mistune/renderers/__init__.py b/src/mistune/renderers/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/mistune/renderers/__init__.py
diff --git a/src/mistune/renderers/_list.py b/src/mistune/renderers/_list.py
new file mode 100644
index 0000000..0a18639
--- /dev/null
+++ b/src/mistune/renderers/_list.py
@@ -0,0 +1,60 @@
+from ..util import strip_end
+
+
+def render_list(renderer, token, state) -> str:
+ attrs = token['attrs']
+ if attrs['ordered']:
+ children = _render_ordered_list(renderer, token, state)
+ else:
+ children = _render_unordered_list(renderer, token, state)
+
+ text = ''.join(children)
+ parent = token.get('parent')
+ if parent:
+ if parent['tight']:
+ return text
+ return text + '\n'
+ return strip_end(text) + '\n'
+
+
+def _render_list_item(renderer, parent, item, state):
+ leading = parent['leading']
+ text = ''
+ for tok in item['children']:
+ if tok['type'] == 'list':
+ tok['parent'] = parent
+ elif tok['type'] == 'blank_line':
+ continue
+ text += renderer.render_token(tok, state)
+
+ lines = text.splitlines()
+ text = lines[0] + '\n'
+ prefix = ' ' * len(leading)
+ for line in lines[1:]:
+ if line:
+ text += prefix + line + '\n'
+ else:
+ text += '\n'
+ return leading + text
+
+
+def _render_ordered_list(renderer, token, state):
+ attrs = token['attrs']
+ start = attrs.get('start', 1)
+ for item in token['children']:
+ leading = str(start) + token['bullet'] + ' '
+ parent = {
+ 'leading': leading,
+ 'tight': token['tight'],
+ }
+ yield _render_list_item(renderer, parent, item, state)
+ start += 1
+
+
+def _render_unordered_list(renderer, token, state):
+ parent = {
+ 'leading': token['bullet'] + ' ',
+ 'tight': token['tight'],
+ }
+ for item in token['children']:
+ yield _render_list_item(renderer, parent, item, state)
diff --git a/src/mistune/renderers/html.py b/src/mistune/renderers/html.py
new file mode 100644
index 0000000..c458a4a
--- /dev/null
+++ b/src/mistune/renderers/html.py
@@ -0,0 +1,151 @@
+from ..core import BaseRenderer
+from ..util import escape as escape_text, striptags, safe_entity
+
+
+class HTMLRenderer(BaseRenderer):
+ """A renderer for converting Markdown to HTML."""
+ NAME = 'html'
+ HARMFUL_PROTOCOLS = (
+ 'javascript:',
+ 'vbscript:',
+ 'file:',
+ 'data:',
+ )
+ GOOD_DATA_PROTOCOLS = (
+ 'data:image/gif;',
+ 'data:image/png;',
+ 'data:image/jpeg;',
+ 'data:image/webp;',
+ )
+
+ def __init__(self, escape=True, allow_harmful_protocols=None):
+ super(HTMLRenderer, self).__init__()
+ self._allow_harmful_protocols = allow_harmful_protocols
+ self._escape = escape
+
+ def render_token(self, token, state):
+ # backward compitable with v2
+ func = self._get_method(token['type'])
+ attrs = token.get('attrs')
+
+ if 'raw' in token:
+ text = token['raw']
+ elif 'children' in token:
+ text = self.render_tokens(token['children'], state)
+ else:
+ if attrs:
+ return func(**attrs)
+ else:
+ return func()
+ if attrs:
+ return func(text, **attrs)
+ else:
+ return func(text)
+
+ def safe_url(self, url: str) -> str:
+ """Ensure the given URL is safe. This method is used for rendering
+ links, images, and etc.
+ """
+ if self._allow_harmful_protocols is True:
+ return url
+
+ _url = url.lower()
+ if self._allow_harmful_protocols and \
+ _url.startswith(tuple(self._allow_harmful_protocols)):
+ return url
+
+ if _url.startswith(self.HARMFUL_PROTOCOLS) and \
+ not _url.startswith(self.GOOD_DATA_PROTOCOLS):
+ return '#harmful-link'
+ return url
+
+ def text(self, text: str) -> str:
+ if self._escape:
+ return escape_text(text)
+ return safe_entity(text)
+
+ def emphasis(self, text: str) -> str:
+ return '<em>' + text + '</em>'
+
+ def strong(self, text: str) -> str:
+ return '<strong>' + text + '</strong>'
+
+ def link(self, text: str, url: str, title=None) -> str:
+ s = '<a href="' + self.safe_url(url) + '"'
+ if title:
+ s += ' title="' + safe_entity(title) + '"'
+ return s + '>' + text + '</a>'
+
+ def image(self, text: str, url: str, title=None) -> str:
+ src = self.safe_url(url)
+ alt = escape_text(striptags(text))
+ s = '<img src="' + src + '" alt="' + alt + '"'
+ if title:
+ s += ' title="' + safe_entity(title) + '"'
+ return s + ' />'
+
+ def codespan(self, text: str) -> str:
+ return '<code>' + text + '</code>'
+
+ def linebreak(self) -> str:
+ return '<br />\n'
+
+ def softbreak(self) -> str:
+ return '\n'
+
+ def inline_html(self, html: str) -> str:
+ if self._escape:
+ return escape_text(html)
+ return html
+
+ def paragraph(self, text: str) -> str:
+ return '<p>' + text + '</p>\n'
+
+ def heading(self, text: str, level: int, **attrs) -> str:
+ tag = 'h' + str(level)
+ html = '<' + tag
+ _id = attrs.get('id')
+ if _id:
+ html += ' id="' + _id + '"'
+ return html + '>' + text + '</' + tag + '>\n'
+
+ def blank_line(self) -> str:
+ return ''
+
+ def thematic_break(self) -> str:
+ return '<hr />\n'
+
+ def block_text(self, text: str) -> str:
+ return text
+
+ def block_code(self, code: str, info=None) -> str:
+ html = '<pre><code'
+ if info is not None:
+ info = safe_entity(info.strip())
+ if info:
+ lang = info.split(None, 1)[0]
+ html += ' class="language-' + lang + '"'
+ return html + '>' + escape_text(code) + '</code></pre>\n'
+
+ def block_quote(self, text: str) -> str:
+ return '<blockquote>\n' + text + '</blockquote>\n'
+
+ def block_html(self, html: str) -> str:
+ if self._escape:
+ return '<p>' + escape_text(html) + '</p>\n'
+ return html + '\n'
+
+ def block_error(self, text: str) -> str:
+ return '<div class="error"><pre>' + text + '</pre></div>\n'
+
+ def list(self, text: str, ordered: bool, **attrs) -> str:
+ if ordered:
+ html = '<ol'
+ start = attrs.get('start')
+ if start is not None:
+ html += ' start="' + str(start) + '"'
+ return html + '>\n' + text + '</ol>\n'
+ return '<ul>\n' + text + '</ul>\n'
+
+ def list_item(self, text: str) -> str:
+ return '<li>' + text + '</li>\n'
diff --git a/src/mistune/renderers/markdown.py b/src/mistune/renderers/markdown.py
new file mode 100644
index 0000000..78334bc
--- /dev/null
+++ b/src/mistune/renderers/markdown.py
@@ -0,0 +1,146 @@
+import re
+from typing import Dict, Any
+from textwrap import indent
+from ._list import render_list
+from ..core import BaseRenderer, BlockState
+from ..util import strip_end
+
+fenced_re = re.compile(r'^(?:`|~)+', re.M)
+
+
+class MarkdownRenderer(BaseRenderer):
+ """A renderer to re-format Markdown text."""
+ NAME = 'markdown'
+
+ def __call__(self, tokens, state: BlockState):
+ out = self.render_tokens(tokens, state)
+ # special handle for line breaks
+ out += '\n\n'.join(self.render_referrences(state)) + '\n'
+ return strip_end(out)
+
+ def render_referrences(self, state: BlockState):
+ ref_links = state.env['ref_links']
+ for key in ref_links:
+ attrs = ref_links[key]
+ text = '[' + attrs['label'] + ']: ' + attrs['url']
+ title = attrs.get('title')
+ if title:
+ text += ' "' + title + '"'
+ yield text
+
+ def render_children(self, token, state: BlockState):
+ children = token['children']
+ return self.render_tokens(children, state)
+
+ def text(self, token: Dict[str, Any], state: BlockState) -> str:
+ return token['raw']
+
+ def emphasis(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '*' + self.render_children(token, state) + '*'
+
+ def strong(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '**' + self.render_children(token, state) + '**'
+
+ def link(self, token: Dict[str, Any], state: BlockState) -> str:
+ label = token.get('label')
+ text = self.render_children(token, state)
+ out = '[' + text + ']'
+ if label:
+ return out + '[' + label + ']'
+
+ attrs = token['attrs']
+ url = attrs['url']
+ title = attrs.get('title')
+ if text == url and not title:
+ return '<' + text + '>'
+ elif 'mailto:' + text == url and not title:
+ return '<' + text + '>'
+
+ out += '('
+ if '(' in url or ')' in url:
+ out += '<' + url + '>'
+ else:
+ out += url
+ if title:
+ out += ' "' + title + '"'
+ return out + ')'
+
+ def image(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '!' + self.link(token, state)
+
+ def codespan(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '`' + token['raw'] + '`'
+
+ def linebreak(self, token: Dict[str, Any], state: BlockState) -> str:
+ return ' \n'
+
+ def softbreak(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '\n'
+
+ def blank_line(self, token: Dict[str, Any], state: BlockState) -> str:
+ return ''
+
+ def inline_html(self, token: Dict[str, Any], state: BlockState) -> str:
+ return token['raw']
+
+ def paragraph(self, token: Dict[str, Any], state: BlockState) -> str:
+ text = self.render_children(token, state)
+ return text + '\n\n'
+
+ def heading(self, token: Dict[str, Any], state: BlockState) -> str:
+ level = token['attrs']['level']
+ marker = '#' * level
+ text = self.render_children(token, state)
+ return marker + ' ' + text + '\n\n'
+
+ def thematic_break(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '***\n\n'
+
+ def block_text(self, token: Dict[str, Any], state: BlockState) -> str:
+ return self.render_children(token, state) + '\n'
+
+ def block_code(self, token: Dict[str, Any], state: BlockState) -> str:
+ attrs = token.get('attrs', {})
+ info = attrs.get('info', '')
+ code = token['raw']
+ if code and code[-1] != '\n':
+ code += '\n'
+
+ marker = token.get('marker')
+ if not marker:
+ marker = _get_fenced_marker(code)
+ return marker + info + '\n' + code + marker + '\n\n'
+
+ def block_quote(self, token: Dict[str, Any], state: BlockState) -> str:
+ text = indent(self.render_children(token, state), '> ')
+ return text + '\n\n'
+
+ def block_html(self, token: Dict[str, Any], state: BlockState) -> str:
+ return token['raw'] + '\n\n'
+
+ def block_error(self, token: Dict[str, Any], state: BlockState) -> str:
+ return ''
+
+ def list(self, token: Dict[str, Any], state: BlockState) -> str:
+ return render_list(self, token, state)
+
+
+def _get_fenced_marker(code):
+ found = fenced_re.findall(code)
+ if not found:
+ return '```'
+
+ ticks = [] # `
+ waves = [] # ~
+ for s in found:
+ if s[0] == '`':
+ ticks.append(len(s))
+ else:
+ waves.append(len(s))
+
+ if not ticks:
+ return '```'
+
+ if not waves:
+ return '~~~'
+ return '`' * (max(ticks) + 1)
diff --git a/src/mistune/renderers/rst.py b/src/mistune/renderers/rst.py
new file mode 100644
index 0000000..fa12c21
--- /dev/null
+++ b/src/mistune/renderers/rst.py
@@ -0,0 +1,147 @@
+from typing import Dict, Any
+from textwrap import indent
+from ._list import render_list
+from ..core import BaseRenderer, BlockState
+from ..util import strip_end
+
+
+class RSTRenderer(BaseRenderer):
+ """A renderer for converting Markdown to ReST."""
+ NAME = 'rst'
+
+ #: marker symbols for heading
+ HEADING_MARKERS = {
+ 1: '=',
+ 2: '-',
+ 3: '~',
+ 4: '^',
+ 5: '"',
+ 6: "'",
+ }
+ INLINE_IMAGE_PREFIX = 'img-'
+
+ def iter_tokens(self, tokens, state):
+ prev = None
+ for tok in tokens:
+ # ignore blank line
+ if tok['type'] == 'blank_line':
+ continue
+ tok['prev'] = prev
+ prev = tok
+ yield self.render_token(tok, state)
+
+ def __call__(self, tokens, state: BlockState):
+ state.env['inline_images'] = []
+ out = self.render_tokens(tokens, state)
+ # special handle for line breaks
+ out += '\n\n'.join(self.render_referrences(state)) + '\n'
+ return strip_end(out)
+
+ def render_referrences(self, state: BlockState):
+ images = state.env['inline_images']
+ for index, token in enumerate(images):
+ attrs = token['attrs']
+ alt = self.render_children(token, state)
+ ident = self.INLINE_IMAGE_PREFIX + str(index)
+ yield '.. |' + ident + '| image:: ' + attrs['url'] + '\n :alt: ' + alt
+
+ def render_children(self, token, state: BlockState):
+ children = token['children']
+ return self.render_tokens(children, state)
+
+ def text(self, token: Dict[str, Any], state: BlockState) -> str:
+ text = token['raw']
+ return text.replace('|', r'\|')
+
+ def emphasis(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '*' + self.render_children(token, state) + '*'
+
+ def strong(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '**' + self.render_children(token, state) + '**'
+
+ def link(self, token: Dict[str, Any], state: BlockState) -> str:
+ attrs = token['attrs']
+ text = self.render_children(token, state)
+ return '`' + text + ' <' + attrs['url'] + '>`__'
+
+ def image(self, token: Dict[str, Any], state: BlockState) -> str:
+ refs: list = state.env['inline_images']
+ index = len(refs)
+ refs.append(token)
+ return '|' + self.INLINE_IMAGE_PREFIX + str(index) + '|'
+
+ def codespan(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '``' + token['raw'] + '``'
+
+ def linebreak(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '<linebreak>'
+
+ def softbreak(self, token: Dict[str, Any], state: BlockState) -> str:
+ return ' '
+
+ def inline_html(self, token: Dict[str, Any], state: BlockState) -> str:
+ # rst does not support inline html
+ return ''
+
+ def paragraph(self, token: Dict[str, Any], state: BlockState) -> str:
+ children = token['children']
+ if len(children) == 1 and children[0]['type'] == 'image':
+ image = children[0]
+ attrs = image['attrs']
+ title = attrs.get('title')
+ alt = self.render_children(image, state)
+ text = '.. figure:: ' + attrs['url']
+ if title:
+ text += '\n :alt: ' + title
+ text += '\n\n' + indent(alt, ' ')
+ else:
+ text = self.render_tokens(children, state)
+ lines = text.split('<linebreak>')
+ if len(lines) > 1:
+ text = '\n'.join('| ' + line for line in lines)
+ return text + '\n\n'
+
+ def heading(self, token: Dict[str, Any], state: BlockState) -> str:
+ attrs = token['attrs']
+ text = self.render_children(token, state)
+ marker = self.HEADING_MARKERS[attrs['level']]
+ return text + '\n' + marker * len(text) + '\n\n'
+
+ def thematic_break(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '--------------\n\n'
+
+ def block_text(self, token: Dict[str, Any], state: BlockState) -> str:
+ return self.render_children(token, state) + '\n'
+
+ def block_code(self, token: Dict[str, Any], state: BlockState) -> str:
+ attrs = token.get('attrs', {})
+ info = attrs.get('info')
+ code = indent(token['raw'], ' ')
+ if info:
+ lang = info.split()[0]
+ return '.. code:: ' + lang + '\n\n' + code + '\n'
+ else:
+ return '::\n\n' + code + '\n\n'
+
+ def block_quote(self, token: Dict[str, Any], state: BlockState) -> str:
+ text = indent(self.render_children(token, state), ' ')
+ prev = token['prev']
+ ignore_blocks = (
+ 'paragraph',
+ 'thematic_break',
+ 'linebreak',
+ 'heading',
+ )
+ if prev and prev['type'] not in ignore_blocks:
+ text = '..\n\n' + text
+ return text
+
+ def block_html(self, token: Dict[str, Any], state: BlockState) -> str:
+ raw = token['raw']
+ return '.. raw:: html\n\n' + indent(raw, ' ') + '\n\n'
+
+ def block_error(self, token: Dict[str, Any], state: BlockState) -> str:
+ return ''
+
+ def list(self, token: Dict[str, Any], state: BlockState) -> str:
+ return render_list(self, token, state)