diff options
Diffstat (limited to 'src/mistune/renderers')
-rw-r--r-- | src/mistune/renderers/__init__.py | 0 | ||||
-rw-r--r-- | src/mistune/renderers/_list.py | 60 | ||||
-rw-r--r-- | src/mistune/renderers/html.py | 151 | ||||
-rw-r--r-- | src/mistune/renderers/markdown.py | 146 | ||||
-rw-r--r-- | src/mistune/renderers/rst.py | 147 |
5 files changed, 504 insertions, 0 deletions
diff --git a/src/mistune/renderers/__init__.py b/src/mistune/renderers/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/mistune/renderers/__init__.py diff --git a/src/mistune/renderers/_list.py b/src/mistune/renderers/_list.py new file mode 100644 index 0000000..0a18639 --- /dev/null +++ b/src/mistune/renderers/_list.py @@ -0,0 +1,60 @@ +from ..util import strip_end + + +def render_list(renderer, token, state) -> str: + attrs = token['attrs'] + if attrs['ordered']: + children = _render_ordered_list(renderer, token, state) + else: + children = _render_unordered_list(renderer, token, state) + + text = ''.join(children) + parent = token.get('parent') + if parent: + if parent['tight']: + return text + return text + '\n' + return strip_end(text) + '\n' + + +def _render_list_item(renderer, parent, item, state): + leading = parent['leading'] + text = '' + for tok in item['children']: + if tok['type'] == 'list': + tok['parent'] = parent + elif tok['type'] == 'blank_line': + continue + text += renderer.render_token(tok, state) + + lines = text.splitlines() + text = lines[0] + '\n' + prefix = ' ' * len(leading) + for line in lines[1:]: + if line: + text += prefix + line + '\n' + else: + text += '\n' + return leading + text + + +def _render_ordered_list(renderer, token, state): + attrs = token['attrs'] + start = attrs.get('start', 1) + for item in token['children']: + leading = str(start) + token['bullet'] + ' ' + parent = { + 'leading': leading, + 'tight': token['tight'], + } + yield _render_list_item(renderer, parent, item, state) + start += 1 + + +def _render_unordered_list(renderer, token, state): + parent = { + 'leading': token['bullet'] + ' ', + 'tight': token['tight'], + } + for item in token['children']: + yield _render_list_item(renderer, parent, item, state) diff --git a/src/mistune/renderers/html.py b/src/mistune/renderers/html.py new file mode 100644 index 0000000..c458a4a --- /dev/null +++ b/src/mistune/renderers/html.py @@ -0,0 +1,151 @@ +from ..core import BaseRenderer +from ..util import escape as escape_text, striptags, safe_entity + + +class HTMLRenderer(BaseRenderer): + """A renderer for converting Markdown to HTML.""" + NAME = 'html' + HARMFUL_PROTOCOLS = ( + 'javascript:', + 'vbscript:', + 'file:', + 'data:', + ) + GOOD_DATA_PROTOCOLS = ( + 'data:image/gif;', + 'data:image/png;', + 'data:image/jpeg;', + 'data:image/webp;', + ) + + def __init__(self, escape=True, allow_harmful_protocols=None): + super(HTMLRenderer, self).__init__() + self._allow_harmful_protocols = allow_harmful_protocols + self._escape = escape + + def render_token(self, token, state): + # backward compitable with v2 + func = self._get_method(token['type']) + attrs = token.get('attrs') + + if 'raw' in token: + text = token['raw'] + elif 'children' in token: + text = self.render_tokens(token['children'], state) + else: + if attrs: + return func(**attrs) + else: + return func() + if attrs: + return func(text, **attrs) + else: + return func(text) + + def safe_url(self, url: str) -> str: + """Ensure the given URL is safe. This method is used for rendering + links, images, and etc. + """ + if self._allow_harmful_protocols is True: + return url + + _url = url.lower() + if self._allow_harmful_protocols and \ + _url.startswith(tuple(self._allow_harmful_protocols)): + return url + + if _url.startswith(self.HARMFUL_PROTOCOLS) and \ + not _url.startswith(self.GOOD_DATA_PROTOCOLS): + return '#harmful-link' + return url + + def text(self, text: str) -> str: + if self._escape: + return escape_text(text) + return safe_entity(text) + + def emphasis(self, text: str) -> str: + return '<em>' + text + '</em>' + + def strong(self, text: str) -> str: + return '<strong>' + text + '</strong>' + + def link(self, text: str, url: str, title=None) -> str: + s = '<a href="' + self.safe_url(url) + '"' + if title: + s += ' title="' + safe_entity(title) + '"' + return s + '>' + text + '</a>' + + def image(self, text: str, url: str, title=None) -> str: + src = self.safe_url(url) + alt = escape_text(striptags(text)) + s = '<img src="' + src + '" alt="' + alt + '"' + if title: + s += ' title="' + safe_entity(title) + '"' + return s + ' />' + + def codespan(self, text: str) -> str: + return '<code>' + text + '</code>' + + def linebreak(self) -> str: + return '<br />\n' + + def softbreak(self) -> str: + return '\n' + + def inline_html(self, html: str) -> str: + if self._escape: + return escape_text(html) + return html + + def paragraph(self, text: str) -> str: + return '<p>' + text + '</p>\n' + + def heading(self, text: str, level: int, **attrs) -> str: + tag = 'h' + str(level) + html = '<' + tag + _id = attrs.get('id') + if _id: + html += ' id="' + _id + '"' + return html + '>' + text + '</' + tag + '>\n' + + def blank_line(self) -> str: + return '' + + def thematic_break(self) -> str: + return '<hr />\n' + + def block_text(self, text: str) -> str: + return text + + def block_code(self, code: str, info=None) -> str: + html = '<pre><code' + if info is not None: + info = safe_entity(info.strip()) + if info: + lang = info.split(None, 1)[0] + html += ' class="language-' + lang + '"' + return html + '>' + escape_text(code) + '</code></pre>\n' + + def block_quote(self, text: str) -> str: + return '<blockquote>\n' + text + '</blockquote>\n' + + def block_html(self, html: str) -> str: + if self._escape: + return '<p>' + escape_text(html) + '</p>\n' + return html + '\n' + + def block_error(self, text: str) -> str: + return '<div class="error"><pre>' + text + '</pre></div>\n' + + def list(self, text: str, ordered: bool, **attrs) -> str: + if ordered: + html = '<ol' + start = attrs.get('start') + if start is not None: + html += ' start="' + str(start) + '"' + return html + '>\n' + text + '</ol>\n' + return '<ul>\n' + text + '</ul>\n' + + def list_item(self, text: str) -> str: + return '<li>' + text + '</li>\n' diff --git a/src/mistune/renderers/markdown.py b/src/mistune/renderers/markdown.py new file mode 100644 index 0000000..78334bc --- /dev/null +++ b/src/mistune/renderers/markdown.py @@ -0,0 +1,146 @@ +import re +from typing import Dict, Any +from textwrap import indent +from ._list import render_list +from ..core import BaseRenderer, BlockState +from ..util import strip_end + +fenced_re = re.compile(r'^(?:`|~)+', re.M) + + +class MarkdownRenderer(BaseRenderer): + """A renderer to re-format Markdown text.""" + NAME = 'markdown' + + def __call__(self, tokens, state: BlockState): + out = self.render_tokens(tokens, state) + # special handle for line breaks + out += '\n\n'.join(self.render_referrences(state)) + '\n' + return strip_end(out) + + def render_referrences(self, state: BlockState): + ref_links = state.env['ref_links'] + for key in ref_links: + attrs = ref_links[key] + text = '[' + attrs['label'] + ']: ' + attrs['url'] + title = attrs.get('title') + if title: + text += ' "' + title + '"' + yield text + + def render_children(self, token, state: BlockState): + children = token['children'] + return self.render_tokens(children, state) + + def text(self, token: Dict[str, Any], state: BlockState) -> str: + return token['raw'] + + def emphasis(self, token: Dict[str, Any], state: BlockState) -> str: + return '*' + self.render_children(token, state) + '*' + + def strong(self, token: Dict[str, Any], state: BlockState) -> str: + return '**' + self.render_children(token, state) + '**' + + def link(self, token: Dict[str, Any], state: BlockState) -> str: + label = token.get('label') + text = self.render_children(token, state) + out = '[' + text + ']' + if label: + return out + '[' + label + ']' + + attrs = token['attrs'] + url = attrs['url'] + title = attrs.get('title') + if text == url and not title: + return '<' + text + '>' + elif 'mailto:' + text == url and not title: + return '<' + text + '>' + + out += '(' + if '(' in url or ')' in url: + out += '<' + url + '>' + else: + out += url + if title: + out += ' "' + title + '"' + return out + ')' + + def image(self, token: Dict[str, Any], state: BlockState) -> str: + return '!' + self.link(token, state) + + def codespan(self, token: Dict[str, Any], state: BlockState) -> str: + return '`' + token['raw'] + '`' + + def linebreak(self, token: Dict[str, Any], state: BlockState) -> str: + return ' \n' + + def softbreak(self, token: Dict[str, Any], state: BlockState) -> str: + return '\n' + + def blank_line(self, token: Dict[str, Any], state: BlockState) -> str: + return '' + + def inline_html(self, token: Dict[str, Any], state: BlockState) -> str: + return token['raw'] + + def paragraph(self, token: Dict[str, Any], state: BlockState) -> str: + text = self.render_children(token, state) + return text + '\n\n' + + def heading(self, token: Dict[str, Any], state: BlockState) -> str: + level = token['attrs']['level'] + marker = '#' * level + text = self.render_children(token, state) + return marker + ' ' + text + '\n\n' + + def thematic_break(self, token: Dict[str, Any], state: BlockState) -> str: + return '***\n\n' + + def block_text(self, token: Dict[str, Any], state: BlockState) -> str: + return self.render_children(token, state) + '\n' + + def block_code(self, token: Dict[str, Any], state: BlockState) -> str: + attrs = token.get('attrs', {}) + info = attrs.get('info', '') + code = token['raw'] + if code and code[-1] != '\n': + code += '\n' + + marker = token.get('marker') + if not marker: + marker = _get_fenced_marker(code) + return marker + info + '\n' + code + marker + '\n\n' + + def block_quote(self, token: Dict[str, Any], state: BlockState) -> str: + text = indent(self.render_children(token, state), '> ') + return text + '\n\n' + + def block_html(self, token: Dict[str, Any], state: BlockState) -> str: + return token['raw'] + '\n\n' + + def block_error(self, token: Dict[str, Any], state: BlockState) -> str: + return '' + + def list(self, token: Dict[str, Any], state: BlockState) -> str: + return render_list(self, token, state) + + +def _get_fenced_marker(code): + found = fenced_re.findall(code) + if not found: + return '```' + + ticks = [] # ` + waves = [] # ~ + for s in found: + if s[0] == '`': + ticks.append(len(s)) + else: + waves.append(len(s)) + + if not ticks: + return '```' + + if not waves: + return '~~~' + return '`' * (max(ticks) + 1) diff --git a/src/mistune/renderers/rst.py b/src/mistune/renderers/rst.py new file mode 100644 index 0000000..fa12c21 --- /dev/null +++ b/src/mistune/renderers/rst.py @@ -0,0 +1,147 @@ +from typing import Dict, Any +from textwrap import indent +from ._list import render_list +from ..core import BaseRenderer, BlockState +from ..util import strip_end + + +class RSTRenderer(BaseRenderer): + """A renderer for converting Markdown to ReST.""" + NAME = 'rst' + + #: marker symbols for heading + HEADING_MARKERS = { + 1: '=', + 2: '-', + 3: '~', + 4: '^', + 5: '"', + 6: "'", + } + INLINE_IMAGE_PREFIX = 'img-' + + def iter_tokens(self, tokens, state): + prev = None + for tok in tokens: + # ignore blank line + if tok['type'] == 'blank_line': + continue + tok['prev'] = prev + prev = tok + yield self.render_token(tok, state) + + def __call__(self, tokens, state: BlockState): + state.env['inline_images'] = [] + out = self.render_tokens(tokens, state) + # special handle for line breaks + out += '\n\n'.join(self.render_referrences(state)) + '\n' + return strip_end(out) + + def render_referrences(self, state: BlockState): + images = state.env['inline_images'] + for index, token in enumerate(images): + attrs = token['attrs'] + alt = self.render_children(token, state) + ident = self.INLINE_IMAGE_PREFIX + str(index) + yield '.. |' + ident + '| image:: ' + attrs['url'] + '\n :alt: ' + alt + + def render_children(self, token, state: BlockState): + children = token['children'] + return self.render_tokens(children, state) + + def text(self, token: Dict[str, Any], state: BlockState) -> str: + text = token['raw'] + return text.replace('|', r'\|') + + def emphasis(self, token: Dict[str, Any], state: BlockState) -> str: + return '*' + self.render_children(token, state) + '*' + + def strong(self, token: Dict[str, Any], state: BlockState) -> str: + return '**' + self.render_children(token, state) + '**' + + def link(self, token: Dict[str, Any], state: BlockState) -> str: + attrs = token['attrs'] + text = self.render_children(token, state) + return '`' + text + ' <' + attrs['url'] + '>`__' + + def image(self, token: Dict[str, Any], state: BlockState) -> str: + refs: list = state.env['inline_images'] + index = len(refs) + refs.append(token) + return '|' + self.INLINE_IMAGE_PREFIX + str(index) + '|' + + def codespan(self, token: Dict[str, Any], state: BlockState) -> str: + return '``' + token['raw'] + '``' + + def linebreak(self, token: Dict[str, Any], state: BlockState) -> str: + return '<linebreak>' + + def softbreak(self, token: Dict[str, Any], state: BlockState) -> str: + return ' ' + + def inline_html(self, token: Dict[str, Any], state: BlockState) -> str: + # rst does not support inline html + return '' + + def paragraph(self, token: Dict[str, Any], state: BlockState) -> str: + children = token['children'] + if len(children) == 1 and children[0]['type'] == 'image': + image = children[0] + attrs = image['attrs'] + title = attrs.get('title') + alt = self.render_children(image, state) + text = '.. figure:: ' + attrs['url'] + if title: + text += '\n :alt: ' + title + text += '\n\n' + indent(alt, ' ') + else: + text = self.render_tokens(children, state) + lines = text.split('<linebreak>') + if len(lines) > 1: + text = '\n'.join('| ' + line for line in lines) + return text + '\n\n' + + def heading(self, token: Dict[str, Any], state: BlockState) -> str: + attrs = token['attrs'] + text = self.render_children(token, state) + marker = self.HEADING_MARKERS[attrs['level']] + return text + '\n' + marker * len(text) + '\n\n' + + def thematic_break(self, token: Dict[str, Any], state: BlockState) -> str: + return '--------------\n\n' + + def block_text(self, token: Dict[str, Any], state: BlockState) -> str: + return self.render_children(token, state) + '\n' + + def block_code(self, token: Dict[str, Any], state: BlockState) -> str: + attrs = token.get('attrs', {}) + info = attrs.get('info') + code = indent(token['raw'], ' ') + if info: + lang = info.split()[0] + return '.. code:: ' + lang + '\n\n' + code + '\n' + else: + return '::\n\n' + code + '\n\n' + + def block_quote(self, token: Dict[str, Any], state: BlockState) -> str: + text = indent(self.render_children(token, state), ' ') + prev = token['prev'] + ignore_blocks = ( + 'paragraph', + 'thematic_break', + 'linebreak', + 'heading', + ) + if prev and prev['type'] not in ignore_blocks: + text = '..\n\n' + text + return text + + def block_html(self, token: Dict[str, Any], state: BlockState) -> str: + raw = token['raw'] + return '.. raw:: html\n\n' + indent(raw, ' ') + '\n\n' + + def block_error(self, token: Dict[str, Any], state: BlockState) -> str: + return '' + + def list(self, token: Dict[str, Any], state: BlockState) -> str: + return render_list(self, token, state) |