summaryrefslogtreecommitdiff
path: root/src/mistune
diff options
context:
space:
mode:
Diffstat (limited to 'src/mistune')
-rw-r--r--src/mistune/__init__.py75
-rw-r--r--src/mistune/__main__.py124
-rw-r--r--src/mistune/block_parser.py486
-rw-r--r--src/mistune/core.py208
-rw-r--r--src/mistune/directives/__init__.py31
-rw-r--r--src/mistune/directives/_base.py121
-rw-r--r--src/mistune/directives/_fenced.py142
-rw-r--r--src/mistune/directives/_rst.py73
-rw-r--r--src/mistune/directives/admonition.py61
-rw-r--r--src/mistune/directives/image.py152
-rw-r--r--src/mistune/directives/include.py65
-rw-r--r--src/mistune/directives/toc.py105
-rw-r--r--src/mistune/helpers.py137
-rw-r--r--src/mistune/inline_parser.py390
-rw-r--r--src/mistune/list_parser.py250
-rw-r--r--src/mistune/markdown.py104
-rw-r--r--src/mistune/plugins/__init__.py38
-rw-r--r--src/mistune/plugins/abbr.py103
-rw-r--r--src/mistune/plugins/def_list.py135
-rw-r--r--src/mistune/plugins/footnotes.py153
-rw-r--r--src/mistune/plugins/formatting.py173
-rw-r--r--src/mistune/plugins/math.py57
-rw-r--r--src/mistune/plugins/ruby.py100
-rw-r--r--src/mistune/plugins/speedup.py44
-rw-r--r--src/mistune/plugins/spoiler.py80
-rw-r--r--src/mistune/plugins/table.py179
-rw-r--r--src/mistune/plugins/task_lists.py67
-rw-r--r--src/mistune/plugins/url.py23
-rw-r--r--src/mistune/renderers/__init__.py0
-rw-r--r--src/mistune/renderers/_list.py60
-rw-r--r--src/mistune/renderers/html.py151
-rw-r--r--src/mistune/renderers/markdown.py146
-rw-r--r--src/mistune/renderers/rst.py147
-rw-r--r--src/mistune/toc.py111
-rw-r--r--src/mistune/util.py81
35 files changed, 4372 insertions, 0 deletions
diff --git a/src/mistune/__init__.py b/src/mistune/__init__.py
new file mode 100644
index 0000000..4de95c5
--- /dev/null
+++ b/src/mistune/__init__.py
@@ -0,0 +1,75 @@
+"""
+ mistune
+ ~~~~~~~
+
+ A fast yet powerful Python Markdown parser with renderers and
+ plugins, compatible with sane CommonMark rules.
+
+ Documentation: https://mistune.lepture.com/
+"""
+
+from .markdown import Markdown
+from .core import BlockState, InlineState, BaseRenderer
+from .block_parser import BlockParser
+from .inline_parser import InlineParser
+from .renderers.html import HTMLRenderer
+from .util import escape, escape_url, safe_entity, unikey
+from .plugins import import_plugin
+
+
+def create_markdown(escape=True, hard_wrap=False, renderer='html', plugins=None):
+ """Create a Markdown instance based on the given condition.
+
+ :param escape: Boolean. If using html renderer, escape html.
+ :param hard_wrap: Boolean. Break every new line into ``<br>``.
+ :param renderer: renderer instance, default is HTMLRenderer.
+ :param plugins: List of plugins.
+
+ This method is used when you want to re-use a Markdown instance::
+
+ markdown = create_markdown(
+ escape=False,
+ hard_wrap=True,
+ )
+ # re-use markdown function
+ markdown('.... your text ...')
+ """
+ if renderer == 'html':
+ renderer = HTMLRenderer(escape=escape)
+
+ inline = InlineParser(hard_wrap=hard_wrap)
+ if plugins is not None:
+ plugins = [import_plugin(n) for n in plugins]
+ return Markdown(renderer=renderer, inline=inline, plugins=plugins)
+
+
+html = create_markdown(
+ escape=False,
+ plugins=['strikethrough', 'footnotes', 'table', 'speedup']
+)
+
+
+__cached_parsers = {}
+
+
+def markdown(text, escape=True, renderer='html', plugins=None):
+ key = (escape, renderer, plugins)
+ if key in __cached_parsers:
+ return __cached_parsers[key](text)
+
+ md = create_markdown(escape=escape, renderer=renderer, plugins=plugins)
+ # improve the speed for markdown parser creation
+ __cached_parsers[key] = md
+ return md(text)
+
+
+__all__ = [
+ 'Markdown', 'HTMLRenderer',
+ 'BlockParser', 'BlockState', 'BaseRenderer',
+ 'InlineParser', 'InlineState',
+ 'escape', 'escape_url', 'safe_entity', 'unikey',
+ 'html', 'create_markdown', 'markdown',
+]
+
+__version__ = '3.0.0rc4'
+__homepage__ = 'https://mistune.lepture.com/'
diff --git a/src/mistune/__main__.py b/src/mistune/__main__.py
new file mode 100644
index 0000000..053a379
--- /dev/null
+++ b/src/mistune/__main__.py
@@ -0,0 +1,124 @@
+import sys
+import argparse
+from .renderers.rst import RSTRenderer
+from .renderers.markdown import MarkdownRenderer
+from . import (
+ create_markdown,
+ __version__ as version
+)
+
+
+def _md(args):
+ if args.plugin:
+ plugins = args.plugin
+ else:
+ # default plugins
+ plugins = ['strikethrough', 'footnotes', 'table', 'speedup']
+
+ if args.renderer == 'rst':
+ renderer = RSTRenderer()
+ elif args.renderer == 'markdown':
+ renderer = MarkdownRenderer()
+ else:
+ renderer = args.renderer
+ return create_markdown(
+ escape=args.escape,
+ hard_wrap=args.hardwrap,
+ renderer=renderer,
+ plugins=plugins,
+ )
+
+
+def _output(text, args):
+ if args.output:
+ with open(args.output, 'w') as f:
+ f.write(text)
+ else:
+ print(text)
+
+
+CMD_HELP = '''Mistune, a sane and fast python markdown parser.
+
+Here are some use cases of the command line tool:
+
+ $ python -m mistune -m "Hi **Markdown**"
+ <p>Hi <strong>Markdown</strong></p>
+
+ $ python -m mistune -f README.md
+ <p>...
+
+ $ cat README.md | python -m mistune
+ <p>...
+'''
+
+
+def cli():
+ parser = argparse.ArgumentParser(
+ prog='python -m mistune',
+ description=CMD_HELP,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ )
+ parser.add_argument(
+ '-m', '--message',
+ help='the markdown message to convert',
+ )
+ parser.add_argument(
+ '-f', '--file',
+ help='the markdown file to convert',
+ )
+ parser.add_argument(
+ '-p', '--plugin',
+ metavar='NAME',
+ action='extend',
+ nargs='+',
+ help='specifiy a plugin to use',
+ )
+ parser.add_argument(
+ '--escape',
+ action='store_true',
+ help='turn on escape option',
+ )
+ parser.add_argument(
+ '--hardwrap',
+ action='store_true',
+ help='turn on hardwrap option',
+ )
+ parser.add_argument(
+ '-o', '--output',
+ help='write the rendered result into file',
+ )
+ parser.add_argument(
+ '-r', '--renderer',
+ default='html',
+ help='specify the output renderer',
+ )
+ parser.add_argument('--version', action='version', version='mistune ' + version)
+ args = parser.parse_args()
+
+ message = args.message
+ if not message and not args.file:
+ message = read_stdin()
+
+ if message:
+ md = _md(args)
+ text = md(message)
+ _output(text, args)
+ elif args.file:
+ md = _md(args)
+ text = md.read(args.file)[0]
+ _output(text, args)
+ else:
+ print('You MUST specify a message or file')
+ return sys.exit(1)
+
+
+def read_stdin():
+ is_stdin_pipe = not sys.stdin.isatty()
+ if is_stdin_pipe:
+ return sys.stdin.read()
+ else:
+ return None
+
+
+if __name__ == '__main__':
+ cli()
diff --git a/src/mistune/block_parser.py b/src/mistune/block_parser.py
new file mode 100644
index 0000000..1ed79ec
--- /dev/null
+++ b/src/mistune/block_parser.py
@@ -0,0 +1,486 @@
+import re
+from typing import Optional, List, Tuple
+from .util import (
+ unikey,
+ escape_url,
+ expand_tab,
+ expand_leading_tab,
+)
+from .core import Parser, BlockState
+from .helpers import (
+ LINK_LABEL,
+ HTML_TAGNAME,
+ HTML_ATTRIBUTES,
+ BLOCK_TAGS,
+ PRE_TAGS,
+ unescape_char,
+ parse_link_href,
+ parse_link_title,
+)
+from .list_parser import parse_list, LIST_PATTERN
+
+_INDENT_CODE_TRIM = re.compile(r'^ {1,4}', flags=re.M)
+_AXT_HEADING_TRIM = re.compile(r'(\s+|^)#+\s*$')
+_BLOCK_QUOTE_TRIM = re.compile(r'^ ?', flags=re.M)
+_BLOCK_QUOTE_LEADING = re.compile(r'^ *>', flags=re.M)
+
+_LINE_BLANK_END = re.compile(r'\n[ \t]*\n$')
+_BLANK_TO_LINE = re.compile(r'[ \t]*\n')
+
+_BLOCK_TAGS_PATTERN = '|'.join(BLOCK_TAGS) + '|' + '|'.join(PRE_TAGS)
+_OPEN_TAG_END = re.compile(HTML_ATTRIBUTES + r'[ \t]*>[ \t]*(?:\n|$)')
+_CLOSE_TAG_END = re.compile(r'[ \t]*>[ \t]*(?:\n|$)')
+_STRICT_BLOCK_QUOTE = re.compile(r'( {0,3}>[^\n]*(?:\n|$))+')
+
+
+class BlockParser(Parser):
+ BLANK_LINE = re.compile(r'(^[ \t\v\f]*\n)+', re.M)
+
+ RAW_HTML = (
+ r'^ {0,3}('
+ r'</?' + HTML_TAGNAME + r'|'
+ r'<!--|' # comment
+ r'<\?|' # script
+ r'<![A-Z]|'
+ r'<!\[CDATA\[)'
+ )
+
+ BLOCK_HTML = (
+ r'^ {0,3}(?:'
+ r'(?:</?' + _BLOCK_TAGS_PATTERN + r'(?:[ \t]+|\n|$))'
+ r'|<!--' # comment
+ r'|<\?' # script
+ r'|<![A-Z]'
+ r'|<!\[CDATA\[)'
+ )
+
+ SPECIFICATION = {
+ 'blank_line': r'(^[ \t\v\f]*\n)+',
+ 'axt_heading': r'^ {0,3}(?P<axt_1>#{1,6})(?!#+)(?P<axt_2>[ \t]*|[ \t]+.*?)$',
+ 'setex_heading': r'^ {0,3}(?P<setext_1>=|-){1,}[ \t]*$',
+ 'fenced_code': (
+ r'^(?P<fenced_1> {0,3})(?P<fenced_2>`{3,}|~{3,})'
+ r'[ \t]*(?P<fenced_3>.*?)$'
+ ),
+ 'indent_code': (
+ r'^(?: {4}| *\t)[^\n]+(?:\n+|$)'
+ r'((?:(?: {4}| *\t)[^\n]+(?:\n+|$))|\s)*'
+ ),
+ 'thematic_break': r'^ {0,3}((?:-[ \t]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})$',
+ 'ref_link': r'^ {0,3}\[(?P<reflink_1>' + LINK_LABEL + r')\]:',
+ 'block_quote': r'^ {0,3}>(?P<quote_1>.*?)$',
+ 'list': LIST_PATTERN,
+ 'block_html': BLOCK_HTML,
+ 'raw_html': RAW_HTML,
+ }
+
+ DEFAULT_RULES = (
+ 'fenced_code',
+ 'indent_code',
+ 'axt_heading',
+ 'setex_heading',
+ 'thematic_break',
+ 'block_quote',
+ 'list',
+ 'ref_link',
+ 'raw_html',
+ 'blank_line',
+ )
+
+ def __init__(
+ self,
+ block_quote_rules: Optional[List[str]]=None,
+ list_rules: Optional[List[str]]=None,
+ max_nested_level: int=6
+ ):
+ super(BlockParser, self).__init__()
+
+ if block_quote_rules is None:
+ block_quote_rules = list(self.DEFAULT_RULES)
+
+ if list_rules is None:
+ list_rules = list(self.DEFAULT_RULES)
+
+ self.block_quote_rules = block_quote_rules
+ self.list_rules = list_rules
+ self.max_nested_level = max_nested_level
+ # register default parse methods
+ self._methods = {
+ name: getattr(self, 'parse_' + name) for name in self.SPECIFICATION
+ }
+
+ def parse_blank_line(self, m: re.Match, state: BlockState) -> int:
+ """Parse token for blank lines."""
+ state.append_token({'type': 'blank_line'})
+ return m.end()
+
+ def parse_thematic_break(self, m: re.Match, state: BlockState) -> int:
+ """Parse token for thematic break, e.g. ``<hr>`` tag in HTML."""
+ state.append_token({'type': 'thematic_break'})
+ # $ does not count '\n'
+ return m.end() + 1
+
+ def parse_indent_code(self, m: re.Match, state: BlockState) -> int:
+ """Parse token for code block which is indented by 4 spaces."""
+ # it is a part of the paragraph
+ end_pos = state.append_paragraph()
+ if end_pos:
+ return end_pos
+
+ code = m.group(0)
+ code = expand_leading_tab(code)
+ code = _INDENT_CODE_TRIM.sub('', code)
+ code = code.strip('\n')
+ state.append_token({'type': 'block_code', 'raw': code, 'style': 'indent'})
+ return m.end()
+
+ def parse_fenced_code(self, m: re.Match, state: BlockState) -> Optional[int]:
+ """Parse token for fenced code block. A fenced code block is started with
+ 3 or more backtick(`) or tilde(~).
+
+ An example of a fenced code block:
+
+ .. code-block:: markdown
+
+ ```python
+ def markdown(text):
+ return mistune.html(text)
+ ```
+ """
+ spaces = m.group('fenced_1')
+ marker = m.group('fenced_2')
+ info = m.group('fenced_3')
+
+ c = marker[0]
+ if info and c == '`':
+ # CommonMark Example 145
+ # Info strings for backtick code blocks cannot contain backticks
+ if info.find(c) != -1:
+ return
+
+ _end = re.compile(
+ r'^ {0,3}' + c + '{' + str(len(marker)) + r',}[ \t]*(?:\n|$)', re.M)
+ cursor_start = m.end() + 1
+
+ m2 = _end.search(state.src, cursor_start)
+ if m2:
+ code = state.src[cursor_start:m2.start()]
+ end_pos = m2.end()
+ else:
+ code = state.src[cursor_start:]
+ end_pos = state.cursor_max
+
+ if spaces and code:
+ _trim_pattern = re.compile('^ {0,' + str(len(spaces)) + '}', re.M)
+ code = _trim_pattern.sub('', code)
+
+ token = {'type': 'block_code', 'raw': code, 'style': 'fenced', 'marker': marker}
+ if info:
+ info = unescape_char(info)
+ token['attrs'] = {'info': info.strip()}
+
+ state.append_token(token)
+ return end_pos
+
+ def parse_axt_heading(self, m: re.Match, state: BlockState) -> int:
+ """Parse token for AXT heading. An AXT heading is started with 1 to 6
+ symbol of ``#``."""
+ level = len(m.group('axt_1'))
+ text = m.group('axt_2').strip()
+ # remove last #
+ if text:
+ text = _AXT_HEADING_TRIM.sub('', text)
+
+ token = {'type': 'heading', 'text': text, 'attrs': {'level': level}, 'style': 'axt'}
+ state.append_token(token)
+ return m.end() + 1
+
+ def parse_setex_heading(self, m: re.Match, state: BlockState) -> Optional[int]:
+ """Parse token for setex style heading. A setex heading syntax looks like:
+
+ .. code-block:: markdown
+
+ H1 title
+ ========
+ """
+ last_token = state.last_token()
+ if last_token and last_token['type'] == 'paragraph':
+ level = 1 if m.group('setext_1') == '=' else 2
+ last_token['type'] = 'heading'
+ last_token['style'] = 'setext'
+ last_token['attrs'] = {'level': level}
+ return m.end() + 1
+
+ sc = self.compile_sc(['thematic_break', 'list'])
+ m = sc.match(state.src, state.cursor)
+ if m:
+ return self.parse_method(m, state)
+
+ def parse_ref_link(self, m: re.Match, state: BlockState) -> Optional[int]:
+ """Parse link references and save the link information into ``state.env``.
+
+ Here is an example of a link reference:
+
+ .. code-block:: markdown
+
+ a [link][example]
+
+ [example]: https://example.com "Optional title"
+
+ This method will save the link reference into ``state.env`` as::
+
+ state.env['ref_links']['example'] = {
+ 'url': 'https://example.com',
+ 'title': "Optional title",
+ }
+ """
+ end_pos = state.append_paragraph()
+ if end_pos:
+ return end_pos
+
+ label = m.group('reflink_1')
+ key = unikey(label)
+ if not key:
+ return
+
+ href, href_pos = parse_link_href(state.src, m.end(), block=True)
+ if href is None:
+ return
+
+ _blank = self.BLANK_LINE.search(state.src, href_pos)
+ if _blank:
+ max_pos = _blank.start()
+ else:
+ max_pos = state.cursor_max
+
+ title, title_pos = parse_link_title(state.src, href_pos, max_pos)
+ if title_pos:
+ m = _BLANK_TO_LINE.match(state.src, title_pos)
+ if m:
+ title_pos = m.end()
+ else:
+ title_pos = None
+ title = None
+
+ if title_pos is None:
+ m = _BLANK_TO_LINE.match(state.src, href_pos)
+ if m:
+ href_pos = m.end()
+ else:
+ href_pos = None
+ href = None
+
+ end_pos = title_pos or href_pos
+ if not end_pos:
+ return
+
+ if key not in state.env['ref_links']:
+ href = unescape_char(href)
+ data = {'url': escape_url(href), 'label': label}
+ if title:
+ data['title'] = title
+ state.env['ref_links'][key] = data
+ return end_pos
+
+ def extract_block_quote(self, m: re.Match, state: BlockState) -> Tuple[str, int]:
+ """Extract text and cursor end position of a block quote."""
+
+ # cleanup at first to detect if it is code block
+ text = m.group('quote_1') + '\n'
+ text = expand_leading_tab(text, 3)
+ text = _BLOCK_QUOTE_TRIM.sub('', text)
+
+ sc = self.compile_sc(['blank_line', 'indent_code', 'fenced_code'])
+ require_marker = bool(sc.match(text))
+
+ state.cursor = m.end() + 1
+
+ end_pos = None
+ if require_marker:
+ m = _STRICT_BLOCK_QUOTE.match(state.src, state.cursor)
+ if m:
+ quote = m.group(0)
+ quote = _BLOCK_QUOTE_LEADING.sub('', quote)
+ quote = expand_leading_tab(quote, 3)
+ quote = _BLOCK_QUOTE_TRIM.sub('', quote)
+ text += quote
+ state.cursor = m.end()
+ else:
+ prev_blank_line = False
+ break_sc = self.compile_sc([
+ 'blank_line', 'thematic_break', 'fenced_code',
+ 'list', 'block_html',
+ ])
+ while state.cursor < state.cursor_max:
+ m = _STRICT_BLOCK_QUOTE.match(state.src, state.cursor)
+ if m:
+ quote = m.group(0)
+ quote = _BLOCK_QUOTE_LEADING.sub('', quote)
+ quote = expand_leading_tab(quote, 3)
+ quote = _BLOCK_QUOTE_TRIM.sub('', quote)
+ text += quote
+ state.cursor = m.end()
+ if not quote.strip():
+ prev_blank_line = True
+ else:
+ prev_blank_line = bool(_LINE_BLANK_END.search(quote))
+ continue
+
+ if prev_blank_line:
+ # CommonMark Example 249
+ # because of laziness, a blank line is needed between
+ # a block quote and a following paragraph
+ break
+
+ m = break_sc.match(state.src, state.cursor)
+ if m:
+ end_pos = self.parse_method(m, state)
+ if end_pos:
+ break
+
+ # lazy continuation line
+ pos = state.find_line_end()
+ line = state.get_text(pos)
+ line = expand_leading_tab(line, 3)
+ text += line
+ state.cursor = pos
+
+ # according to CommonMark Example 6, the second tab should be
+ # treated as 4 spaces
+ return expand_tab(text), end_pos
+
+ def parse_block_quote(self, m: re.Match, state: BlockState) -> int:
+ """Parse token for block quote. Here is an example of the syntax:
+
+ .. code-block:: markdown
+
+ > a block quote starts
+ > with right arrows
+ """
+ text, end_pos = self.extract_block_quote(m, state)
+ # scan children state
+ child = state.child_state(text)
+ if state.depth() >= self.max_nested_level - 1:
+ rules = list(self.block_quote_rules)
+ rules.remove('block_quote')
+ else:
+ rules = self.block_quote_rules
+
+ self.parse(child, rules)
+ token = {'type': 'block_quote', 'children': child.tokens}
+ if end_pos:
+ state.prepend_token(token)
+ return end_pos
+ state.append_token(token)
+ return state.cursor
+
+ def parse_list(self, m: re.Match, state: BlockState) -> int:
+ """Parse tokens for ordered and unordered list."""
+ return parse_list(self, m, state)
+
+ def parse_block_html(self, m: re.Match, state: BlockState) -> Optional[int]:
+ return self.parse_raw_html(m, state)
+
+ def parse_raw_html(self, m: re.Match, state: BlockState) -> Optional[int]:
+ marker = m.group(0).strip()
+
+ # rule 2
+ if marker == '<!--':
+ return _parse_html_to_end(state, '-->', m.end())
+
+ # rule 3
+ if marker == '<?':
+ return _parse_html_to_end(state, '?>', m.end())
+
+ # rule 5
+ if marker == '<![CDATA[':
+ return _parse_html_to_end(state, ']]>', m.end())
+
+ # rule 4
+ if marker.startswith('<!'):
+ return _parse_html_to_end(state, '>', m.end())
+
+ close_tag = None
+ open_tag = None
+ if marker.startswith('</'):
+ close_tag = marker[2:].lower()
+ # rule 6
+ if close_tag in BLOCK_TAGS:
+ return _parse_html_to_newline(state, self.BLANK_LINE)
+ else:
+ open_tag = marker[1:].lower()
+ # rule 1
+ if open_tag in PRE_TAGS:
+ end_tag = '</' + open_tag + '>'
+ return _parse_html_to_end(state, end_tag, m.end())
+ # rule 6
+ if open_tag in BLOCK_TAGS:
+ return _parse_html_to_newline(state, self.BLANK_LINE)
+
+ # Blocks of type 7 may not interrupt a paragraph.
+ end_pos = state.append_paragraph()
+ if end_pos:
+ return end_pos
+
+ # rule 7
+ start_pos = m.end()
+ end_pos = state.find_line_end()
+ if (open_tag and _OPEN_TAG_END.match(state.src, start_pos, end_pos)) or \
+ (close_tag and _CLOSE_TAG_END.match(state.src, start_pos, end_pos)):
+ return _parse_html_to_newline(state, self.BLANK_LINE)
+
+ def parse(self, state: BlockState, rules: Optional[List[str]]=None) -> None:
+ sc = self.compile_sc(rules)
+
+ while state.cursor < state.cursor_max:
+ m = sc.search(state.src, state.cursor)
+ if not m:
+ break
+
+ end_pos = m.start()
+ if end_pos > state.cursor:
+ text = state.get_text(end_pos)
+ state.add_paragraph(text)
+ state.cursor = end_pos
+
+ end_pos = self.parse_method(m, state)
+ if end_pos:
+ state.cursor = end_pos
+ else:
+ end_pos = state.find_line_end()
+ text = state.get_text(end_pos)
+ state.add_paragraph(text)
+ state.cursor = end_pos
+
+ if state.cursor < state.cursor_max:
+ text = state.src[state.cursor:]
+ state.add_paragraph(text)
+ state.cursor = state.cursor_max
+
+
+def _parse_html_to_end(state, end_marker, start_pos):
+ marker_pos = state.src.find(end_marker, start_pos)
+ if marker_pos == -1:
+ text = state.src[state.cursor:]
+ end_pos = state.cursor_max
+ else:
+ text = state.get_text(marker_pos)
+ state.cursor = marker_pos
+ end_pos = state.find_line_end()
+ text += state.get_text(end_pos)
+
+ state.append_token({'type': 'block_html', 'raw': text})
+ return end_pos
+
+
+def _parse_html_to_newline(state, newline):
+ m = newline.search(state.src, state.cursor)
+ if m:
+ end_pos = m.start()
+ text = state.get_text(end_pos)
+ else:
+ text = state.src[state.cursor:]
+ end_pos = state.cursor_max
+
+ state.append_token({'type': 'block_html', 'raw': text})
+ return end_pos
diff --git a/src/mistune/core.py b/src/mistune/core.py
new file mode 100644
index 0000000..71db4dd
--- /dev/null
+++ b/src/mistune/core.py
@@ -0,0 +1,208 @@
+import re
+
+_LINE_END = re.compile(r'\n|$')
+
+
+class BlockState:
+ """The state to save block parser's cursor and tokens."""
+ def __init__(self, parent=None):
+ self.src = ''
+ self.tokens = []
+
+ # current cursor position
+ self.cursor = 0
+ self.cursor_max = 0
+
+ # for list and block quote chain
+ self.list_tight = True
+ self.parent = parent
+
+ # for saving def references
+ if parent:
+ self.env = parent.env
+ else:
+ self.env = {'ref_links': {}}
+
+ def child_state(self, src):
+ child = self.__class__(self)
+ child.process(src)
+ return child
+
+ def process(self, src):
+ self.src = src
+ self.cursor_max = len(src)
+
+ def find_line_end(self):
+ m = _LINE_END.search(self.src, self.cursor)
+ return m.end()
+
+ def get_text(self, end_pos):
+ return self.src[self.cursor:end_pos]
+
+ def last_token(self):
+ if self.tokens:
+ return self.tokens[-1]
+
+ def prepend_token(self, token):
+ """Insert token before the last token."""
+ self.tokens.insert(len(self.tokens) - 1, token)
+
+ def append_token(self, token):
+ """Add token to the end of token list."""
+ self.tokens.append(token)
+
+ def add_paragraph(self, text):
+ last_token = self.last_token()
+ if last_token and last_token['type'] == 'paragraph':
+ last_token['text'] += text
+ else:
+ self.tokens.append({'type': 'paragraph', 'text': text})
+
+ def append_paragraph(self):
+ last_token = self.last_token()
+ if last_token and last_token['type'] == 'paragraph':
+ pos = self.find_line_end()
+ last_token['text'] += self.get_text(pos)
+ return pos
+
+ def depth(self):
+ d = 0
+ parent = self.parent
+ while parent:
+ d += 1
+ parent = parent.parent
+ return d
+
+
+class InlineState:
+ """The state to save inline parser's tokens."""
+ def __init__(self, env):
+ self.env = env
+ self.src = ''
+ self.tokens = []
+ self.in_image = False
+ self.in_link = False
+ self.in_emphasis = False
+ self.in_strong = False
+
+ def prepend_token(self, token):
+ """Insert token before the last token."""
+ self.tokens.insert(len(self.tokens) - 1, token)
+
+ def append_token(self, token):
+ """Add token to the end of token list."""
+ self.tokens.append(token)
+
+ def copy(self):
+ """Create a copy of current state."""
+ state = self.__class__(self.env)
+ state.in_image = self.in_image
+ state.in_link = self.in_link
+ state.in_emphasis = self.in_emphasis
+ state.in_strong = self.in_strong
+ return state
+
+
+class Parser:
+ sc_flag = re.M
+ state_cls = BlockState
+
+ SPECIFICATION = {}
+ DEFAULT_RULES = []
+
+ def __init__(self):
+ self.specification = self.SPECIFICATION.copy()
+ self.rules = list(self.DEFAULT_RULES)
+ self._methods = {}
+
+ self.__sc = {}
+
+ def compile_sc(self, rules=None):
+ if rules is None:
+ key = '$'
+ rules = self.rules
+ else:
+ key = '|'.join(rules)
+
+ sc = self.__sc.get(key)
+ if sc:
+ return sc
+
+ regex = '|'.join(r'(?P<%s>%s)' % (k, self.specification[k]) for k in rules)
+ sc = re.compile(regex, self.sc_flag)
+ self.__sc[key] = sc
+ return sc
+
+ def register(self, name, pattern, func, before=None):
+ """Register a new rule to parse the token. This method is usually used to
+ create a new plugin.
+
+ :param name: name of the new grammar
+ :param pattern: regex pattern in string
+ :param func: the parsing function
+ :param before: insert this rule before a built-in rule
+ """
+ self._methods[name] = lambda m, state: func(self, m, state)
+ if pattern:
+ self.specification[name] = pattern
+ if name not in self.rules:
+ self.insert_rule(self.rules, name, before=before)
+
+ def register_rule(self, name, pattern, func):
+ raise DeprecationWarning('This plugin is not compatible with mistune v3.')
+
+ @staticmethod
+ def insert_rule(rules, name, before=None):
+ if before:
+ try:
+ index = rules.index(before)
+ rules.insert(index, name)
+ except ValueError:
+ rules.append(name)
+ else:
+ rules.append(name)
+
+ def parse_method(self, m, state):
+ func = self._methods[m.lastgroup]
+ return func(m, state)
+
+
+class BaseRenderer(object):
+ NAME = 'base'
+
+ def __init__(self):
+ self.__methods = {}
+
+ def register(self, name, method):
+ """Register a render method for the named token. For example::
+
+ def render_wiki(renderer, key, title):
+ return f'<a href="/wiki/{key}">{title}</a>'
+
+ renderer.register('wiki', render_wiki)
+ """
+ # bind self into renderer method
+ self.__methods[name] = lambda *arg, **kwargs: method(self, *arg, **kwargs)
+
+ def _get_method(self, name):
+ try:
+ return object.__getattribute__(self, name)
+ except AttributeError:
+ method = self.__methods.get(name)
+ if not method:
+ raise AttributeError('No renderer "{!r}"'.format(name))
+ return method
+
+ def render_token(self, token, state):
+ func = self._get_method(token['type'])
+ return func(token, state)
+
+ def iter_tokens(self, tokens, state):
+ for tok in tokens:
+ yield self.render_token(tok, state)
+
+ def render_tokens(self, tokens, state):
+ return ''.join(self.iter_tokens(tokens, state))
+
+ def __call__(self, tokens, state):
+ return self.render_tokens(tokens, state)
diff --git a/src/mistune/directives/__init__.py b/src/mistune/directives/__init__.py
new file mode 100644
index 0000000..660c4c8
--- /dev/null
+++ b/src/mistune/directives/__init__.py
@@ -0,0 +1,31 @@
+from ._base import DirectiveParser, BaseDirective, DirectivePlugin
+from ._rst import RSTDirective
+from ._fenced import FencedDirective
+from .admonition import Admonition
+from .toc import TableOfContents
+from .include import Include
+from .image import Image, Figure
+
+
+class RstDirective(RSTDirective): # pragma: no cover
+ def __init__(self, plugins):
+ super(RstDirective, self).__init__(plugins)
+ import warnings
+ warnings.warn(
+ "'RstDirective' is deprecated, please use 'RSTDirective' instead.",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+
+
+__all__ = [
+ 'DirectiveParser',
+ 'BaseDirective',
+ 'DirectivePlugin',
+ 'RSTDirective',
+ 'FencedDirective',
+ 'Admonition',
+ 'TableOfContents',
+ 'Include',
+ 'Image', 'Figure',
+]
diff --git a/src/mistune/directives/_base.py b/src/mistune/directives/_base.py
new file mode 100644
index 0000000..ad326c6
--- /dev/null
+++ b/src/mistune/directives/_base.py
@@ -0,0 +1,121 @@
+import re
+
+
+class DirectiveParser:
+ name = 'directive'
+
+ @staticmethod
+ def parse_type(m: re.Match):
+ raise NotImplementedError()
+
+ @staticmethod
+ def parse_title(m: re.Match):
+ raise NotImplementedError()
+
+ @staticmethod
+ def parse_content(m: re.Match):
+ raise NotImplementedError()
+
+ @classmethod
+ def parse_tokens(cls, block, text, state):
+ if state.depth() >= block.max_nested_level - 1 and cls.name in block.rules:
+ rules = list(block.rules)
+ rules.remove(cls.name)
+ else:
+ rules = block.rules
+ child = state.child_state(text)
+ block.parse(child, rules)
+ return child.tokens
+
+ @staticmethod
+ def parse_options(m: re.Match):
+ text = m.group('options')
+ if not text.strip():
+ return []
+
+ options = []
+ for line in re.split(r'\n+', text):
+ line = line.strip()[1:]
+ if not line:
+ continue
+ i = line.find(':')
+ k = line[:i]
+ v = line[i + 1:].strip()
+ options.append((k, v))
+ return options
+
+
+class BaseDirective:
+ parser = DirectiveParser
+ directive_pattern = None
+
+ def __init__(self, plugins):
+ self._methods = {}
+ self.__plugins = plugins
+
+ def register(self, name, fn):
+ self._methods[name] = fn
+
+ def parse_method(self, block, m, state):
+ _type = self.parser.parse_type(m)
+ method = self._methods.get(_type)
+ if method:
+ try:
+ token = method(block, m, state)
+ except ValueError as e:
+ token = {'type': 'block_error', 'raw': str(e)}
+ else:
+ text = m.group(0)
+ token = {
+ 'type': 'block_error',
+ 'raw': text,
+ }
+
+ if isinstance(token, list):
+ for tok in token:
+ state.append_token(tok)
+ else:
+ state.append_token(token)
+ return token
+
+ def parse_directive(self, block, m, state):
+ raise NotImplementedError()
+
+ def register_block_parser(self, md, before=None):
+ md.block.register(
+ self.parser.name,
+ self.directive_pattern,
+ self.parse_directive,
+ before=before,
+ )
+
+ def __call__(self, md):
+ for plugin in self.__plugins:
+ plugin.parser = self.parser
+ plugin(self, md)
+
+
+class DirectivePlugin:
+ def __init__(self):
+ self.parser = None
+
+ def parse_options(self, m: re.Match):
+ return self.parser.parse_options(m)
+
+ def parse_type(self, m: re.Match):
+ return self.parser.parse_type(m)
+
+ def parse_title(self, m: re.Match):
+ return self.parser.parse_title(m)
+
+ def parse_content(self, m: re.Match):
+ return self.parser.parse_content(m)
+
+ def parse_tokens(self, block, text, state):
+ return self.parser.parse_tokens(block, text, state)
+
+ def parse(self, block, m, state):
+ raise NotImplementedError()
+
+ def __call__(self, md):
+ raise NotImplementedError()
diff --git a/src/mistune/directives/_fenced.py b/src/mistune/directives/_fenced.py
new file mode 100644
index 0000000..818f130
--- /dev/null
+++ b/src/mistune/directives/_fenced.py
@@ -0,0 +1,142 @@
+import re
+from ._base import DirectiveParser, BaseDirective
+
+__all__ = ['FencedDirective']
+
+
+_type_re = re.compile(r'^ *\{[a-zA-Z0-9_-]+\}')
+_directive_re = re.compile(
+ r'\{(?P<type>[a-zA-Z0-9_-]+)\} *(?P<title>[^\n]*)(?:\n|$)'
+ r'(?P<options>(?:\:[a-zA-Z0-9_-]+\: *[^\n]*\n+)*)'
+ r'\n*(?P<text>(?:[^\n]*\n+)*)'
+)
+
+
+class FencedParser(DirectiveParser):
+ name = 'fenced_directive'
+
+ @staticmethod
+ def parse_type(m: re.Match):
+ return m.group('type')
+
+ @staticmethod
+ def parse_title(m: re.Match):
+ return m.group('title')
+
+ @staticmethod
+ def parse_content(m: re.Match):
+ return m.group('text')
+
+
+class FencedDirective(BaseDirective):
+ """A **fenced** style of directive looks like a fenced code block, it is
+ inspired by markdown-it-docutils. The syntax looks like:
+
+ .. code-block:: text
+
+ ```{directive-type} title
+ :option-key: option value
+ :option-key: option value
+
+ content text here
+ ```
+
+ To use ``FencedDirective``, developers can add it into plugin list in
+ the :class:`Markdown` instance:
+
+ .. code-block:: python
+
+ import mistune
+ from mistune.directives import FencedDirective, Admonition
+
+ md = mistune.create_markdown(plugins=[
+ # ...
+ FencedDirective([Admonition()]),
+ ])
+
+ FencedDirective is using >= 3 backticks or curly-brackets for the fenced
+ syntax. Developers can change it to other characters, e.g. colon:
+
+ .. code-block:: python
+
+ directive = FencedDirective([Admonition()], ':')
+
+ And then the directive syntax would look like:
+
+ .. code-block:: text
+
+ ::::{note} Nesting directives
+ You can nest directives by ensuring the start and end fence matching
+ the length. For instance, in this example, the admonition is started
+ with 4 colons, then it should end with 4 colons.
+
+ You can nest another admonition with other length of colons except 4.
+
+ :::{tip} Longer outermost fence
+ It would be better that you put longer markers for the outer fence,
+ and shorter markers for the inner fence. In this example, we put 4
+ colons outsie, and 3 colons inside.
+ :::
+ ::::
+
+ :param plugins: list of directive plugins
+ :param markers: characters to determine the fence, default is backtick
+ and curly-bracket
+ """
+ parser = FencedParser
+
+ def __init__(self, plugins, markers='`~'):
+ super(FencedDirective, self).__init__(plugins)
+ self.markers = markers
+ _marker_pattern = '|'.join(re.escape(c) for c in markers)
+ self.directive_pattern = (
+ r'^(?P<fenced_directive_mark>(?:' + _marker_pattern + r'){3,})'
+ r'\{[a-zA-Z0-9_-]+\}'
+ )
+
+ def _process_directive(self, block, marker, start, state):
+ mlen = len(marker)
+ cursor_start = start + len(marker)
+
+ _end_pattern = (
+ r'^ {0,3}' + marker[0] + '{' + str(mlen) + r',}'
+ r'[ \t]*(?:\n|$)'
+ )
+ _end_re = re.compile(_end_pattern, re.M)
+
+ _end_m = _end_re.search(state.src, cursor_start)
+ if _end_m:
+ text = state.src[cursor_start:_end_m.start()]
+ end_pos = _end_m.end()
+ else:
+ text = state.src[cursor_start:]
+ end_pos = state.cursor_max
+
+ m = _directive_re.match(text)
+ if not m:
+ return
+
+ self.parse_method(block, m, state)
+ return end_pos
+
+ def parse_directive(self, block, m, state):
+ marker = m.group('fenced_directive_mark')
+ return self._process_directive(block, marker, m.start(), state)
+
+ def parse_fenced_code(self, block, m, state):
+ info = m.group('fenced_3')
+ if not info or not _type_re.match(info):
+ return block.parse_fenced_code(m, state)
+
+ if state.depth() >= block.max_nested_level:
+ return block.parse_fenced_code(m, state)
+
+ marker = m.group('fenced_2')
+ return self._process_directive(block, marker, m.start(), state)
+
+ def __call__(self, md):
+ super(FencedDirective, self).__call__(md)
+ if self.markers == '`~':
+ md.block.register('fenced_code', None, self.parse_fenced_code)
+ else:
+ self.register_block_parser(md, 'fenced_code')
diff --git a/src/mistune/directives/_rst.py b/src/mistune/directives/_rst.py
new file mode 100644
index 0000000..6e054cf
--- /dev/null
+++ b/src/mistune/directives/_rst.py
@@ -0,0 +1,73 @@
+import re
+from ._base import DirectiveParser, BaseDirective
+
+__all__ = ['RSTDirective']
+
+
+_directive_re = re.compile(
+ r'\.\.( +)(?P<type>[a-zA-Z0-9_-]+)\:\: *(?P<title>[^\n]*)(?:\n|$)'
+ r'(?P<options>(?: \1 {0,3}\:[a-zA-Z0-9_-]+\: *[^\n]*\n+)*)'
+ r'\n*(?P<text>(?: \1 {0,3}[^\n]*\n+)*)'
+)
+
+
+class RSTParser(DirectiveParser):
+ name = 'rst_directive'
+
+ @staticmethod
+ def parse_type(m: re.Match):
+ return m.group('type')
+
+ @staticmethod
+ def parse_title(m: re.Match):
+ return m.group('title')
+
+ @staticmethod
+ def parse_content(m: re.Match):
+ full_content = m.group(0)
+ text = m.group('text')
+ pretext = full_content[:-len(text)]
+ leading = len(m.group(1)) + 2
+ return '\n'.join(line[leading:] for line in text.splitlines()) + '\n'
+
+
+class RSTDirective(BaseDirective):
+ """A RST style of directive syntax is inspired by reStructuredText.
+ The syntax is very powerful that you can define a lot of custom
+ features on your own. The syntax looks like:
+
+ .. code-block:: text
+
+ .. directive-type:: directive value
+ :option-key: option value
+ :option-key: option value
+
+ content text here
+
+ To use ``RSTDirective``, developers can add it into plugin list in
+ the :class:`Markdown` instance:
+
+ .. code-block:: python
+
+ import mistune
+ from mistune.directives import RSTDirective, Admonition
+
+ md = mistune.create_markdown(plugins=[
+ # ...
+ RSTDirective([Admonition()]),
+ ])
+ """
+ parser = RSTParser
+ directive_pattern = r'^\.\. +[a-zA-Z0-9_-]+\:\:'
+
+ def parse_directive(self, block, m, state):
+ m = _directive_re.match(state.src, state.cursor)
+ if not m:
+ return
+
+ self.parse_method(block, m, state)
+ return m.end()
+
+ def __call__(self, md):
+ super(RSTDirective, self).__call__(md)
+ self.register_block_parser(md)
diff --git a/src/mistune/directives/admonition.py b/src/mistune/directives/admonition.py
new file mode 100644
index 0000000..b380611
--- /dev/null
+++ b/src/mistune/directives/admonition.py
@@ -0,0 +1,61 @@
+from ._base import DirectivePlugin
+
+
+class Admonition(DirectivePlugin):
+ SUPPORTED_NAMES = {
+ "attention", "caution", "danger", "error", "hint",
+ "important", "note", "tip", "warning",
+ }
+
+ def parse(self, block, m, state):
+ name = self.parse_type(m)
+ attrs = {'name': name}
+ options = dict(self.parse_options(m))
+ if 'class' in options:
+ attrs['class'] = options['class']
+
+ title = self.parse_title(m)
+ if not title:
+ title = name.capitalize()
+
+ content = self.parse_content(m)
+ children = [
+ {
+ 'type': 'admonition_title',
+ 'text': title,
+ },
+ {
+ 'type': 'admonition_content',
+ 'children': self.parse_tokens(block, content, state),
+ }
+ ]
+ return {
+ 'type': 'admonition',
+ 'children': children,
+ 'attrs': attrs,
+ }
+
+ def __call__(self, directive, md):
+ for name in self.SUPPORTED_NAMES:
+ directive.register(name, self.parse)
+
+ if md.renderer.NAME == 'html':
+ md.renderer.register('admonition', render_admonition)
+ md.renderer.register('admonition_title', render_admonition_title)
+ md.renderer.register('admonition_content', render_admonition_content)
+
+
+def render_admonition(self, text, name, **attrs):
+ html = '<section class="admonition ' + name
+ _cls = attrs.get('class')
+ if _cls:
+ html += ' ' + _cls
+ return html + '">\n' + text + '</section>\n'
+
+
+def render_admonition_title(self, text):
+ return '<p class="admonition-title">' + text + '</p>\n'
+
+
+def render_admonition_content(self, text):
+ return text
diff --git a/src/mistune/directives/image.py b/src/mistune/directives/image.py
new file mode 100644
index 0000000..5d9d40a
--- /dev/null
+++ b/src/mistune/directives/image.py
@@ -0,0 +1,152 @@
+import re
+from ._base import DirectivePlugin
+from ..util import escape as escape_text, escape_url
+
+__all__ = ['Image', 'Figure']
+
+_num_re = re.compile(r'^\d+(?:\.\d*)?')
+_allowed_aligns = ["top", "middle", "bottom", "left", "center", "right"]
+
+
+def _parse_attrs(options):
+ attrs = {}
+ if 'alt' in options:
+ attrs['alt'] = options['alt']
+
+ # validate align
+ align = options.get('align')
+ if align and align in _allowed_aligns:
+ attrs['align'] = align
+
+ height = options.get('height')
+ width = options.get('width')
+ if height and _num_re.match(height):
+ attrs['height'] = height
+ if width and _num_re.match(width):
+ attrs['width'] = width
+ if 'target' in options:
+ attrs['target'] = escape_url(options['target'])
+ return attrs
+
+
+class Image(DirectivePlugin):
+ NAME = 'image'
+
+ def parse(self, block, m, state):
+ options = dict(self.parse_options(m))
+ attrs = _parse_attrs(options)
+ attrs['src'] = self.parse_title(m)
+ return {'type': 'block_image', 'attrs': attrs}
+
+ def __call__(self, directive, md):
+ directive.register(self.NAME, self.parse)
+ if md.renderer.NAME == 'html':
+ md.renderer.register('block_image', render_block_image)
+
+
+def render_block_image(self, src: str, alt=None, width=None, height=None, **attrs):
+ img = '<img src="' + src + '"'
+ style = ''
+ if alt:
+ img += ' alt="' + escape_text(alt) + '"'
+ if width:
+ if width.isdigit():
+ img += ' width="' + width + '"'
+ else:
+ style += 'width:' + width + ';'
+ if height:
+ if height.isdigit():
+ img += ' height="' + height + '"'
+ else:
+ style += 'height:' + height + ';'
+ if style:
+ img += ' style="' + escape_text(style) + '"'
+
+ img += ' />'
+
+ _cls = 'block-image'
+ align = attrs.get('align')
+ if align:
+ _cls += ' align-' + align
+
+ target = attrs.get('target')
+ if target:
+ href = escape_text(self.safe_url(target))
+ outer = '<a class="' + _cls + '" href="' + href + '">'
+ return outer + img + '</a>\n'
+ else:
+ return '<div class="' + _cls + '">' + img + '</div>\n'
+
+
+class Figure(DirectivePlugin):
+ NAME = 'figure'
+
+ def parse_directive_content(self, block, m, state):
+ content = self.parse_content(m)
+ if not content:
+ return
+
+ tokens = self.parse_tokens(block, content, state)
+ caption = tokens[0]
+ if caption['type'] == 'paragraph':
+ caption['type'] = 'figcaption'
+ children = [caption]
+ if len(tokens) > 1:
+ children.append({
+ 'type': 'legend',
+ 'children': tokens[1:]
+ })
+ return children
+
+ def parse(self, block, m, state):
+ options = dict(self.parse_options(m))
+ image_attrs = _parse_attrs(options)
+ image_attrs['src'] = self.parse_title(m)
+
+ align = image_attrs.pop('align', None)
+ fig_attrs = {}
+ if align:
+ fig_attrs['align'] = align
+ for k in ['figwidth', 'figclass']:
+ if k in options:
+ fig_attrs[k] = options[k]
+
+ children = [{'type': 'block_image', 'attrs': image_attrs}]
+ content = self.parse_directive_content(block, m, state)
+ if content:
+ children.extend(content)
+ return {
+ 'type': 'figure',
+ 'attrs': fig_attrs,
+ 'children': children,
+ }
+
+ def __call__(self, directive, md):
+ directive.register(self.NAME, self.parse)
+
+ if md.renderer.NAME == 'html':
+ md.renderer.register('figure', render_figure)
+ md.renderer.register('block_image', render_block_image)
+ md.renderer.register('figcaption', render_figcaption)
+ md.renderer.register('legend', render_legend)
+
+
+def render_figure(self, text, align=None, figwidth=None, figclass=None):
+ _cls = 'figure'
+ if align:
+ _cls += ' align-' + align
+ if figclass:
+ _cls += ' ' + figclass
+
+ html = '<figure class="' + _cls + '"'
+ if figwidth:
+ html += ' style="width:' + figwidth + '"'
+ return html + '>\n' + text + '</figure>\n'
+
+
+def render_figcaption(self, text):
+ return '<figcaption>' + text + '</figcaption>\n'
+
+
+def render_legend(self, text):
+ return '<div class="legend">\n' + text + '</div>\n'
diff --git a/src/mistune/directives/include.py b/src/mistune/directives/include.py
new file mode 100644
index 0000000..d2180ba
--- /dev/null
+++ b/src/mistune/directives/include.py
@@ -0,0 +1,65 @@
+import os
+from ._base import DirectivePlugin
+
+
+class Include(DirectivePlugin):
+ def parse(self, block, m, state):
+ source_file = state.env.get('__file__')
+ if not source_file:
+ return {'type': 'block_error', 'raw': 'Missing source file'}
+
+ encoding = 'utf-8'
+ options = self.parse_options(m)
+ if options:
+ attrs = dict(options)
+ if 'encoding' in attrs:
+ encoding = attrs['encoding']
+ else:
+ attrs = {}
+
+ relpath = self.parse_title(m)
+ dest = os.path.join(os.path.dirname(source_file), relpath)
+ dest = os.path.normpath(dest)
+
+ if dest == source_file:
+ return {
+ 'type': 'block_error',
+ 'raw': 'Could not include self: ' + relpath,
+ }
+
+ if not os.path.isfile(dest):
+ return {
+ 'type': 'block_error',
+ 'raw': 'Could not find file: ' + relpath,
+ }
+
+ with open(dest, 'rb') as f:
+ content = f.read()
+ content = content.decode(encoding)
+
+ ext = os.path.splitext(relpath)[1]
+ if ext in {'.md', '.markdown', '.mkd'}:
+ new_state = block.state_cls()
+ new_state.env['__file__'] = dest
+ new_state.process(content)
+ block.parse(new_state)
+ return new_state.tokens
+
+ elif ext in {'.html', '.xhtml', '.htm'}:
+ return {'type': 'block_html', 'raw': content}
+
+ attrs['filepath'] = dest
+ return {
+ 'type': 'include',
+ 'raw': content,
+ 'attrs': attrs,
+ }
+
+ def __call__(self, directive, md):
+ directive.register('include', self.parse)
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('include', render_html_include)
+
+
+def render_html_include(renderer, text, **attrs):
+ return '<pre class="directive-include">\n' + text + '</pre>\n'
diff --git a/src/mistune/directives/toc.py b/src/mistune/directives/toc.py
new file mode 100644
index 0000000..4084f43
--- /dev/null
+++ b/src/mistune/directives/toc.py
@@ -0,0 +1,105 @@
+"""
+ TOC directive
+ ~~~~~~~~~~~~~
+
+ The TOC directive syntax looks like::
+
+ .. toc:: Title
+ :min-level: 1
+ :max-level: 3
+
+ "Title", "min-level", and "max-level" option can be empty. "min-level"
+ and "max-level" are integers >= 1 and <= 6, which define the allowed
+ heading levels writers want to include in the table of contents.
+"""
+
+from ._base import DirectivePlugin
+from ..toc import normalize_toc_item, render_toc_ul
+
+
+class TableOfContents(DirectivePlugin):
+ def __init__(self, min_level=1, max_level=3):
+ self.min_level = min_level
+ self.max_level = max_level
+
+ def generate_heading_id(self, token, index):
+ return 'toc_' + str(index + 1)
+
+ def parse(self, block, m, state):
+ title = self.parse_title(m)
+ options = self.parse_options(m)
+ if options:
+ d_options = dict(options)
+ collapse = 'collapse' in d_options
+ min_level = _normalize_level(d_options, 'min-level', self.min_level)
+ max_level = _normalize_level(d_options, 'max-level', self.max_level)
+ if min_level < self.min_level:
+ raise ValueError(f'"min-level" option MUST be >= {self.min_level}')
+ if max_level > self.max_level:
+ raise ValueError(f'"max-level" option MUST be <= {self.max_level}')
+ if min_level > max_level:
+ raise ValueError('"min-level" option MUST be less than "max-level" option')
+ else:
+ collapse = False
+ min_level = self.min_level
+ max_level = self.max_level
+
+ attrs = {
+ 'min_level': min_level,
+ 'max_level': max_level,
+ 'collapse': collapse,
+ }
+ return {'type': 'toc', 'text': title or '', 'attrs': attrs}
+
+ def toc_hook(self, md, state):
+ sections = []
+ headings = []
+
+ for tok in state.tokens:
+ if tok['type'] == 'toc':
+ sections.append(tok)
+ elif tok['type'] == 'heading':
+ headings.append(tok)
+
+ if sections:
+ toc_items = []
+ # adding ID for each heading
+ for i, tok in enumerate(headings):
+ tok['attrs']['id'] = self.generate_heading_id(tok, i)
+ toc_items.append(normalize_toc_item(md, tok))
+
+ for sec in sections:
+ _min = sec['attrs']['min_level']
+ _max = sec['attrs']['max_level']
+ toc = [item for item in toc_items if _min <= item[0] <= _max]
+ sec['attrs']['toc'] = toc
+
+ def __call__(self, directive, md):
+ if md.renderer and md.renderer.NAME == 'html':
+ # only works with HTML renderer
+ directive.register('toc', self.parse)
+ md.before_render_hooks.append(self.toc_hook)
+ md.renderer.register('toc', render_html_toc)
+
+
+def render_html_toc(renderer, title, collapse=False, **attrs):
+ if not title:
+ title = 'Table of Contents'
+ toc = attrs['toc']
+ content = render_toc_ul(attrs['toc'])
+
+ html = '<details class="toc"'
+ if not collapse:
+ html += ' open'
+ html += '>\n<summary>' + title + '</summary>\n'
+ return html + content + '</details>\n'
+
+
+def _normalize_level(options, name, default):
+ level = options.get(name)
+ if not level:
+ return default
+ try:
+ return int(level)
+ except (ValueError, TypeError):
+ raise ValueError(f'"{name}" option MUST be integer')
diff --git a/src/mistune/helpers.py b/src/mistune/helpers.py
new file mode 100644
index 0000000..04c1df1
--- /dev/null
+++ b/src/mistune/helpers.py
@@ -0,0 +1,137 @@
+import re
+import string
+from .util import escape_url
+
+PREVENT_BACKSLASH = r'(?<!\\)(?:\\\\)*'
+PUNCTUATION = r'[' + re.escape(string.punctuation) + r']'
+
+LINK_LABEL = r'(?:[^\\\[\]]|\\.){0,500}'
+
+LINK_BRACKET_START = re.compile(r'[ \t]*\n?[ \t]*<')
+LINK_BRACKET_RE = re.compile(r'<([^<>\n\\\x00]*)>')
+LINK_HREF_BLOCK_RE = re.compile(r'[ \t]*\n?[ \t]*([^\s]+)(?:\s|$)')
+LINK_HREF_INLINE_RE = re.compile(
+ r'[ \t]*\n?[ \t]*([^ \t\n]*?)(?:[ \t\n]|'
+ r'(?:' + PREVENT_BACKSLASH + r'\)))'
+)
+
+LINK_TITLE_RE = re.compile(
+ r'[ \t\n]+('
+ r'"(?:\\' + PUNCTUATION + r'|[^"\x00])*"|' # "title"
+ r"'(?:\\" + PUNCTUATION + r"|[^'\x00])*'" # 'title'
+ r')'
+)
+PAREN_END_RE = re.compile(r'\s*\)')
+
+HTML_TAGNAME = r'[A-Za-z][A-Za-z0-9-]*'
+HTML_ATTRIBUTES = (
+ r'(?:\s+[A-Za-z_:][A-Za-z0-9_.:-]*'
+ r'(?:\s*=\s*(?:[^ !"\'=<>`]+|\'[^\']*?\'|"[^\"]*?"))?)*'
+)
+
+BLOCK_TAGS = (
+ 'address', 'article', 'aside', 'base', 'basefont', 'blockquote',
+ 'body', 'caption', 'center', 'col', 'colgroup', 'dd', 'details',
+ 'dialog', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption',
+ 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3',
+ 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'iframe',
+ 'legend', 'li', 'link', 'main', 'menu', 'menuitem', 'meta', 'nav',
+ 'noframes', 'ol', 'optgroup', 'option', 'p', 'param', 'section',
+ 'source', 'summary', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead',
+ 'title', 'tr', 'track', 'ul'
+)
+PRE_TAGS = ('pre', 'script', 'style', 'textarea')
+
+_INLINE_LINK_LABEL_RE = re.compile(LINK_LABEL + r'\]')
+_INLINE_SQUARE_BRACKET_RE = re.compile(PREVENT_BACKSLASH + r'[\[\]]')
+_ESCAPE_CHAR_RE = re.compile(r'\\(' + PUNCTUATION + r')')
+
+
+def unescape_char(text):
+ return _ESCAPE_CHAR_RE.sub(r'\1', text)
+
+
+def parse_link_text(src, pos):
+ level = 1
+ found = False
+ start_pos = pos
+
+ while pos < len(src):
+ m = _INLINE_SQUARE_BRACKET_RE.search(src, pos)
+ if not m:
+ break
+
+ pos = m.end()
+ marker = m.group(0)
+ if marker == ']':
+ level -= 1
+ if level == 0:
+ found = True
+ break
+ else:
+ level += 1
+
+ if found:
+ text = src[start_pos:pos-1]
+ return text, pos
+ return None, None
+
+
+def parse_link_label(src, start_pos):
+ m = _INLINE_LINK_LABEL_RE.match(src, start_pos)
+ if m:
+ label = m.group(0)[:-1]
+ return label, m.end()
+ return None, None
+
+
+def parse_link_href(src, start_pos, block=False):
+ m = LINK_BRACKET_START.match(src, start_pos)
+ if m:
+ start_pos = m.end() - 1
+ m = LINK_BRACKET_RE.match(src, start_pos)
+ if m:
+ return m.group(1), m.end()
+ return None, None
+
+ if block:
+ m = LINK_HREF_BLOCK_RE.match(src, start_pos)
+ else:
+ m = LINK_HREF_INLINE_RE.match(src, start_pos)
+
+ if not m:
+ return None, None
+
+ end_pos = m.end()
+ href = m.group(1)
+
+ if block and src[end_pos - 1] == href[-1]:
+ return href, end_pos
+ return href, end_pos - 1
+
+
+def parse_link_title(src, start_pos, max_pos):
+ m = LINK_TITLE_RE.match(src, start_pos, max_pos)
+ if m:
+ title = m.group(1)[1:-1]
+ title = unescape_char(title)
+ return title, m.end()
+ return None, None
+
+
+def parse_link(src, pos):
+ href, href_pos = parse_link_href(src, pos)
+ if href is None:
+ return None, None
+
+ title, title_pos = parse_link_title(src, href_pos, len(src))
+ next_pos = title_pos or href_pos
+ m = PAREN_END_RE.match(src, next_pos)
+ if not m:
+ return None, None
+
+ href = unescape_char(href)
+ attrs = {'url': escape_url(href)}
+ if title:
+ attrs['title'] = title
+ return attrs, m.end()
diff --git a/src/mistune/inline_parser.py b/src/mistune/inline_parser.py
new file mode 100644
index 0000000..0375a74
--- /dev/null
+++ b/src/mistune/inline_parser.py
@@ -0,0 +1,390 @@
+import re
+from typing import Optional, List, Dict, Any
+from .core import Parser, InlineState
+from .util import (
+ escape,
+ escape_url,
+ unikey,
+)
+from .helpers import (
+ PREVENT_BACKSLASH,
+ PUNCTUATION,
+ HTML_TAGNAME,
+ HTML_ATTRIBUTES,
+ unescape_char,
+ parse_link,
+ parse_link_label,
+ parse_link_text,
+)
+
+PAREN_END_RE = re.compile(r'\s*\)')
+
+AUTO_EMAIL = (
+ r'''<[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9]'''
+ r'(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?'
+ r'(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>'
+)
+
+INLINE_HTML = (
+ r'<' + HTML_TAGNAME + HTML_ATTRIBUTES + r'\s*/?>|' # open tag
+ r'</' + HTML_TAGNAME + r'\s*>|' # close tag
+ r'<!--(?!>|->)(?:(?!--)[\s\S])+?(?<!-)-->|' # comment
+ r'<\?[\s\S]+?\?>|' # script like <?php?>
+ r'<![A-Z][\s\S]+?>|' # doctype
+ r'<!\[CDATA[\s\S]+?\]\]>' # cdata
+)
+
+EMPHASIS_END_RE = {
+ '*': re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\\*|[^\s*])\*(?!\*)'),
+ '_': re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\_|[^\s_])_(?!_)\b'),
+
+ '**': re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\\*|[^\s*])\*\*(?!\*)'),
+ '__': re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\_|[^\s_])__(?!_)\b'),
+
+ '***': re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\\*|[^\s*])\*\*\*(?!\*)'),
+ '___': re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\_|[^\s_])___(?!_)\b'),
+}
+
+
+class InlineParser(Parser):
+ sc_flag = 0
+ state_cls = InlineState
+
+ #: linebreak leaves two spaces at the end of line
+ STD_LINEBREAK = r'(?:\\| {2,})\n\s*'
+
+ #: every new line becomes <br>
+ HARD_LINEBREAK = r' *\n\s*'
+
+ # we only need to find the start pattern of an inline token
+ SPECIFICATION = {
+ # e.g. \`, \$
+ 'escape': r'(?:\\' + PUNCTUATION + ')+',
+
+ # `code, ```code
+ 'codespan': r'`{1,}',
+
+ # *w, **w, _w, __w
+ 'emphasis': r'\*{1,3}(?=[^\s*])|\b_{1,3}(?=[^\s_])',
+
+ # [link], ![img]
+ 'link': r'!?\[',
+
+ # <https://example.com>. regex copied from commonmark.js
+ 'auto_link': r'<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>',
+ 'auto_email': AUTO_EMAIL,
+
+ 'inline_html': INLINE_HTML,
+
+ 'linebreak': STD_LINEBREAK,
+ 'softbreak': HARD_LINEBREAK,
+
+ 'prec_auto_link': r'<[A-Za-z][A-Za-z\d.+-]{1,31}:',
+ 'prec_inline_html': r'</?' + HTML_TAGNAME + r'|<!|<\?',
+ }
+ DEFAULT_RULES = (
+ 'escape',
+ 'codespan',
+ 'emphasis',
+ 'link',
+ 'auto_link',
+ 'auto_email',
+ 'inline_html',
+ 'linebreak',
+ )
+
+ def __init__(self, hard_wrap=False):
+ super(InlineParser, self).__init__()
+
+ self.hard_wrap = hard_wrap
+ # lazy add linebreak
+ if hard_wrap:
+ self.specification['linebreak'] = self.HARD_LINEBREAK
+ else:
+ self.rules.append('softbreak')
+
+ self._methods = {
+ name: getattr(self, 'parse_' + name) for name in self.rules
+ }
+
+ def parse_escape(self, m: re.Match, state: InlineState) -> int:
+ text = m.group(0)
+ text = unescape_char(text)
+ state.append_token({
+ 'type': 'text',
+ 'raw': text,
+ })
+ return m.end()
+
+ def parse_link(self, m: re.Match, state: InlineState) -> Optional[int]:
+ pos = m.end()
+
+ marker = m.group(0)
+ is_image = marker[0] == '!'
+ if is_image and state.in_image:
+ state.append_token({'type': 'text', 'raw': marker})
+ return pos
+ elif not is_image and state.in_link:
+ state.append_token({'type': 'text', 'raw': marker})
+ return pos
+
+ text = None
+ label, end_pos = parse_link_label(state.src, pos)
+ if label is None:
+ text, end_pos = parse_link_text(state.src, pos)
+ if text is None:
+ return
+
+ if text is None:
+ text = label
+
+ if end_pos >= len(state.src) and label is None:
+ return
+
+ rules = ['codespan', 'prec_auto_link', 'prec_inline_html']
+ prec_pos = self.precedence_scan(m, state, end_pos, rules)
+ if prec_pos:
+ return prec_pos
+
+ if end_pos < len(state.src):
+ c = state.src[end_pos]
+ if c == '(':
+ # standard link [text](<url> "title")
+ attrs, pos2 = parse_link(state.src, end_pos + 1)
+ if pos2:
+ token = self.__parse_link_token(is_image, text, attrs, state)
+ state.append_token(token)
+ return pos2
+
+ elif c == '[':
+ # standard ref link [text][label]
+ label2, pos2 = parse_link_label(state.src, end_pos + 1)
+ if pos2:
+ end_pos = pos2
+ if label2:
+ label = label2
+
+ if label is None:
+ return
+
+ ref_links = state.env.get('ref_links')
+ if not ref_links:
+ return
+
+ key = unikey(label)
+ env = ref_links.get(key)
+ if env:
+ attrs = {'url': env['url'], 'title': env.get('title')}
+ token = self.__parse_link_token(is_image, text, attrs, state)
+ token['ref'] = key
+ token['label'] = label
+ state.append_token(token)
+ return end_pos
+
+ def __parse_link_token(self, is_image, text, attrs, state):
+ new_state = state.copy()
+ new_state.src = text
+ if is_image:
+ new_state.in_image = True
+ token = {
+ 'type': 'image',
+ 'children': self.render(new_state),
+ 'attrs': attrs,
+ }
+ else:
+ new_state.in_link = True
+ token = {
+ 'type': 'link',
+ 'children': self.render(new_state),
+ 'attrs': attrs,
+ }
+ return token
+
+ def parse_auto_link(self, m: re.Match, state: InlineState) -> int:
+ text = m.group(0)
+ pos = m.end()
+ if state.in_link:
+ self.process_text(text, state)
+ return pos
+
+ text = text[1:-1]
+ self._add_auto_link(text, text, state)
+ return pos
+
+ def parse_auto_email(self, m: re.Match, state: InlineState) -> int:
+ text = m.group(0)
+ pos = m.end()
+ if state.in_link:
+ self.process_text(text, state)
+ return pos
+
+ text = text[1:-1]
+ url = 'mailto:' + text
+ self._add_auto_link(url, text, state)
+ return pos
+
+ def _add_auto_link(self, url, text, state):
+ state.append_token({
+ 'type': 'link',
+ 'children': [{'type': 'text', 'raw': text}],
+ 'attrs': {'url': escape_url(url)},
+ })
+
+ def parse_emphasis(self, m: re.Match, state: InlineState) -> int:
+ pos = m.end()
+
+ marker = m.group(0)
+ mlen = len(marker)
+ if mlen == 1 and state.in_emphasis:
+ state.append_token({'type': 'text', 'raw': marker})
+ return pos
+ elif mlen == 2 and state.in_strong:
+ state.append_token({'type': 'text', 'raw': marker})
+ return pos
+
+ _end_re = EMPHASIS_END_RE[marker]
+ m1 = _end_re.search(state.src, pos)
+ if not m1:
+ state.append_token({'type': 'text', 'raw': marker})
+ return pos
+
+ end_pos = m1.end()
+ text = state.src[pos:end_pos-mlen]
+
+ prec_pos = self.precedence_scan(m, state, end_pos)
+ if prec_pos:
+ return prec_pos
+
+ new_state = state.copy()
+ new_state.src = text
+ if mlen == 1:
+ new_state.in_emphasis = True
+ children = self.render(new_state)
+ state.append_token({'type': 'emphasis', 'children': children})
+ elif mlen == 2:
+ new_state.in_strong = True
+ children = self.render(new_state)
+ state.append_token({'type': 'strong', 'children': children})
+ else:
+ new_state.in_emphasis = True
+ new_state.in_strong = True
+
+ children = [{
+ 'type': 'strong',
+ 'children': self.render(new_state)
+ }]
+ state.append_token({
+ 'type': 'emphasis',
+ 'children': children,
+ })
+ return end_pos
+
+ def parse_codespan(self, m: re.Match, state: InlineState) -> int:
+ marker = m.group(0)
+ # require same marker with same length at end
+
+ pattern = re.compile(r'(.*?(?:[^`]))' + marker + r'(?!`)', re.S)
+
+ pos = m.end()
+ m = pattern.match(state.src, pos)
+ if m:
+ end_pos = m.end()
+ code = m.group(1)
+ # Line endings are treated like spaces
+ code = code.replace('\n', ' ')
+ if len(code.strip()):
+ if code.startswith(' ') and code.endswith(' '):
+ code = code[1:-1]
+ state.append_token({'type': 'codespan', 'raw': escape(code)})
+ return end_pos
+ else:
+ state.append_token({'type': 'text', 'raw': marker})
+ return pos
+
+ def parse_linebreak(self, m: re.Match, state: InlineState) -> int:
+ state.append_token({'type': 'linebreak'})
+ return m.end()
+
+ def parse_softbreak(self, m: re.Match, state: InlineState) -> int:
+ state.append_token({'type': 'softbreak'})
+ return m.end()
+
+ def parse_inline_html(self, m: re.Match, state: InlineState) -> int:
+ end_pos = m.end()
+ html = m.group(0)
+ state.append_token({'type': 'inline_html', 'raw': html})
+ if html.startswith(('<a ', '<a>', '<A ', '<A>')):
+ state.in_link = True
+ elif html.startswith(('</a ', '</a>', '</A ', '</A>')):
+ state.in_link = False
+ return end_pos
+
+ def process_text(self, text: str, state: InlineState):
+ state.append_token({'type': 'text', 'raw': text})
+
+ def parse(self, state: InlineState) -> List[Dict[str, Any]]:
+ pos = 0
+ sc = self.compile_sc()
+ while pos < len(state.src):
+ m = sc.search(state.src, pos)
+ if not m:
+ break
+
+ end_pos = m.start()
+ if end_pos > pos:
+ hole = state.src[pos:end_pos]
+ self.process_text(hole, state)
+
+ new_pos = self.parse_method(m, state)
+ if not new_pos:
+ # move cursor 1 character forward
+ pos = end_pos + 1
+ hole = state.src[end_pos:pos]
+ self.process_text(hole, state)
+ else:
+ pos = new_pos
+
+ if pos == 0:
+ # special case, just pure text
+ self.process_text(state.src, state)
+ elif pos < len(state.src):
+ self.process_text(state.src[pos:], state)
+ return state.tokens
+
+ def precedence_scan(self, m: re.Match, state: InlineState, end_pos: int, rules=None):
+ if rules is None:
+ rules = ['codespan', 'link', 'prec_auto_link', 'prec_inline_html']
+
+ mark_pos = m.end()
+ sc = self.compile_sc(rules)
+ m1 = sc.search(state.src, mark_pos, end_pos)
+ if not m1:
+ return
+
+ rule_name = m1.lastgroup.replace('prec_', '')
+ sc = self.compile_sc([rule_name])
+ m2 = sc.match(state.src, m1.start())
+ if not m2:
+ return
+
+ func = self._methods[rule_name]
+ new_state = state.copy()
+ new_state.src = state.src
+ m2_pos = func(m2, new_state)
+ if not m2_pos or m2_pos < end_pos:
+ return
+
+ raw_text = state.src[m.start():m2.start()]
+ state.append_token({'type': 'text', 'raw': raw_text})
+ for token in new_state.tokens:
+ state.append_token(token)
+ return m2_pos
+
+ def render(self, state: InlineState):
+ self.parse(state)
+ return state.tokens
+
+ def __call__(self, s, env):
+ state = self.state_cls(env)
+ state.src = s
+ return self.render(state)
diff --git a/src/mistune/list_parser.py b/src/mistune/list_parser.py
new file mode 100644
index 0000000..b5ff866
--- /dev/null
+++ b/src/mistune/list_parser.py
@@ -0,0 +1,250 @@
+import re
+from .core import BlockState
+from .util import (
+ strip_end,
+ expand_tab,
+ expand_leading_tab,
+)
+# because list is complex, split list parser in a new file
+
+LIST_PATTERN = (
+ r'^(?P<list_1> {0,3})'
+ r'(?P<list_2>[\*\+-]|\d{1,9}[.)])'
+ r'(?P<list_3>[ \t]*|[ \t].+)$'
+)
+
+_LINE_HAS_TEXT = re.compile(r'( *)\S')
+
+
+def parse_list(block, m: re.Match, state: BlockState) -> int:
+ """Parse tokens for ordered and unordered list."""
+ text = m.group('list_3')
+ if not text.strip():
+ # Example 285
+ # an empty list item cannot interrupt a paragraph
+ end_pos = state.append_paragraph()
+ if end_pos:
+ return end_pos
+
+ marker = m.group('list_2')
+ ordered = len(marker) > 1
+ depth = state.depth()
+ token = {
+ 'type': 'list',
+ 'children': [],
+ 'tight': True,
+ 'bullet': marker[-1],
+ 'attrs': {
+ 'depth': depth,
+ 'ordered': ordered,
+ },
+ }
+ if ordered:
+ start = int(marker[:-1])
+ if start != 1:
+ # Example 304
+ # we allow only lists starting with 1 to interrupt paragraphs
+ end_pos = state.append_paragraph()
+ if end_pos:
+ return end_pos
+ token['attrs']['start'] = start
+
+ state.cursor = m.end() + 1
+ groups = (m.group('list_1'), marker, text)
+
+ if depth >= block.max_nested_level - 1:
+ rules = list(block.list_rules)
+ rules.remove('list')
+ else:
+ rules = block.list_rules
+
+ bullet = _get_list_bullet(marker[-1])
+ while groups:
+ groups = _parse_list_item(block, bullet, groups, token, state, rules)
+
+ end_pos = token.pop('_end_pos', None)
+ _transform_tight_list(token)
+ if end_pos:
+ index = token.pop('_tok_index')
+ state.tokens.insert(index, token)
+ return end_pos
+
+ state.append_token(token)
+ return state.cursor
+
+
+def _transform_tight_list(token):
+ if token['tight']:
+ # reset tight list item
+ for list_item in token['children']:
+ for tok in list_item['children']:
+ if tok['type'] == 'paragraph':
+ tok['type'] = 'block_text'
+ elif tok['type'] == 'list':
+ _transform_tight_list(tok)
+
+
+def _parse_list_item(block, bullet, groups, token, state, rules):
+ spaces, marker, text = groups
+
+ leading_width = len(spaces) + len(marker)
+ text, continue_width = _compile_continue_width(text, leading_width)
+ item_pattern = _compile_list_item_pattern(bullet, leading_width)
+ pairs = [
+ ('thematic_break', block.specification['thematic_break']),
+ ('fenced_code', block.specification['fenced_code']),
+ ('axt_heading', block.specification['axt_heading']),
+ ('block_quote', block.specification['block_quote']),
+ ('block_html', block.specification['block_html']),
+ ('list', block.specification['list']),
+ ]
+ if leading_width < 3:
+ _repl_w = str(leading_width)
+ pairs = [(n, p.replace('3', _repl_w, 1)) for n, p in pairs]
+
+ pairs.insert(1, ('list_item', item_pattern))
+ regex = '|'.join(r'(?P<%s>(?<=\n)%s)' % pair for pair in pairs)
+ sc = re.compile(regex, re.M)
+
+ src = ''
+ next_group = None
+ prev_blank_line = False
+ pos = state.cursor
+
+ continue_space = ' ' * continue_width
+ while pos < state.cursor_max:
+ pos = state.find_line_end()
+ line = state.get_text(pos)
+ if block.BLANK_LINE.match(line):
+ src += '\n'
+ prev_blank_line = True
+ state.cursor = pos
+ continue
+
+ line = expand_leading_tab(line)
+ if line.startswith(continue_space):
+ if prev_blank_line and not text and not src.strip():
+ # Example 280
+ # A list item can begin with at most one blank line
+ break
+
+ src += line
+ prev_blank_line = False
+ state.cursor = pos
+ continue
+
+ m = sc.match(state.src, state.cursor)
+ if m:
+ tok_type = m.lastgroup
+ if tok_type == 'list_item':
+ if prev_blank_line:
+ token['tight'] = False
+ next_group = (
+ m.group('listitem_1'),
+ m.group('listitem_2'),
+ m.group('listitem_3')
+ )
+ state.cursor = m.end() + 1
+ break
+ tok_index = len(state.tokens)
+ end_pos = block.parse_method(m, state)
+ if end_pos:
+ token['_tok_index'] = tok_index
+ token['_end_pos'] = end_pos
+ break
+
+ if prev_blank_line and not line.startswith(continue_space):
+ # not a continue line, and previous line is blank
+ break
+
+ src += line
+ state.cursor = pos
+
+ text += _clean_list_item_text(src, continue_width)
+ child = state.child_state(strip_end(text))
+
+ block.parse(child, rules)
+
+ if token['tight'] and _is_loose_list(child.tokens):
+ token['tight'] = False
+
+ token['children'].append({
+ 'type': 'list_item',
+ 'children': child.tokens,
+ })
+ if next_group:
+ return next_group
+
+
+def _get_list_bullet(c):
+ if c == '.':
+ bullet = r'\d{0,9}\.'
+ elif c == ')':
+ bullet = r'\d{0,9}\)'
+ elif c == '*':
+ bullet = r'\*'
+ elif c == '+':
+ bullet = r'\+'
+ else:
+ bullet = '-'
+ return bullet
+
+
+def _compile_list_item_pattern(bullet, leading_width):
+ if leading_width > 3:
+ leading_width = 3
+ return (
+ r'^(?P<listitem_1> {0,' + str(leading_width) + '})'
+ r'(?P<listitem_2>' + bullet + ')'
+ r'(?P<listitem_3>[ \t]*|[ \t][^\n]+)$'
+ )
+
+
+def _compile_continue_width(text, leading_width):
+ text = expand_leading_tab(text, 3)
+ text = expand_tab(text)
+
+ m2 = _LINE_HAS_TEXT.match(text)
+ if m2:
+ # indent code, startswith 5 spaces
+ if text.startswith(' '):
+ space_width = 1
+ else:
+ space_width = len(m2.group(1))
+
+ text = text[space_width:] + '\n'
+ else:
+ space_width = 1
+ text = ''
+
+ continue_width = leading_width + space_width
+ return text, continue_width
+
+
+def _clean_list_item_text(src, continue_width):
+ # according to Example 7, tab should be treated as 3 spaces
+ rv = []
+ trim_space = ' ' * continue_width
+ lines = src.split('\n')
+ for line in lines:
+ if line.startswith(trim_space):
+ line = line.replace(trim_space, '', 1)
+ # according to CommonMark Example 5
+ # tab should be treated as 4 spaces
+ line = expand_tab(line)
+ rv.append(line)
+ else:
+ rv.append(line)
+
+ return '\n'.join(rv)
+
+
+def _is_loose_list(tokens):
+ paragraph_count = 0
+ for tok in tokens:
+ if tok['type'] == 'blank_line':
+ return True
+ if tok['type'] == 'paragraph':
+ paragraph_count += 1
+ if paragraph_count > 1:
+ return True
diff --git a/src/mistune/markdown.py b/src/mistune/markdown.py
new file mode 100644
index 0000000..c814a59
--- /dev/null
+++ b/src/mistune/markdown.py
@@ -0,0 +1,104 @@
+from typing import Optional
+from .core import BlockState
+from .block_parser import BlockParser
+from .inline_parser import InlineParser
+
+
+class Markdown:
+ """Markdown instance to convert markdown text into HTML or other formats.
+ Here is an example with the HTMLRenderer::
+
+ from mistune import HTMLRenderer
+
+ md = Markdown(renderer=HTMLRenderer(escape=False))
+ md('hello **world**')
+
+ :param renderer: a renderer to convert parsed tokens
+ :param block: block level syntax parser
+ :param inline: inline level syntax parser
+ :param plugins: mistune plugins to use
+ """
+ def __init__(self, renderer=None, block=None, inline=None, plugins=None):
+ if block is None:
+ block = BlockParser()
+
+ if inline is None:
+ inline = InlineParser()
+
+ self.renderer = renderer
+ self.block = block
+ self.inline = inline
+ self.before_parse_hooks = []
+ self.before_render_hooks = []
+ self.after_render_hooks = []
+
+ if plugins:
+ for plugin in plugins:
+ plugin(self)
+
+ def use(self, plugin):
+ plugin(self)
+
+ def render_state(self, state: BlockState):
+ data = self._iter_render(state.tokens, state)
+ if self.renderer:
+ return self.renderer(data, state)
+ return list(data)
+
+ def _iter_render(self, tokens, state):
+ for tok in tokens:
+ if 'children' in tok:
+ children = self._iter_render(tok['children'], state)
+ tok['children'] = list(children)
+ elif 'text' in tok:
+ text = tok.pop('text')
+ # process inline text
+ tok['children'] = self.inline(text.strip(), state.env)
+ yield tok
+
+ def parse(self, s: str, state: Optional[BlockState]=None):
+ """Parse and convert the given markdown string. If renderer is None,
+ the returned **result** will be parsed markdown tokens.
+
+ :param s: markdown string
+ :param state: instance of BlockState
+ :returns: result, state
+ """
+ if state is None:
+ state = self.block.state_cls()
+
+ # normalize line separator
+ s = s.replace('\r\n', '\n')
+ s = s.replace('\r', '\n')
+
+ state.process(s)
+
+ for hook in self.before_parse_hooks:
+ hook(self, state)
+
+ self.block.parse(state)
+
+ for hook in self.before_render_hooks:
+ hook(self, state)
+
+ result = self.render_state(state)
+
+ for hook in self.after_render_hooks:
+ result = hook(self, result, state)
+ return result, state
+
+ def read(self, filepath, encoding='utf-8', state=None):
+ if state is None:
+ state = self.block.state_cls()
+
+ state.env['__file__'] = filepath
+ with open(filepath, 'rb') as f:
+ s = f.read()
+
+ s = s.decode(encoding)
+ return self.parse(s, state)
+
+ def __call__(self, s: str):
+ if s is None:
+ s = '\n'
+ return self.parse(s)[0]
diff --git a/src/mistune/plugins/__init__.py b/src/mistune/plugins/__init__.py
new file mode 100644
index 0000000..a79d727
--- /dev/null
+++ b/src/mistune/plugins/__init__.py
@@ -0,0 +1,38 @@
+from importlib import import_module
+
+_plugins = {
+ 'speedup': 'mistune.plugins.speedup.speedup',
+ 'strikethrough': 'mistune.plugins.formatting.strikethrough',
+ 'mark': 'mistune.plugins.formatting.mark',
+ 'insert': 'mistune.plugins.formatting.insert',
+ 'superscript': 'mistune.plugins.formatting.superscript',
+ 'subscript': 'mistune.plugins.formatting.subscript',
+ 'footnotes': 'mistune.plugins.footnotes.footnotes',
+ 'table': 'mistune.plugins.table.table',
+ 'url': 'mistune.plugins.url.url',
+ 'abbr': 'mistune.plugins.abbr.abbr',
+ 'def_list': 'mistune.plugins.def_list.def_list',
+ 'math': 'mistune.plugins.math.math',
+ 'ruby': 'mistune.plugins.ruby.ruby',
+ 'task_lists': 'mistune.plugins.task_lists.task_lists',
+ 'spoiler': 'mistune.plugins.spoiler.spoiler',
+}
+_cached_modules = {}
+
+
+def import_plugin(name):
+ if name in _cached_modules:
+ return _cached_modules[name]
+
+ if callable(name):
+ return name
+
+ if name in _plugins:
+ module_path, func_name = _plugins[name].rsplit(".", 1)
+ else:
+ module_path, func_name = name.rsplit(".", 1)
+
+ module = import_module(module_path)
+ plugin = getattr(module, func_name)
+ _cached_modules[name] = plugin
+ return plugin
diff --git a/src/mistune/plugins/abbr.py b/src/mistune/plugins/abbr.py
new file mode 100644
index 0000000..1b45790
--- /dev/null
+++ b/src/mistune/plugins/abbr.py
@@ -0,0 +1,103 @@
+import re
+import types
+from ..util import escape
+from ..helpers import PREVENT_BACKSLASH
+
+__all__ = ['abbr']
+
+# https://michelf.ca/projects/php-markdown/extra/#abbr
+REF_ABBR = (
+ r'^ {0,3}\*\[(?P<abbr_key>[^\]]+)'+ PREVENT_BACKSLASH + r'\]:'
+ r'(?P<abbr_text>(?:[ \t]*\n(?: {3,}|\t)[^\n]+)|(?:[^\n]*))$'
+)
+
+
+def parse_ref_abbr(block, m, state):
+ ref = state.env.get('ref_abbrs')
+ if not ref:
+ ref = {}
+ key = m.group('abbr_key')
+ text = m.group('abbr_text')
+ ref[key] = text.strip()
+ state.env['ref_abbrs'] = ref
+ # abbr definition can split paragraph
+ state.append_token({'type': 'blank_line'})
+ return m.end() + 1
+
+
+def process_text(inline, text, state):
+ ref = state.env.get('ref_abbrs')
+ if not ref:
+ return state.append_token({'type': 'text', 'raw': text})
+
+ if state.tokens:
+ last = state.tokens[-1]
+ if last['type'] == 'text':
+ state.tokens.pop()
+ text = last['raw'] + text
+
+ abbrs_re = state.env.get('abbrs_re')
+ if not abbrs_re:
+ abbrs_re = re.compile(r'|'.join(re.escape(k) for k in ref.keys()))
+ state.env['abbrs_re'] = abbrs_re
+
+ pos = 0
+ while pos < len(text):
+ m = abbrs_re.search(text, pos)
+ if not m:
+ break
+
+ end_pos = m.start()
+ if end_pos > pos:
+ hole = text[pos:end_pos]
+ state.append_token({'type': 'text', 'raw': hole})
+
+ label = m.group(0)
+ state.append_token({
+ 'type': 'abbr',
+ 'children': [{'type': 'text', 'raw': label}],
+ 'attrs': {'title': ref[label]}
+ })
+ pos = m.end()
+
+ if pos == 0:
+ # special case, just pure text
+ state.append_token({'type': 'text', 'raw': text})
+ elif pos < len(text):
+ state.append_token({'type': 'text', 'raw': text[pos:]})
+
+
+def render_abbr(renderer, text, title):
+ if not title:
+ return '<abbr>' + text + '</abbr>'
+ return '<abbr title="' + escape(title) + '">' + text + '</abbr>'
+
+
+def abbr(md):
+ """A mistune plugin to support abbreviations, spec defined at
+ https://michelf.ca/projects/php-markdown/extra/#abbr
+
+ Here is an example:
+
+ .. code-block:: text
+
+ The HTML specification
+ is maintained by the W3C.
+
+ *[HTML]: Hyper Text Markup Language
+ *[W3C]: World Wide Web Consortium
+
+ It will be converted into HTML:
+
+ .. code-block:: html
+
+ The <abbr title="Hyper Text Markup Language">HTML</abbr> specification
+ is maintained by the <abbr title="World Wide Web Consortium">W3C</abbr>.
+
+ :param md: Markdown instance
+ """
+ md.block.register('ref_abbr', REF_ABBR, parse_ref_abbr, before='paragraph')
+ # replace process_text
+ md.inline.process_text = types.MethodType(process_text, md.inline)
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('abbr', render_abbr)
diff --git a/src/mistune/plugins/def_list.py b/src/mistune/plugins/def_list.py
new file mode 100644
index 0000000..3675641
--- /dev/null
+++ b/src/mistune/plugins/def_list.py
@@ -0,0 +1,135 @@
+import re
+from ..util import strip_end
+
+__all__ = ['def_list']
+
+# https://michelf.ca/projects/php-markdown/extra/#def-list
+
+DEF_PATTERN = (
+ r'^(?P<def_list_head>(?:[^\n]+\n)+?)'
+ r'\n?(?:'
+ r'\:[ \t]+.*\n'
+ r'(?:[^\n]+\n)*' # lazy continue line
+ r'(?:(?:[ \t]*\n)*[ \t]+[^\n]+\n)*'
+ r'(?:[ \t]*\n)*'
+ r')+'
+)
+DEF_RE = re.compile(DEF_PATTERN, re.M)
+DD_START_RE = re.compile(r'^:[ \t]+', re.M)
+TRIM_RE = re.compile(r'^ {0,4}', re.M)
+HAS_BLANK_LINE_RE = re.compile(r'\n[ \t]*\n$')
+
+
+def parse_def_list(block, m, state):
+ pos = m.end()
+ children = list(_parse_def_item(block, m))
+
+ m = DEF_RE.match(state.src, pos)
+ while m:
+ children.extend(list(_parse_def_item(block, m)))
+ pos = m.end()
+ m = DEF_RE.match(state.src, pos)
+
+ state.append_token({
+ 'type': 'def_list',
+ 'children': children,
+ })
+ return pos
+
+
+def _parse_def_item(block, m):
+ head = m.group('def_list_head')
+ for line in head.splitlines():
+ yield {
+ 'type': 'def_list_head',
+ 'text': line,
+ }
+
+ src = m.group(0)
+ end = len(head)
+
+ m = DD_START_RE.search(src, end)
+ start = m.start()
+ prev_blank_line = src[end:start] == '\n'
+ while m:
+ m = DD_START_RE.search(src, start + 1)
+ if not m:
+ break
+
+ end = m.start()
+ text = src[start:end].replace(':', ' ', 1)
+ children = _process_text(block, text, prev_blank_line)
+ prev_blank_line = bool(HAS_BLANK_LINE_RE.search(text))
+ yield {
+ 'type': 'def_list_item',
+ 'children': children,
+ }
+ start = end
+
+ text = src[start:].replace(':', ' ', 1)
+ children = _process_text(block, text, prev_blank_line)
+ yield {
+ 'type': 'def_list_item',
+ 'children': children,
+ }
+
+
+def _process_text(block, text, loose):
+ text = TRIM_RE.sub('', text)
+ state = block.state_cls()
+ state.process(strip_end(text))
+ # use default list rules
+ block.parse(state, block.list_rules)
+ tokens = state.tokens
+ if not loose and len(tokens) == 1 and tokens[0]['type'] == 'paragraph':
+ tokens[0]['type'] = 'block_text'
+ return tokens
+
+
+def render_def_list(renderer, text):
+ return '<dl>\n' + text + '</dl>\n'
+
+
+def render_def_list_head(renderer, text):
+ return '<dt>' + text + '</dt>\n'
+
+
+def render_def_list_item(renderer, text):
+ return '<dd>' + text + '</dd>\n'
+
+
+def def_list(md):
+ """A mistune plugin to support def list, spec defined at
+ https://michelf.ca/projects/php-markdown/extra/#def-list
+
+ Here is an example:
+
+ .. code-block:: text
+
+ Apple
+ : Pomaceous fruit of plants of the genus Malus in
+ the family Rosaceae.
+
+ Orange
+ : The fruit of an evergreen tree of the genus Citrus.
+
+ It will be converted into HTML:
+
+ .. code-block:: html
+
+ <dl>
+ <dt>Apple</dt>
+ <dd>Pomaceous fruit of plants of the genus Malus in
+ the family Rosaceae.</dd>
+
+ <dt>Orange</dt>
+ <dd>The fruit of an evergreen tree of the genus Citrus.</dd>
+ </dl>
+
+ :param md: Markdown instance
+ """
+ md.block.register('def_list', DEF_PATTERN, parse_def_list, before='paragraph')
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('def_list', render_def_list)
+ md.renderer.register('def_list_head', render_def_list_head)
+ md.renderer.register('def_list_item', render_def_list_item)
diff --git a/src/mistune/plugins/footnotes.py b/src/mistune/plugins/footnotes.py
new file mode 100644
index 0000000..2e10704
--- /dev/null
+++ b/src/mistune/plugins/footnotes.py
@@ -0,0 +1,153 @@
+import re
+from ..core import BlockState
+from ..util import unikey
+from ..helpers import LINK_LABEL
+
+__all__ = ['footnotes']
+
+_PARAGRAPH_SPLIT = re.compile(r'\n{2,}')
+# https://michelf.ca/projects/php-markdown/extra/#footnotes
+REF_FOOTNOTE = (
+ r'^(?P<footnote_lead> {0,3})'
+ r'\[\^(?P<footnote_key>' + LINK_LABEL + r')]:[ \t]'
+ r'(?P<footnote_text>[^\n]*(?:\n+|$)'
+ r'(?:(?P=footnote_lead) {1,3}(?! )[^\n]*\n+)*'
+ r')'
+)
+
+INLINE_FOOTNOTE = r'\[\^(?P<footnote_key>' + LINK_LABEL + r')\]'
+
+
+def parse_inline_footnote(inline, m: re.Match, state):
+ key = unikey(m.group('footnote_key'))
+ ref = state.env.get('ref_footnotes')
+ if ref and key in ref:
+ notes = state.env.get('footnotes')
+ if not notes:
+ notes = []
+ if key not in notes:
+ notes.append(key)
+ state.env['footnotes'] = notes
+ state.append_token({
+ 'type': 'footnote_ref',
+ 'raw': key,
+ 'attrs': {'index': notes.index(key) + 1}
+ })
+ else:
+ state.append_token({'type': 'text', 'raw': m.group(0)})
+ return m.end()
+
+
+def parse_ref_footnote(block, m: re.Match, state: BlockState):
+ ref = state.env.get('ref_footnotes')
+ if not ref:
+ ref = {}
+
+ key = unikey(m.group('footnote_key'))
+ if key not in ref:
+ ref[key] = m.group('footnote_text')
+ state.env['ref_footnotes'] = ref
+ return m.end()
+
+
+def parse_footnote_item(block, key: str, index: int, state: BlockState):
+ ref = state.env.get('ref_footnotes')
+ text = ref[key]
+
+ lines = text.splitlines()
+ second_line = None
+ for second_line in lines[1:]:
+ if second_line:
+ break
+
+ if second_line:
+ spaces = len(second_line) - len(second_line.lstrip())
+ pattern = re.compile(r'^ {' + str(spaces) + r',}', flags=re.M)
+ text = pattern.sub('', text).strip()
+ items = _PARAGRAPH_SPLIT.split(text)
+ children = [{'type': 'paragraph', 'text': s} for s in items]
+ else:
+ text = text.strip()
+ children = [{'type': 'paragraph', 'text': text}]
+ return {
+ 'type': 'footnote_item',
+ 'children': children,
+ 'attrs': {'key': key, 'index': index}
+ }
+
+
+def md_footnotes_hook(md, result: str, state: BlockState):
+ notes = state.env.get('footnotes')
+ if not notes:
+ return result
+
+ children = [
+ parse_footnote_item(md.block, k, i + 1, state)
+ for i, k in enumerate(notes)
+ ]
+ state = BlockState()
+ state.tokens = [{'type': 'footnotes', 'children': children}]
+ output = md.render_state(state)
+ return result + output
+
+
+def render_footnote_ref(renderer, key: str, index: int):
+ i = str(index)
+ html = '<sup class="footnote-ref" id="fnref-' + i + '">'
+ return html + '<a href="#fn-' + i + '">' + i + '</a></sup>'
+
+
+def render_footnotes(renderer, text: str):
+ return '<section class="footnotes">\n<ol>\n' + text + '</ol>\n</section>\n'
+
+
+def render_footnote_item(renderer, text: str, key: str, index: int):
+ i = str(index)
+ back = '<a href="#fnref-' + i + '" class="footnote">&#8617;</a>'
+ text = text.rstrip()[:-4] + back + '</p>'
+ return '<li id="fn-' + i + '">' + text + '</li>\n'
+
+
+def footnotes(md):
+ """A mistune plugin to support footnotes, spec defined at
+ https://michelf.ca/projects/php-markdown/extra/#footnotes
+
+ Here is an example:
+
+ .. code-block:: text
+
+ That's some text with a footnote.[^1]
+
+ [^1]: And that's the footnote.
+
+ It will be converted into HTML:
+
+ .. code-block:: html
+
+ <p>That's some text with a footnote.<sup class="footnote-ref" id="fnref-1"><a href="#fn-1">1</a></sup></p>
+ <section class="footnotes">
+ <ol>
+ <li id="fn-1"><p>And that's the footnote.<a href="#fnref-1" class="footnote">&#8617;</a></p></li>
+ </ol>
+ </section>
+
+ :param md: Markdown instance
+ """
+ md.inline.register(
+ 'footnote',
+ INLINE_FOOTNOTE,
+ parse_inline_footnote,
+ before='link',
+ )
+ md.block.register(
+ 'ref_footnote',
+ REF_FOOTNOTE,
+ parse_ref_footnote,
+ before='ref_link',
+ )
+ md.after_render_hooks.append(md_footnotes_hook)
+
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('footnote_ref', render_footnote_ref)
+ md.renderer.register('footnote_item', render_footnote_item)
+ md.renderer.register('footnotes', render_footnotes)
diff --git a/src/mistune/plugins/formatting.py b/src/mistune/plugins/formatting.py
new file mode 100644
index 0000000..57e5def
--- /dev/null
+++ b/src/mistune/plugins/formatting.py
@@ -0,0 +1,173 @@
+import re
+from ..helpers import PREVENT_BACKSLASH
+
+__all__ = ["strikethrough", "mark", "insert", "superscript", "subscript"]
+
+_STRIKE_END = re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\~|[^\s~])~~(?!~)')
+_MARK_END = re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\=|[^\s=])==(?!=)')
+_INSERT_END = re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\\^|[^\s^])\^\^(?!\^)')
+
+SUPERSCRIPT_PATTERN = r'\^(?:' + PREVENT_BACKSLASH + r'\\\^|\S|\\ )+?\^'
+SUBSCRIPT_PATTERN = r'~(?:' + PREVENT_BACKSLASH + r'\\~|\S|\\ )+?~'
+
+
+def parse_strikethrough(inline, m, state):
+ return _parse_to_end(inline, m, state, 'strikethrough', _STRIKE_END)
+
+
+def render_strikethrough(renderer, text):
+ return '<del>' + text + '</del>'
+
+
+def parse_mark(inline, m, state):
+ return _parse_to_end(inline, m, state, 'mark', _MARK_END)
+
+
+def render_mark(renderer, text):
+ return '<mark>' + text + '</mark>'
+
+
+def parse_insert(inline, m, state):
+ return _parse_to_end(inline, m, state, 'insert', _INSERT_END)
+
+
+def render_insert(renderer, text):
+ return '<ins>' + text + '</ins>'
+
+
+def parse_superscript(inline, m, state):
+ return _parse_script(inline, m, state, 'superscript')
+
+
+def render_superscript(renderer, text):
+ return '<sup>' + text + '</sup>'
+
+
+def parse_subscript(inline, m, state):
+ return _parse_script(inline, m, state, 'subscript')
+
+
+def render_subscript(renderer, text):
+ return '<sub>' + text + '</sub>'
+
+
+def _parse_to_end(inline, m, state, tok_type, end_pattern):
+ pos = m.end()
+ m1 = end_pattern.search(state.src, pos)
+ if not m1:
+ return
+ end_pos = m1.end()
+ text = state.src[pos:end_pos-2]
+ new_state = state.copy()
+ new_state.src = text
+ children = inline.render(new_state)
+ state.append_token({'type': tok_type, 'children': children})
+ return end_pos
+
+
+def _parse_script(inline, m, state, tok_type):
+ text = m.group(0)
+ new_state = state.copy()
+ new_state.src = text[1:-1].replace('\\ ', ' ')
+ children = inline.render(new_state)
+ state.append_token({
+ 'type': tok_type,
+ 'children': children
+ })
+ return m.end()
+
+
+def strikethrough(md):
+ """A mistune plugin to support strikethrough. Spec defined by
+ GitHub flavored Markdown and commonly used by many parsers:
+
+ .. code-block:: text
+
+ ~~This was mistaken text~~
+
+ It will be converted into HTML:
+
+ .. code-block:: html
+
+ <del>This was mistaken text</del>
+
+ :param md: Markdown instance
+ """
+ md.inline.register(
+ 'strikethrough',
+ r'~~(?=[^\s~])',
+ parse_strikethrough,
+ before='link',
+ )
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('strikethrough', render_strikethrough)
+
+
+def mark(md):
+ """A mistune plugin to add ``<mark>`` tag. Spec defined at
+ https://facelessuser.github.io/pymdown-extensions/extensions/mark/:
+
+ .. code-block:: text
+
+ ==mark me== ==mark \\=\\= equal==
+
+ :param md: Markdown instance
+ """
+ md.inline.register(
+ 'mark',
+ r'==(?=[^\s=])',
+ parse_mark,
+ before='link',
+ )
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('mark', render_mark)
+
+
+def insert(md):
+ """A mistune plugin to add ``<ins>`` tag. Spec defined at
+ https://facelessuser.github.io/pymdown-extensions/extensions/caret/#insert:
+
+ .. code-block:: text
+
+ ^^insert me^^
+
+ :param md: Markdown instance
+ """
+ md.inline.register(
+ 'insert',
+ r'\^\^(?=[^\s\^])',
+ parse_insert,
+ before='link',
+ )
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('insert', render_insert)
+
+
+def superscript(md):
+ """A mistune plugin to add ``<sup>`` tag. Spec defined at
+ https://pandoc.org/MANUAL.html#superscripts-and-subscripts:
+
+ .. code-block:: text
+
+ 2^10^ is 1024.
+
+ :param md: Markdown instance
+ """
+ md.inline.register('superscript', SUPERSCRIPT_PATTERN, parse_superscript, before='linebreak')
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('superscript', render_superscript)
+
+
+def subscript(md):
+ """A mistune plugin to add ``<sub>`` tag. Spec defined at
+ https://pandoc.org/MANUAL.html#superscripts-and-subscripts:
+
+ .. code-block:: text
+
+ H~2~O is a liquid.
+
+ :param md: Markdown instance
+ """
+ md.inline.register('subscript', SUBSCRIPT_PATTERN, parse_subscript, before='linebreak')
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('subscript', render_subscript)
diff --git a/src/mistune/plugins/math.py b/src/mistune/plugins/math.py
new file mode 100644
index 0000000..805105e
--- /dev/null
+++ b/src/mistune/plugins/math.py
@@ -0,0 +1,57 @@
+__all__ = ['math', 'math_in_quote', 'math_in_list']
+
+BLOCK_MATH_PATTERN = r'^ {0,3}\$\$[ \t]*\n(?P<math_text>[\s\S]+?)\n\$\$[ \t]*$'
+INLINE_MATH_PATTERN = r'\$(?!\s)(?P<math_text>.+?)(?!\s)\$'
+
+
+def parse_block_math(block, m, state):
+ text = m.group('math_text')
+ state.append_token({'type': 'block_math', 'raw': text})
+ return m.end() + 1
+
+
+def parse_inline_math(inline, m, state):
+ text = m.group('math_text')
+ state.append_token({'type': 'inline_math', 'raw': text})
+ return m.end()
+
+
+def render_block_math(renderer, text):
+ return '<div class="math">$$\n' + text + '\n$$</div>\n'
+
+
+def render_inline_math(renderer, text):
+ return r'<span class="math">\(' + text + r'\)</span>'
+
+
+def math(md):
+ """A mistune plugin to support math. The syntax is used
+ by many markdown extensions:
+
+ .. code-block:: text
+
+ Block math is surrounded by $$:
+
+ $$
+ f(a)=f(b)
+ $$
+
+ Inline math is surrounded by `$`, such as $f(a)=f(b)$
+
+ :param md: Markdown instance
+ """
+ md.block.register('block_math', BLOCK_MATH_PATTERN, parse_block_math, before='list')
+ md.inline.register('inline_math', INLINE_MATH_PATTERN, parse_inline_math, before='link')
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('block_math', render_block_math)
+ md.renderer.register('inline_math', render_inline_math)
+
+
+def math_in_quote(md):
+ """Enable block math plugin in block quote."""
+ md.block.insert_rule(md.block.block_quote_rules, 'block_math', before='list')
+
+
+def math_in_list(md):
+ """Enable block math plugin in list."""
+ md.block.insert_rule(md.block.list_rules, 'block_math', before='list')
diff --git a/src/mistune/plugins/ruby.py b/src/mistune/plugins/ruby.py
new file mode 100644
index 0000000..eabc037
--- /dev/null
+++ b/src/mistune/plugins/ruby.py
@@ -0,0 +1,100 @@
+import re
+from ..util import unikey
+from ..helpers import parse_link, parse_link_label
+
+
+RUBY_PATTERN = r'\[(?:\w+\(\w+\))+\]'
+_ruby_re = re.compile(RUBY_PATTERN)
+
+
+def parse_ruby(inline, m, state):
+ text = m.group(0)[1:-2]
+ items = text.split(')')
+ tokens = []
+ for item in items:
+ rb, rt = item.split('(')
+ tokens.append({
+ 'type': 'ruby',
+ 'raw': rb,
+ 'attrs': {'rt': rt}
+ })
+
+ end_pos = m.end()
+
+ next_match = _ruby_re.match(state.src, end_pos)
+ if next_match:
+ for tok in tokens:
+ state.append_token(tok)
+ return parse_ruby(inline, next_match, state)
+
+ # repeat link logic
+ if end_pos < len(state.src):
+ link_pos = _parse_ruby_link(inline, state, end_pos, tokens)
+ if link_pos:
+ return link_pos
+
+ for tok in tokens:
+ state.append_token(tok)
+ return end_pos
+
+
+def _parse_ruby_link(inline, state, pos, tokens):
+ c = state.src[pos]
+ if c == '(':
+ # standard link [text](<url> "title")
+ attrs, link_pos = parse_link(state.src, pos + 1)
+ if link_pos:
+ state.append_token({
+ 'type': 'link',
+ 'children': tokens,
+ 'attrs': attrs,
+ })
+ return link_pos
+
+ elif c == '[':
+ # standard ref link [text][label]
+ label, link_pos = parse_link_label(state.src, pos + 1)
+ if label and link_pos:
+ ref_links = state.env['ref_links']
+ key = unikey(label)
+ env = ref_links.get(key)
+ if env:
+ attrs = {'url': env['url'], 'title': env.get('title')}
+ state.append_token({
+ 'type': 'link',
+ 'children': tokens,
+ 'attrs': attrs,
+ })
+ else:
+ for tok in tokens:
+ state.append_token(tok)
+ state.append_token({
+ 'type': 'text',
+ 'raw': '[' + label + ']',
+ })
+ return link_pos
+
+
+def render_ruby(renderer, text, rt):
+ return '<ruby><rb>' + text + '</rb><rt>' + rt + '</rt></ruby>'
+
+
+def ruby(md):
+ """A mistune plugin to support ``<ruby>`` tag. The syntax is defined
+ at https://lepture.com/en/2022/markdown-ruby-markup:
+
+ .. code-block:: text
+
+ [漢字(ㄏㄢˋㄗˋ)]
+ [漢(ㄏㄢˋ)字(ㄗˋ)]
+
+ [漢字(ㄏㄢˋㄗˋ)][link]
+ [漢字(ㄏㄢˋㄗˋ)](/url "title")
+
+ [link]: /url "title"
+
+ :param md: Markdown instance
+ """
+ md.inline.register('ruby', RUBY_PATTERN, parse_ruby, before='link')
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('ruby', render_ruby)
diff --git a/src/mistune/plugins/speedup.py b/src/mistune/plugins/speedup.py
new file mode 100644
index 0000000..784022c
--- /dev/null
+++ b/src/mistune/plugins/speedup.py
@@ -0,0 +1,44 @@
+import re
+import string
+
+# because mismatch is too slow, add parsers for paragraph and text
+
+HARD_LINEBREAK_RE = re.compile(r' *\n\s*')
+PARAGRAPH = (
+ # start with none punctuation, not number, not whitespace
+ r'(?:^[^\s\d' + re.escape(string.punctuation) + r'][^\n]*\n)+'
+)
+
+__all__ = ['speedup']
+
+
+
+def parse_text(inline, m, state):
+ text = m.group(0)
+ text = HARD_LINEBREAK_RE.sub('\n', text)
+ inline.process_text(text, state)
+ return m.end()
+
+
+def parse_paragraph(block, m, state):
+ text = m.group(0)
+ state.add_paragraph(text)
+ return m.end()
+
+
+def speedup(md):
+ """Increase the speed of parsing paragraph and inline text."""
+ md.block.register('paragraph', PARAGRAPH, parse_paragraph)
+
+ punc = r'\\><!\[_*`~\^\$='
+ text_pattern = r'[\s\S]+?(?=[' + punc + r']|'
+ if 'url_link' in md.inline.rules:
+ text_pattern += 'https?:|'
+
+ if md.inline.hard_wrap:
+ text_pattern += r' *\n|'
+ else:
+ text_pattern += r' {2,}\n|'
+
+ text_pattern += r'$)'
+ md.inline.register('text', text_pattern, parse_text)
diff --git a/src/mistune/plugins/spoiler.py b/src/mistune/plugins/spoiler.py
new file mode 100644
index 0000000..2931d2b
--- /dev/null
+++ b/src/mistune/plugins/spoiler.py
@@ -0,0 +1,80 @@
+import re
+
+__all__ = ['spoiler']
+
+_BLOCK_SPOILER_START = re.compile(r'^ {0,3}! ?', re.M)
+_BLOCK_SPOILER_MATCH = re.compile(r'^( {0,3}![^\n]*\n)+$')
+
+INLINE_SPOILER_PATTERN = r'>!\s*(?P<spoiler_text>.+?)\s*!<'
+
+
+def parse_block_spoiler(block, m, state):
+ text, end_pos = block.extract_block_quote(m, state)
+ if not text.endswith('\n'):
+ # ensure it endswith \n to make sure
+ # _BLOCK_SPOILER_MATCH.match works
+ text += '\n'
+
+ depth = state.depth()
+ if not depth and _BLOCK_SPOILER_MATCH.match(text):
+ text = _BLOCK_SPOILER_START.sub('', text)
+ tok_type = 'block_spoiler'
+ else:
+ tok_type = 'block_quote'
+
+ # scan children state
+ child = state.child_state(text)
+ if state.depth() >= block.max_nested_level - 1:
+ rules = list(block.block_quote_rules)
+ rules.remove('block_quote')
+ else:
+ rules = block.block_quote_rules
+
+ block.parse(child, rules)
+ token = {'type': tok_type, 'children': child.tokens}
+ if end_pos:
+ state.prepend_token(token)
+ return end_pos
+ state.append_token(token)
+ return state.cursor
+
+
+def parse_inline_spoiler(inline, m, state):
+ text = m.group('spoiler_text')
+ new_state = state.copy()
+ new_state.src = text
+ children = inline.render(new_state)
+ state.append_token({'type': 'inline_spoiler', 'children': children})
+ return m.end()
+
+
+def render_block_spoiler(renderer, text):
+ return '<div class="spoiler">\n' + text + '</div>\n'
+
+
+def render_inline_spoiler(renderer, text):
+ return '<span class="spoiler">' + text + '</span>'
+
+
+def spoiler(md):
+ """A mistune plugin to support block and inline spoiler. The
+ syntax is inspired by stackexchange:
+
+ .. code-block:: text
+
+ Block level spoiler looks like block quote, but with `>!`:
+
+ >! this is spoiler
+ >!
+ >! the content will be hidden
+
+ Inline spoiler is surrounded by `>!` and `!<`, such as >! hide me !<.
+
+ :param md: Markdown instance
+ """
+ # reset block quote parser with block spoiler parser
+ md.block.register('block_quote', None, parse_block_spoiler)
+ md.inline.register('inline_spoiler', INLINE_SPOILER_PATTERN, parse_inline_spoiler)
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('block_spoiler', render_block_spoiler)
+ md.renderer.register('inline_spoiler', render_inline_spoiler)
diff --git a/src/mistune/plugins/table.py b/src/mistune/plugins/table.py
new file mode 100644
index 0000000..d3bc4c2
--- /dev/null
+++ b/src/mistune/plugins/table.py
@@ -0,0 +1,179 @@
+import re
+from ..helpers import PREVENT_BACKSLASH
+
+# https://michelf.ca/projects/php-markdown/extra/#table
+
+__all__ = ['table', 'table_in_quote', 'table_in_list']
+
+
+TABLE_PATTERN = (
+ r'^ {0,3}\|(?P<table_head>.+)\|[ \t]*\n'
+ r' {0,3}\|(?P<table_align> *[-:]+[-| :]*)\|[ \t]*\n'
+ r'(?P<table_body>(?: {0,3}\|.*\|[ \t]*(?:\n|$))*)\n*'
+)
+NP_TABLE_PATTERN = (
+ r'^ {0,3}(?P<nptable_head>\S.*\|.*)\n'
+ r' {0,3}(?P<nptable_align>[-:]+ *\|[-| :]*)\n'
+ r'(?P<nptable_body>(?:.*\|.*(?:\n|$))*)\n*'
+)
+
+TABLE_CELL = re.compile(r'^ {0,3}\|(.+)\|[ \t]*$')
+CELL_SPLIT = re.compile(r' *' + PREVENT_BACKSLASH + r'\| *')
+ALIGN_CENTER = re.compile(r'^ *:-+: *$')
+ALIGN_LEFT = re.compile(r'^ *:-+ *$')
+ALIGN_RIGHT = re.compile(r'^ *-+: *$')
+
+
+def parse_table(block, m, state):
+ pos = m.end()
+ header = m.group('table_head')
+ align = m.group('table_align')
+ thead, aligns = _process_thead(header, align)
+ if not thead:
+ return
+
+ rows = []
+ body = m.group('table_body')
+ for text in body.splitlines():
+ m = TABLE_CELL.match(text)
+ if not m: # pragma: no cover
+ return
+ row = _process_row(m.group(1), aligns)
+ if not row:
+ return
+ rows.append(row)
+
+ children = [thead, {'type': 'table_body', 'children': rows}]
+ state.append_token({'type': 'table', 'children': children})
+ return pos
+
+
+def parse_nptable(block, m, state):
+ header = m.group('nptable_head')
+ align = m.group('nptable_align')
+ thead, aligns = _process_thead(header, align)
+ if not thead:
+ return
+
+ rows = []
+ body = m.group('nptable_body')
+ for text in body.splitlines():
+ row = _process_row(text, aligns)
+ if not row:
+ return
+ rows.append(row)
+
+ children = [thead, {'type': 'table_body', 'children': rows}]
+ state.append_token({'type': 'table', 'children': children})
+ return m.end()
+
+
+def _process_thead(header, align):
+ headers = CELL_SPLIT.split(header)
+ aligns = CELL_SPLIT.split(align)
+ if len(headers) != len(aligns):
+ return None, None
+
+ for i, v in enumerate(aligns):
+ if ALIGN_CENTER.match(v):
+ aligns[i] = 'center'
+ elif ALIGN_LEFT.match(v):
+ aligns[i] = 'left'
+ elif ALIGN_RIGHT.match(v):
+ aligns[i] = 'right'
+ else:
+ aligns[i] = None
+
+ children = [
+ {
+ 'type': 'table_cell',
+ 'text': text.strip(),
+ 'attrs': {'align': aligns[i], 'head': True}
+ }
+ for i, text in enumerate(headers)
+ ]
+ thead = {'type': 'table_head', 'children': children}
+ return thead, aligns
+
+
+def _process_row(text, aligns):
+ cells = CELL_SPLIT.split(text)
+ if len(cells) != len(aligns):
+ return None
+
+ children = [
+ {
+ 'type': 'table_cell',
+ 'text': text.strip(),
+ 'attrs': {'align': aligns[i], 'head': False}
+ }
+ for i, text in enumerate(cells)
+ ]
+ return {'type': 'table_row', 'children': children}
+
+
+def render_table(renderer, text):
+ return '<table>\n' + text + '</table>\n'
+
+
+def render_table_head(renderer, text):
+ return '<thead>\n<tr>\n' + text + '</tr>\n</thead>\n'
+
+
+def render_table_body(renderer, text):
+ return '<tbody>\n' + text + '</tbody>\n'
+
+
+def render_table_row(renderer, text):
+ return '<tr>\n' + text + '</tr>\n'
+
+
+def render_table_cell(renderer, text, align=None, head=False):
+ if head:
+ tag = 'th'
+ else:
+ tag = 'td'
+
+ html = ' <' + tag
+ if align:
+ html += ' style="text-align:' + align + '"'
+
+ return html + '>' + text + '</' + tag + '>\n'
+
+
+def table(md):
+ """A mistune plugin to support table, spec defined at
+ https://michelf.ca/projects/php-markdown/extra/#table
+
+ Here is an example:
+
+ .. code-block:: text
+
+ First Header | Second Header
+ ------------- | -------------
+ Content Cell | Content Cell
+ Content Cell | Content Cell
+
+ :param md: Markdown instance
+ """
+ md.block.register('table', TABLE_PATTERN, parse_table, before='paragraph')
+ md.block.register('nptable', NP_TABLE_PATTERN, parse_nptable, before='paragraph')
+
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('table', render_table)
+ md.renderer.register('table_head', render_table_head)
+ md.renderer.register('table_body', render_table_body)
+ md.renderer.register('table_row', render_table_row)
+ md.renderer.register('table_cell', render_table_cell)
+
+
+def table_in_quote(md):
+ """Enable table plugin in block quotes."""
+ md.block.insert_rule(md.block.block_quote_rules, 'table', before='paragraph')
+ md.block.insert_rule(md.block.block_quote_rules, 'nptable', before='paragraph')
+
+
+def table_in_list(md):
+ """Enable table plugin in list."""
+ md.block.insert_rule(md.block.list_rules, 'table', before='paragraph')
+ md.block.insert_rule(md.block.list_rules, 'nptable', before='paragraph')
diff --git a/src/mistune/plugins/task_lists.py b/src/mistune/plugins/task_lists.py
new file mode 100644
index 0000000..8571c32
--- /dev/null
+++ b/src/mistune/plugins/task_lists.py
@@ -0,0 +1,67 @@
+import re
+
+__all__ = ['task_lists']
+
+
+TASK_LIST_ITEM = re.compile(r'^(\[[ xX]\])\s+')
+
+
+def task_lists_hook(md, state):
+ return _rewrite_all_list_items(state.tokens)
+
+
+def render_task_list_item(renderer, text, checked=False):
+ checkbox = (
+ '<input class="task-list-item-checkbox" '
+ 'type="checkbox" disabled'
+ )
+ if checked:
+ checkbox += ' checked/>'
+ else:
+ checkbox += '/>'
+
+ if text.startswith('<p>'):
+ text = text.replace('<p>', '<p>' + checkbox, 1)
+ else:
+ text = checkbox + text
+
+ return '<li class="task-list-item">' + text + '</li>\n'
+
+
+def task_lists(md):
+ """A mistune plugin to support task lists. Spec defined by
+ GitHub flavored Markdown and commonly used by many parsers:
+
+ .. code-block:: text
+
+ - [ ] unchecked task
+ - [x] checked task
+
+ :param md: Markdown instance
+ """
+ md.before_render_hooks.append(task_lists_hook)
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('task_list_item', render_task_list_item)
+
+
+def _rewrite_all_list_items(tokens):
+ for tok in tokens:
+ if tok['type'] == 'list_item':
+ _rewrite_list_item(tok)
+ if 'children' in tok:
+ _rewrite_all_list_items(tok['children'])
+ return tokens
+
+
+def _rewrite_list_item(tok):
+ children = tok['children']
+ if children:
+ first_child = children[0]
+ text = first_child.get('text', '')
+ m = TASK_LIST_ITEM.match(text)
+ if m:
+ mark = m.group(1)
+ first_child['text'] = text[m.end():]
+
+ tok['type'] = 'task_list_item'
+ tok['attrs'] = {'checked': mark != '[ ]'}
diff --git a/src/mistune/plugins/url.py b/src/mistune/plugins/url.py
new file mode 100644
index 0000000..d6f2251
--- /dev/null
+++ b/src/mistune/plugins/url.py
@@ -0,0 +1,23 @@
+from ..util import escape_url
+
+__all__ = ['url']
+
+URL_LINK_PATTERN = r'''https?:\/\/[^\s<]+[^<.,:;"')\]\s]'''
+
+
+def parse_url_link(inline, m, state):
+ text = m.group(0)
+ pos = m.end()
+ if state.in_link:
+ inline.process_text(text, state)
+ return pos
+ state.append_token({
+ 'type': 'link',
+ 'children': [{'type': 'text', 'raw': text}],
+ 'attrs': {'url': escape_url(text)},
+ })
+ return pos
+
+
+def url(md):
+ md.inline.register('url_link', URL_LINK_PATTERN, parse_url_link)
diff --git a/src/mistune/renderers/__init__.py b/src/mistune/renderers/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/mistune/renderers/__init__.py
diff --git a/src/mistune/renderers/_list.py b/src/mistune/renderers/_list.py
new file mode 100644
index 0000000..0a18639
--- /dev/null
+++ b/src/mistune/renderers/_list.py
@@ -0,0 +1,60 @@
+from ..util import strip_end
+
+
+def render_list(renderer, token, state) -> str:
+ attrs = token['attrs']
+ if attrs['ordered']:
+ children = _render_ordered_list(renderer, token, state)
+ else:
+ children = _render_unordered_list(renderer, token, state)
+
+ text = ''.join(children)
+ parent = token.get('parent')
+ if parent:
+ if parent['tight']:
+ return text
+ return text + '\n'
+ return strip_end(text) + '\n'
+
+
+def _render_list_item(renderer, parent, item, state):
+ leading = parent['leading']
+ text = ''
+ for tok in item['children']:
+ if tok['type'] == 'list':
+ tok['parent'] = parent
+ elif tok['type'] == 'blank_line':
+ continue
+ text += renderer.render_token(tok, state)
+
+ lines = text.splitlines()
+ text = lines[0] + '\n'
+ prefix = ' ' * len(leading)
+ for line in lines[1:]:
+ if line:
+ text += prefix + line + '\n'
+ else:
+ text += '\n'
+ return leading + text
+
+
+def _render_ordered_list(renderer, token, state):
+ attrs = token['attrs']
+ start = attrs.get('start', 1)
+ for item in token['children']:
+ leading = str(start) + token['bullet'] + ' '
+ parent = {
+ 'leading': leading,
+ 'tight': token['tight'],
+ }
+ yield _render_list_item(renderer, parent, item, state)
+ start += 1
+
+
+def _render_unordered_list(renderer, token, state):
+ parent = {
+ 'leading': token['bullet'] + ' ',
+ 'tight': token['tight'],
+ }
+ for item in token['children']:
+ yield _render_list_item(renderer, parent, item, state)
diff --git a/src/mistune/renderers/html.py b/src/mistune/renderers/html.py
new file mode 100644
index 0000000..c458a4a
--- /dev/null
+++ b/src/mistune/renderers/html.py
@@ -0,0 +1,151 @@
+from ..core import BaseRenderer
+from ..util import escape as escape_text, striptags, safe_entity
+
+
+class HTMLRenderer(BaseRenderer):
+ """A renderer for converting Markdown to HTML."""
+ NAME = 'html'
+ HARMFUL_PROTOCOLS = (
+ 'javascript:',
+ 'vbscript:',
+ 'file:',
+ 'data:',
+ )
+ GOOD_DATA_PROTOCOLS = (
+ 'data:image/gif;',
+ 'data:image/png;',
+ 'data:image/jpeg;',
+ 'data:image/webp;',
+ )
+
+ def __init__(self, escape=True, allow_harmful_protocols=None):
+ super(HTMLRenderer, self).__init__()
+ self._allow_harmful_protocols = allow_harmful_protocols
+ self._escape = escape
+
+ def render_token(self, token, state):
+ # backward compitable with v2
+ func = self._get_method(token['type'])
+ attrs = token.get('attrs')
+
+ if 'raw' in token:
+ text = token['raw']
+ elif 'children' in token:
+ text = self.render_tokens(token['children'], state)
+ else:
+ if attrs:
+ return func(**attrs)
+ else:
+ return func()
+ if attrs:
+ return func(text, **attrs)
+ else:
+ return func(text)
+
+ def safe_url(self, url: str) -> str:
+ """Ensure the given URL is safe. This method is used for rendering
+ links, images, and etc.
+ """
+ if self._allow_harmful_protocols is True:
+ return url
+
+ _url = url.lower()
+ if self._allow_harmful_protocols and \
+ _url.startswith(tuple(self._allow_harmful_protocols)):
+ return url
+
+ if _url.startswith(self.HARMFUL_PROTOCOLS) and \
+ not _url.startswith(self.GOOD_DATA_PROTOCOLS):
+ return '#harmful-link'
+ return url
+
+ def text(self, text: str) -> str:
+ if self._escape:
+ return escape_text(text)
+ return safe_entity(text)
+
+ def emphasis(self, text: str) -> str:
+ return '<em>' + text + '</em>'
+
+ def strong(self, text: str) -> str:
+ return '<strong>' + text + '</strong>'
+
+ def link(self, text: str, url: str, title=None) -> str:
+ s = '<a href="' + self.safe_url(url) + '"'
+ if title:
+ s += ' title="' + safe_entity(title) + '"'
+ return s + '>' + text + '</a>'
+
+ def image(self, text: str, url: str, title=None) -> str:
+ src = self.safe_url(url)
+ alt = escape_text(striptags(text))
+ s = '<img src="' + src + '" alt="' + alt + '"'
+ if title:
+ s += ' title="' + safe_entity(title) + '"'
+ return s + ' />'
+
+ def codespan(self, text: str) -> str:
+ return '<code>' + text + '</code>'
+
+ def linebreak(self) -> str:
+ return '<br />\n'
+
+ def softbreak(self) -> str:
+ return '\n'
+
+ def inline_html(self, html: str) -> str:
+ if self._escape:
+ return escape_text(html)
+ return html
+
+ def paragraph(self, text: str) -> str:
+ return '<p>' + text + '</p>\n'
+
+ def heading(self, text: str, level: int, **attrs) -> str:
+ tag = 'h' + str(level)
+ html = '<' + tag
+ _id = attrs.get('id')
+ if _id:
+ html += ' id="' + _id + '"'
+ return html + '>' + text + '</' + tag + '>\n'
+
+ def blank_line(self) -> str:
+ return ''
+
+ def thematic_break(self) -> str:
+ return '<hr />\n'
+
+ def block_text(self, text: str) -> str:
+ return text
+
+ def block_code(self, code: str, info=None) -> str:
+ html = '<pre><code'
+ if info is not None:
+ info = safe_entity(info.strip())
+ if info:
+ lang = info.split(None, 1)[0]
+ html += ' class="language-' + lang + '"'
+ return html + '>' + escape_text(code) + '</code></pre>\n'
+
+ def block_quote(self, text: str) -> str:
+ return '<blockquote>\n' + text + '</blockquote>\n'
+
+ def block_html(self, html: str) -> str:
+ if self._escape:
+ return '<p>' + escape_text(html) + '</p>\n'
+ return html + '\n'
+
+ def block_error(self, text: str) -> str:
+ return '<div class="error"><pre>' + text + '</pre></div>\n'
+
+ def list(self, text: str, ordered: bool, **attrs) -> str:
+ if ordered:
+ html = '<ol'
+ start = attrs.get('start')
+ if start is not None:
+ html += ' start="' + str(start) + '"'
+ return html + '>\n' + text + '</ol>\n'
+ return '<ul>\n' + text + '</ul>\n'
+
+ def list_item(self, text: str) -> str:
+ return '<li>' + text + '</li>\n'
diff --git a/src/mistune/renderers/markdown.py b/src/mistune/renderers/markdown.py
new file mode 100644
index 0000000..78334bc
--- /dev/null
+++ b/src/mistune/renderers/markdown.py
@@ -0,0 +1,146 @@
+import re
+from typing import Dict, Any
+from textwrap import indent
+from ._list import render_list
+from ..core import BaseRenderer, BlockState
+from ..util import strip_end
+
+fenced_re = re.compile(r'^(?:`|~)+', re.M)
+
+
+class MarkdownRenderer(BaseRenderer):
+ """A renderer to re-format Markdown text."""
+ NAME = 'markdown'
+
+ def __call__(self, tokens, state: BlockState):
+ out = self.render_tokens(tokens, state)
+ # special handle for line breaks
+ out += '\n\n'.join(self.render_referrences(state)) + '\n'
+ return strip_end(out)
+
+ def render_referrences(self, state: BlockState):
+ ref_links = state.env['ref_links']
+ for key in ref_links:
+ attrs = ref_links[key]
+ text = '[' + attrs['label'] + ']: ' + attrs['url']
+ title = attrs.get('title')
+ if title:
+ text += ' "' + title + '"'
+ yield text
+
+ def render_children(self, token, state: BlockState):
+ children = token['children']
+ return self.render_tokens(children, state)
+
+ def text(self, token: Dict[str, Any], state: BlockState) -> str:
+ return token['raw']
+
+ def emphasis(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '*' + self.render_children(token, state) + '*'
+
+ def strong(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '**' + self.render_children(token, state) + '**'
+
+ def link(self, token: Dict[str, Any], state: BlockState) -> str:
+ label = token.get('label')
+ text = self.render_children(token, state)
+ out = '[' + text + ']'
+ if label:
+ return out + '[' + label + ']'
+
+ attrs = token['attrs']
+ url = attrs['url']
+ title = attrs.get('title')
+ if text == url and not title:
+ return '<' + text + '>'
+ elif 'mailto:' + text == url and not title:
+ return '<' + text + '>'
+
+ out += '('
+ if '(' in url or ')' in url:
+ out += '<' + url + '>'
+ else:
+ out += url
+ if title:
+ out += ' "' + title + '"'
+ return out + ')'
+
+ def image(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '!' + self.link(token, state)
+
+ def codespan(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '`' + token['raw'] + '`'
+
+ def linebreak(self, token: Dict[str, Any], state: BlockState) -> str:
+ return ' \n'
+
+ def softbreak(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '\n'
+
+ def blank_line(self, token: Dict[str, Any], state: BlockState) -> str:
+ return ''
+
+ def inline_html(self, token: Dict[str, Any], state: BlockState) -> str:
+ return token['raw']
+
+ def paragraph(self, token: Dict[str, Any], state: BlockState) -> str:
+ text = self.render_children(token, state)
+ return text + '\n\n'
+
+ def heading(self, token: Dict[str, Any], state: BlockState) -> str:
+ level = token['attrs']['level']
+ marker = '#' * level
+ text = self.render_children(token, state)
+ return marker + ' ' + text + '\n\n'
+
+ def thematic_break(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '***\n\n'
+
+ def block_text(self, token: Dict[str, Any], state: BlockState) -> str:
+ return self.render_children(token, state) + '\n'
+
+ def block_code(self, token: Dict[str, Any], state: BlockState) -> str:
+ attrs = token.get('attrs', {})
+ info = attrs.get('info', '')
+ code = token['raw']
+ if code and code[-1] != '\n':
+ code += '\n'
+
+ marker = token.get('marker')
+ if not marker:
+ marker = _get_fenced_marker(code)
+ return marker + info + '\n' + code + marker + '\n\n'
+
+ def block_quote(self, token: Dict[str, Any], state: BlockState) -> str:
+ text = indent(self.render_children(token, state), '> ')
+ return text + '\n\n'
+
+ def block_html(self, token: Dict[str, Any], state: BlockState) -> str:
+ return token['raw'] + '\n\n'
+
+ def block_error(self, token: Dict[str, Any], state: BlockState) -> str:
+ return ''
+
+ def list(self, token: Dict[str, Any], state: BlockState) -> str:
+ return render_list(self, token, state)
+
+
+def _get_fenced_marker(code):
+ found = fenced_re.findall(code)
+ if not found:
+ return '```'
+
+ ticks = [] # `
+ waves = [] # ~
+ for s in found:
+ if s[0] == '`':
+ ticks.append(len(s))
+ else:
+ waves.append(len(s))
+
+ if not ticks:
+ return '```'
+
+ if not waves:
+ return '~~~'
+ return '`' * (max(ticks) + 1)
diff --git a/src/mistune/renderers/rst.py b/src/mistune/renderers/rst.py
new file mode 100644
index 0000000..fa12c21
--- /dev/null
+++ b/src/mistune/renderers/rst.py
@@ -0,0 +1,147 @@
+from typing import Dict, Any
+from textwrap import indent
+from ._list import render_list
+from ..core import BaseRenderer, BlockState
+from ..util import strip_end
+
+
+class RSTRenderer(BaseRenderer):
+ """A renderer for converting Markdown to ReST."""
+ NAME = 'rst'
+
+ #: marker symbols for heading
+ HEADING_MARKERS = {
+ 1: '=',
+ 2: '-',
+ 3: '~',
+ 4: '^',
+ 5: '"',
+ 6: "'",
+ }
+ INLINE_IMAGE_PREFIX = 'img-'
+
+ def iter_tokens(self, tokens, state):
+ prev = None
+ for tok in tokens:
+ # ignore blank line
+ if tok['type'] == 'blank_line':
+ continue
+ tok['prev'] = prev
+ prev = tok
+ yield self.render_token(tok, state)
+
+ def __call__(self, tokens, state: BlockState):
+ state.env['inline_images'] = []
+ out = self.render_tokens(tokens, state)
+ # special handle for line breaks
+ out += '\n\n'.join(self.render_referrences(state)) + '\n'
+ return strip_end(out)
+
+ def render_referrences(self, state: BlockState):
+ images = state.env['inline_images']
+ for index, token in enumerate(images):
+ attrs = token['attrs']
+ alt = self.render_children(token, state)
+ ident = self.INLINE_IMAGE_PREFIX + str(index)
+ yield '.. |' + ident + '| image:: ' + attrs['url'] + '\n :alt: ' + alt
+
+ def render_children(self, token, state: BlockState):
+ children = token['children']
+ return self.render_tokens(children, state)
+
+ def text(self, token: Dict[str, Any], state: BlockState) -> str:
+ text = token['raw']
+ return text.replace('|', r'\|')
+
+ def emphasis(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '*' + self.render_children(token, state) + '*'
+
+ def strong(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '**' + self.render_children(token, state) + '**'
+
+ def link(self, token: Dict[str, Any], state: BlockState) -> str:
+ attrs = token['attrs']
+ text = self.render_children(token, state)
+ return '`' + text + ' <' + attrs['url'] + '>`__'
+
+ def image(self, token: Dict[str, Any], state: BlockState) -> str:
+ refs: list = state.env['inline_images']
+ index = len(refs)
+ refs.append(token)
+ return '|' + self.INLINE_IMAGE_PREFIX + str(index) + '|'
+
+ def codespan(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '``' + token['raw'] + '``'
+
+ def linebreak(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '<linebreak>'
+
+ def softbreak(self, token: Dict[str, Any], state: BlockState) -> str:
+ return ' '
+
+ def inline_html(self, token: Dict[str, Any], state: BlockState) -> str:
+ # rst does not support inline html
+ return ''
+
+ def paragraph(self, token: Dict[str, Any], state: BlockState) -> str:
+ children = token['children']
+ if len(children) == 1 and children[0]['type'] == 'image':
+ image = children[0]
+ attrs = image['attrs']
+ title = attrs.get('title')
+ alt = self.render_children(image, state)
+ text = '.. figure:: ' + attrs['url']
+ if title:
+ text += '\n :alt: ' + title
+ text += '\n\n' + indent(alt, ' ')
+ else:
+ text = self.render_tokens(children, state)
+ lines = text.split('<linebreak>')
+ if len(lines) > 1:
+ text = '\n'.join('| ' + line for line in lines)
+ return text + '\n\n'
+
+ def heading(self, token: Dict[str, Any], state: BlockState) -> str:
+ attrs = token['attrs']
+ text = self.render_children(token, state)
+ marker = self.HEADING_MARKERS[attrs['level']]
+ return text + '\n' + marker * len(text) + '\n\n'
+
+ def thematic_break(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '--------------\n\n'
+
+ def block_text(self, token: Dict[str, Any], state: BlockState) -> str:
+ return self.render_children(token, state) + '\n'
+
+ def block_code(self, token: Dict[str, Any], state: BlockState) -> str:
+ attrs = token.get('attrs', {})
+ info = attrs.get('info')
+ code = indent(token['raw'], ' ')
+ if info:
+ lang = info.split()[0]
+ return '.. code:: ' + lang + '\n\n' + code + '\n'
+ else:
+ return '::\n\n' + code + '\n\n'
+
+ def block_quote(self, token: Dict[str, Any], state: BlockState) -> str:
+ text = indent(self.render_children(token, state), ' ')
+ prev = token['prev']
+ ignore_blocks = (
+ 'paragraph',
+ 'thematic_break',
+ 'linebreak',
+ 'heading',
+ )
+ if prev and prev['type'] not in ignore_blocks:
+ text = '..\n\n' + text
+ return text
+
+ def block_html(self, token: Dict[str, Any], state: BlockState) -> str:
+ raw = token['raw']
+ return '.. raw:: html\n\n' + indent(raw, ' ') + '\n\n'
+
+ def block_error(self, token: Dict[str, Any], state: BlockState) -> str:
+ return ''
+
+ def list(self, token: Dict[str, Any], state: BlockState) -> str:
+ return render_list(self, token, state)
diff --git a/src/mistune/toc.py b/src/mistune/toc.py
new file mode 100644
index 0000000..c908b0c
--- /dev/null
+++ b/src/mistune/toc.py
@@ -0,0 +1,111 @@
+from .util import striptags
+
+
+def add_toc_hook(md, min_level=1, max_level=3, heading_id=None):
+ """Add a hook to save toc items into ``state.env``. This is
+ usually helpful for doc generator::
+
+ import mistune
+ from mistune.toc import add_toc_hook, render_toc_ul
+
+ md = mistune.create_markdown(...)
+ add_toc_hook(md, level, heading_id)
+
+ html, state = md.parse(text)
+ toc_items = state.env['toc_items']
+ toc_html = render_toc_ul(toc_items)
+
+ :param md: Markdown instance
+ :param min_level: min heading level
+ :param max_level: max heading level
+ :param heading_id: a function to generate heading_id
+ """
+ if heading_id is None:
+ def heading_id(token, index):
+ return 'toc_' + str(index + 1)
+
+ def toc_hook(md, state):
+ headings = []
+
+ for tok in state.tokens:
+ if tok['type'] == 'heading':
+ level = tok['attrs']['level']
+ if min_level <= level <= max_level:
+ headings.append(tok)
+
+ toc_items = []
+ for i, tok in enumerate(headings):
+ tok['attrs']['id'] = heading_id(tok, i)
+ toc_items.append(normalize_toc_item(md, tok))
+
+ # save items into state
+ state.env['toc_items'] = toc_items
+
+ md.before_render_hooks.append(toc_hook)
+
+
+def normalize_toc_item(md, token):
+ text = token['text']
+ tokens = md.inline(text, {})
+ html = md.renderer(tokens, {})
+ text = striptags(html)
+ attrs = token['attrs']
+ return attrs['level'], attrs['id'], text
+
+
+def render_toc_ul(toc):
+ """Render a <ul> table of content HTML. The param "toc" should
+ be formatted into this structure::
+
+ [
+ (level, id, text),
+ ]
+
+ For example::
+
+ [
+ (1, 'toc-intro', 'Introduction'),
+ (2, 'toc-install', 'Install'),
+ (2, 'toc-upgrade', 'Upgrade'),
+ (1, 'toc-license', 'License'),
+ ]
+ """
+ if not toc:
+ return ''
+
+ s = '<ul>\n'
+ levels = []
+ for level, k, text in toc:
+ item = '<a href="#{}">{}</a>'.format(k, text)
+ if not levels:
+ s += '<li>' + item
+ levels.append(level)
+ elif level == levels[-1]:
+ s += '</li>\n<li>' + item
+ elif level > levels[-1]:
+ s += '\n<ul>\n<li>' + item
+ levels.append(level)
+ else:
+ levels.pop()
+ while levels:
+ last_level = levels.pop()
+ if level == last_level:
+ s += '</li>\n</ul>\n</li>\n<li>' + item
+ levels.append(level)
+ break
+ elif level > last_level:
+ s += '</li>\n<li>' + item
+ levels.append(last_level)
+ levels.append(level)
+ break
+ else:
+ s += '</li>\n</ul>\n'
+ else:
+ levels.append(level)
+ s += '</li>\n<li>' + item
+
+ while len(levels) > 1:
+ s += '</li>\n</ul>\n'
+ levels.pop()
+
+ return s + '</li>\n</ul>\n'
diff --git a/src/mistune/util.py b/src/mistune/util.py
new file mode 100644
index 0000000..e2337b4
--- /dev/null
+++ b/src/mistune/util.py
@@ -0,0 +1,81 @@
+import re
+from urllib.parse import quote
+from html import _replace_charref
+
+
+_expand_tab_re = re.compile(r'^( {0,3})\t', flags=re.M)
+
+
+def expand_leading_tab(text, width=4):
+ def repl(m):
+ s = m.group(1)
+ return s + ' ' * (width - len(s))
+ return _expand_tab_re.sub(repl, text)
+
+
+def expand_tab(text, space=' '):
+ repl = r'\1' + space
+ return _expand_tab_re.sub(repl, text)
+
+
+def escape(s, quote=True):
+ """Escape characters of ``&<>``. If quote=True, ``"`` will be
+ converted to ``&quote;``."""
+ s = s.replace("&", "&amp;")
+ s = s.replace("<", "&lt;")
+ s = s.replace(">", "&gt;")
+ if quote:
+ s = s.replace('"', "&quot;")
+ return s
+
+
+def escape_url(link):
+ """Escape URL for safety."""
+ safe = (
+ ':/?#@' # gen-delims - '[]' (rfc3986)
+ '!$&()*+,;=' # sub-delims - "'" (rfc3986)
+ '%' # leave already-encoded octets alone
+ )
+ return escape(quote(unescape(link), safe=safe))
+
+
+def safe_entity(s):
+ """Escape characters for safety."""
+ return escape(unescape(s))
+
+
+def unikey(s):
+ """Generate a unique key for links and footnotes."""
+ key = ' '.join(s.split()).strip()
+ return key.lower().upper()
+
+
+_charref_re = re.compile(
+ r'&(#[0-9]{1,7};'
+ r'|#[xX][0-9a-fA-F]+;'
+ r'|[^\t\n\f <&#;]{1,32};)'
+)
+
+
+def unescape(s):
+ """
+ Copy from `html.unescape`, but `_charref` is different. CommonMark
+ does not accept entity references without a trailing semicolon
+ """
+ if '&' not in s:
+ return s
+ return _charref_re.sub(_replace_charref, s)
+
+
+_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
+
+
+def striptags(s):
+ return _striptags_re.sub('', s)
+
+
+_strip_end_re = re.compile(r'\n\s+$')
+
+
+def strip_end(src):
+ return _strip_end_re.sub('\n', src)