summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArturs Artamonovs <dos21h@gmail.com>2023-01-29 10:30:54 +0000
committerArturs Artamonovs <dos21h@gmail.com>2023-01-29 10:30:54 +0000
commit66fa71a8f11b6ce5e8471b533f67cc3a1fdb85a8 (patch)
tree7aed7f385826a3bd88c76a373e28c6cfae4f396e
parent129c1201ea5c4418f0f89ad932633c7cea2439b7 (diff)
downloadmd-site-66fa71a8f11b6ce5e8471b533f67cc3a1fdb85a8.tar.gz
md-site-66fa71a8f11b6ce5e8471b533f67cc3a1fdb85a8.zip
Update to new mistune, removed old mistune, rewrite to python3
-rwxr-xr-xsrc/md.py227
-rw-r--r--src/mistune.py1143
-rw-r--r--src/mistune/__init__.py75
-rw-r--r--src/mistune/__main__.py124
-rw-r--r--src/mistune/block_parser.py486
-rw-r--r--src/mistune/core.py208
-rw-r--r--src/mistune/directives/__init__.py31
-rw-r--r--src/mistune/directives/_base.py121
-rw-r--r--src/mistune/directives/_fenced.py142
-rw-r--r--src/mistune/directives/_rst.py73
-rw-r--r--src/mistune/directives/admonition.py61
-rw-r--r--src/mistune/directives/image.py152
-rw-r--r--src/mistune/directives/include.py65
-rw-r--r--src/mistune/directives/toc.py105
-rw-r--r--src/mistune/helpers.py137
-rw-r--r--src/mistune/inline_parser.py390
-rw-r--r--src/mistune/list_parser.py250
-rw-r--r--src/mistune/markdown.py104
-rw-r--r--src/mistune/plugins/__init__.py38
-rw-r--r--src/mistune/plugins/abbr.py103
-rw-r--r--src/mistune/plugins/def_list.py135
-rw-r--r--src/mistune/plugins/footnotes.py153
-rw-r--r--src/mistune/plugins/formatting.py173
-rw-r--r--src/mistune/plugins/math.py57
-rw-r--r--src/mistune/plugins/ruby.py100
-rw-r--r--src/mistune/plugins/speedup.py44
-rw-r--r--src/mistune/plugins/spoiler.py80
-rw-r--r--src/mistune/plugins/table.py179
-rw-r--r--src/mistune/plugins/task_lists.py67
-rw-r--r--src/mistune/plugins/url.py23
-rw-r--r--src/mistune/renderers/__init__.py0
-rw-r--r--src/mistune/renderers/_list.py60
-rw-r--r--src/mistune/renderers/html.py151
-rw-r--r--src/mistune/renderers/markdown.py146
-rw-r--r--src/mistune/renderers/rst.py147
-rw-r--r--src/mistune/toc.py111
-rw-r--r--src/mistune/util.py81
37 files changed, 4488 insertions, 1254 deletions
diff --git a/src/md.py b/src/md.py
index 31d5e9e..60fb574 100755
--- a/src/md.py
+++ b/src/md.py
@@ -1,112 +1,114 @@
-#!/usr/bin/python2
+#!/usr/bin/python3
import os
import os.path
import sys
+
+#from jinja2 import Environment, PackageLoader, Template, FileSystemLoader
from jinja2 import Environment, PackageLoader, Template, FileSystemLoader
import mistune
#global name for templates
-article = {}
-
-class TocMixin():
- """TOC mixin for Renderer, mix this with Renderer::
-
- class TocRenderer(TocMixin, Renderer):
- pass
-
- toc = TocRenderer()
- md = mistune.Markdown(renderer=toc)
-
- # required in this order
- toc.reset_toc() # initial the status
- md.parse(text) # parse for headers
- toc.render_toc(level=3) # render TOC HTML
- """
-
- def reset_toc(self):
- self.toc_tree = []
- self.toc_count = 0
- self.title = None
- self.img_count = -1
-
- def header(self, text, level, raw=None):
- rv = ""
- if level != 1:
- rv = '<h%d id="toc-%d">%s</h%d>\n' % (
- level, self.toc_count, text, level
- )
- else:
- self.title = text
- self.toc_tree.append((self.toc_count, text, level, raw))
- self.toc_count += 1
- return rv
-
- #Lets do previev image
- def image(self, src, title, alt_text):
- prev_img = src.split(".")
- prev_img = prev_img[0]+"_prev."+prev_img[1]
- self.img_count += 1
- return "<a href="+src+"><img src=\""+prev_img+"\" alt=\""+alt_text+"\"></a>"
-
- def render_toc(self, level=3):
- """Render TOC to HTML.
-
- :param level: render toc to the given level
- """
- return ''.join(self._iter_toc(level))
-
- def _iter_toc(self, level):
- first_level = None
- last_level = None
- cnt = 0
-
- if (self.title != None):
- yield '<h1>%s</h1>\n' % (self.title)
-
- yield '<ul id="table-of-content">\n'
-
- for toc in self.toc_tree:
- index, text, l, raw = toc
- index = cnt
-
-
- if l > level:
- # ignore this level
- continue
-
- if first_level is None:
- # based on first level
- first_level = l
- last_level = l
- #yield '<li><a href="#toc-%d">%s</a>' % (cnt, text)
- yield ''
- elif last_level == l:
- yield '</li>\n<li><a href="#toc-%d">%s</a>' % (cnt, text)
- elif last_level == l - 1:
- last_level = l
- yield '<ul>\n<li><a href="#toc-%d">%s</a>' % (cnt, text)
- elif last_level > l:
- # close indention
- yield '</li>'
- while last_level > l:
- yield '</ul>\n</li>\n'
- last_level -= 1
- yield '<li><a href="#toc-%d">%s</a>' % (cnt, text)
- cnt = cnt + 1
-
- # close tags
- yield '</li>\n'
- while last_level > first_level:
- yield '</ul>\n<!--</li>-->\n'
- last_level -= 1
-
- yield '</ul>\n'
- self.toc_count = 0
-
-class TocRenderer(TocMixin, mistune.Renderer):
- pass
+article = {}#
+
+#class TocMixin():
+# """TOC mixin for Renderer, mix this with Renderer::#
+
+# class TocRenderer(TocMixin, Renderer):
+# pass#
+
+# toc = TocRenderer()
+# md = mistune.Markdown(renderer=toc)#
+
+# # required in this order
+# toc.reset_toc() # initial the status
+# md.parse(text) # parse for headers
+# toc.render_toc(level=3) # render TOC HTML
+# """#
+
+# def reset_toc(self):
+# self.toc_tree = []
+# self.toc_count = 0
+# self.title = None
+# self.img_count = -1#
+
+# def header(self, text, level, raw=None):
+# rv = ""
+# if level != 1:
+# rv = '<h%d id="toc-%d">%s</h%d>\n' % (
+# level, self.toc_count, text, level
+# )
+# else:
+# self.title = text
+# self.toc_tree.append((self.toc_count, text, level, raw))
+# self.toc_count += 1
+# return rv#
+
+# #Lets do previev image
+# def image(self, src, title, alt_text):
+# prev_img = src.split(".")
+# prev_img = prev_img[0]+"_prev."+prev_img[1]
+# self.img_count += 1
+# return "<a href="+src+"><img src=\""+prev_img+"\" alt=\""+alt_text+"\"></a>"#
+
+# def render_toc(self, level=3):
+# """Render TOC to HTML.#
+
+# :param level: render toc to the given level
+# """
+# return ''.join(self._iter_toc(level))#
+
+# def _iter_toc(self, level):
+# first_level = None
+# last_level = None
+# cnt = 0#
+
+# if (self.title != None):
+# yield '<h1>%s</h1>\n' % (self.title)#
+
+# yield '<ul id="table-of-content">\n'#
+
+# for toc in self.toc_tree:
+# index, text, l, raw = toc
+# index = cnt
+# #
+
+# if l > level:
+# # ignore this level
+# continue#
+
+# if first_level is None:
+# # based on first level
+# first_level = l
+# last_level = l
+# #yield '<li><a href="#toc-%d">%s</a>' % (cnt, text)
+# yield ''
+# elif last_level == l:
+# yield '</li>\n<li><a href="#toc-%d">%s</a>' % (cnt, text)
+# elif last_level == l - 1:
+# last_level = l
+# yield '<ul>\n<li><a href="#toc-%d">%s</a>' % (cnt, text)
+# elif last_level > l:
+# # close indention
+# yield '</li>'
+# while last_level > l:
+# yield '</ul>\n</li>\n'
+# last_level -= 1
+# yield '<li><a href="#toc-%d">%s</a>' % (cnt, text)
+# cnt = cnt + 1#
+
+# # close tags
+# yield '</li>\n'
+# while last_level > first_level:
+# yield '</ul>\n<!--</li>-->\n'
+# last_level -= 1#
+
+# yield '</ul>\n'
+# self.toc_count = 0#
+
+#class TocRenderer(TocMixin, mistune.Renderer):
+# pass
################################################################################
#get first tags and use them to configure some bits
@@ -129,27 +131,30 @@ def get_tags(data):
################################################################################
#check if there is input file
if len(sys.argv) < 2:
- print "Need more arguments"
+ print("ERROR: Exit . Need more arguments")
sys.exit(0)
md_fn = sys.argv[1]
#template loader
loader = FileSystemLoader( "/home/fam/downloads/source/repos/md-site/src/templ" )
-templ_env = Environment( loader = loader )
+templ_env = Environment( loader = loader )#
-t = templ_env.get_template("main.thtml")
+t = templ_env.get_template("main.thtml")#
-#get md file
+##get md file
f = open( md_fn, "r" )
data = f.read()
data = get_tags(data)
-toc = TocRenderer()
-md = mistune.Markdown(rule=True,renderer=toc)
+#toc = TocRenderer()
+## Create AST rendered
+
+## Create AST -> Markdown renderer
+
+## Mistune generate MTL
+md_rend = mistune.html(data)
-toc.reset_toc()
-md.parse(data)
-md_rend_toc = toc.render_toc(level=3)
-md_rend = md_rend_toc + md.render( data )
+#print t.render( article=article, block = md_rend )
+print(t.render( article=article, block = md_rend ))
-print t.render( article=article, block = md_rend ) \ No newline at end of file
+f.close() \ No newline at end of file
diff --git a/src/mistune.py b/src/mistune.py
deleted file mode 100644
index 4c711b2..0000000
--- a/src/mistune.py
+++ /dev/null
@@ -1,1143 +0,0 @@
-# coding: utf-8
-"""
- mistune
- ~~~~~~~
-
- The fastest markdown parser in pure Python with renderer feature.
-
- :copyright: (c) 2014 - 2015 by Hsiaoming Yang.
-"""
-
-import re
-import inspect
-
-__version__ = '0.7.1'
-__author__ = 'Hsiaoming Yang <me@lepture.com>'
-__all__ = [
- 'BlockGrammar', 'BlockLexer',
- 'InlineGrammar', 'InlineLexer',
- 'Renderer', 'Markdown',
- 'markdown', 'escape',
-]
-
-
-_key_pattern = re.compile(r'\s+')
-_escape_pattern = re.compile(r'&(?!#?\w+;)')
-_newline_pattern = re.compile(r'\r\n|\r')
-_block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M)
-_block_code_leadning_pattern = re.compile(r'^ {4}', re.M)
-_inline_tags = [
- 'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data',
- 'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark',
- 'ruby', 'rt', 'rp', 'bdi', 'bdo', 'span', 'br', 'wbr', 'ins', 'del',
- 'img', 'font',
-]
-_pre_tags = ['pre', 'script', 'style']
-_valid_end = r'(?!:/|[^\w\s@]*@)\b'
-_valid_attr = r'''"[^"]*"|'[^']*'|[^'">]'''
-_block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end)
-
-
-def _pure_pattern(regex):
- pattern = regex.pattern
- if pattern.startswith('^'):
- pattern = pattern[1:]
- return pattern
-
-
-def _keyify(key):
- return _key_pattern.sub(' ', key.lower())
-
-
-def escape(text, quote=False, smart_amp=True):
- """Replace special characters "&", "<" and ">" to HTML-safe sequences.
-
- The original cgi.escape will always escape "&", but you can control
- this one for a smart escape amp.
-
- :param quote: if set to True, " and ' will be escaped.
- :param smart_amp: if set to False, & will always be escaped.
- """
- if smart_amp:
- text = _escape_pattern.sub('&amp;', text)
- else:
- text = text.replace('&', '&amp;')
- text = text.replace('<', '&lt;')
- text = text.replace('>', '&gt;')
- if quote:
- text = text.replace('"', '&quot;')
- text = text.replace("'", '&#39;')
- return text
-
-
-def preprocessing(text, tab=4):
- text = _newline_pattern.sub('\n', text)
- text = text.replace('\t', ' ' * tab)
- text = text.replace('\u00a0', ' ')
- text = text.replace('\u2424', '\n')
- pattern = re.compile(r'^ +$', re.M)
- return pattern.sub('', text)
-
-
-class BlockGrammar(object):
- """Grammars for block level tokens."""
-
- def_links = re.compile(
- r'^ *\[([^^\]]+)\]: *' # [key]:
- r'<?([^\s>]+)>?' # <link> or link
- r'(?: +["(]([^\n]+)[")])? *(?:\n+|$)'
- )
- def_footnotes = re.compile(
- r'^\[\^([^\]]+)\]: *('
- r'[^\n]*(?:\n+|$)' # [^key]:
- r'(?: {1,}[^\n]*(?:\n+|$))*'
- r')'
- )
-
- newline = re.compile(r'^\n+')
- block_code = re.compile(r'^( {4}[^\n]+\n*)+')
- fences = re.compile(
- r'^ *(`{3,}|~{3,}) *(\S+)? *\n' # ```lang
- r'([\s\S]+?)\s*'
- r'\1 *(?:\n+|$)' # ```
- )
- hrule = re.compile(r'^ {0,3}[-*_](?: *[-*_]){2,} *(?:\n+|$)')
- heading = re.compile(r'^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)')
- lheading = re.compile(r'^([^\n]+)\n *(=|-)+ *(?:\n+|$)')
- block_quote = re.compile(r'^( *>[^\n]+(\n[^\n]+)*\n*)+')
- list_block = re.compile(
- r'^( *)([*+-]|\d+\.) [\s\S]+?'
- r'(?:'
- r'\n+(?=\1?(?:[-*_] *){3,}(?:\n+|$))' # hrule
- r'|\n+(?=%s)' # def links
- r'|\n+(?=%s)' # def footnotes
- r'|\n{2,}'
- r'(?! )'
- r'(?!\1(?:[*+-]|\d+\.) )\n*'
- r'|'
- r'\s*$)' % (
- _pure_pattern(def_links),
- _pure_pattern(def_footnotes),
- )
- )
- list_item = re.compile(
- r'^(( *)(?:[*+-]|\d+\.) [^\n]*'
- r'(?:\n(?!\2(?:[*+-]|\d+\.) )[^\n]*)*)',
- flags=re.M
- )
- list_bullet = re.compile(r'^ *(?:[*+-]|\d+\.) +')
- paragraph = re.compile(
- r'^((?:[^\n]+\n?(?!'
- r'%s|%s|%s|%s|%s|%s|%s|%s|%s'
- r'))+)\n*' % (
- _pure_pattern(fences).replace(r'\1', r'\2'),
- _pure_pattern(list_block).replace(r'\1', r'\3'),
- _pure_pattern(hrule),
- _pure_pattern(heading),
- _pure_pattern(lheading),
- _pure_pattern(block_quote),
- _pure_pattern(def_links),
- _pure_pattern(def_footnotes),
- '<' + _block_tag,
- )
- )
- block_html = re.compile(
- r'^ *(?:%s|%s|%s) *(?:\n{2,}|\s*$)' % (
- r'<!--[\s\S]*?-->',
- r'<(%s)((?:%s)*?)>([\s\S]+?)<\/\1>' % (_block_tag, _valid_attr),
- r'<%s(?:%s)*?>' % (_block_tag, _valid_attr),
- )
- )
- table = re.compile(
- r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*'
- )
- nptable = re.compile(
- r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*'
- )
- text = re.compile(r'^[^\n]+')
-
-
-class BlockLexer(object):
- """Block level lexer for block grammars."""
- grammar_class = BlockGrammar
-
- default_rules = [
- 'newline', 'hrule', 'block_code', 'fences', 'heading',
- 'nptable', 'lheading', 'block_quote',
- 'list_block', 'block_html', 'def_links',
- 'def_footnotes', 'table', 'paragraph', 'text'
- ]
-
- list_rules = (
- 'newline', 'block_code', 'fences', 'lheading', 'hrule',
- 'block_quote', 'list_block', 'block_html', 'text',
- )
-
- footnote_rules = (
- 'newline', 'block_code', 'fences', 'heading',
- 'nptable', 'lheading', 'hrule', 'block_quote',
- 'list_block', 'block_html', 'table', 'paragraph', 'text'
- )
-
- def __init__(self, rules=None, **kwargs):
- self.tokens = []
- self.def_links = {}
- self.def_footnotes = {}
-
- if not rules:
- rules = self.grammar_class()
-
- self.rules = rules
-
- def __call__(self, text, rules=None):
- return self.parse(text, rules)
-
- def parse(self, text, rules=None):
- text = text.rstrip('\n')
-
- if not rules:
- rules = self.default_rules
-
- def manipulate(text):
- for key in rules:
- rule = getattr(self.rules, key)
- m = rule.match(text)
- if not m:
- continue
- getattr(self, 'parse_%s' % key)(m)
- return m
- return False # pragma: no cover
-
- while text:
- m = manipulate(text)
- if m is not False:
- text = text[len(m.group(0)):]
- continue
- if text: # pragma: no cover
- raise RuntimeError('Infinite loop at: %s' % text)
- return self.tokens
-
- def parse_newline(self, m):
- length = len(m.group(0))
- if length > 1:
- self.tokens.append({'type': 'newline'})
-
- def parse_block_code(self, m):
- # clean leading whitespace
- code = _block_code_leadning_pattern.sub('', m.group(0))
- self.tokens.append({
- 'type': 'code',
- 'lang': None,
- 'text': code,
- })
-
- def parse_fences(self, m):
- self.tokens.append({
- 'type': 'code',
- 'lang': m.group(2),
- 'text': m.group(3),
- })
-
- def parse_heading(self, m):
- self.tokens.append({
- 'type': 'heading',
- 'level': len(m.group(1)),
- 'text': m.group(2),
- })
-
- def parse_lheading(self, m):
- """Parse setext heading."""
- self.tokens.append({
- 'type': 'heading',
- 'level': 1 if m.group(2) == '=' else 2,
- 'text': m.group(1),
- })
-
- def parse_hrule(self, m):
- self.tokens.append({'type': 'hrule'})
-
- def parse_list_block(self, m):
- bull = m.group(2)
- self.tokens.append({
- 'type': 'list_start',
- 'ordered': '.' in bull,
- })
- cap = m.group(0)
- self._process_list_item(cap, bull)
- self.tokens.append({'type': 'list_end'})
-
- def _process_list_item(self, cap, bull):
- cap = self.rules.list_item.findall(cap)
-
- _next = False
- length = len(cap)
-
- for i in range(length):
- item = cap[i][0]
-
- # remove the bullet
- space = len(item)
- item = self.rules.list_bullet.sub('', item)
-
- # outdent
- if '\n ' in item:
- space = space - len(item)
- pattern = re.compile(r'^ {1,%d}' % space, flags=re.M)
- item = pattern.sub('', item)
-
- # determin whether item is loose or not
- loose = _next
- if not loose and re.search(r'\n\n(?!\s*$)', item):
- loose = True
-
- rest = len(item)
- if i != length - 1 and rest:
- _next = item[rest-1] == '\n'
- if not loose:
- loose = _next
-
- if loose:
- t = 'loose_item_start'
- else:
- t = 'list_item_start'
-
- self.tokens.append({'type': t})
- # recurse
- self.parse(item, self.list_rules)
- self.tokens.append({'type': 'list_item_end'})
-
- def parse_block_quote(self, m):
- self.tokens.append({'type': 'block_quote_start'})
- # clean leading >
- cap = _block_quote_leading_pattern.sub('', m.group(0))
- self.parse(cap)
- self.tokens.append({'type': 'block_quote_end'})
-
- def parse_def_links(self, m):
- key = _keyify(m.group(1))
- self.def_links[key] = {
- 'link': m.group(2),
- 'title': m.group(3),
- }
-
- def parse_def_footnotes(self, m):
- key = _keyify(m.group(1))
- if key in self.def_footnotes:
- # footnote is already defined
- return
-
- self.def_footnotes[key] = 0
-
- self.tokens.append({
- 'type': 'footnote_start',
- 'key': key,
- })
-
- text = m.group(2)
-
- if '\n' in text:
- lines = text.split('\n')
- whitespace = None
- for line in lines[1:]:
- space = len(line) - len(line.lstrip())
- if space and (not whitespace or space < whitespace):
- whitespace = space
- newlines = [lines[0]]
- for line in lines[1:]:
- newlines.append(line[whitespace:])
- text = '\n'.join(newlines)
-
- self.parse(text, self.footnote_rules)
-
- self.tokens.append({
- 'type': 'footnote_end',
- 'key': key,
- })
-
- def parse_table(self, m):
- item = self._process_table(m)
-
- cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3))
- cells = cells.split('\n')
- for i, v in enumerate(cells):
- v = re.sub(r'^ *\| *| *\| *$', '', v)
- cells[i] = re.split(r' *\| *', v)
-
- item['cells'] = cells
- self.tokens.append(item)
-
- def parse_nptable(self, m):
- item = self._process_table(m)
-
- cells = re.sub(r'\n$', '', m.group(3))
- cells = cells.split('\n')
- for i, v in enumerate(cells):
- cells[i] = re.split(r' *\| *', v)
-
- item['cells'] = cells
- self.tokens.append(item)
-
- def _process_table(self, m):
- header = re.sub(r'^ *| *\| *$', '', m.group(1))
- header = re.split(r' *\| *', header)
- align = re.sub(r' *|\| *$', '', m.group(2))
- align = re.split(r' *\| *', align)
-
- for i, v in enumerate(align):
- if re.search(r'^ *-+: *$', v):
- align[i] = 'right'
- elif re.search(r'^ *:-+: *$', v):
- align[i] = 'center'
- elif re.search(r'^ *:-+ *$', v):
- align[i] = 'left'
- else:
- align[i] = None
-
- item = {
- 'type': 'table',
- 'header': header,
- 'align': align,
- }
- return item
-
- def parse_block_html(self, m):
- tag = m.group(1)
- if not tag:
- text = m.group(0)
- self.tokens.append({
- 'type': 'close_html',
- 'text': text
- })
- else:
- attr = m.group(2)
- text = m.group(3)
- self.tokens.append({
- 'type': 'open_html',
- 'tag': tag,
- 'extra': attr,
- 'text': text
- })
-
- def parse_paragraph(self, m):
- text = m.group(1).rstrip('\n')
- self.tokens.append({'type': 'paragraph', 'text': text})
-
- def parse_text(self, m):
- text = m.group(0)
- self.tokens.append({'type': 'text', 'text': text})
-
-
-class InlineGrammar(object):
- """Grammars for inline level tokens."""
-
- escape = re.compile(r'^\\([\\`*{}\[\]()#+\-.!_>~|])') # \* \+ \! ....
- inline_html = re.compile(
- r'^(?:%s|%s|%s)' % (
- r'<!--[\s\S]*?-->',
- r'<(\w+%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_valid_end, _valid_attr),
- r'<\w+%s(?:%s)*?>' % (_valid_end, _valid_attr),
- )
- )
- autolink = re.compile(r'^<([^ >]+(@|:)[^ >]+)>')
- link = re.compile(
- r'^!?\[('
- r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
- r')\]\('
- r'''\s*(<)?([\s\S]*?)(?(2)>)(?:\s+['"]([\s\S]*?)['"])?\s*'''
- r'\)'
- )
- reflink = re.compile(
- r'^!?\[('
- r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
- r')\]\s*\[([^^\]]*)\]'
- )
- nolink = re.compile(r'^!?\[((?:\[[^\]]*\]|[^\[\]])*)\]')
- url = re.compile(r'''^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])''')
- double_emphasis = re.compile(
- r'^_{2}([\s\S]+?)_{2}(?!_)' # __word__
- r'|'
- r'^\*{2}([\s\S]+?)\*{2}(?!\*)' # **word**
- )
- emphasis = re.compile(
- r'^\b_((?:__|[\s\S])+?)_\b' # _word_
- r'|'
- r'^\*((?:\*\*|[\s\S])+?)\*(?!\*)' # *word*
- )
- code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)') # `code`
- linebreak = re.compile(r'^ {2,}\n(?!\s*$)')
- strikethrough = re.compile(r'^~~(?=\S)([\s\S]+?\S)~~') # ~~word~~
- footnote = re.compile(r'^\[\^([^\]]+)\]')
- text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| {2,}\n|$)')
-
- def hard_wrap(self):
- """Grammar for hard wrap linebreak. You don't need to add two
- spaces at the end of a line.
- """
- self.linebreak = re.compile(r'^ *\n(?!\s*$)')
- self.text = re.compile(
- r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| *\n|$)'
- )
-
-
-class InlineLexer(object):
- """Inline level lexer for inline grammars."""
- grammar_class = InlineGrammar
-
- default_rules = [
- 'escape', 'inline_html', 'autolink', 'url',
- 'footnote', 'link', 'reflink', 'nolink',
- 'double_emphasis', 'emphasis', 'code',
- 'linebreak', 'strikethrough', 'text',
- ]
- inline_html_rules = [
- 'escape', 'autolink', 'url', 'link', 'reflink',
- 'nolink', 'double_emphasis', 'emphasis', 'code',
- 'linebreak', 'strikethrough', 'text',
- ]
-
- def __init__(self, renderer, rules=None, **kwargs):
- self.renderer = renderer
- self.links = {}
- self.footnotes = {}
- self.footnote_index = 0
-
- if not rules:
- rules = self.grammar_class()
-
- self.rules = rules
-
- self._in_link = False
- self._in_footnote = False
-
- kwargs.update(self.renderer.options)
- self._parse_inline_html = kwargs.get('parse_inline_html')
-
- def __call__(self, text, rules=None):
- return self.output(text, rules)
-
- def setup(self, links, footnotes):
- self.footnote_index = 0
- self.links = links or {}
- self.footnotes = footnotes or {}
-
- def output(self, text, rules=None):
- text = text.rstrip('\n')
- if not rules:
- rules = list(self.default_rules)
-
- if self._in_footnote and 'footnote' in rules:
- rules.remove('footnote')
-
- output = self.renderer.placeholder()
-
- def manipulate(text):
- for key in rules:
- pattern = getattr(self.rules, key)
- m = pattern.match(text)
- if not m:
- continue
- self.line_match = m
- out = getattr(self, 'output_%s' % key)(m)
- if out is not None:
- return m, out
- return False # pragma: no cover
-
- self.line_started = False
- while text:
- ret = manipulate(text)
- self.line_started = True
- if ret is not False:
- m, out = ret
- output += out
- text = text[len(m.group(0)):]
- continue
- if text: # pragma: no cover
- raise RuntimeError('Infinite loop at: %s' % text)
-
- return output
-
- def output_escape(self, m):
- return m.group(1)
-
- def output_autolink(self, m):
- link = m.group(1)
- if m.group(2) == '@':
- is_email = True
- else:
- is_email = False
- return self.renderer.autolink(link, is_email)
-
- def output_url(self, m):
- link = m.group(1)
- if self._in_link:
- return self.renderer.text(link)
- return self.renderer.autolink(link, False)
-
- def output_inline_html(self, m):
- tag = m.group(1)
- if self._parse_inline_html and tag in _inline_tags:
- text = m.group(3)
- if tag == 'a':
- self._in_link = True
- text = self.output(text, rules=self.inline_html_rules)
- self._in_link = False
- else:
- text = self.output(text, rules=self.inline_html_rules)
- extra = m.group(2) or ''
- html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
- else:
- html = m.group(0)
- return self.renderer.inline_html(html)
-
- def output_footnote(self, m):
- key = _keyify(m.group(1))
- if key not in self.footnotes:
- return None
- if self.footnotes[key]:
- return None
- self.footnote_index += 1
- self.footnotes[key] = self.footnote_index
- return self.renderer.footnote_ref(key, self.footnote_index)
-
- def output_link(self, m):
- return self._process_link(m, m.group(3), m.group(4))
-
- def output_reflink(self, m):
- key = _keyify(m.group(2) or m.group(1))
- if key not in self.links:
- return None
- ret = self.links[key]
- return self._process_link(m, ret['link'], ret['title'])
-
- def output_nolink(self, m):
- key = _keyify(m.group(1))
- if key not in self.links:
- return None
- ret = self.links[key]
- return self._process_link(m, ret['link'], ret['title'])
-
- def _process_link(self, m, link, title=None):
- line = m.group(0)
- text = m.group(1)
- if line[0] == '!':
- return self.renderer.image(link, title, text)
-
- self._in_link = True
- text = self.output(text)
- self._in_link = False
- return self.renderer.link(link, title, text)
-
- def output_double_emphasis(self, m):
- text = m.group(2) or m.group(1)
- text = self.output(text)
- return self.renderer.double_emphasis(text)
-
- def output_emphasis(self, m):
- text = m.group(2) or m.group(1)
- text = self.output(text)
- return self.renderer.emphasis(text)
-
- def output_code(self, m):
- text = m.group(2)
- return self.renderer.codespan(text)
-
- def output_linebreak(self, m):
- return self.renderer.linebreak()
-
- def output_strikethrough(self, m):
- text = self.output(m.group(1))
- return self.renderer.strikethrough(text)
-
- def output_text(self, m):
- text = m.group(0)
- return self.renderer.text(text)
-
-
-class Renderer(object):
- """The default HTML renderer for rendering Markdown.
- """
-
- def __init__(self, **kwargs):
- self.options = kwargs
-
- def placeholder(self):
- """Returns the default, empty output value for the renderer.
-
- All renderer methods use the '+=' operator to append to this value.
- Default is a string so rendering HTML can build up a result string with
- the rendered Markdown.
-
- Can be overridden by Renderer subclasses to be types like an empty
- list, allowing the renderer to create a tree-like structure to
- represent the document (which can then be reprocessed later into a
- separate format like docx or pdf).
- """
- return ''
-
- def block_code(self, code, lang=None):
- """Rendering block level code. ``pre > code``.
-
- :param code: text content of the code block.
- :param lang: language of the given code.
- """
- code = code.rstrip('\n')
- if not lang:
- code = escape(code, smart_amp=False)
- return '<pre><code>%s\n</code></pre>\n' % code
- code = escape(code, quote=True, smart_amp=False)
- #return '<pre><code class="lang-%s">%s\n</code></pre>\n' % (lang, code)
- return '<pre><code class="%s">%s\n</code></pre>\n' % (lang, code)
-
-
- def block_quote(self, text):
- """Rendering <blockquote> with the given text.
-
- :param text: text content of the blockquote.
- """
- return '<blockquote>%s\n</blockquote>\n' % text.rstrip('\n')
-
- def block_html(self, html):
- """Rendering block level pure html content.
-
- :param html: text content of the html snippet.
- """
- if self.options.get('skip_style') and \
- html.lower().startswith('<style'):
- return ''
- if self.options.get('escape'):
- return escape(html)
- return html
-
- def header(self, text, level, raw=None):
- """Rendering header/heading tags like ``<h1>`` ``<h2>``.
-
- :param text: rendered text content for the header.
- :param level: a number for the header level, for example: 1.
- :param raw: raw text content of the header.
- """
- return '<h%d>%s</h%d>\n' % (level, text, level)
-
- def hrule(self):
- """Rendering method for ``<hr>`` tag."""
- if self.options.get('use_xhtml'):
- return '<hr />\n'
- return '<hr>\n'
-
- def list(self, body, ordered=True):
- """Rendering list tags like ``<ul>`` and ``<ol>``.
-
- :param body: body contents of the list.
- :param ordered: whether this list is ordered or not.
- """
- tag = 'ul'
- if ordered:
- tag = 'ol'
- return '<%s>\n%s</%s>\n' % (tag, body, tag)
-
- def list_item(self, text):
- """Rendering list item snippet. Like ``<li>``."""
- return '<li>%s</li>\n' % text
-
- def paragraph(self, text):
- """Rendering paragraph tags. Like ``<p>``."""
- return '<p>%s</p>\n' % text.strip(' ')
-
- def table(self, header, body):
- """Rendering table element. Wrap header and body in it.
-
- :param header: header part of the table.
- :param body: body part of the table.
- """
- return (
- '<table>\n<thead>%s</thead>\n'
- '<tbody>\n%s</tbody>\n</table>\n'
- ) % (header, body)
-
- def table_row(self, content):
- """Rendering a table row. Like ``<tr>``.
-
- :param content: content of current table row.
- """
- return '<tr>\n%s</tr>\n' % content
-
- def table_cell(self, content, **flags):
- """Rendering a table cell. Like ``<th>`` ``<td>``.
-
- :param content: content of current table cell.
- :param header: whether this is header or not.
- :param align: align of current table cell.
- """
- if flags['header']:
- tag = 'th'
- else:
- tag = 'td'
- align = flags['align']
- if not align:
- return '<%s>%s</%s>\n' % (tag, content, tag)
- return '<%s style="text-align:%s">%s</%s>\n' % (
- tag, align, content, tag
- )
-
- def double_emphasis(self, text):
- """Rendering **strong** text.
-
- :param text: text content for emphasis.
- """
- return '<strong>%s</strong>' % text
-
- def emphasis(self, text):
- """Rendering *emphasis* text.
-
- :param text: text content for emphasis.
- """
- return '<em>%s</em>' % text
-
- def codespan(self, text):
- """Rendering inline `code` text.
-
- :param text: text content for inline code.
- """
- text = escape(text.rstrip(), smart_amp=False)
- return '<code>%s</code>' % text
-
- def linebreak(self):
- """Rendering line break like ``<br>``."""
- if self.options.get('use_xhtml'):
- return '<br />\n'
- return '<br>\n'
-
- def strikethrough(self, text):
- """Rendering ~~strikethrough~~ text.
-
- :param text: text content for strikethrough.
- """
- return '<del>%s</del>' % text
-
- def text(self, text):
- """Rendering unformatted text.
-
- :param text: text content.
- """
- return escape(text)
-
- def autolink(self, link, is_email=False):
- """Rendering a given link or email address.
-
- :param link: link content or email address.
- :param is_email: whether this is an email or not.
- """
- text = link = escape(link)
- if is_email:
- link = 'mailto:%s' % link
- return '<a href="%s">%s</a>' % (link, text)
-
- def link(self, link, title, text):
- """Rendering a given link with content and title.
-
- :param link: href link for ``<a>`` tag.
- :param title: title content for `title` attribute.
- :param text: text content for description.
- """
- if link.startswith('javascript:'):
- link = ''
- if not title:
- return '<a href="%s">%s</a>' % (link, text)
- title = escape(title, quote=True)
- return '<a href="%s" title="%s">%s</a>' % (link, title, text)
-
- def image(self, src, title, text):
- """Rendering a image with title and text.
-
- :param src: source link of the image.
- :param title: title text of the image.
- :param text: alt text of the image.
- """
- if src.startswith('javascript:'):
- src = ''
- text = escape(text, quote=True)
- if title:
- title = escape(title, quote=True)
- html = '<img src="%s" alt="%s" title="%s"' % (src, text, title)
- else:
- html = '<img src="%s" alt="%s"' % (src, text)
- if self.options.get('use_xhtml'):
- return '%s />' % html
- return '%s>' % html
-
- def inline_html(self, html):
- """Rendering span level pure html content.
-
- :param html: text content of the html snippet.
- """
- if self.options.get('escape'):
- return escape(html)
- return html
-
- def newline(self):
- """Rendering newline element."""
- return ''
-
- def footnote_ref(self, key, index):
- """Rendering the ref anchor of a footnote.
-
- :param key: identity key for the footnote.
- :param index: the index count of current footnote.
- """
- html = (
- '<sup class="footnote-ref" id="fnref-%s">'
- '<a href="#fn-%s" rel="footnote">%d</a></sup>'
- ) % (escape(key), escape(key), index)
- return html
-
- def footnote_item(self, key, text):
- """Rendering a footnote item.
-
- :param key: identity key for the footnote.
- :param text: text content of the footnote.
- """
- back = (
- '<a href="#fnref-%s" rev="footnote">&#8617;</a>'
- ) % escape(key)
- text = text.rstrip()
- if text.endswith('</p>'):
- text = re.sub(r'<\/p>$', r'%s</p>' % back, text)
- else:
- text = '%s<p>%s</p>' % (text, back)
- html = '<li id="fn-%s">%s</li>\n' % (escape(key), text)
- return html
-
- def footnotes(self, text):
- """Wrapper for all footnotes.
-
- :param text: contents of all footnotes.
- """
- html = '<div class="footnotes">\n%s<ol>%s</ol>\n</div>\n'
- return html % (self.hrule(), text)
-
-
-class Markdown(object):
- """The Markdown parser.
-
- :param renderer: An instance of ``Renderer``.
- :param inline: An inline lexer class or instance.
- :param block: A block lexer class or instance.
- """
- def __init__(self, renderer=None, inline=None, block=None, **kwargs):
- if not renderer:
- renderer = Renderer(**kwargs)
-
- self.renderer = renderer
-
- if inline and inspect.isclass(inline):
- inline = inline(renderer, **kwargs)
- if block and inspect.isclass(block):
- block = block(**kwargs)
-
- if inline:
- self.inline = inline
- else:
- rules = InlineGrammar()
- if kwargs.get('hard_wrap'):
- rules.hard_wrap()
- self.inline = InlineLexer(renderer, rules=rules)
-
- self.block = block or BlockLexer(BlockGrammar())
- self.options = kwargs
- self.footnotes = []
- self.tokens = []
-
- # detect if it should parse text in block html
- self._parse_block_html = kwargs.get('parse_block_html')
-
- def __call__(self, text):
- return self.parse(text)
-
- def render(self, text):
- """Render the Markdown text.
-
- :param text: markdown formatted text content.
- """
- return self.parse(text)
-
- def parse(self, text):
- out = self.output(preprocessing(text))
-
- keys = self.block.def_footnotes
-
- # reset block
- self.block.def_links = {}
- self.block.def_footnotes = {}
-
- # reset inline
- self.inline.links = {}
- self.inline.footnotes = {}
-
- if not self.footnotes:
- return out
-
- footnotes = filter(lambda o: keys.get(o['key']), self.footnotes)
- self.footnotes = sorted(
- footnotes, key=lambda o: keys.get(o['key']), reverse=True
- )
-
- body = self.renderer.placeholder()
- while self.footnotes:
- note = self.footnotes.pop()
- body += self.renderer.footnote_item(
- note['key'], note['text']
- )
-
- out += self.renderer.footnotes(body)
- return out
-
- def pop(self):
- if not self.tokens:
- return None
- self.token = self.tokens.pop()
- return self.token
-
- def peek(self):
- if self.tokens:
- return self.tokens[-1]
- return None # pragma: no cover
-
- def output(self, text, rules=None):
- self.tokens = self.block(text, rules)
- self.tokens.reverse()
-
- self.inline.setup(self.block.def_links, self.block.def_footnotes)
-
- out = self.renderer.placeholder()
- while self.pop():
- out += self.tok()
- return out
-
- def tok(self):
- t = self.token['type']
-
- # sepcial cases
- if t.endswith('_start'):
- t = t[:-6]
-
- return getattr(self, 'output_%s' % t)()
-
- def tok_text(self):
- text = self.token['text']
- while self.peek()['type'] == 'text':
- text += '\n' + self.pop()['text']
- return self.inline(text)
-
- def output_newline(self):
- return self.renderer.newline()
-
- def output_hrule(self):
- return self.renderer.hrule()
-
- def output_heading(self):
- return self.renderer.header(
- self.inline(self.token['text']),
- self.token['level'],
- self.token['text'],
- )
-
- def output_code(self):
- return self.renderer.block_code(
- self.token['text'], self.token['lang']
- )
-
- def output_table(self):
- aligns = self.token['align']
- aligns_length = len(aligns)
- cell = self.renderer.placeholder()
-
- # header part
- header = self.renderer.placeholder()
- for i, value in enumerate(self.token['header']):
- align = aligns[i] if i < aligns_length else None
- flags = {'header': True, 'align': align}
- cell += self.renderer.table_cell(self.inline(value), **flags)
-
- header += self.renderer.table_row(cell)
-
- # body part
- body = self.renderer.placeholder()
- for i, row in enumerate(self.token['cells']):
- cell = self.renderer.placeholder()
- for j, value in enumerate(row):
- align = aligns[j] if j < aligns_length else None
- flags = {'header': False, 'align': align}
- cell += self.renderer.table_cell(self.inline(value), **flags)
- body += self.renderer.table_row(cell)
-
- return self.renderer.table(header, body)
-
- def output_block_quote(self):
- body = self.renderer.placeholder()
- while self.pop()['type'] != 'block_quote_end':
- body += self.tok()
- return self.renderer.block_quote(body)
-
- def output_list(self):
- ordered = self.token['ordered']
- body = self.renderer.placeholder()
- while self.pop()['type'] != 'list_end':
- body += self.tok()
- return self.renderer.list(body, ordered)
-
- def output_list_item(self):
- body = self.renderer.placeholder()
- while self.pop()['type'] != 'list_item_end':
- if self.token['type'] == 'text':
- body += self.tok_text()
- else:
- body += self.tok()
-
- return self.renderer.list_item(body)
-
- def output_loose_item(self):
- body = self.renderer.placeholder()
- while self.pop()['type'] != 'list_item_end':
- body += self.tok()
- return self.renderer.list_item(body)
-
- def output_footnote(self):
- self.inline._in_footnote = True
- body = self.renderer.placeholder()
- key = self.token['key']
- while self.pop()['type'] != 'footnote_end':
- body += self.tok()
- self.footnotes.append({'key': key, 'text': body})
- self.inline._in_footnote = False
- return self.renderer.placeholder()
-
- def output_close_html(self):
- text = self.token['text']
- return self.renderer.block_html(text)
-
- def output_open_html(self):
- text = self.token['text']
- tag = self.token['tag']
- if self._parse_block_html and tag not in _pre_tags:
- text = self.inline(text, rules=self.inline.inline_html_rules)
- extra = self.token.get('extra') or ''
- html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
- return self.renderer.block_html(html)
-
- def output_paragraph(self):
- return self.renderer.paragraph(self.inline(self.token['text']))
-
- def output_text(self):
- return self.renderer.paragraph(self.tok_text())
-
-
-def markdown(text, escape=True, **kwargs):
- """Render markdown formatted text to html.
-
- :param text: markdown formatted text content.
- :param escape: if set to False, all html tags will not be escaped.
- :param use_xhtml: output with xhtml tags.
- :param hard_wrap: if set to True, it will has GFM line breaks feature.
- :param parse_block_html: parse text only in block level html.
- :param parse_inline_html: parse text only in inline level html.
- """
- return Markdown(escape=escape, **kwargs)(text)
diff --git a/src/mistune/__init__.py b/src/mistune/__init__.py
new file mode 100644
index 0000000..4de95c5
--- /dev/null
+++ b/src/mistune/__init__.py
@@ -0,0 +1,75 @@
+"""
+ mistune
+ ~~~~~~~
+
+ A fast yet powerful Python Markdown parser with renderers and
+ plugins, compatible with sane CommonMark rules.
+
+ Documentation: https://mistune.lepture.com/
+"""
+
+from .markdown import Markdown
+from .core import BlockState, InlineState, BaseRenderer
+from .block_parser import BlockParser
+from .inline_parser import InlineParser
+from .renderers.html import HTMLRenderer
+from .util import escape, escape_url, safe_entity, unikey
+from .plugins import import_plugin
+
+
+def create_markdown(escape=True, hard_wrap=False, renderer='html', plugins=None):
+ """Create a Markdown instance based on the given condition.
+
+ :param escape: Boolean. If using html renderer, escape html.
+ :param hard_wrap: Boolean. Break every new line into ``<br>``.
+ :param renderer: renderer instance, default is HTMLRenderer.
+ :param plugins: List of plugins.
+
+ This method is used when you want to re-use a Markdown instance::
+
+ markdown = create_markdown(
+ escape=False,
+ hard_wrap=True,
+ )
+ # re-use markdown function
+ markdown('.... your text ...')
+ """
+ if renderer == 'html':
+ renderer = HTMLRenderer(escape=escape)
+
+ inline = InlineParser(hard_wrap=hard_wrap)
+ if plugins is not None:
+ plugins = [import_plugin(n) for n in plugins]
+ return Markdown(renderer=renderer, inline=inline, plugins=plugins)
+
+
+html = create_markdown(
+ escape=False,
+ plugins=['strikethrough', 'footnotes', 'table', 'speedup']
+)
+
+
+__cached_parsers = {}
+
+
+def markdown(text, escape=True, renderer='html', plugins=None):
+ key = (escape, renderer, plugins)
+ if key in __cached_parsers:
+ return __cached_parsers[key](text)
+
+ md = create_markdown(escape=escape, renderer=renderer, plugins=plugins)
+ # improve the speed for markdown parser creation
+ __cached_parsers[key] = md
+ return md(text)
+
+
+__all__ = [
+ 'Markdown', 'HTMLRenderer',
+ 'BlockParser', 'BlockState', 'BaseRenderer',
+ 'InlineParser', 'InlineState',
+ 'escape', 'escape_url', 'safe_entity', 'unikey',
+ 'html', 'create_markdown', 'markdown',
+]
+
+__version__ = '3.0.0rc4'
+__homepage__ = 'https://mistune.lepture.com/'
diff --git a/src/mistune/__main__.py b/src/mistune/__main__.py
new file mode 100644
index 0000000..053a379
--- /dev/null
+++ b/src/mistune/__main__.py
@@ -0,0 +1,124 @@
+import sys
+import argparse
+from .renderers.rst import RSTRenderer
+from .renderers.markdown import MarkdownRenderer
+from . import (
+ create_markdown,
+ __version__ as version
+)
+
+
+def _md(args):
+ if args.plugin:
+ plugins = args.plugin
+ else:
+ # default plugins
+ plugins = ['strikethrough', 'footnotes', 'table', 'speedup']
+
+ if args.renderer == 'rst':
+ renderer = RSTRenderer()
+ elif args.renderer == 'markdown':
+ renderer = MarkdownRenderer()
+ else:
+ renderer = args.renderer
+ return create_markdown(
+ escape=args.escape,
+ hard_wrap=args.hardwrap,
+ renderer=renderer,
+ plugins=plugins,
+ )
+
+
+def _output(text, args):
+ if args.output:
+ with open(args.output, 'w') as f:
+ f.write(text)
+ else:
+ print(text)
+
+
+CMD_HELP = '''Mistune, a sane and fast python markdown parser.
+
+Here are some use cases of the command line tool:
+
+ $ python -m mistune -m "Hi **Markdown**"
+ <p>Hi <strong>Markdown</strong></p>
+
+ $ python -m mistune -f README.md
+ <p>...
+
+ $ cat README.md | python -m mistune
+ <p>...
+'''
+
+
+def cli():
+ parser = argparse.ArgumentParser(
+ prog='python -m mistune',
+ description=CMD_HELP,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ )
+ parser.add_argument(
+ '-m', '--message',
+ help='the markdown message to convert',
+ )
+ parser.add_argument(
+ '-f', '--file',
+ help='the markdown file to convert',
+ )
+ parser.add_argument(
+ '-p', '--plugin',
+ metavar='NAME',
+ action='extend',
+ nargs='+',
+ help='specifiy a plugin to use',
+ )
+ parser.add_argument(
+ '--escape',
+ action='store_true',
+ help='turn on escape option',
+ )
+ parser.add_argument(
+ '--hardwrap',
+ action='store_true',
+ help='turn on hardwrap option',
+ )
+ parser.add_argument(
+ '-o', '--output',
+ help='write the rendered result into file',
+ )
+ parser.add_argument(
+ '-r', '--renderer',
+ default='html',
+ help='specify the output renderer',
+ )
+ parser.add_argument('--version', action='version', version='mistune ' + version)
+ args = parser.parse_args()
+
+ message = args.message
+ if not message and not args.file:
+ message = read_stdin()
+
+ if message:
+ md = _md(args)
+ text = md(message)
+ _output(text, args)
+ elif args.file:
+ md = _md(args)
+ text = md.read(args.file)[0]
+ _output(text, args)
+ else:
+ print('You MUST specify a message or file')
+ return sys.exit(1)
+
+
+def read_stdin():
+ is_stdin_pipe = not sys.stdin.isatty()
+ if is_stdin_pipe:
+ return sys.stdin.read()
+ else:
+ return None
+
+
+if __name__ == '__main__':
+ cli()
diff --git a/src/mistune/block_parser.py b/src/mistune/block_parser.py
new file mode 100644
index 0000000..1ed79ec
--- /dev/null
+++ b/src/mistune/block_parser.py
@@ -0,0 +1,486 @@
+import re
+from typing import Optional, List, Tuple
+from .util import (
+ unikey,
+ escape_url,
+ expand_tab,
+ expand_leading_tab,
+)
+from .core import Parser, BlockState
+from .helpers import (
+ LINK_LABEL,
+ HTML_TAGNAME,
+ HTML_ATTRIBUTES,
+ BLOCK_TAGS,
+ PRE_TAGS,
+ unescape_char,
+ parse_link_href,
+ parse_link_title,
+)
+from .list_parser import parse_list, LIST_PATTERN
+
+_INDENT_CODE_TRIM = re.compile(r'^ {1,4}', flags=re.M)
+_AXT_HEADING_TRIM = re.compile(r'(\s+|^)#+\s*$')
+_BLOCK_QUOTE_TRIM = re.compile(r'^ ?', flags=re.M)
+_BLOCK_QUOTE_LEADING = re.compile(r'^ *>', flags=re.M)
+
+_LINE_BLANK_END = re.compile(r'\n[ \t]*\n$')
+_BLANK_TO_LINE = re.compile(r'[ \t]*\n')
+
+_BLOCK_TAGS_PATTERN = '|'.join(BLOCK_TAGS) + '|' + '|'.join(PRE_TAGS)
+_OPEN_TAG_END = re.compile(HTML_ATTRIBUTES + r'[ \t]*>[ \t]*(?:\n|$)')
+_CLOSE_TAG_END = re.compile(r'[ \t]*>[ \t]*(?:\n|$)')
+_STRICT_BLOCK_QUOTE = re.compile(r'( {0,3}>[^\n]*(?:\n|$))+')
+
+
+class BlockParser(Parser):
+ BLANK_LINE = re.compile(r'(^[ \t\v\f]*\n)+', re.M)
+
+ RAW_HTML = (
+ r'^ {0,3}('
+ r'</?' + HTML_TAGNAME + r'|'
+ r'<!--|' # comment
+ r'<\?|' # script
+ r'<![A-Z]|'
+ r'<!\[CDATA\[)'
+ )
+
+ BLOCK_HTML = (
+ r'^ {0,3}(?:'
+ r'(?:</?' + _BLOCK_TAGS_PATTERN + r'(?:[ \t]+|\n|$))'
+ r'|<!--' # comment
+ r'|<\?' # script
+ r'|<![A-Z]'
+ r'|<!\[CDATA\[)'
+ )
+
+ SPECIFICATION = {
+ 'blank_line': r'(^[ \t\v\f]*\n)+',
+ 'axt_heading': r'^ {0,3}(?P<axt_1>#{1,6})(?!#+)(?P<axt_2>[ \t]*|[ \t]+.*?)$',
+ 'setex_heading': r'^ {0,3}(?P<setext_1>=|-){1,}[ \t]*$',
+ 'fenced_code': (
+ r'^(?P<fenced_1> {0,3})(?P<fenced_2>`{3,}|~{3,})'
+ r'[ \t]*(?P<fenced_3>.*?)$'
+ ),
+ 'indent_code': (
+ r'^(?: {4}| *\t)[^\n]+(?:\n+|$)'
+ r'((?:(?: {4}| *\t)[^\n]+(?:\n+|$))|\s)*'
+ ),
+ 'thematic_break': r'^ {0,3}((?:-[ \t]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})$',
+ 'ref_link': r'^ {0,3}\[(?P<reflink_1>' + LINK_LABEL + r')\]:',
+ 'block_quote': r'^ {0,3}>(?P<quote_1>.*?)$',
+ 'list': LIST_PATTERN,
+ 'block_html': BLOCK_HTML,
+ 'raw_html': RAW_HTML,
+ }
+
+ DEFAULT_RULES = (
+ 'fenced_code',
+ 'indent_code',
+ 'axt_heading',
+ 'setex_heading',
+ 'thematic_break',
+ 'block_quote',
+ 'list',
+ 'ref_link',
+ 'raw_html',
+ 'blank_line',
+ )
+
+ def __init__(
+ self,
+ block_quote_rules: Optional[List[str]]=None,
+ list_rules: Optional[List[str]]=None,
+ max_nested_level: int=6
+ ):
+ super(BlockParser, self).__init__()
+
+ if block_quote_rules is None:
+ block_quote_rules = list(self.DEFAULT_RULES)
+
+ if list_rules is None:
+ list_rules = list(self.DEFAULT_RULES)
+
+ self.block_quote_rules = block_quote_rules
+ self.list_rules = list_rules
+ self.max_nested_level = max_nested_level
+ # register default parse methods
+ self._methods = {
+ name: getattr(self, 'parse_' + name) for name in self.SPECIFICATION
+ }
+
+ def parse_blank_line(self, m: re.Match, state: BlockState) -> int:
+ """Parse token for blank lines."""
+ state.append_token({'type': 'blank_line'})
+ return m.end()
+
+ def parse_thematic_break(self, m: re.Match, state: BlockState) -> int:
+ """Parse token for thematic break, e.g. ``<hr>`` tag in HTML."""
+ state.append_token({'type': 'thematic_break'})
+ # $ does not count '\n'
+ return m.end() + 1
+
+ def parse_indent_code(self, m: re.Match, state: BlockState) -> int:
+ """Parse token for code block which is indented by 4 spaces."""
+ # it is a part of the paragraph
+ end_pos = state.append_paragraph()
+ if end_pos:
+ return end_pos
+
+ code = m.group(0)
+ code = expand_leading_tab(code)
+ code = _INDENT_CODE_TRIM.sub('', code)
+ code = code.strip('\n')
+ state.append_token({'type': 'block_code', 'raw': code, 'style': 'indent'})
+ return m.end()
+
+ def parse_fenced_code(self, m: re.Match, state: BlockState) -> Optional[int]:
+ """Parse token for fenced code block. A fenced code block is started with
+ 3 or more backtick(`) or tilde(~).
+
+ An example of a fenced code block:
+
+ .. code-block:: markdown
+
+ ```python
+ def markdown(text):
+ return mistune.html(text)
+ ```
+ """
+ spaces = m.group('fenced_1')
+ marker = m.group('fenced_2')
+ info = m.group('fenced_3')
+
+ c = marker[0]
+ if info and c == '`':
+ # CommonMark Example 145
+ # Info strings for backtick code blocks cannot contain backticks
+ if info.find(c) != -1:
+ return
+
+ _end = re.compile(
+ r'^ {0,3}' + c + '{' + str(len(marker)) + r',}[ \t]*(?:\n|$)', re.M)
+ cursor_start = m.end() + 1
+
+ m2 = _end.search(state.src, cursor_start)
+ if m2:
+ code = state.src[cursor_start:m2.start()]
+ end_pos = m2.end()
+ else:
+ code = state.src[cursor_start:]
+ end_pos = state.cursor_max
+
+ if spaces and code:
+ _trim_pattern = re.compile('^ {0,' + str(len(spaces)) + '}', re.M)
+ code = _trim_pattern.sub('', code)
+
+ token = {'type': 'block_code', 'raw': code, 'style': 'fenced', 'marker': marker}
+ if info:
+ info = unescape_char(info)
+ token['attrs'] = {'info': info.strip()}
+
+ state.append_token(token)
+ return end_pos
+
+ def parse_axt_heading(self, m: re.Match, state: BlockState) -> int:
+ """Parse token for AXT heading. An AXT heading is started with 1 to 6
+ symbol of ``#``."""
+ level = len(m.group('axt_1'))
+ text = m.group('axt_2').strip()
+ # remove last #
+ if text:
+ text = _AXT_HEADING_TRIM.sub('', text)
+
+ token = {'type': 'heading', 'text': text, 'attrs': {'level': level}, 'style': 'axt'}
+ state.append_token(token)
+ return m.end() + 1
+
+ def parse_setex_heading(self, m: re.Match, state: BlockState) -> Optional[int]:
+ """Parse token for setex style heading. A setex heading syntax looks like:
+
+ .. code-block:: markdown
+
+ H1 title
+ ========
+ """
+ last_token = state.last_token()
+ if last_token and last_token['type'] == 'paragraph':
+ level = 1 if m.group('setext_1') == '=' else 2
+ last_token['type'] = 'heading'
+ last_token['style'] = 'setext'
+ last_token['attrs'] = {'level': level}
+ return m.end() + 1
+
+ sc = self.compile_sc(['thematic_break', 'list'])
+ m = sc.match(state.src, state.cursor)
+ if m:
+ return self.parse_method(m, state)
+
+ def parse_ref_link(self, m: re.Match, state: BlockState) -> Optional[int]:
+ """Parse link references and save the link information into ``state.env``.
+
+ Here is an example of a link reference:
+
+ .. code-block:: markdown
+
+ a [link][example]
+
+ [example]: https://example.com "Optional title"
+
+ This method will save the link reference into ``state.env`` as::
+
+ state.env['ref_links']['example'] = {
+ 'url': 'https://example.com',
+ 'title': "Optional title",
+ }
+ """
+ end_pos = state.append_paragraph()
+ if end_pos:
+ return end_pos
+
+ label = m.group('reflink_1')
+ key = unikey(label)
+ if not key:
+ return
+
+ href, href_pos = parse_link_href(state.src, m.end(), block=True)
+ if href is None:
+ return
+
+ _blank = self.BLANK_LINE.search(state.src, href_pos)
+ if _blank:
+ max_pos = _blank.start()
+ else:
+ max_pos = state.cursor_max
+
+ title, title_pos = parse_link_title(state.src, href_pos, max_pos)
+ if title_pos:
+ m = _BLANK_TO_LINE.match(state.src, title_pos)
+ if m:
+ title_pos = m.end()
+ else:
+ title_pos = None
+ title = None
+
+ if title_pos is None:
+ m = _BLANK_TO_LINE.match(state.src, href_pos)
+ if m:
+ href_pos = m.end()
+ else:
+ href_pos = None
+ href = None
+
+ end_pos = title_pos or href_pos
+ if not end_pos:
+ return
+
+ if key not in state.env['ref_links']:
+ href = unescape_char(href)
+ data = {'url': escape_url(href), 'label': label}
+ if title:
+ data['title'] = title
+ state.env['ref_links'][key] = data
+ return end_pos
+
+ def extract_block_quote(self, m: re.Match, state: BlockState) -> Tuple[str, int]:
+ """Extract text and cursor end position of a block quote."""
+
+ # cleanup at first to detect if it is code block
+ text = m.group('quote_1') + '\n'
+ text = expand_leading_tab(text, 3)
+ text = _BLOCK_QUOTE_TRIM.sub('', text)
+
+ sc = self.compile_sc(['blank_line', 'indent_code', 'fenced_code'])
+ require_marker = bool(sc.match(text))
+
+ state.cursor = m.end() + 1
+
+ end_pos = None
+ if require_marker:
+ m = _STRICT_BLOCK_QUOTE.match(state.src, state.cursor)
+ if m:
+ quote = m.group(0)
+ quote = _BLOCK_QUOTE_LEADING.sub('', quote)
+ quote = expand_leading_tab(quote, 3)
+ quote = _BLOCK_QUOTE_TRIM.sub('', quote)
+ text += quote
+ state.cursor = m.end()
+ else:
+ prev_blank_line = False
+ break_sc = self.compile_sc([
+ 'blank_line', 'thematic_break', 'fenced_code',
+ 'list', 'block_html',
+ ])
+ while state.cursor < state.cursor_max:
+ m = _STRICT_BLOCK_QUOTE.match(state.src, state.cursor)
+ if m:
+ quote = m.group(0)
+ quote = _BLOCK_QUOTE_LEADING.sub('', quote)
+ quote = expand_leading_tab(quote, 3)
+ quote = _BLOCK_QUOTE_TRIM.sub('', quote)
+ text += quote
+ state.cursor = m.end()
+ if not quote.strip():
+ prev_blank_line = True
+ else:
+ prev_blank_line = bool(_LINE_BLANK_END.search(quote))
+ continue
+
+ if prev_blank_line:
+ # CommonMark Example 249
+ # because of laziness, a blank line is needed between
+ # a block quote and a following paragraph
+ break
+
+ m = break_sc.match(state.src, state.cursor)
+ if m:
+ end_pos = self.parse_method(m, state)
+ if end_pos:
+ break
+
+ # lazy continuation line
+ pos = state.find_line_end()
+ line = state.get_text(pos)
+ line = expand_leading_tab(line, 3)
+ text += line
+ state.cursor = pos
+
+ # according to CommonMark Example 6, the second tab should be
+ # treated as 4 spaces
+ return expand_tab(text), end_pos
+
+ def parse_block_quote(self, m: re.Match, state: BlockState) -> int:
+ """Parse token for block quote. Here is an example of the syntax:
+
+ .. code-block:: markdown
+
+ > a block quote starts
+ > with right arrows
+ """
+ text, end_pos = self.extract_block_quote(m, state)
+ # scan children state
+ child = state.child_state(text)
+ if state.depth() >= self.max_nested_level - 1:
+ rules = list(self.block_quote_rules)
+ rules.remove('block_quote')
+ else:
+ rules = self.block_quote_rules
+
+ self.parse(child, rules)
+ token = {'type': 'block_quote', 'children': child.tokens}
+ if end_pos:
+ state.prepend_token(token)
+ return end_pos
+ state.append_token(token)
+ return state.cursor
+
+ def parse_list(self, m: re.Match, state: BlockState) -> int:
+ """Parse tokens for ordered and unordered list."""
+ return parse_list(self, m, state)
+
+ def parse_block_html(self, m: re.Match, state: BlockState) -> Optional[int]:
+ return self.parse_raw_html(m, state)
+
+ def parse_raw_html(self, m: re.Match, state: BlockState) -> Optional[int]:
+ marker = m.group(0).strip()
+
+ # rule 2
+ if marker == '<!--':
+ return _parse_html_to_end(state, '-->', m.end())
+
+ # rule 3
+ if marker == '<?':
+ return _parse_html_to_end(state, '?>', m.end())
+
+ # rule 5
+ if marker == '<![CDATA[':
+ return _parse_html_to_end(state, ']]>', m.end())
+
+ # rule 4
+ if marker.startswith('<!'):
+ return _parse_html_to_end(state, '>', m.end())
+
+ close_tag = None
+ open_tag = None
+ if marker.startswith('</'):
+ close_tag = marker[2:].lower()
+ # rule 6
+ if close_tag in BLOCK_TAGS:
+ return _parse_html_to_newline(state, self.BLANK_LINE)
+ else:
+ open_tag = marker[1:].lower()
+ # rule 1
+ if open_tag in PRE_TAGS:
+ end_tag = '</' + open_tag + '>'
+ return _parse_html_to_end(state, end_tag, m.end())
+ # rule 6
+ if open_tag in BLOCK_TAGS:
+ return _parse_html_to_newline(state, self.BLANK_LINE)
+
+ # Blocks of type 7 may not interrupt a paragraph.
+ end_pos = state.append_paragraph()
+ if end_pos:
+ return end_pos
+
+ # rule 7
+ start_pos = m.end()
+ end_pos = state.find_line_end()
+ if (open_tag and _OPEN_TAG_END.match(state.src, start_pos, end_pos)) or \
+ (close_tag and _CLOSE_TAG_END.match(state.src, start_pos, end_pos)):
+ return _parse_html_to_newline(state, self.BLANK_LINE)
+
+ def parse(self, state: BlockState, rules: Optional[List[str]]=None) -> None:
+ sc = self.compile_sc(rules)
+
+ while state.cursor < state.cursor_max:
+ m = sc.search(state.src, state.cursor)
+ if not m:
+ break
+
+ end_pos = m.start()
+ if end_pos > state.cursor:
+ text = state.get_text(end_pos)
+ state.add_paragraph(text)
+ state.cursor = end_pos
+
+ end_pos = self.parse_method(m, state)
+ if end_pos:
+ state.cursor = end_pos
+ else:
+ end_pos = state.find_line_end()
+ text = state.get_text(end_pos)
+ state.add_paragraph(text)
+ state.cursor = end_pos
+
+ if state.cursor < state.cursor_max:
+ text = state.src[state.cursor:]
+ state.add_paragraph(text)
+ state.cursor = state.cursor_max
+
+
+def _parse_html_to_end(state, end_marker, start_pos):
+ marker_pos = state.src.find(end_marker, start_pos)
+ if marker_pos == -1:
+ text = state.src[state.cursor:]
+ end_pos = state.cursor_max
+ else:
+ text = state.get_text(marker_pos)
+ state.cursor = marker_pos
+ end_pos = state.find_line_end()
+ text += state.get_text(end_pos)
+
+ state.append_token({'type': 'block_html', 'raw': text})
+ return end_pos
+
+
+def _parse_html_to_newline(state, newline):
+ m = newline.search(state.src, state.cursor)
+ if m:
+ end_pos = m.start()
+ text = state.get_text(end_pos)
+ else:
+ text = state.src[state.cursor:]
+ end_pos = state.cursor_max
+
+ state.append_token({'type': 'block_html', 'raw': text})
+ return end_pos
diff --git a/src/mistune/core.py b/src/mistune/core.py
new file mode 100644
index 0000000..71db4dd
--- /dev/null
+++ b/src/mistune/core.py
@@ -0,0 +1,208 @@
+import re
+
+_LINE_END = re.compile(r'\n|$')
+
+
+class BlockState:
+ """The state to save block parser's cursor and tokens."""
+ def __init__(self, parent=None):
+ self.src = ''
+ self.tokens = []
+
+ # current cursor position
+ self.cursor = 0
+ self.cursor_max = 0
+
+ # for list and block quote chain
+ self.list_tight = True
+ self.parent = parent
+
+ # for saving def references
+ if parent:
+ self.env = parent.env
+ else:
+ self.env = {'ref_links': {}}
+
+ def child_state(self, src):
+ child = self.__class__(self)
+ child.process(src)
+ return child
+
+ def process(self, src):
+ self.src = src
+ self.cursor_max = len(src)
+
+ def find_line_end(self):
+ m = _LINE_END.search(self.src, self.cursor)
+ return m.end()
+
+ def get_text(self, end_pos):
+ return self.src[self.cursor:end_pos]
+
+ def last_token(self):
+ if self.tokens:
+ return self.tokens[-1]
+
+ def prepend_token(self, token):
+ """Insert token before the last token."""
+ self.tokens.insert(len(self.tokens) - 1, token)
+
+ def append_token(self, token):
+ """Add token to the end of token list."""
+ self.tokens.append(token)
+
+ def add_paragraph(self, text):
+ last_token = self.last_token()
+ if last_token and last_token['type'] == 'paragraph':
+ last_token['text'] += text
+ else:
+ self.tokens.append({'type': 'paragraph', 'text': text})
+
+ def append_paragraph(self):
+ last_token = self.last_token()
+ if last_token and last_token['type'] == 'paragraph':
+ pos = self.find_line_end()
+ last_token['text'] += self.get_text(pos)
+ return pos
+
+ def depth(self):
+ d = 0
+ parent = self.parent
+ while parent:
+ d += 1
+ parent = parent.parent
+ return d
+
+
+class InlineState:
+ """The state to save inline parser's tokens."""
+ def __init__(self, env):
+ self.env = env
+ self.src = ''
+ self.tokens = []
+ self.in_image = False
+ self.in_link = False
+ self.in_emphasis = False
+ self.in_strong = False
+
+ def prepend_token(self, token):
+ """Insert token before the last token."""
+ self.tokens.insert(len(self.tokens) - 1, token)
+
+ def append_token(self, token):
+ """Add token to the end of token list."""
+ self.tokens.append(token)
+
+ def copy(self):
+ """Create a copy of current state."""
+ state = self.__class__(self.env)
+ state.in_image = self.in_image
+ state.in_link = self.in_link
+ state.in_emphasis = self.in_emphasis
+ state.in_strong = self.in_strong
+ return state
+
+
+class Parser:
+ sc_flag = re.M
+ state_cls = BlockState
+
+ SPECIFICATION = {}
+ DEFAULT_RULES = []
+
+ def __init__(self):
+ self.specification = self.SPECIFICATION.copy()
+ self.rules = list(self.DEFAULT_RULES)
+ self._methods = {}
+
+ self.__sc = {}
+
+ def compile_sc(self, rules=None):
+ if rules is None:
+ key = '$'
+ rules = self.rules
+ else:
+ key = '|'.join(rules)
+
+ sc = self.__sc.get(key)
+ if sc:
+ return sc
+
+ regex = '|'.join(r'(?P<%s>%s)' % (k, self.specification[k]) for k in rules)
+ sc = re.compile(regex, self.sc_flag)
+ self.__sc[key] = sc
+ return sc
+
+ def register(self, name, pattern, func, before=None):
+ """Register a new rule to parse the token. This method is usually used to
+ create a new plugin.
+
+ :param name: name of the new grammar
+ :param pattern: regex pattern in string
+ :param func: the parsing function
+ :param before: insert this rule before a built-in rule
+ """
+ self._methods[name] = lambda m, state: func(self, m, state)
+ if pattern:
+ self.specification[name] = pattern
+ if name not in self.rules:
+ self.insert_rule(self.rules, name, before=before)
+
+ def register_rule(self, name, pattern, func):
+ raise DeprecationWarning('This plugin is not compatible with mistune v3.')
+
+ @staticmethod
+ def insert_rule(rules, name, before=None):
+ if before:
+ try:
+ index = rules.index(before)
+ rules.insert(index, name)
+ except ValueError:
+ rules.append(name)
+ else:
+ rules.append(name)
+
+ def parse_method(self, m, state):
+ func = self._methods[m.lastgroup]
+ return func(m, state)
+
+
+class BaseRenderer(object):
+ NAME = 'base'
+
+ def __init__(self):
+ self.__methods = {}
+
+ def register(self, name, method):
+ """Register a render method for the named token. For example::
+
+ def render_wiki(renderer, key, title):
+ return f'<a href="/wiki/{key}">{title}</a>'
+
+ renderer.register('wiki', render_wiki)
+ """
+ # bind self into renderer method
+ self.__methods[name] = lambda *arg, **kwargs: method(self, *arg, **kwargs)
+
+ def _get_method(self, name):
+ try:
+ return object.__getattribute__(self, name)
+ except AttributeError:
+ method = self.__methods.get(name)
+ if not method:
+ raise AttributeError('No renderer "{!r}"'.format(name))
+ return method
+
+ def render_token(self, token, state):
+ func = self._get_method(token['type'])
+ return func(token, state)
+
+ def iter_tokens(self, tokens, state):
+ for tok in tokens:
+ yield self.render_token(tok, state)
+
+ def render_tokens(self, tokens, state):
+ return ''.join(self.iter_tokens(tokens, state))
+
+ def __call__(self, tokens, state):
+ return self.render_tokens(tokens, state)
diff --git a/src/mistune/directives/__init__.py b/src/mistune/directives/__init__.py
new file mode 100644
index 0000000..660c4c8
--- /dev/null
+++ b/src/mistune/directives/__init__.py
@@ -0,0 +1,31 @@
+from ._base import DirectiveParser, BaseDirective, DirectivePlugin
+from ._rst import RSTDirective
+from ._fenced import FencedDirective
+from .admonition import Admonition
+from .toc import TableOfContents
+from .include import Include
+from .image import Image, Figure
+
+
+class RstDirective(RSTDirective): # pragma: no cover
+ def __init__(self, plugins):
+ super(RstDirective, self).__init__(plugins)
+ import warnings
+ warnings.warn(
+ "'RstDirective' is deprecated, please use 'RSTDirective' instead.",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+
+
+__all__ = [
+ 'DirectiveParser',
+ 'BaseDirective',
+ 'DirectivePlugin',
+ 'RSTDirective',
+ 'FencedDirective',
+ 'Admonition',
+ 'TableOfContents',
+ 'Include',
+ 'Image', 'Figure',
+]
diff --git a/src/mistune/directives/_base.py b/src/mistune/directives/_base.py
new file mode 100644
index 0000000..ad326c6
--- /dev/null
+++ b/src/mistune/directives/_base.py
@@ -0,0 +1,121 @@
+import re
+
+
+class DirectiveParser:
+ name = 'directive'
+
+ @staticmethod
+ def parse_type(m: re.Match):
+ raise NotImplementedError()
+
+ @staticmethod
+ def parse_title(m: re.Match):
+ raise NotImplementedError()
+
+ @staticmethod
+ def parse_content(m: re.Match):
+ raise NotImplementedError()
+
+ @classmethod
+ def parse_tokens(cls, block, text, state):
+ if state.depth() >= block.max_nested_level - 1 and cls.name in block.rules:
+ rules = list(block.rules)
+ rules.remove(cls.name)
+ else:
+ rules = block.rules
+ child = state.child_state(text)
+ block.parse(child, rules)
+ return child.tokens
+
+ @staticmethod
+ def parse_options(m: re.Match):
+ text = m.group('options')
+ if not text.strip():
+ return []
+
+ options = []
+ for line in re.split(r'\n+', text):
+ line = line.strip()[1:]
+ if not line:
+ continue
+ i = line.find(':')
+ k = line[:i]
+ v = line[i + 1:].strip()
+ options.append((k, v))
+ return options
+
+
+class BaseDirective:
+ parser = DirectiveParser
+ directive_pattern = None
+
+ def __init__(self, plugins):
+ self._methods = {}
+ self.__plugins = plugins
+
+ def register(self, name, fn):
+ self._methods[name] = fn
+
+ def parse_method(self, block, m, state):
+ _type = self.parser.parse_type(m)
+ method = self._methods.get(_type)
+ if method:
+ try:
+ token = method(block, m, state)
+ except ValueError as e:
+ token = {'type': 'block_error', 'raw': str(e)}
+ else:
+ text = m.group(0)
+ token = {
+ 'type': 'block_error',
+ 'raw': text,
+ }
+
+ if isinstance(token, list):
+ for tok in token:
+ state.append_token(tok)
+ else:
+ state.append_token(token)
+ return token
+
+ def parse_directive(self, block, m, state):
+ raise NotImplementedError()
+
+ def register_block_parser(self, md, before=None):
+ md.block.register(
+ self.parser.name,
+ self.directive_pattern,
+ self.parse_directive,
+ before=before,
+ )
+
+ def __call__(self, md):
+ for plugin in self.__plugins:
+ plugin.parser = self.parser
+ plugin(self, md)
+
+
+class DirectivePlugin:
+ def __init__(self):
+ self.parser = None
+
+ def parse_options(self, m: re.Match):
+ return self.parser.parse_options(m)
+
+ def parse_type(self, m: re.Match):
+ return self.parser.parse_type(m)
+
+ def parse_title(self, m: re.Match):
+ return self.parser.parse_title(m)
+
+ def parse_content(self, m: re.Match):
+ return self.parser.parse_content(m)
+
+ def parse_tokens(self, block, text, state):
+ return self.parser.parse_tokens(block, text, state)
+
+ def parse(self, block, m, state):
+ raise NotImplementedError()
+
+ def __call__(self, md):
+ raise NotImplementedError()
diff --git a/src/mistune/directives/_fenced.py b/src/mistune/directives/_fenced.py
new file mode 100644
index 0000000..818f130
--- /dev/null
+++ b/src/mistune/directives/_fenced.py
@@ -0,0 +1,142 @@
+import re
+from ._base import DirectiveParser, BaseDirective
+
+__all__ = ['FencedDirective']
+
+
+_type_re = re.compile(r'^ *\{[a-zA-Z0-9_-]+\}')
+_directive_re = re.compile(
+ r'\{(?P<type>[a-zA-Z0-9_-]+)\} *(?P<title>[^\n]*)(?:\n|$)'
+ r'(?P<options>(?:\:[a-zA-Z0-9_-]+\: *[^\n]*\n+)*)'
+ r'\n*(?P<text>(?:[^\n]*\n+)*)'
+)
+
+
+class FencedParser(DirectiveParser):
+ name = 'fenced_directive'
+
+ @staticmethod
+ def parse_type(m: re.Match):
+ return m.group('type')
+
+ @staticmethod
+ def parse_title(m: re.Match):
+ return m.group('title')
+
+ @staticmethod
+ def parse_content(m: re.Match):
+ return m.group('text')
+
+
+class FencedDirective(BaseDirective):
+ """A **fenced** style of directive looks like a fenced code block, it is
+ inspired by markdown-it-docutils. The syntax looks like:
+
+ .. code-block:: text
+
+ ```{directive-type} title
+ :option-key: option value
+ :option-key: option value
+
+ content text here
+ ```
+
+ To use ``FencedDirective``, developers can add it into plugin list in
+ the :class:`Markdown` instance:
+
+ .. code-block:: python
+
+ import mistune
+ from mistune.directives import FencedDirective, Admonition
+
+ md = mistune.create_markdown(plugins=[
+ # ...
+ FencedDirective([Admonition()]),
+ ])
+
+ FencedDirective is using >= 3 backticks or curly-brackets for the fenced
+ syntax. Developers can change it to other characters, e.g. colon:
+
+ .. code-block:: python
+
+ directive = FencedDirective([Admonition()], ':')
+
+ And then the directive syntax would look like:
+
+ .. code-block:: text
+
+ ::::{note} Nesting directives
+ You can nest directives by ensuring the start and end fence matching
+ the length. For instance, in this example, the admonition is started
+ with 4 colons, then it should end with 4 colons.
+
+ You can nest another admonition with other length of colons except 4.
+
+ :::{tip} Longer outermost fence
+ It would be better that you put longer markers for the outer fence,
+ and shorter markers for the inner fence. In this example, we put 4
+ colons outsie, and 3 colons inside.
+ :::
+ ::::
+
+ :param plugins: list of directive plugins
+ :param markers: characters to determine the fence, default is backtick
+ and curly-bracket
+ """
+ parser = FencedParser
+
+ def __init__(self, plugins, markers='`~'):
+ super(FencedDirective, self).__init__(plugins)
+ self.markers = markers
+ _marker_pattern = '|'.join(re.escape(c) for c in markers)
+ self.directive_pattern = (
+ r'^(?P<fenced_directive_mark>(?:' + _marker_pattern + r'){3,})'
+ r'\{[a-zA-Z0-9_-]+\}'
+ )
+
+ def _process_directive(self, block, marker, start, state):
+ mlen = len(marker)
+ cursor_start = start + len(marker)
+
+ _end_pattern = (
+ r'^ {0,3}' + marker[0] + '{' + str(mlen) + r',}'
+ r'[ \t]*(?:\n|$)'
+ )
+ _end_re = re.compile(_end_pattern, re.M)
+
+ _end_m = _end_re.search(state.src, cursor_start)
+ if _end_m:
+ text = state.src[cursor_start:_end_m.start()]
+ end_pos = _end_m.end()
+ else:
+ text = state.src[cursor_start:]
+ end_pos = state.cursor_max
+
+ m = _directive_re.match(text)
+ if not m:
+ return
+
+ self.parse_method(block, m, state)
+ return end_pos
+
+ def parse_directive(self, block, m, state):
+ marker = m.group('fenced_directive_mark')
+ return self._process_directive(block, marker, m.start(), state)
+
+ def parse_fenced_code(self, block, m, state):
+ info = m.group('fenced_3')
+ if not info or not _type_re.match(info):
+ return block.parse_fenced_code(m, state)
+
+ if state.depth() >= block.max_nested_level:
+ return block.parse_fenced_code(m, state)
+
+ marker = m.group('fenced_2')
+ return self._process_directive(block, marker, m.start(), state)
+
+ def __call__(self, md):
+ super(FencedDirective, self).__call__(md)
+ if self.markers == '`~':
+ md.block.register('fenced_code', None, self.parse_fenced_code)
+ else:
+ self.register_block_parser(md, 'fenced_code')
diff --git a/src/mistune/directives/_rst.py b/src/mistune/directives/_rst.py
new file mode 100644
index 0000000..6e054cf
--- /dev/null
+++ b/src/mistune/directives/_rst.py
@@ -0,0 +1,73 @@
+import re
+from ._base import DirectiveParser, BaseDirective
+
+__all__ = ['RSTDirective']
+
+
+_directive_re = re.compile(
+ r'\.\.( +)(?P<type>[a-zA-Z0-9_-]+)\:\: *(?P<title>[^\n]*)(?:\n|$)'
+ r'(?P<options>(?: \1 {0,3}\:[a-zA-Z0-9_-]+\: *[^\n]*\n+)*)'
+ r'\n*(?P<text>(?: \1 {0,3}[^\n]*\n+)*)'
+)
+
+
+class RSTParser(DirectiveParser):
+ name = 'rst_directive'
+
+ @staticmethod
+ def parse_type(m: re.Match):
+ return m.group('type')
+
+ @staticmethod
+ def parse_title(m: re.Match):
+ return m.group('title')
+
+ @staticmethod
+ def parse_content(m: re.Match):
+ full_content = m.group(0)
+ text = m.group('text')
+ pretext = full_content[:-len(text)]
+ leading = len(m.group(1)) + 2
+ return '\n'.join(line[leading:] for line in text.splitlines()) + '\n'
+
+
+class RSTDirective(BaseDirective):
+ """A RST style of directive syntax is inspired by reStructuredText.
+ The syntax is very powerful that you can define a lot of custom
+ features on your own. The syntax looks like:
+
+ .. code-block:: text
+
+ .. directive-type:: directive value
+ :option-key: option value
+ :option-key: option value
+
+ content text here
+
+ To use ``RSTDirective``, developers can add it into plugin list in
+ the :class:`Markdown` instance:
+
+ .. code-block:: python
+
+ import mistune
+ from mistune.directives import RSTDirective, Admonition
+
+ md = mistune.create_markdown(plugins=[
+ # ...
+ RSTDirective([Admonition()]),
+ ])
+ """
+ parser = RSTParser
+ directive_pattern = r'^\.\. +[a-zA-Z0-9_-]+\:\:'
+
+ def parse_directive(self, block, m, state):
+ m = _directive_re.match(state.src, state.cursor)
+ if not m:
+ return
+
+ self.parse_method(block, m, state)
+ return m.end()
+
+ def __call__(self, md):
+ super(RSTDirective, self).__call__(md)
+ self.register_block_parser(md)
diff --git a/src/mistune/directives/admonition.py b/src/mistune/directives/admonition.py
new file mode 100644
index 0000000..b380611
--- /dev/null
+++ b/src/mistune/directives/admonition.py
@@ -0,0 +1,61 @@
+from ._base import DirectivePlugin
+
+
+class Admonition(DirectivePlugin):
+ SUPPORTED_NAMES = {
+ "attention", "caution", "danger", "error", "hint",
+ "important", "note", "tip", "warning",
+ }
+
+ def parse(self, block, m, state):
+ name = self.parse_type(m)
+ attrs = {'name': name}
+ options = dict(self.parse_options(m))
+ if 'class' in options:
+ attrs['class'] = options['class']
+
+ title = self.parse_title(m)
+ if not title:
+ title = name.capitalize()
+
+ content = self.parse_content(m)
+ children = [
+ {
+ 'type': 'admonition_title',
+ 'text': title,
+ },
+ {
+ 'type': 'admonition_content',
+ 'children': self.parse_tokens(block, content, state),
+ }
+ ]
+ return {
+ 'type': 'admonition',
+ 'children': children,
+ 'attrs': attrs,
+ }
+
+ def __call__(self, directive, md):
+ for name in self.SUPPORTED_NAMES:
+ directive.register(name, self.parse)
+
+ if md.renderer.NAME == 'html':
+ md.renderer.register('admonition', render_admonition)
+ md.renderer.register('admonition_title', render_admonition_title)
+ md.renderer.register('admonition_content', render_admonition_content)
+
+
+def render_admonition(self, text, name, **attrs):
+ html = '<section class="admonition ' + name
+ _cls = attrs.get('class')
+ if _cls:
+ html += ' ' + _cls
+ return html + '">\n' + text + '</section>\n'
+
+
+def render_admonition_title(self, text):
+ return '<p class="admonition-title">' + text + '</p>\n'
+
+
+def render_admonition_content(self, text):
+ return text
diff --git a/src/mistune/directives/image.py b/src/mistune/directives/image.py
new file mode 100644
index 0000000..5d9d40a
--- /dev/null
+++ b/src/mistune/directives/image.py
@@ -0,0 +1,152 @@
+import re
+from ._base import DirectivePlugin
+from ..util import escape as escape_text, escape_url
+
+__all__ = ['Image', 'Figure']
+
+_num_re = re.compile(r'^\d+(?:\.\d*)?')
+_allowed_aligns = ["top", "middle", "bottom", "left", "center", "right"]
+
+
+def _parse_attrs(options):
+ attrs = {}
+ if 'alt' in options:
+ attrs['alt'] = options['alt']
+
+ # validate align
+ align = options.get('align')
+ if align and align in _allowed_aligns:
+ attrs['align'] = align
+
+ height = options.get('height')
+ width = options.get('width')
+ if height and _num_re.match(height):
+ attrs['height'] = height
+ if width and _num_re.match(width):
+ attrs['width'] = width
+ if 'target' in options:
+ attrs['target'] = escape_url(options['target'])
+ return attrs
+
+
+class Image(DirectivePlugin):
+ NAME = 'image'
+
+ def parse(self, block, m, state):
+ options = dict(self.parse_options(m))
+ attrs = _parse_attrs(options)
+ attrs['src'] = self.parse_title(m)
+ return {'type': 'block_image', 'attrs': attrs}
+
+ def __call__(self, directive, md):
+ directive.register(self.NAME, self.parse)
+ if md.renderer.NAME == 'html':
+ md.renderer.register('block_image', render_block_image)
+
+
+def render_block_image(self, src: str, alt=None, width=None, height=None, **attrs):
+ img = '<img src="' + src + '"'
+ style = ''
+ if alt:
+ img += ' alt="' + escape_text(alt) + '"'
+ if width:
+ if width.isdigit():
+ img += ' width="' + width + '"'
+ else:
+ style += 'width:' + width + ';'
+ if height:
+ if height.isdigit():
+ img += ' height="' + height + '"'
+ else:
+ style += 'height:' + height + ';'
+ if style:
+ img += ' style="' + escape_text(style) + '"'
+
+ img += ' />'
+
+ _cls = 'block-image'
+ align = attrs.get('align')
+ if align:
+ _cls += ' align-' + align
+
+ target = attrs.get('target')
+ if target:
+ href = escape_text(self.safe_url(target))
+ outer = '<a class="' + _cls + '" href="' + href + '">'
+ return outer + img + '</a>\n'
+ else:
+ return '<div class="' + _cls + '">' + img + '</div>\n'
+
+
+class Figure(DirectivePlugin):
+ NAME = 'figure'
+
+ def parse_directive_content(self, block, m, state):
+ content = self.parse_content(m)
+ if not content:
+ return
+
+ tokens = self.parse_tokens(block, content, state)
+ caption = tokens[0]
+ if caption['type'] == 'paragraph':
+ caption['type'] = 'figcaption'
+ children = [caption]
+ if len(tokens) > 1:
+ children.append({
+ 'type': 'legend',
+ 'children': tokens[1:]
+ })
+ return children
+
+ def parse(self, block, m, state):
+ options = dict(self.parse_options(m))
+ image_attrs = _parse_attrs(options)
+ image_attrs['src'] = self.parse_title(m)
+
+ align = image_attrs.pop('align', None)
+ fig_attrs = {}
+ if align:
+ fig_attrs['align'] = align
+ for k in ['figwidth', 'figclass']:
+ if k in options:
+ fig_attrs[k] = options[k]
+
+ children = [{'type': 'block_image', 'attrs': image_attrs}]
+ content = self.parse_directive_content(block, m, state)
+ if content:
+ children.extend(content)
+ return {
+ 'type': 'figure',
+ 'attrs': fig_attrs,
+ 'children': children,
+ }
+
+ def __call__(self, directive, md):
+ directive.register(self.NAME, self.parse)
+
+ if md.renderer.NAME == 'html':
+ md.renderer.register('figure', render_figure)
+ md.renderer.register('block_image', render_block_image)
+ md.renderer.register('figcaption', render_figcaption)
+ md.renderer.register('legend', render_legend)
+
+
+def render_figure(self, text, align=None, figwidth=None, figclass=None):
+ _cls = 'figure'
+ if align:
+ _cls += ' align-' + align
+ if figclass:
+ _cls += ' ' + figclass
+
+ html = '<figure class="' + _cls + '"'
+ if figwidth:
+ html += ' style="width:' + figwidth + '"'
+ return html + '>\n' + text + '</figure>\n'
+
+
+def render_figcaption(self, text):
+ return '<figcaption>' + text + '</figcaption>\n'
+
+
+def render_legend(self, text):
+ return '<div class="legend">\n' + text + '</div>\n'
diff --git a/src/mistune/directives/include.py b/src/mistune/directives/include.py
new file mode 100644
index 0000000..d2180ba
--- /dev/null
+++ b/src/mistune/directives/include.py
@@ -0,0 +1,65 @@
+import os
+from ._base import DirectivePlugin
+
+
+class Include(DirectivePlugin):
+ def parse(self, block, m, state):
+ source_file = state.env.get('__file__')
+ if not source_file:
+ return {'type': 'block_error', 'raw': 'Missing source file'}
+
+ encoding = 'utf-8'
+ options = self.parse_options(m)
+ if options:
+ attrs = dict(options)
+ if 'encoding' in attrs:
+ encoding = attrs['encoding']
+ else:
+ attrs = {}
+
+ relpath = self.parse_title(m)
+ dest = os.path.join(os.path.dirname(source_file), relpath)
+ dest = os.path.normpath(dest)
+
+ if dest == source_file:
+ return {
+ 'type': 'block_error',
+ 'raw': 'Could not include self: ' + relpath,
+ }
+
+ if not os.path.isfile(dest):
+ return {
+ 'type': 'block_error',
+ 'raw': 'Could not find file: ' + relpath,
+ }
+
+ with open(dest, 'rb') as f:
+ content = f.read()
+ content = content.decode(encoding)
+
+ ext = os.path.splitext(relpath)[1]
+ if ext in {'.md', '.markdown', '.mkd'}:
+ new_state = block.state_cls()
+ new_state.env['__file__'] = dest
+ new_state.process(content)
+ block.parse(new_state)
+ return new_state.tokens
+
+ elif ext in {'.html', '.xhtml', '.htm'}:
+ return {'type': 'block_html', 'raw': content}
+
+ attrs['filepath'] = dest
+ return {
+ 'type': 'include',
+ 'raw': content,
+ 'attrs': attrs,
+ }
+
+ def __call__(self, directive, md):
+ directive.register('include', self.parse)
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('include', render_html_include)
+
+
+def render_html_include(renderer, text, **attrs):
+ return '<pre class="directive-include">\n' + text + '</pre>\n'
diff --git a/src/mistune/directives/toc.py b/src/mistune/directives/toc.py
new file mode 100644
index 0000000..4084f43
--- /dev/null
+++ b/src/mistune/directives/toc.py
@@ -0,0 +1,105 @@
+"""
+ TOC directive
+ ~~~~~~~~~~~~~
+
+ The TOC directive syntax looks like::
+
+ .. toc:: Title
+ :min-level: 1
+ :max-level: 3
+
+ "Title", "min-level", and "max-level" option can be empty. "min-level"
+ and "max-level" are integers >= 1 and <= 6, which define the allowed
+ heading levels writers want to include in the table of contents.
+"""
+
+from ._base import DirectivePlugin
+from ..toc import normalize_toc_item, render_toc_ul
+
+
+class TableOfContents(DirectivePlugin):
+ def __init__(self, min_level=1, max_level=3):
+ self.min_level = min_level
+ self.max_level = max_level
+
+ def generate_heading_id(self, token, index):
+ return 'toc_' + str(index + 1)
+
+ def parse(self, block, m, state):
+ title = self.parse_title(m)
+ options = self.parse_options(m)
+ if options:
+ d_options = dict(options)
+ collapse = 'collapse' in d_options
+ min_level = _normalize_level(d_options, 'min-level', self.min_level)
+ max_level = _normalize_level(d_options, 'max-level', self.max_level)
+ if min_level < self.min_level:
+ raise ValueError(f'"min-level" option MUST be >= {self.min_level}')
+ if max_level > self.max_level:
+ raise ValueError(f'"max-level" option MUST be <= {self.max_level}')
+ if min_level > max_level:
+ raise ValueError('"min-level" option MUST be less than "max-level" option')
+ else:
+ collapse = False
+ min_level = self.min_level
+ max_level = self.max_level
+
+ attrs = {
+ 'min_level': min_level,
+ 'max_level': max_level,
+ 'collapse': collapse,
+ }
+ return {'type': 'toc', 'text': title or '', 'attrs': attrs}
+
+ def toc_hook(self, md, state):
+ sections = []
+ headings = []
+
+ for tok in state.tokens:
+ if tok['type'] == 'toc':
+ sections.append(tok)
+ elif tok['type'] == 'heading':
+ headings.append(tok)
+
+ if sections:
+ toc_items = []
+ # adding ID for each heading
+ for i, tok in enumerate(headings):
+ tok['attrs']['id'] = self.generate_heading_id(tok, i)
+ toc_items.append(normalize_toc_item(md, tok))
+
+ for sec in sections:
+ _min = sec['attrs']['min_level']
+ _max = sec['attrs']['max_level']
+ toc = [item for item in toc_items if _min <= item[0] <= _max]
+ sec['attrs']['toc'] = toc
+
+ def __call__(self, directive, md):
+ if md.renderer and md.renderer.NAME == 'html':
+ # only works with HTML renderer
+ directive.register('toc', self.parse)
+ md.before_render_hooks.append(self.toc_hook)
+ md.renderer.register('toc', render_html_toc)
+
+
+def render_html_toc(renderer, title, collapse=False, **attrs):
+ if not title:
+ title = 'Table of Contents'
+ toc = attrs['toc']
+ content = render_toc_ul(attrs['toc'])
+
+ html = '<details class="toc"'
+ if not collapse:
+ html += ' open'
+ html += '>\n<summary>' + title + '</summary>\n'
+ return html + content + '</details>\n'
+
+
+def _normalize_level(options, name, default):
+ level = options.get(name)
+ if not level:
+ return default
+ try:
+ return int(level)
+ except (ValueError, TypeError):
+ raise ValueError(f'"{name}" option MUST be integer')
diff --git a/src/mistune/helpers.py b/src/mistune/helpers.py
new file mode 100644
index 0000000..04c1df1
--- /dev/null
+++ b/src/mistune/helpers.py
@@ -0,0 +1,137 @@
+import re
+import string
+from .util import escape_url
+
+PREVENT_BACKSLASH = r'(?<!\\)(?:\\\\)*'
+PUNCTUATION = r'[' + re.escape(string.punctuation) + r']'
+
+LINK_LABEL = r'(?:[^\\\[\]]|\\.){0,500}'
+
+LINK_BRACKET_START = re.compile(r'[ \t]*\n?[ \t]*<')
+LINK_BRACKET_RE = re.compile(r'<([^<>\n\\\x00]*)>')
+LINK_HREF_BLOCK_RE = re.compile(r'[ \t]*\n?[ \t]*([^\s]+)(?:\s|$)')
+LINK_HREF_INLINE_RE = re.compile(
+ r'[ \t]*\n?[ \t]*([^ \t\n]*?)(?:[ \t\n]|'
+ r'(?:' + PREVENT_BACKSLASH + r'\)))'
+)
+
+LINK_TITLE_RE = re.compile(
+ r'[ \t\n]+('
+ r'"(?:\\' + PUNCTUATION + r'|[^"\x00])*"|' # "title"
+ r"'(?:\\" + PUNCTUATION + r"|[^'\x00])*'" # 'title'
+ r')'
+)
+PAREN_END_RE = re.compile(r'\s*\)')
+
+HTML_TAGNAME = r'[A-Za-z][A-Za-z0-9-]*'
+HTML_ATTRIBUTES = (
+ r'(?:\s+[A-Za-z_:][A-Za-z0-9_.:-]*'
+ r'(?:\s*=\s*(?:[^ !"\'=<>`]+|\'[^\']*?\'|"[^\"]*?"))?)*'
+)
+
+BLOCK_TAGS = (
+ 'address', 'article', 'aside', 'base', 'basefont', 'blockquote',
+ 'body', 'caption', 'center', 'col', 'colgroup', 'dd', 'details',
+ 'dialog', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption',
+ 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3',
+ 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'iframe',
+ 'legend', 'li', 'link', 'main', 'menu', 'menuitem', 'meta', 'nav',
+ 'noframes', 'ol', 'optgroup', 'option', 'p', 'param', 'section',
+ 'source', 'summary', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead',
+ 'title', 'tr', 'track', 'ul'
+)
+PRE_TAGS = ('pre', 'script', 'style', 'textarea')
+
+_INLINE_LINK_LABEL_RE = re.compile(LINK_LABEL + r'\]')
+_INLINE_SQUARE_BRACKET_RE = re.compile(PREVENT_BACKSLASH + r'[\[\]]')
+_ESCAPE_CHAR_RE = re.compile(r'\\(' + PUNCTUATION + r')')
+
+
+def unescape_char(text):
+ return _ESCAPE_CHAR_RE.sub(r'\1', text)
+
+
+def parse_link_text(src, pos):
+ level = 1
+ found = False
+ start_pos = pos
+
+ while pos < len(src):
+ m = _INLINE_SQUARE_BRACKET_RE.search(src, pos)
+ if not m:
+ break
+
+ pos = m.end()
+ marker = m.group(0)
+ if marker == ']':
+ level -= 1
+ if level == 0:
+ found = True
+ break
+ else:
+ level += 1
+
+ if found:
+ text = src[start_pos:pos-1]
+ return text, pos
+ return None, None
+
+
+def parse_link_label(src, start_pos):
+ m = _INLINE_LINK_LABEL_RE.match(src, start_pos)
+ if m:
+ label = m.group(0)[:-1]
+ return label, m.end()
+ return None, None
+
+
+def parse_link_href(src, start_pos, block=False):
+ m = LINK_BRACKET_START.match(src, start_pos)
+ if m:
+ start_pos = m.end() - 1
+ m = LINK_BRACKET_RE.match(src, start_pos)
+ if m:
+ return m.group(1), m.end()
+ return None, None
+
+ if block:
+ m = LINK_HREF_BLOCK_RE.match(src, start_pos)
+ else:
+ m = LINK_HREF_INLINE_RE.match(src, start_pos)
+
+ if not m:
+ return None, None
+
+ end_pos = m.end()
+ href = m.group(1)
+
+ if block and src[end_pos - 1] == href[-1]:
+ return href, end_pos
+ return href, end_pos - 1
+
+
+def parse_link_title(src, start_pos, max_pos):
+ m = LINK_TITLE_RE.match(src, start_pos, max_pos)
+ if m:
+ title = m.group(1)[1:-1]
+ title = unescape_char(title)
+ return title, m.end()
+ return None, None
+
+
+def parse_link(src, pos):
+ href, href_pos = parse_link_href(src, pos)
+ if href is None:
+ return None, None
+
+ title, title_pos = parse_link_title(src, href_pos, len(src))
+ next_pos = title_pos or href_pos
+ m = PAREN_END_RE.match(src, next_pos)
+ if not m:
+ return None, None
+
+ href = unescape_char(href)
+ attrs = {'url': escape_url(href)}
+ if title:
+ attrs['title'] = title
+ return attrs, m.end()
diff --git a/src/mistune/inline_parser.py b/src/mistune/inline_parser.py
new file mode 100644
index 0000000..0375a74
--- /dev/null
+++ b/src/mistune/inline_parser.py
@@ -0,0 +1,390 @@
+import re
+from typing import Optional, List, Dict, Any
+from .core import Parser, InlineState
+from .util import (
+ escape,
+ escape_url,
+ unikey,
+)
+from .helpers import (
+ PREVENT_BACKSLASH,
+ PUNCTUATION,
+ HTML_TAGNAME,
+ HTML_ATTRIBUTES,
+ unescape_char,
+ parse_link,
+ parse_link_label,
+ parse_link_text,
+)
+
+PAREN_END_RE = re.compile(r'\s*\)')
+
+AUTO_EMAIL = (
+ r'''<[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9]'''
+ r'(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?'
+ r'(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>'
+)
+
+INLINE_HTML = (
+ r'<' + HTML_TAGNAME + HTML_ATTRIBUTES + r'\s*/?>|' # open tag
+ r'</' + HTML_TAGNAME + r'\s*>|' # close tag
+ r'<!--(?!>|->)(?:(?!--)[\s\S])+?(?<!-)-->|' # comment
+ r'<\?[\s\S]+?\?>|' # script like <?php?>
+ r'<![A-Z][\s\S]+?>|' # doctype
+ r'<!\[CDATA[\s\S]+?\]\]>' # cdata
+)
+
+EMPHASIS_END_RE = {
+ '*': re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\\*|[^\s*])\*(?!\*)'),
+ '_': re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\_|[^\s_])_(?!_)\b'),
+
+ '**': re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\\*|[^\s*])\*\*(?!\*)'),
+ '__': re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\_|[^\s_])__(?!_)\b'),
+
+ '***': re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\\*|[^\s*])\*\*\*(?!\*)'),
+ '___': re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\_|[^\s_])___(?!_)\b'),
+}
+
+
+class InlineParser(Parser):
+ sc_flag = 0
+ state_cls = InlineState
+
+ #: linebreak leaves two spaces at the end of line
+ STD_LINEBREAK = r'(?:\\| {2,})\n\s*'
+
+ #: every new line becomes <br>
+ HARD_LINEBREAK = r' *\n\s*'
+
+ # we only need to find the start pattern of an inline token
+ SPECIFICATION = {
+ # e.g. \`, \$
+ 'escape': r'(?:\\' + PUNCTUATION + ')+',
+
+ # `code, ```code
+ 'codespan': r'`{1,}',
+
+ # *w, **w, _w, __w
+ 'emphasis': r'\*{1,3}(?=[^\s*])|\b_{1,3}(?=[^\s_])',
+
+ # [link], ![img]
+ 'link': r'!?\[',
+
+ # <https://example.com>. regex copied from commonmark.js
+ 'auto_link': r'<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>',
+ 'auto_email': AUTO_EMAIL,
+
+ 'inline_html': INLINE_HTML,
+
+ 'linebreak': STD_LINEBREAK,
+ 'softbreak': HARD_LINEBREAK,
+
+ 'prec_auto_link': r'<[A-Za-z][A-Za-z\d.+-]{1,31}:',
+ 'prec_inline_html': r'</?' + HTML_TAGNAME + r'|<!|<\?',
+ }
+ DEFAULT_RULES = (
+ 'escape',
+ 'codespan',
+ 'emphasis',
+ 'link',
+ 'auto_link',
+ 'auto_email',
+ 'inline_html',
+ 'linebreak',
+ )
+
+ def __init__(self, hard_wrap=False):
+ super(InlineParser, self).__init__()
+
+ self.hard_wrap = hard_wrap
+ # lazy add linebreak
+ if hard_wrap:
+ self.specification['linebreak'] = self.HARD_LINEBREAK
+ else:
+ self.rules.append('softbreak')
+
+ self._methods = {
+ name: getattr(self, 'parse_' + name) for name in self.rules
+ }
+
+ def parse_escape(self, m: re.Match, state: InlineState) -> int:
+ text = m.group(0)
+ text = unescape_char(text)
+ state.append_token({
+ 'type': 'text',
+ 'raw': text,
+ })
+ return m.end()
+
+ def parse_link(self, m: re.Match, state: InlineState) -> Optional[int]:
+ pos = m.end()
+
+ marker = m.group(0)
+ is_image = marker[0] == '!'
+ if is_image and state.in_image:
+ state.append_token({'type': 'text', 'raw': marker})
+ return pos
+ elif not is_image and state.in_link:
+ state.append_token({'type': 'text', 'raw': marker})
+ return pos
+
+ text = None
+ label, end_pos = parse_link_label(state.src, pos)
+ if label is None:
+ text, end_pos = parse_link_text(state.src, pos)
+ if text is None:
+ return
+
+ if text is None:
+ text = label
+
+ if end_pos >= len(state.src) and label is None:
+ return
+
+ rules = ['codespan', 'prec_auto_link', 'prec_inline_html']
+ prec_pos = self.precedence_scan(m, state, end_pos, rules)
+ if prec_pos:
+ return prec_pos
+
+ if end_pos < len(state.src):
+ c = state.src[end_pos]
+ if c == '(':
+ # standard link [text](<url> "title")
+ attrs, pos2 = parse_link(state.src, end_pos + 1)
+ if pos2:
+ token = self.__parse_link_token(is_image, text, attrs, state)
+ state.append_token(token)
+ return pos2
+
+ elif c == '[':
+ # standard ref link [text][label]
+ label2, pos2 = parse_link_label(state.src, end_pos + 1)
+ if pos2:
+ end_pos = pos2
+ if label2:
+ label = label2
+
+ if label is None:
+ return
+
+ ref_links = state.env.get('ref_links')
+ if not ref_links:
+ return
+
+ key = unikey(label)
+ env = ref_links.get(key)
+ if env:
+ attrs = {'url': env['url'], 'title': env.get('title')}
+ token = self.__parse_link_token(is_image, text, attrs, state)
+ token['ref'] = key
+ token['label'] = label
+ state.append_token(token)
+ return end_pos
+
+ def __parse_link_token(self, is_image, text, attrs, state):
+ new_state = state.copy()
+ new_state.src = text
+ if is_image:
+ new_state.in_image = True
+ token = {
+ 'type': 'image',
+ 'children': self.render(new_state),
+ 'attrs': attrs,
+ }
+ else:
+ new_state.in_link = True
+ token = {
+ 'type': 'link',
+ 'children': self.render(new_state),
+ 'attrs': attrs,
+ }
+ return token
+
+ def parse_auto_link(self, m: re.Match, state: InlineState) -> int:
+ text = m.group(0)
+ pos = m.end()
+ if state.in_link:
+ self.process_text(text, state)
+ return pos
+
+ text = text[1:-1]
+ self._add_auto_link(text, text, state)
+ return pos
+
+ def parse_auto_email(self, m: re.Match, state: InlineState) -> int:
+ text = m.group(0)
+ pos = m.end()
+ if state.in_link:
+ self.process_text(text, state)
+ return pos
+
+ text = text[1:-1]
+ url = 'mailto:' + text
+ self._add_auto_link(url, text, state)
+ return pos
+
+ def _add_auto_link(self, url, text, state):
+ state.append_token({
+ 'type': 'link',
+ 'children': [{'type': 'text', 'raw': text}],
+ 'attrs': {'url': escape_url(url)},
+ })
+
+ def parse_emphasis(self, m: re.Match, state: InlineState) -> int:
+ pos = m.end()
+
+ marker = m.group(0)
+ mlen = len(marker)
+ if mlen == 1 and state.in_emphasis:
+ state.append_token({'type': 'text', 'raw': marker})
+ return pos
+ elif mlen == 2 and state.in_strong:
+ state.append_token({'type': 'text', 'raw': marker})
+ return pos
+
+ _end_re = EMPHASIS_END_RE[marker]
+ m1 = _end_re.search(state.src, pos)
+ if not m1:
+ state.append_token({'type': 'text', 'raw': marker})
+ return pos
+
+ end_pos = m1.end()
+ text = state.src[pos:end_pos-mlen]
+
+ prec_pos = self.precedence_scan(m, state, end_pos)
+ if prec_pos:
+ return prec_pos
+
+ new_state = state.copy()
+ new_state.src = text
+ if mlen == 1:
+ new_state.in_emphasis = True
+ children = self.render(new_state)
+ state.append_token({'type': 'emphasis', 'children': children})
+ elif mlen == 2:
+ new_state.in_strong = True
+ children = self.render(new_state)
+ state.append_token({'type': 'strong', 'children': children})
+ else:
+ new_state.in_emphasis = True
+ new_state.in_strong = True
+
+ children = [{
+ 'type': 'strong',
+ 'children': self.render(new_state)
+ }]
+ state.append_token({
+ 'type': 'emphasis',
+ 'children': children,
+ })
+ return end_pos
+
+ def parse_codespan(self, m: re.Match, state: InlineState) -> int:
+ marker = m.group(0)
+ # require same marker with same length at end
+
+ pattern = re.compile(r'(.*?(?:[^`]))' + marker + r'(?!`)', re.S)
+
+ pos = m.end()
+ m = pattern.match(state.src, pos)
+ if m:
+ end_pos = m.end()
+ code = m.group(1)
+ # Line endings are treated like spaces
+ code = code.replace('\n', ' ')
+ if len(code.strip()):
+ if code.startswith(' ') and code.endswith(' '):
+ code = code[1:-1]
+ state.append_token({'type': 'codespan', 'raw': escape(code)})
+ return end_pos
+ else:
+ state.append_token({'type': 'text', 'raw': marker})
+ return pos
+
+ def parse_linebreak(self, m: re.Match, state: InlineState) -> int:
+ state.append_token({'type': 'linebreak'})
+ return m.end()
+
+ def parse_softbreak(self, m: re.Match, state: InlineState) -> int:
+ state.append_token({'type': 'softbreak'})
+ return m.end()
+
+ def parse_inline_html(self, m: re.Match, state: InlineState) -> int:
+ end_pos = m.end()
+ html = m.group(0)
+ state.append_token({'type': 'inline_html', 'raw': html})
+ if html.startswith(('<a ', '<a>', '<A ', '<A>')):
+ state.in_link = True
+ elif html.startswith(('</a ', '</a>', '</A ', '</A>')):
+ state.in_link = False
+ return end_pos
+
+ def process_text(self, text: str, state: InlineState):
+ state.append_token({'type': 'text', 'raw': text})
+
+ def parse(self, state: InlineState) -> List[Dict[str, Any]]:
+ pos = 0
+ sc = self.compile_sc()
+ while pos < len(state.src):
+ m = sc.search(state.src, pos)
+ if not m:
+ break
+
+ end_pos = m.start()
+ if end_pos > pos:
+ hole = state.src[pos:end_pos]
+ self.process_text(hole, state)
+
+ new_pos = self.parse_method(m, state)
+ if not new_pos:
+ # move cursor 1 character forward
+ pos = end_pos + 1
+ hole = state.src[end_pos:pos]
+ self.process_text(hole, state)
+ else:
+ pos = new_pos
+
+ if pos == 0:
+ # special case, just pure text
+ self.process_text(state.src, state)
+ elif pos < len(state.src):
+ self.process_text(state.src[pos:], state)
+ return state.tokens
+
+ def precedence_scan(self, m: re.Match, state: InlineState, end_pos: int, rules=None):
+ if rules is None:
+ rules = ['codespan', 'link', 'prec_auto_link', 'prec_inline_html']
+
+ mark_pos = m.end()
+ sc = self.compile_sc(rules)
+ m1 = sc.search(state.src, mark_pos, end_pos)
+ if not m1:
+ return
+
+ rule_name = m1.lastgroup.replace('prec_', '')
+ sc = self.compile_sc([rule_name])
+ m2 = sc.match(state.src, m1.start())
+ if not m2:
+ return
+
+ func = self._methods[rule_name]
+ new_state = state.copy()
+ new_state.src = state.src
+ m2_pos = func(m2, new_state)
+ if not m2_pos or m2_pos < end_pos:
+ return
+
+ raw_text = state.src[m.start():m2.start()]
+ state.append_token({'type': 'text', 'raw': raw_text})
+ for token in new_state.tokens:
+ state.append_token(token)
+ return m2_pos
+
+ def render(self, state: InlineState):
+ self.parse(state)
+ return state.tokens
+
+ def __call__(self, s, env):
+ state = self.state_cls(env)
+ state.src = s
+ return self.render(state)
diff --git a/src/mistune/list_parser.py b/src/mistune/list_parser.py
new file mode 100644
index 0000000..b5ff866
--- /dev/null
+++ b/src/mistune/list_parser.py
@@ -0,0 +1,250 @@
+import re
+from .core import BlockState
+from .util import (
+ strip_end,
+ expand_tab,
+ expand_leading_tab,
+)
+# because list is complex, split list parser in a new file
+
+LIST_PATTERN = (
+ r'^(?P<list_1> {0,3})'
+ r'(?P<list_2>[\*\+-]|\d{1,9}[.)])'
+ r'(?P<list_3>[ \t]*|[ \t].+)$'
+)
+
+_LINE_HAS_TEXT = re.compile(r'( *)\S')
+
+
+def parse_list(block, m: re.Match, state: BlockState) -> int:
+ """Parse tokens for ordered and unordered list."""
+ text = m.group('list_3')
+ if not text.strip():
+ # Example 285
+ # an empty list item cannot interrupt a paragraph
+ end_pos = state.append_paragraph()
+ if end_pos:
+ return end_pos
+
+ marker = m.group('list_2')
+ ordered = len(marker) > 1
+ depth = state.depth()
+ token = {
+ 'type': 'list',
+ 'children': [],
+ 'tight': True,
+ 'bullet': marker[-1],
+ 'attrs': {
+ 'depth': depth,
+ 'ordered': ordered,
+ },
+ }
+ if ordered:
+ start = int(marker[:-1])
+ if start != 1:
+ # Example 304
+ # we allow only lists starting with 1 to interrupt paragraphs
+ end_pos = state.append_paragraph()
+ if end_pos:
+ return end_pos
+ token['attrs']['start'] = start
+
+ state.cursor = m.end() + 1
+ groups = (m.group('list_1'), marker, text)
+
+ if depth >= block.max_nested_level - 1:
+ rules = list(block.list_rules)
+ rules.remove('list')
+ else:
+ rules = block.list_rules
+
+ bullet = _get_list_bullet(marker[-1])
+ while groups:
+ groups = _parse_list_item(block, bullet, groups, token, state, rules)
+
+ end_pos = token.pop('_end_pos', None)
+ _transform_tight_list(token)
+ if end_pos:
+ index = token.pop('_tok_index')
+ state.tokens.insert(index, token)
+ return end_pos
+
+ state.append_token(token)
+ return state.cursor
+
+
+def _transform_tight_list(token):
+ if token['tight']:
+ # reset tight list item
+ for list_item in token['children']:
+ for tok in list_item['children']:
+ if tok['type'] == 'paragraph':
+ tok['type'] = 'block_text'
+ elif tok['type'] == 'list':
+ _transform_tight_list(tok)
+
+
+def _parse_list_item(block, bullet, groups, token, state, rules):
+ spaces, marker, text = groups
+
+ leading_width = len(spaces) + len(marker)
+ text, continue_width = _compile_continue_width(text, leading_width)
+ item_pattern = _compile_list_item_pattern(bullet, leading_width)
+ pairs = [
+ ('thematic_break', block.specification['thematic_break']),
+ ('fenced_code', block.specification['fenced_code']),
+ ('axt_heading', block.specification['axt_heading']),
+ ('block_quote', block.specification['block_quote']),
+ ('block_html', block.specification['block_html']),
+ ('list', block.specification['list']),
+ ]
+ if leading_width < 3:
+ _repl_w = str(leading_width)
+ pairs = [(n, p.replace('3', _repl_w, 1)) for n, p in pairs]
+
+ pairs.insert(1, ('list_item', item_pattern))
+ regex = '|'.join(r'(?P<%s>(?<=\n)%s)' % pair for pair in pairs)
+ sc = re.compile(regex, re.M)
+
+ src = ''
+ next_group = None
+ prev_blank_line = False
+ pos = state.cursor
+
+ continue_space = ' ' * continue_width
+ while pos < state.cursor_max:
+ pos = state.find_line_end()
+ line = state.get_text(pos)
+ if block.BLANK_LINE.match(line):
+ src += '\n'
+ prev_blank_line = True
+ state.cursor = pos
+ continue
+
+ line = expand_leading_tab(line)
+ if line.startswith(continue_space):
+ if prev_blank_line and not text and not src.strip():
+ # Example 280
+ # A list item can begin with at most one blank line
+ break
+
+ src += line
+ prev_blank_line = False
+ state.cursor = pos
+ continue
+
+ m = sc.match(state.src, state.cursor)
+ if m:
+ tok_type = m.lastgroup
+ if tok_type == 'list_item':
+ if prev_blank_line:
+ token['tight'] = False
+ next_group = (
+ m.group('listitem_1'),
+ m.group('listitem_2'),
+ m.group('listitem_3')
+ )
+ state.cursor = m.end() + 1
+ break
+ tok_index = len(state.tokens)
+ end_pos = block.parse_method(m, state)
+ if end_pos:
+ token['_tok_index'] = tok_index
+ token['_end_pos'] = end_pos
+ break
+
+ if prev_blank_line and not line.startswith(continue_space):
+ # not a continue line, and previous line is blank
+ break
+
+ src += line
+ state.cursor = pos
+
+ text += _clean_list_item_text(src, continue_width)
+ child = state.child_state(strip_end(text))
+
+ block.parse(child, rules)
+
+ if token['tight'] and _is_loose_list(child.tokens):
+ token['tight'] = False
+
+ token['children'].append({
+ 'type': 'list_item',
+ 'children': child.tokens,
+ })
+ if next_group:
+ return next_group
+
+
+def _get_list_bullet(c):
+ if c == '.':
+ bullet = r'\d{0,9}\.'
+ elif c == ')':
+ bullet = r'\d{0,9}\)'
+ elif c == '*':
+ bullet = r'\*'
+ elif c == '+':
+ bullet = r'\+'
+ else:
+ bullet = '-'
+ return bullet
+
+
+def _compile_list_item_pattern(bullet, leading_width):
+ if leading_width > 3:
+ leading_width = 3
+ return (
+ r'^(?P<listitem_1> {0,' + str(leading_width) + '})'
+ r'(?P<listitem_2>' + bullet + ')'
+ r'(?P<listitem_3>[ \t]*|[ \t][^\n]+)$'
+ )
+
+
+def _compile_continue_width(text, leading_width):
+ text = expand_leading_tab(text, 3)
+ text = expand_tab(text)
+
+ m2 = _LINE_HAS_TEXT.match(text)
+ if m2:
+ # indent code, startswith 5 spaces
+ if text.startswith(' '):
+ space_width = 1
+ else:
+ space_width = len(m2.group(1))
+
+ text = text[space_width:] + '\n'
+ else:
+ space_width = 1
+ text = ''
+
+ continue_width = leading_width + space_width
+ return text, continue_width
+
+
+def _clean_list_item_text(src, continue_width):
+ # according to Example 7, tab should be treated as 3 spaces
+ rv = []
+ trim_space = ' ' * continue_width
+ lines = src.split('\n')
+ for line in lines:
+ if line.startswith(trim_space):
+ line = line.replace(trim_space, '', 1)
+ # according to CommonMark Example 5
+ # tab should be treated as 4 spaces
+ line = expand_tab(line)
+ rv.append(line)
+ else:
+ rv.append(line)
+
+ return '\n'.join(rv)
+
+
+def _is_loose_list(tokens):
+ paragraph_count = 0
+ for tok in tokens:
+ if tok['type'] == 'blank_line':
+ return True
+ if tok['type'] == 'paragraph':
+ paragraph_count += 1
+ if paragraph_count > 1:
+ return True
diff --git a/src/mistune/markdown.py b/src/mistune/markdown.py
new file mode 100644
index 0000000..c814a59
--- /dev/null
+++ b/src/mistune/markdown.py
@@ -0,0 +1,104 @@
+from typing import Optional
+from .core import BlockState
+from .block_parser import BlockParser
+from .inline_parser import InlineParser
+
+
+class Markdown:
+ """Markdown instance to convert markdown text into HTML or other formats.
+ Here is an example with the HTMLRenderer::
+
+ from mistune import HTMLRenderer
+
+ md = Markdown(renderer=HTMLRenderer(escape=False))
+ md('hello **world**')
+
+ :param renderer: a renderer to convert parsed tokens
+ :param block: block level syntax parser
+ :param inline: inline level syntax parser
+ :param plugins: mistune plugins to use
+ """
+ def __init__(self, renderer=None, block=None, inline=None, plugins=None):
+ if block is None:
+ block = BlockParser()
+
+ if inline is None:
+ inline = InlineParser()
+
+ self.renderer = renderer
+ self.block = block
+ self.inline = inline
+ self.before_parse_hooks = []
+ self.before_render_hooks = []
+ self.after_render_hooks = []
+
+ if plugins:
+ for plugin in plugins:
+ plugin(self)
+
+ def use(self, plugin):
+ plugin(self)
+
+ def render_state(self, state: BlockState):
+ data = self._iter_render(state.tokens, state)
+ if self.renderer:
+ return self.renderer(data, state)
+ return list(data)
+
+ def _iter_render(self, tokens, state):
+ for tok in tokens:
+ if 'children' in tok:
+ children = self._iter_render(tok['children'], state)
+ tok['children'] = list(children)
+ elif 'text' in tok:
+ text = tok.pop('text')
+ # process inline text
+ tok['children'] = self.inline(text.strip(), state.env)
+ yield tok
+
+ def parse(self, s: str, state: Optional[BlockState]=None):
+ """Parse and convert the given markdown string. If renderer is None,
+ the returned **result** will be parsed markdown tokens.
+
+ :param s: markdown string
+ :param state: instance of BlockState
+ :returns: result, state
+ """
+ if state is None:
+ state = self.block.state_cls()
+
+ # normalize line separator
+ s = s.replace('\r\n', '\n')
+ s = s.replace('\r', '\n')
+
+ state.process(s)
+
+ for hook in self.before_parse_hooks:
+ hook(self, state)
+
+ self.block.parse(state)
+
+ for hook in self.before_render_hooks:
+ hook(self, state)
+
+ result = self.render_state(state)
+
+ for hook in self.after_render_hooks:
+ result = hook(self, result, state)
+ return result, state
+
+ def read(self, filepath, encoding='utf-8', state=None):
+ if state is None:
+ state = self.block.state_cls()
+
+ state.env['__file__'] = filepath
+ with open(filepath, 'rb') as f:
+ s = f.read()
+
+ s = s.decode(encoding)
+ return self.parse(s, state)
+
+ def __call__(self, s: str):
+ if s is None:
+ s = '\n'
+ return self.parse(s)[0]
diff --git a/src/mistune/plugins/__init__.py b/src/mistune/plugins/__init__.py
new file mode 100644
index 0000000..a79d727
--- /dev/null
+++ b/src/mistune/plugins/__init__.py
@@ -0,0 +1,38 @@
+from importlib import import_module
+
+_plugins = {
+ 'speedup': 'mistune.plugins.speedup.speedup',
+ 'strikethrough': 'mistune.plugins.formatting.strikethrough',
+ 'mark': 'mistune.plugins.formatting.mark',
+ 'insert': 'mistune.plugins.formatting.insert',
+ 'superscript': 'mistune.plugins.formatting.superscript',
+ 'subscript': 'mistune.plugins.formatting.subscript',
+ 'footnotes': 'mistune.plugins.footnotes.footnotes',
+ 'table': 'mistune.plugins.table.table',
+ 'url': 'mistune.plugins.url.url',
+ 'abbr': 'mistune.plugins.abbr.abbr',
+ 'def_list': 'mistune.plugins.def_list.def_list',
+ 'math': 'mistune.plugins.math.math',
+ 'ruby': 'mistune.plugins.ruby.ruby',
+ 'task_lists': 'mistune.plugins.task_lists.task_lists',
+ 'spoiler': 'mistune.plugins.spoiler.spoiler',
+}
+_cached_modules = {}
+
+
+def import_plugin(name):
+ if name in _cached_modules:
+ return _cached_modules[name]
+
+ if callable(name):
+ return name
+
+ if name in _plugins:
+ module_path, func_name = _plugins[name].rsplit(".", 1)
+ else:
+ module_path, func_name = name.rsplit(".", 1)
+
+ module = import_module(module_path)
+ plugin = getattr(module, func_name)
+ _cached_modules[name] = plugin
+ return plugin
diff --git a/src/mistune/plugins/abbr.py b/src/mistune/plugins/abbr.py
new file mode 100644
index 0000000..1b45790
--- /dev/null
+++ b/src/mistune/plugins/abbr.py
@@ -0,0 +1,103 @@
+import re
+import types
+from ..util import escape
+from ..helpers import PREVENT_BACKSLASH
+
+__all__ = ['abbr']
+
+# https://michelf.ca/projects/php-markdown/extra/#abbr
+REF_ABBR = (
+ r'^ {0,3}\*\[(?P<abbr_key>[^\]]+)'+ PREVENT_BACKSLASH + r'\]:'
+ r'(?P<abbr_text>(?:[ \t]*\n(?: {3,}|\t)[^\n]+)|(?:[^\n]*))$'
+)
+
+
+def parse_ref_abbr(block, m, state):
+ ref = state.env.get('ref_abbrs')
+ if not ref:
+ ref = {}
+ key = m.group('abbr_key')
+ text = m.group('abbr_text')
+ ref[key] = text.strip()
+ state.env['ref_abbrs'] = ref
+ # abbr definition can split paragraph
+ state.append_token({'type': 'blank_line'})
+ return m.end() + 1
+
+
+def process_text(inline, text, state):
+ ref = state.env.get('ref_abbrs')
+ if not ref:
+ return state.append_token({'type': 'text', 'raw': text})
+
+ if state.tokens:
+ last = state.tokens[-1]
+ if last['type'] == 'text':
+ state.tokens.pop()
+ text = last['raw'] + text
+
+ abbrs_re = state.env.get('abbrs_re')
+ if not abbrs_re:
+ abbrs_re = re.compile(r'|'.join(re.escape(k) for k in ref.keys()))
+ state.env['abbrs_re'] = abbrs_re
+
+ pos = 0
+ while pos < len(text):
+ m = abbrs_re.search(text, pos)
+ if not m:
+ break
+
+ end_pos = m.start()
+ if end_pos > pos:
+ hole = text[pos:end_pos]
+ state.append_token({'type': 'text', 'raw': hole})
+
+ label = m.group(0)
+ state.append_token({
+ 'type': 'abbr',
+ 'children': [{'type': 'text', 'raw': label}],
+ 'attrs': {'title': ref[label]}
+ })
+ pos = m.end()
+
+ if pos == 0:
+ # special case, just pure text
+ state.append_token({'type': 'text', 'raw': text})
+ elif pos < len(text):
+ state.append_token({'type': 'text', 'raw': text[pos:]})
+
+
+def render_abbr(renderer, text, title):
+ if not title:
+ return '<abbr>' + text + '</abbr>'
+ return '<abbr title="' + escape(title) + '">' + text + '</abbr>'
+
+
+def abbr(md):
+ """A mistune plugin to support abbreviations, spec defined at
+ https://michelf.ca/projects/php-markdown/extra/#abbr
+
+ Here is an example:
+
+ .. code-block:: text
+
+ The HTML specification
+ is maintained by the W3C.
+
+ *[HTML]: Hyper Text Markup Language
+ *[W3C]: World Wide Web Consortium
+
+ It will be converted into HTML:
+
+ .. code-block:: html
+
+ The <abbr title="Hyper Text Markup Language">HTML</abbr> specification
+ is maintained by the <abbr title="World Wide Web Consortium">W3C</abbr>.
+
+ :param md: Markdown instance
+ """
+ md.block.register('ref_abbr', REF_ABBR, parse_ref_abbr, before='paragraph')
+ # replace process_text
+ md.inline.process_text = types.MethodType(process_text, md.inline)
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('abbr', render_abbr)
diff --git a/src/mistune/plugins/def_list.py b/src/mistune/plugins/def_list.py
new file mode 100644
index 0000000..3675641
--- /dev/null
+++ b/src/mistune/plugins/def_list.py
@@ -0,0 +1,135 @@
+import re
+from ..util import strip_end
+
+__all__ = ['def_list']
+
+# https://michelf.ca/projects/php-markdown/extra/#def-list
+
+DEF_PATTERN = (
+ r'^(?P<def_list_head>(?:[^\n]+\n)+?)'
+ r'\n?(?:'
+ r'\:[ \t]+.*\n'
+ r'(?:[^\n]+\n)*' # lazy continue line
+ r'(?:(?:[ \t]*\n)*[ \t]+[^\n]+\n)*'
+ r'(?:[ \t]*\n)*'
+ r')+'
+)
+DEF_RE = re.compile(DEF_PATTERN, re.M)
+DD_START_RE = re.compile(r'^:[ \t]+', re.M)
+TRIM_RE = re.compile(r'^ {0,4}', re.M)
+HAS_BLANK_LINE_RE = re.compile(r'\n[ \t]*\n$')
+
+
+def parse_def_list(block, m, state):
+ pos = m.end()
+ children = list(_parse_def_item(block, m))
+
+ m = DEF_RE.match(state.src, pos)
+ while m:
+ children.extend(list(_parse_def_item(block, m)))
+ pos = m.end()
+ m = DEF_RE.match(state.src, pos)
+
+ state.append_token({
+ 'type': 'def_list',
+ 'children': children,
+ })
+ return pos
+
+
+def _parse_def_item(block, m):
+ head = m.group('def_list_head')
+ for line in head.splitlines():
+ yield {
+ 'type': 'def_list_head',
+ 'text': line,
+ }
+
+ src = m.group(0)
+ end = len(head)
+
+ m = DD_START_RE.search(src, end)
+ start = m.start()
+ prev_blank_line = src[end:start] == '\n'
+ while m:
+ m = DD_START_RE.search(src, start + 1)
+ if not m:
+ break
+
+ end = m.start()
+ text = src[start:end].replace(':', ' ', 1)
+ children = _process_text(block, text, prev_blank_line)
+ prev_blank_line = bool(HAS_BLANK_LINE_RE.search(text))
+ yield {
+ 'type': 'def_list_item',
+ 'children': children,
+ }
+ start = end
+
+ text = src[start:].replace(':', ' ', 1)
+ children = _process_text(block, text, prev_blank_line)
+ yield {
+ 'type': 'def_list_item',
+ 'children': children,
+ }
+
+
+def _process_text(block, text, loose):
+ text = TRIM_RE.sub('', text)
+ state = block.state_cls()
+ state.process(strip_end(text))
+ # use default list rules
+ block.parse(state, block.list_rules)
+ tokens = state.tokens
+ if not loose and len(tokens) == 1 and tokens[0]['type'] == 'paragraph':
+ tokens[0]['type'] = 'block_text'
+ return tokens
+
+
+def render_def_list(renderer, text):
+ return '<dl>\n' + text + '</dl>\n'
+
+
+def render_def_list_head(renderer, text):
+ return '<dt>' + text + '</dt>\n'
+
+
+def render_def_list_item(renderer, text):
+ return '<dd>' + text + '</dd>\n'
+
+
+def def_list(md):
+ """A mistune plugin to support def list, spec defined at
+ https://michelf.ca/projects/php-markdown/extra/#def-list
+
+ Here is an example:
+
+ .. code-block:: text
+
+ Apple
+ : Pomaceous fruit of plants of the genus Malus in
+ the family Rosaceae.
+
+ Orange
+ : The fruit of an evergreen tree of the genus Citrus.
+
+ It will be converted into HTML:
+
+ .. code-block:: html
+
+ <dl>
+ <dt>Apple</dt>
+ <dd>Pomaceous fruit of plants of the genus Malus in
+ the family Rosaceae.</dd>
+
+ <dt>Orange</dt>
+ <dd>The fruit of an evergreen tree of the genus Citrus.</dd>
+ </dl>
+
+ :param md: Markdown instance
+ """
+ md.block.register('def_list', DEF_PATTERN, parse_def_list, before='paragraph')
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('def_list', render_def_list)
+ md.renderer.register('def_list_head', render_def_list_head)
+ md.renderer.register('def_list_item', render_def_list_item)
diff --git a/src/mistune/plugins/footnotes.py b/src/mistune/plugins/footnotes.py
new file mode 100644
index 0000000..2e10704
--- /dev/null
+++ b/src/mistune/plugins/footnotes.py
@@ -0,0 +1,153 @@
+import re
+from ..core import BlockState
+from ..util import unikey
+from ..helpers import LINK_LABEL
+
+__all__ = ['footnotes']
+
+_PARAGRAPH_SPLIT = re.compile(r'\n{2,}')
+# https://michelf.ca/projects/php-markdown/extra/#footnotes
+REF_FOOTNOTE = (
+ r'^(?P<footnote_lead> {0,3})'
+ r'\[\^(?P<footnote_key>' + LINK_LABEL + r')]:[ \t]'
+ r'(?P<footnote_text>[^\n]*(?:\n+|$)'
+ r'(?:(?P=footnote_lead) {1,3}(?! )[^\n]*\n+)*'
+ r')'
+)
+
+INLINE_FOOTNOTE = r'\[\^(?P<footnote_key>' + LINK_LABEL + r')\]'
+
+
+def parse_inline_footnote(inline, m: re.Match, state):
+ key = unikey(m.group('footnote_key'))
+ ref = state.env.get('ref_footnotes')
+ if ref and key in ref:
+ notes = state.env.get('footnotes')
+ if not notes:
+ notes = []
+ if key not in notes:
+ notes.append(key)
+ state.env['footnotes'] = notes
+ state.append_token({
+ 'type': 'footnote_ref',
+ 'raw': key,
+ 'attrs': {'index': notes.index(key) + 1}
+ })
+ else:
+ state.append_token({'type': 'text', 'raw': m.group(0)})
+ return m.end()
+
+
+def parse_ref_footnote(block, m: re.Match, state: BlockState):
+ ref = state.env.get('ref_footnotes')
+ if not ref:
+ ref = {}
+
+ key = unikey(m.group('footnote_key'))
+ if key not in ref:
+ ref[key] = m.group('footnote_text')
+ state.env['ref_footnotes'] = ref
+ return m.end()
+
+
+def parse_footnote_item(block, key: str, index: int, state: BlockState):
+ ref = state.env.get('ref_footnotes')
+ text = ref[key]
+
+ lines = text.splitlines()
+ second_line = None
+ for second_line in lines[1:]:
+ if second_line:
+ break
+
+ if second_line:
+ spaces = len(second_line) - len(second_line.lstrip())
+ pattern = re.compile(r'^ {' + str(spaces) + r',}', flags=re.M)
+ text = pattern.sub('', text).strip()
+ items = _PARAGRAPH_SPLIT.split(text)
+ children = [{'type': 'paragraph', 'text': s} for s in items]
+ else:
+ text = text.strip()
+ children = [{'type': 'paragraph', 'text': text}]
+ return {
+ 'type': 'footnote_item',
+ 'children': children,
+ 'attrs': {'key': key, 'index': index}
+ }
+
+
+def md_footnotes_hook(md, result: str, state: BlockState):
+ notes = state.env.get('footnotes')
+ if not notes:
+ return result
+
+ children = [
+ parse_footnote_item(md.block, k, i + 1, state)
+ for i, k in enumerate(notes)
+ ]
+ state = BlockState()
+ state.tokens = [{'type': 'footnotes', 'children': children}]
+ output = md.render_state(state)
+ return result + output
+
+
+def render_footnote_ref(renderer, key: str, index: int):
+ i = str(index)
+ html = '<sup class="footnote-ref" id="fnref-' + i + '">'
+ return html + '<a href="#fn-' + i + '">' + i + '</a></sup>'
+
+
+def render_footnotes(renderer, text: str):
+ return '<section class="footnotes">\n<ol>\n' + text + '</ol>\n</section>\n'
+
+
+def render_footnote_item(renderer, text: str, key: str, index: int):
+ i = str(index)
+ back = '<a href="#fnref-' + i + '" class="footnote">&#8617;</a>'
+ text = text.rstrip()[:-4] + back + '</p>'
+ return '<li id="fn-' + i + '">' + text + '</li>\n'
+
+
+def footnotes(md):
+ """A mistune plugin to support footnotes, spec defined at
+ https://michelf.ca/projects/php-markdown/extra/#footnotes
+
+ Here is an example:
+
+ .. code-block:: text
+
+ That's some text with a footnote.[^1]
+
+ [^1]: And that's the footnote.
+
+ It will be converted into HTML:
+
+ .. code-block:: html
+
+ <p>That's some text with a footnote.<sup class="footnote-ref" id="fnref-1"><a href="#fn-1">1</a></sup></p>
+ <section class="footnotes">
+ <ol>
+ <li id="fn-1"><p>And that's the footnote.<a href="#fnref-1" class="footnote">&#8617;</a></p></li>
+ </ol>
+ </section>
+
+ :param md: Markdown instance
+ """
+ md.inline.register(
+ 'footnote',
+ INLINE_FOOTNOTE,
+ parse_inline_footnote,
+ before='link',
+ )
+ md.block.register(
+ 'ref_footnote',
+ REF_FOOTNOTE,
+ parse_ref_footnote,
+ before='ref_link',
+ )
+ md.after_render_hooks.append(md_footnotes_hook)
+
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('footnote_ref', render_footnote_ref)
+ md.renderer.register('footnote_item', render_footnote_item)
+ md.renderer.register('footnotes', render_footnotes)
diff --git a/src/mistune/plugins/formatting.py b/src/mistune/plugins/formatting.py
new file mode 100644
index 0000000..57e5def
--- /dev/null
+++ b/src/mistune/plugins/formatting.py
@@ -0,0 +1,173 @@
+import re
+from ..helpers import PREVENT_BACKSLASH
+
+__all__ = ["strikethrough", "mark", "insert", "superscript", "subscript"]
+
+_STRIKE_END = re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\~|[^\s~])~~(?!~)')
+_MARK_END = re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\=|[^\s=])==(?!=)')
+_INSERT_END = re.compile(r'(?:' + PREVENT_BACKSLASH + r'\\\^|[^\s^])\^\^(?!\^)')
+
+SUPERSCRIPT_PATTERN = r'\^(?:' + PREVENT_BACKSLASH + r'\\\^|\S|\\ )+?\^'
+SUBSCRIPT_PATTERN = r'~(?:' + PREVENT_BACKSLASH + r'\\~|\S|\\ )+?~'
+
+
+def parse_strikethrough(inline, m, state):
+ return _parse_to_end(inline, m, state, 'strikethrough', _STRIKE_END)
+
+
+def render_strikethrough(renderer, text):
+ return '<del>' + text + '</del>'
+
+
+def parse_mark(inline, m, state):
+ return _parse_to_end(inline, m, state, 'mark', _MARK_END)
+
+
+def render_mark(renderer, text):
+ return '<mark>' + text + '</mark>'
+
+
+def parse_insert(inline, m, state):
+ return _parse_to_end(inline, m, state, 'insert', _INSERT_END)
+
+
+def render_insert(renderer, text):
+ return '<ins>' + text + '</ins>'
+
+
+def parse_superscript(inline, m, state):
+ return _parse_script(inline, m, state, 'superscript')
+
+
+def render_superscript(renderer, text):
+ return '<sup>' + text + '</sup>'
+
+
+def parse_subscript(inline, m, state):
+ return _parse_script(inline, m, state, 'subscript')
+
+
+def render_subscript(renderer, text):
+ return '<sub>' + text + '</sub>'
+
+
+def _parse_to_end(inline, m, state, tok_type, end_pattern):
+ pos = m.end()
+ m1 = end_pattern.search(state.src, pos)
+ if not m1:
+ return
+ end_pos = m1.end()
+ text = state.src[pos:end_pos-2]
+ new_state = state.copy()
+ new_state.src = text
+ children = inline.render(new_state)
+ state.append_token({'type': tok_type, 'children': children})
+ return end_pos
+
+
+def _parse_script(inline, m, state, tok_type):
+ text = m.group(0)
+ new_state = state.copy()
+ new_state.src = text[1:-1].replace('\\ ', ' ')
+ children = inline.render(new_state)
+ state.append_token({
+ 'type': tok_type,
+ 'children': children
+ })
+ return m.end()
+
+
+def strikethrough(md):
+ """A mistune plugin to support strikethrough. Spec defined by
+ GitHub flavored Markdown and commonly used by many parsers:
+
+ .. code-block:: text
+
+ ~~This was mistaken text~~
+
+ It will be converted into HTML:
+
+ .. code-block:: html
+
+ <del>This was mistaken text</del>
+
+ :param md: Markdown instance
+ """
+ md.inline.register(
+ 'strikethrough',
+ r'~~(?=[^\s~])',
+ parse_strikethrough,
+ before='link',
+ )
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('strikethrough', render_strikethrough)
+
+
+def mark(md):
+ """A mistune plugin to add ``<mark>`` tag. Spec defined at
+ https://facelessuser.github.io/pymdown-extensions/extensions/mark/:
+
+ .. code-block:: text
+
+ ==mark me== ==mark \\=\\= equal==
+
+ :param md: Markdown instance
+ """
+ md.inline.register(
+ 'mark',
+ r'==(?=[^\s=])',
+ parse_mark,
+ before='link',
+ )
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('mark', render_mark)
+
+
+def insert(md):
+ """A mistune plugin to add ``<ins>`` tag. Spec defined at
+ https://facelessuser.github.io/pymdown-extensions/extensions/caret/#insert:
+
+ .. code-block:: text
+
+ ^^insert me^^
+
+ :param md: Markdown instance
+ """
+ md.inline.register(
+ 'insert',
+ r'\^\^(?=[^\s\^])',
+ parse_insert,
+ before='link',
+ )
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('insert', render_insert)
+
+
+def superscript(md):
+ """A mistune plugin to add ``<sup>`` tag. Spec defined at
+ https://pandoc.org/MANUAL.html#superscripts-and-subscripts:
+
+ .. code-block:: text
+
+ 2^10^ is 1024.
+
+ :param md: Markdown instance
+ """
+ md.inline.register('superscript', SUPERSCRIPT_PATTERN, parse_superscript, before='linebreak')
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('superscript', render_superscript)
+
+
+def subscript(md):
+ """A mistune plugin to add ``<sub>`` tag. Spec defined at
+ https://pandoc.org/MANUAL.html#superscripts-and-subscripts:
+
+ .. code-block:: text
+
+ H~2~O is a liquid.
+
+ :param md: Markdown instance
+ """
+ md.inline.register('subscript', SUBSCRIPT_PATTERN, parse_subscript, before='linebreak')
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('subscript', render_subscript)
diff --git a/src/mistune/plugins/math.py b/src/mistune/plugins/math.py
new file mode 100644
index 0000000..805105e
--- /dev/null
+++ b/src/mistune/plugins/math.py
@@ -0,0 +1,57 @@
+__all__ = ['math', 'math_in_quote', 'math_in_list']
+
+BLOCK_MATH_PATTERN = r'^ {0,3}\$\$[ \t]*\n(?P<math_text>[\s\S]+?)\n\$\$[ \t]*$'
+INLINE_MATH_PATTERN = r'\$(?!\s)(?P<math_text>.+?)(?!\s)\$'
+
+
+def parse_block_math(block, m, state):
+ text = m.group('math_text')
+ state.append_token({'type': 'block_math', 'raw': text})
+ return m.end() + 1
+
+
+def parse_inline_math(inline, m, state):
+ text = m.group('math_text')
+ state.append_token({'type': 'inline_math', 'raw': text})
+ return m.end()
+
+
+def render_block_math(renderer, text):
+ return '<div class="math">$$\n' + text + '\n$$</div>\n'
+
+
+def render_inline_math(renderer, text):
+ return r'<span class="math">\(' + text + r'\)</span>'
+
+
+def math(md):
+ """A mistune plugin to support math. The syntax is used
+ by many markdown extensions:
+
+ .. code-block:: text
+
+ Block math is surrounded by $$:
+
+ $$
+ f(a)=f(b)
+ $$
+
+ Inline math is surrounded by `$`, such as $f(a)=f(b)$
+
+ :param md: Markdown instance
+ """
+ md.block.register('block_math', BLOCK_MATH_PATTERN, parse_block_math, before='list')
+ md.inline.register('inline_math', INLINE_MATH_PATTERN, parse_inline_math, before='link')
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('block_math', render_block_math)
+ md.renderer.register('inline_math', render_inline_math)
+
+
+def math_in_quote(md):
+ """Enable block math plugin in block quote."""
+ md.block.insert_rule(md.block.block_quote_rules, 'block_math', before='list')
+
+
+def math_in_list(md):
+ """Enable block math plugin in list."""
+ md.block.insert_rule(md.block.list_rules, 'block_math', before='list')
diff --git a/src/mistune/plugins/ruby.py b/src/mistune/plugins/ruby.py
new file mode 100644
index 0000000..eabc037
--- /dev/null
+++ b/src/mistune/plugins/ruby.py
@@ -0,0 +1,100 @@
+import re
+from ..util import unikey
+from ..helpers import parse_link, parse_link_label
+
+
+RUBY_PATTERN = r'\[(?:\w+\(\w+\))+\]'
+_ruby_re = re.compile(RUBY_PATTERN)
+
+
+def parse_ruby(inline, m, state):
+ text = m.group(0)[1:-2]
+ items = text.split(')')
+ tokens = []
+ for item in items:
+ rb, rt = item.split('(')
+ tokens.append({
+ 'type': 'ruby',
+ 'raw': rb,
+ 'attrs': {'rt': rt}
+ })
+
+ end_pos = m.end()
+
+ next_match = _ruby_re.match(state.src, end_pos)
+ if next_match:
+ for tok in tokens:
+ state.append_token(tok)
+ return parse_ruby(inline, next_match, state)
+
+ # repeat link logic
+ if end_pos < len(state.src):
+ link_pos = _parse_ruby_link(inline, state, end_pos, tokens)
+ if link_pos:
+ return link_pos
+
+ for tok in tokens:
+ state.append_token(tok)
+ return end_pos
+
+
+def _parse_ruby_link(inline, state, pos, tokens):
+ c = state.src[pos]
+ if c == '(':
+ # standard link [text](<url> "title")
+ attrs, link_pos = parse_link(state.src, pos + 1)
+ if link_pos:
+ state.append_token({
+ 'type': 'link',
+ 'children': tokens,
+ 'attrs': attrs,
+ })
+ return link_pos
+
+ elif c == '[':
+ # standard ref link [text][label]
+ label, link_pos = parse_link_label(state.src, pos + 1)
+ if label and link_pos:
+ ref_links = state.env['ref_links']
+ key = unikey(label)
+ env = ref_links.get(key)
+ if env:
+ attrs = {'url': env['url'], 'title': env.get('title')}
+ state.append_token({
+ 'type': 'link',
+ 'children': tokens,
+ 'attrs': attrs,
+ })
+ else:
+ for tok in tokens:
+ state.append_token(tok)
+ state.append_token({
+ 'type': 'text',
+ 'raw': '[' + label + ']',
+ })
+ return link_pos
+
+
+def render_ruby(renderer, text, rt):
+ return '<ruby><rb>' + text + '</rb><rt>' + rt + '</rt></ruby>'
+
+
+def ruby(md):
+ """A mistune plugin to support ``<ruby>`` tag. The syntax is defined
+ at https://lepture.com/en/2022/markdown-ruby-markup:
+
+ .. code-block:: text
+
+ [漢字(ㄏㄢˋㄗˋ)]
+ [漢(ㄏㄢˋ)字(ㄗˋ)]
+
+ [漢字(ㄏㄢˋㄗˋ)][link]
+ [漢字(ㄏㄢˋㄗˋ)](/url "title")
+
+ [link]: /url "title"
+
+ :param md: Markdown instance
+ """
+ md.inline.register('ruby', RUBY_PATTERN, parse_ruby, before='link')
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('ruby', render_ruby)
diff --git a/src/mistune/plugins/speedup.py b/src/mistune/plugins/speedup.py
new file mode 100644
index 0000000..784022c
--- /dev/null
+++ b/src/mistune/plugins/speedup.py
@@ -0,0 +1,44 @@
+import re
+import string
+
+# because mismatch is too slow, add parsers for paragraph and text
+
+HARD_LINEBREAK_RE = re.compile(r' *\n\s*')
+PARAGRAPH = (
+ # start with none punctuation, not number, not whitespace
+ r'(?:^[^\s\d' + re.escape(string.punctuation) + r'][^\n]*\n)+'
+)
+
+__all__ = ['speedup']
+
+
+
+def parse_text(inline, m, state):
+ text = m.group(0)
+ text = HARD_LINEBREAK_RE.sub('\n', text)
+ inline.process_text(text, state)
+ return m.end()
+
+
+def parse_paragraph(block, m, state):
+ text = m.group(0)
+ state.add_paragraph(text)
+ return m.end()
+
+
+def speedup(md):
+ """Increase the speed of parsing paragraph and inline text."""
+ md.block.register('paragraph', PARAGRAPH, parse_paragraph)
+
+ punc = r'\\><!\[_*`~\^\$='
+ text_pattern = r'[\s\S]+?(?=[' + punc + r']|'
+ if 'url_link' in md.inline.rules:
+ text_pattern += 'https?:|'
+
+ if md.inline.hard_wrap:
+ text_pattern += r' *\n|'
+ else:
+ text_pattern += r' {2,}\n|'
+
+ text_pattern += r'$)'
+ md.inline.register('text', text_pattern, parse_text)
diff --git a/src/mistune/plugins/spoiler.py b/src/mistune/plugins/spoiler.py
new file mode 100644
index 0000000..2931d2b
--- /dev/null
+++ b/src/mistune/plugins/spoiler.py
@@ -0,0 +1,80 @@
+import re
+
+__all__ = ['spoiler']
+
+_BLOCK_SPOILER_START = re.compile(r'^ {0,3}! ?', re.M)
+_BLOCK_SPOILER_MATCH = re.compile(r'^( {0,3}![^\n]*\n)+$')
+
+INLINE_SPOILER_PATTERN = r'>!\s*(?P<spoiler_text>.+?)\s*!<'
+
+
+def parse_block_spoiler(block, m, state):
+ text, end_pos = block.extract_block_quote(m, state)
+ if not text.endswith('\n'):
+ # ensure it endswith \n to make sure
+ # _BLOCK_SPOILER_MATCH.match works
+ text += '\n'
+
+ depth = state.depth()
+ if not depth and _BLOCK_SPOILER_MATCH.match(text):
+ text = _BLOCK_SPOILER_START.sub('', text)
+ tok_type = 'block_spoiler'
+ else:
+ tok_type = 'block_quote'
+
+ # scan children state
+ child = state.child_state(text)
+ if state.depth() >= block.max_nested_level - 1:
+ rules = list(block.block_quote_rules)
+ rules.remove('block_quote')
+ else:
+ rules = block.block_quote_rules
+
+ block.parse(child, rules)
+ token = {'type': tok_type, 'children': child.tokens}
+ if end_pos:
+ state.prepend_token(token)
+ return end_pos
+ state.append_token(token)
+ return state.cursor
+
+
+def parse_inline_spoiler(inline, m, state):
+ text = m.group('spoiler_text')
+ new_state = state.copy()
+ new_state.src = text
+ children = inline.render(new_state)
+ state.append_token({'type': 'inline_spoiler', 'children': children})
+ return m.end()
+
+
+def render_block_spoiler(renderer, text):
+ return '<div class="spoiler">\n' + text + '</div>\n'
+
+
+def render_inline_spoiler(renderer, text):
+ return '<span class="spoiler">' + text + '</span>'
+
+
+def spoiler(md):
+ """A mistune plugin to support block and inline spoiler. The
+ syntax is inspired by stackexchange:
+
+ .. code-block:: text
+
+ Block level spoiler looks like block quote, but with `>!`:
+
+ >! this is spoiler
+ >!
+ >! the content will be hidden
+
+ Inline spoiler is surrounded by `>!` and `!<`, such as >! hide me !<.
+
+ :param md: Markdown instance
+ """
+ # reset block quote parser with block spoiler parser
+ md.block.register('block_quote', None, parse_block_spoiler)
+ md.inline.register('inline_spoiler', INLINE_SPOILER_PATTERN, parse_inline_spoiler)
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('block_spoiler', render_block_spoiler)
+ md.renderer.register('inline_spoiler', render_inline_spoiler)
diff --git a/src/mistune/plugins/table.py b/src/mistune/plugins/table.py
new file mode 100644
index 0000000..d3bc4c2
--- /dev/null
+++ b/src/mistune/plugins/table.py
@@ -0,0 +1,179 @@
+import re
+from ..helpers import PREVENT_BACKSLASH
+
+# https://michelf.ca/projects/php-markdown/extra/#table
+
+__all__ = ['table', 'table_in_quote', 'table_in_list']
+
+
+TABLE_PATTERN = (
+ r'^ {0,3}\|(?P<table_head>.+)\|[ \t]*\n'
+ r' {0,3}\|(?P<table_align> *[-:]+[-| :]*)\|[ \t]*\n'
+ r'(?P<table_body>(?: {0,3}\|.*\|[ \t]*(?:\n|$))*)\n*'
+)
+NP_TABLE_PATTERN = (
+ r'^ {0,3}(?P<nptable_head>\S.*\|.*)\n'
+ r' {0,3}(?P<nptable_align>[-:]+ *\|[-| :]*)\n'
+ r'(?P<nptable_body>(?:.*\|.*(?:\n|$))*)\n*'
+)
+
+TABLE_CELL = re.compile(r'^ {0,3}\|(.+)\|[ \t]*$')
+CELL_SPLIT = re.compile(r' *' + PREVENT_BACKSLASH + r'\| *')
+ALIGN_CENTER = re.compile(r'^ *:-+: *$')
+ALIGN_LEFT = re.compile(r'^ *:-+ *$')
+ALIGN_RIGHT = re.compile(r'^ *-+: *$')
+
+
+def parse_table(block, m, state):
+ pos = m.end()
+ header = m.group('table_head')
+ align = m.group('table_align')
+ thead, aligns = _process_thead(header, align)
+ if not thead:
+ return
+
+ rows = []
+ body = m.group('table_body')
+ for text in body.splitlines():
+ m = TABLE_CELL.match(text)
+ if not m: # pragma: no cover
+ return
+ row = _process_row(m.group(1), aligns)
+ if not row:
+ return
+ rows.append(row)
+
+ children = [thead, {'type': 'table_body', 'children': rows}]
+ state.append_token({'type': 'table', 'children': children})
+ return pos
+
+
+def parse_nptable(block, m, state):
+ header = m.group('nptable_head')
+ align = m.group('nptable_align')
+ thead, aligns = _process_thead(header, align)
+ if not thead:
+ return
+
+ rows = []
+ body = m.group('nptable_body')
+ for text in body.splitlines():
+ row = _process_row(text, aligns)
+ if not row:
+ return
+ rows.append(row)
+
+ children = [thead, {'type': 'table_body', 'children': rows}]
+ state.append_token({'type': 'table', 'children': children})
+ return m.end()
+
+
+def _process_thead(header, align):
+ headers = CELL_SPLIT.split(header)
+ aligns = CELL_SPLIT.split(align)
+ if len(headers) != len(aligns):
+ return None, None
+
+ for i, v in enumerate(aligns):
+ if ALIGN_CENTER.match(v):
+ aligns[i] = 'center'
+ elif ALIGN_LEFT.match(v):
+ aligns[i] = 'left'
+ elif ALIGN_RIGHT.match(v):
+ aligns[i] = 'right'
+ else:
+ aligns[i] = None
+
+ children = [
+ {
+ 'type': 'table_cell',
+ 'text': text.strip(),
+ 'attrs': {'align': aligns[i], 'head': True}
+ }
+ for i, text in enumerate(headers)
+ ]
+ thead = {'type': 'table_head', 'children': children}
+ return thead, aligns
+
+
+def _process_row(text, aligns):
+ cells = CELL_SPLIT.split(text)
+ if len(cells) != len(aligns):
+ return None
+
+ children = [
+ {
+ 'type': 'table_cell',
+ 'text': text.strip(),
+ 'attrs': {'align': aligns[i], 'head': False}
+ }
+ for i, text in enumerate(cells)
+ ]
+ return {'type': 'table_row', 'children': children}
+
+
+def render_table(renderer, text):
+ return '<table>\n' + text + '</table>\n'
+
+
+def render_table_head(renderer, text):
+ return '<thead>\n<tr>\n' + text + '</tr>\n</thead>\n'
+
+
+def render_table_body(renderer, text):
+ return '<tbody>\n' + text + '</tbody>\n'
+
+
+def render_table_row(renderer, text):
+ return '<tr>\n' + text + '</tr>\n'
+
+
+def render_table_cell(renderer, text, align=None, head=False):
+ if head:
+ tag = 'th'
+ else:
+ tag = 'td'
+
+ html = ' <' + tag
+ if align:
+ html += ' style="text-align:' + align + '"'
+
+ return html + '>' + text + '</' + tag + '>\n'
+
+
+def table(md):
+ """A mistune plugin to support table, spec defined at
+ https://michelf.ca/projects/php-markdown/extra/#table
+
+ Here is an example:
+
+ .. code-block:: text
+
+ First Header | Second Header
+ ------------- | -------------
+ Content Cell | Content Cell
+ Content Cell | Content Cell
+
+ :param md: Markdown instance
+ """
+ md.block.register('table', TABLE_PATTERN, parse_table, before='paragraph')
+ md.block.register('nptable', NP_TABLE_PATTERN, parse_nptable, before='paragraph')
+
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('table', render_table)
+ md.renderer.register('table_head', render_table_head)
+ md.renderer.register('table_body', render_table_body)
+ md.renderer.register('table_row', render_table_row)
+ md.renderer.register('table_cell', render_table_cell)
+
+
+def table_in_quote(md):
+ """Enable table plugin in block quotes."""
+ md.block.insert_rule(md.block.block_quote_rules, 'table', before='paragraph')
+ md.block.insert_rule(md.block.block_quote_rules, 'nptable', before='paragraph')
+
+
+def table_in_list(md):
+ """Enable table plugin in list."""
+ md.block.insert_rule(md.block.list_rules, 'table', before='paragraph')
+ md.block.insert_rule(md.block.list_rules, 'nptable', before='paragraph')
diff --git a/src/mistune/plugins/task_lists.py b/src/mistune/plugins/task_lists.py
new file mode 100644
index 0000000..8571c32
--- /dev/null
+++ b/src/mistune/plugins/task_lists.py
@@ -0,0 +1,67 @@
+import re
+
+__all__ = ['task_lists']
+
+
+TASK_LIST_ITEM = re.compile(r'^(\[[ xX]\])\s+')
+
+
+def task_lists_hook(md, state):
+ return _rewrite_all_list_items(state.tokens)
+
+
+def render_task_list_item(renderer, text, checked=False):
+ checkbox = (
+ '<input class="task-list-item-checkbox" '
+ 'type="checkbox" disabled'
+ )
+ if checked:
+ checkbox += ' checked/>'
+ else:
+ checkbox += '/>'
+
+ if text.startswith('<p>'):
+ text = text.replace('<p>', '<p>' + checkbox, 1)
+ else:
+ text = checkbox + text
+
+ return '<li class="task-list-item">' + text + '</li>\n'
+
+
+def task_lists(md):
+ """A mistune plugin to support task lists. Spec defined by
+ GitHub flavored Markdown and commonly used by many parsers:
+
+ .. code-block:: text
+
+ - [ ] unchecked task
+ - [x] checked task
+
+ :param md: Markdown instance
+ """
+ md.before_render_hooks.append(task_lists_hook)
+ if md.renderer and md.renderer.NAME == 'html':
+ md.renderer.register('task_list_item', render_task_list_item)
+
+
+def _rewrite_all_list_items(tokens):
+ for tok in tokens:
+ if tok['type'] == 'list_item':
+ _rewrite_list_item(tok)
+ if 'children' in tok:
+ _rewrite_all_list_items(tok['children'])
+ return tokens
+
+
+def _rewrite_list_item(tok):
+ children = tok['children']
+ if children:
+ first_child = children[0]
+ text = first_child.get('text', '')
+ m = TASK_LIST_ITEM.match(text)
+ if m:
+ mark = m.group(1)
+ first_child['text'] = text[m.end():]
+
+ tok['type'] = 'task_list_item'
+ tok['attrs'] = {'checked': mark != '[ ]'}
diff --git a/src/mistune/plugins/url.py b/src/mistune/plugins/url.py
new file mode 100644
index 0000000..d6f2251
--- /dev/null
+++ b/src/mistune/plugins/url.py
@@ -0,0 +1,23 @@
+from ..util import escape_url
+
+__all__ = ['url']
+
+URL_LINK_PATTERN = r'''https?:\/\/[^\s<]+[^<.,:;"')\]\s]'''
+
+
+def parse_url_link(inline, m, state):
+ text = m.group(0)
+ pos = m.end()
+ if state.in_link:
+ inline.process_text(text, state)
+ return pos
+ state.append_token({
+ 'type': 'link',
+ 'children': [{'type': 'text', 'raw': text}],
+ 'attrs': {'url': escape_url(text)},
+ })
+ return pos
+
+
+def url(md):
+ md.inline.register('url_link', URL_LINK_PATTERN, parse_url_link)
diff --git a/src/mistune/renderers/__init__.py b/src/mistune/renderers/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/mistune/renderers/__init__.py
diff --git a/src/mistune/renderers/_list.py b/src/mistune/renderers/_list.py
new file mode 100644
index 0000000..0a18639
--- /dev/null
+++ b/src/mistune/renderers/_list.py
@@ -0,0 +1,60 @@
+from ..util import strip_end
+
+
+def render_list(renderer, token, state) -> str:
+ attrs = token['attrs']
+ if attrs['ordered']:
+ children = _render_ordered_list(renderer, token, state)
+ else:
+ children = _render_unordered_list(renderer, token, state)
+
+ text = ''.join(children)
+ parent = token.get('parent')
+ if parent:
+ if parent['tight']:
+ return text
+ return text + '\n'
+ return strip_end(text) + '\n'
+
+
+def _render_list_item(renderer, parent, item, state):
+ leading = parent['leading']
+ text = ''
+ for tok in item['children']:
+ if tok['type'] == 'list':
+ tok['parent'] = parent
+ elif tok['type'] == 'blank_line':
+ continue
+ text += renderer.render_token(tok, state)
+
+ lines = text.splitlines()
+ text = lines[0] + '\n'
+ prefix = ' ' * len(leading)
+ for line in lines[1:]:
+ if line:
+ text += prefix + line + '\n'
+ else:
+ text += '\n'
+ return leading + text
+
+
+def _render_ordered_list(renderer, token, state):
+ attrs = token['attrs']
+ start = attrs.get('start', 1)
+ for item in token['children']:
+ leading = str(start) + token['bullet'] + ' '
+ parent = {
+ 'leading': leading,
+ 'tight': token['tight'],
+ }
+ yield _render_list_item(renderer, parent, item, state)
+ start += 1
+
+
+def _render_unordered_list(renderer, token, state):
+ parent = {
+ 'leading': token['bullet'] + ' ',
+ 'tight': token['tight'],
+ }
+ for item in token['children']:
+ yield _render_list_item(renderer, parent, item, state)
diff --git a/src/mistune/renderers/html.py b/src/mistune/renderers/html.py
new file mode 100644
index 0000000..c458a4a
--- /dev/null
+++ b/src/mistune/renderers/html.py
@@ -0,0 +1,151 @@
+from ..core import BaseRenderer
+from ..util import escape as escape_text, striptags, safe_entity
+
+
+class HTMLRenderer(BaseRenderer):
+ """A renderer for converting Markdown to HTML."""
+ NAME = 'html'
+ HARMFUL_PROTOCOLS = (
+ 'javascript:',
+ 'vbscript:',
+ 'file:',
+ 'data:',
+ )
+ GOOD_DATA_PROTOCOLS = (
+ 'data:image/gif;',
+ 'data:image/png;',
+ 'data:image/jpeg;',
+ 'data:image/webp;',
+ )
+
+ def __init__(self, escape=True, allow_harmful_protocols=None):
+ super(HTMLRenderer, self).__init__()
+ self._allow_harmful_protocols = allow_harmful_protocols
+ self._escape = escape
+
+ def render_token(self, token, state):
+ # backward compitable with v2
+ func = self._get_method(token['type'])
+ attrs = token.get('attrs')
+
+ if 'raw' in token:
+ text = token['raw']
+ elif 'children' in token:
+ text = self.render_tokens(token['children'], state)
+ else:
+ if attrs:
+ return func(**attrs)
+ else:
+ return func()
+ if attrs:
+ return func(text, **attrs)
+ else:
+ return func(text)
+
+ def safe_url(self, url: str) -> str:
+ """Ensure the given URL is safe. This method is used for rendering
+ links, images, and etc.
+ """
+ if self._allow_harmful_protocols is True:
+ return url
+
+ _url = url.lower()
+ if self._allow_harmful_protocols and \
+ _url.startswith(tuple(self._allow_harmful_protocols)):
+ return url
+
+ if _url.startswith(self.HARMFUL_PROTOCOLS) and \
+ not _url.startswith(self.GOOD_DATA_PROTOCOLS):
+ return '#harmful-link'
+ return url
+
+ def text(self, text: str) -> str:
+ if self._escape:
+ return escape_text(text)
+ return safe_entity(text)
+
+ def emphasis(self, text: str) -> str:
+ return '<em>' + text + '</em>'
+
+ def strong(self, text: str) -> str:
+ return '<strong>' + text + '</strong>'
+
+ def link(self, text: str, url: str, title=None) -> str:
+ s = '<a href="' + self.safe_url(url) + '"'
+ if title:
+ s += ' title="' + safe_entity(title) + '"'
+ return s + '>' + text + '</a>'
+
+ def image(self, text: str, url: str, title=None) -> str:
+ src = self.safe_url(url)
+ alt = escape_text(striptags(text))
+ s = '<img src="' + src + '" alt="' + alt + '"'
+ if title:
+ s += ' title="' + safe_entity(title) + '"'
+ return s + ' />'
+
+ def codespan(self, text: str) -> str:
+ return '<code>' + text + '</code>'
+
+ def linebreak(self) -> str:
+ return '<br />\n'
+
+ def softbreak(self) -> str:
+ return '\n'
+
+ def inline_html(self, html: str) -> str:
+ if self._escape:
+ return escape_text(html)
+ return html
+
+ def paragraph(self, text: str) -> str:
+ return '<p>' + text + '</p>\n'
+
+ def heading(self, text: str, level: int, **attrs) -> str:
+ tag = 'h' + str(level)
+ html = '<' + tag
+ _id = attrs.get('id')
+ if _id:
+ html += ' id="' + _id + '"'
+ return html + '>' + text + '</' + tag + '>\n'
+
+ def blank_line(self) -> str:
+ return ''
+
+ def thematic_break(self) -> str:
+ return '<hr />\n'
+
+ def block_text(self, text: str) -> str:
+ return text
+
+ def block_code(self, code: str, info=None) -> str:
+ html = '<pre><code'
+ if info is not None:
+ info = safe_entity(info.strip())
+ if info:
+ lang = info.split(None, 1)[0]
+ html += ' class="language-' + lang + '"'
+ return html + '>' + escape_text(code) + '</code></pre>\n'
+
+ def block_quote(self, text: str) -> str:
+ return '<blockquote>\n' + text + '</blockquote>\n'
+
+ def block_html(self, html: str) -> str:
+ if self._escape:
+ return '<p>' + escape_text(html) + '</p>\n'
+ return html + '\n'
+
+ def block_error(self, text: str) -> str:
+ return '<div class="error"><pre>' + text + '</pre></div>\n'
+
+ def list(self, text: str, ordered: bool, **attrs) -> str:
+ if ordered:
+ html = '<ol'
+ start = attrs.get('start')
+ if start is not None:
+ html += ' start="' + str(start) + '"'
+ return html + '>\n' + text + '</ol>\n'
+ return '<ul>\n' + text + '</ul>\n'
+
+ def list_item(self, text: str) -> str:
+ return '<li>' + text + '</li>\n'
diff --git a/src/mistune/renderers/markdown.py b/src/mistune/renderers/markdown.py
new file mode 100644
index 0000000..78334bc
--- /dev/null
+++ b/src/mistune/renderers/markdown.py
@@ -0,0 +1,146 @@
+import re
+from typing import Dict, Any
+from textwrap import indent
+from ._list import render_list
+from ..core import BaseRenderer, BlockState
+from ..util import strip_end
+
+fenced_re = re.compile(r'^(?:`|~)+', re.M)
+
+
+class MarkdownRenderer(BaseRenderer):
+ """A renderer to re-format Markdown text."""
+ NAME = 'markdown'
+
+ def __call__(self, tokens, state: BlockState):
+ out = self.render_tokens(tokens, state)
+ # special handle for line breaks
+ out += '\n\n'.join(self.render_referrences(state)) + '\n'
+ return strip_end(out)
+
+ def render_referrences(self, state: BlockState):
+ ref_links = state.env['ref_links']
+ for key in ref_links:
+ attrs = ref_links[key]
+ text = '[' + attrs['label'] + ']: ' + attrs['url']
+ title = attrs.get('title')
+ if title:
+ text += ' "' + title + '"'
+ yield text
+
+ def render_children(self, token, state: BlockState):
+ children = token['children']
+ return self.render_tokens(children, state)
+
+ def text(self, token: Dict[str, Any], state: BlockState) -> str:
+ return token['raw']
+
+ def emphasis(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '*' + self.render_children(token, state) + '*'
+
+ def strong(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '**' + self.render_children(token, state) + '**'
+
+ def link(self, token: Dict[str, Any], state: BlockState) -> str:
+ label = token.get('label')
+ text = self.render_children(token, state)
+ out = '[' + text + ']'
+ if label:
+ return out + '[' + label + ']'
+
+ attrs = token['attrs']
+ url = attrs['url']
+ title = attrs.get('title')
+ if text == url and not title:
+ return '<' + text + '>'
+ elif 'mailto:' + text == url and not title:
+ return '<' + text + '>'
+
+ out += '('
+ if '(' in url or ')' in url:
+ out += '<' + url + '>'
+ else:
+ out += url
+ if title:
+ out += ' "' + title + '"'
+ return out + ')'
+
+ def image(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '!' + self.link(token, state)
+
+ def codespan(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '`' + token['raw'] + '`'
+
+ def linebreak(self, token: Dict[str, Any], state: BlockState) -> str:
+ return ' \n'
+
+ def softbreak(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '\n'
+
+ def blank_line(self, token: Dict[str, Any], state: BlockState) -> str:
+ return ''
+
+ def inline_html(self, token: Dict[str, Any], state: BlockState) -> str:
+ return token['raw']
+
+ def paragraph(self, token: Dict[str, Any], state: BlockState) -> str:
+ text = self.render_children(token, state)
+ return text + '\n\n'
+
+ def heading(self, token: Dict[str, Any], state: BlockState) -> str:
+ level = token['attrs']['level']
+ marker = '#' * level
+ text = self.render_children(token, state)
+ return marker + ' ' + text + '\n\n'
+
+ def thematic_break(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '***\n\n'
+
+ def block_text(self, token: Dict[str, Any], state: BlockState) -> str:
+ return self.render_children(token, state) + '\n'
+
+ def block_code(self, token: Dict[str, Any], state: BlockState) -> str:
+ attrs = token.get('attrs', {})
+ info = attrs.get('info', '')
+ code = token['raw']
+ if code and code[-1] != '\n':
+ code += '\n'
+
+ marker = token.get('marker')
+ if not marker:
+ marker = _get_fenced_marker(code)
+ return marker + info + '\n' + code + marker + '\n\n'
+
+ def block_quote(self, token: Dict[str, Any], state: BlockState) -> str:
+ text = indent(self.render_children(token, state), '> ')
+ return text + '\n\n'
+
+ def block_html(self, token: Dict[str, Any], state: BlockState) -> str:
+ return token['raw'] + '\n\n'
+
+ def block_error(self, token: Dict[str, Any], state: BlockState) -> str:
+ return ''
+
+ def list(self, token: Dict[str, Any], state: BlockState) -> str:
+ return render_list(self, token, state)
+
+
+def _get_fenced_marker(code):
+ found = fenced_re.findall(code)
+ if not found:
+ return '```'
+
+ ticks = [] # `
+ waves = [] # ~
+ for s in found:
+ if s[0] == '`':
+ ticks.append(len(s))
+ else:
+ waves.append(len(s))
+
+ if not ticks:
+ return '```'
+
+ if not waves:
+ return '~~~'
+ return '`' * (max(ticks) + 1)
diff --git a/src/mistune/renderers/rst.py b/src/mistune/renderers/rst.py
new file mode 100644
index 0000000..fa12c21
--- /dev/null
+++ b/src/mistune/renderers/rst.py
@@ -0,0 +1,147 @@
+from typing import Dict, Any
+from textwrap import indent
+from ._list import render_list
+from ..core import BaseRenderer, BlockState
+from ..util import strip_end
+
+
+class RSTRenderer(BaseRenderer):
+ """A renderer for converting Markdown to ReST."""
+ NAME = 'rst'
+
+ #: marker symbols for heading
+ HEADING_MARKERS = {
+ 1: '=',
+ 2: '-',
+ 3: '~',
+ 4: '^',
+ 5: '"',
+ 6: "'",
+ }
+ INLINE_IMAGE_PREFIX = 'img-'
+
+ def iter_tokens(self, tokens, state):
+ prev = None
+ for tok in tokens:
+ # ignore blank line
+ if tok['type'] == 'blank_line':
+ continue
+ tok['prev'] = prev
+ prev = tok
+ yield self.render_token(tok, state)
+
+ def __call__(self, tokens, state: BlockState):
+ state.env['inline_images'] = []
+ out = self.render_tokens(tokens, state)
+ # special handle for line breaks
+ out += '\n\n'.join(self.render_referrences(state)) + '\n'
+ return strip_end(out)
+
+ def render_referrences(self, state: BlockState):
+ images = state.env['inline_images']
+ for index, token in enumerate(images):
+ attrs = token['attrs']
+ alt = self.render_children(token, state)
+ ident = self.INLINE_IMAGE_PREFIX + str(index)
+ yield '.. |' + ident + '| image:: ' + attrs['url'] + '\n :alt: ' + alt
+
+ def render_children(self, token, state: BlockState):
+ children = token['children']
+ return self.render_tokens(children, state)
+
+ def text(self, token: Dict[str, Any], state: BlockState) -> str:
+ text = token['raw']
+ return text.replace('|', r'\|')
+
+ def emphasis(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '*' + self.render_children(token, state) + '*'
+
+ def strong(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '**' + self.render_children(token, state) + '**'
+
+ def link(self, token: Dict[str, Any], state: BlockState) -> str:
+ attrs = token['attrs']
+ text = self.render_children(token, state)
+ return '`' + text + ' <' + attrs['url'] + '>`__'
+
+ def image(self, token: Dict[str, Any], state: BlockState) -> str:
+ refs: list = state.env['inline_images']
+ index = len(refs)
+ refs.append(token)
+ return '|' + self.INLINE_IMAGE_PREFIX + str(index) + '|'
+
+ def codespan(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '``' + token['raw'] + '``'
+
+ def linebreak(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '<linebreak>'
+
+ def softbreak(self, token: Dict[str, Any], state: BlockState) -> str:
+ return ' '
+
+ def inline_html(self, token: Dict[str, Any], state: BlockState) -> str:
+ # rst does not support inline html
+ return ''
+
+ def paragraph(self, token: Dict[str, Any], state: BlockState) -> str:
+ children = token['children']
+ if len(children) == 1 and children[0]['type'] == 'image':
+ image = children[0]
+ attrs = image['attrs']
+ title = attrs.get('title')
+ alt = self.render_children(image, state)
+ text = '.. figure:: ' + attrs['url']
+ if title:
+ text += '\n :alt: ' + title
+ text += '\n\n' + indent(alt, ' ')
+ else:
+ text = self.render_tokens(children, state)
+ lines = text.split('<linebreak>')
+ if len(lines) > 1:
+ text = '\n'.join('| ' + line for line in lines)
+ return text + '\n\n'
+
+ def heading(self, token: Dict[str, Any], state: BlockState) -> str:
+ attrs = token['attrs']
+ text = self.render_children(token, state)
+ marker = self.HEADING_MARKERS[attrs['level']]
+ return text + '\n' + marker * len(text) + '\n\n'
+
+ def thematic_break(self, token: Dict[str, Any], state: BlockState) -> str:
+ return '--------------\n\n'
+
+ def block_text(self, token: Dict[str, Any], state: BlockState) -> str:
+ return self.render_children(token, state) + '\n'
+
+ def block_code(self, token: Dict[str, Any], state: BlockState) -> str:
+ attrs = token.get('attrs', {})
+ info = attrs.get('info')
+ code = indent(token['raw'], ' ')
+ if info:
+ lang = info.split()[0]
+ return '.. code:: ' + lang + '\n\n' + code + '\n'
+ else:
+ return '::\n\n' + code + '\n\n'
+
+ def block_quote(self, token: Dict[str, Any], state: BlockState) -> str:
+ text = indent(self.render_children(token, state), ' ')
+ prev = token['prev']
+ ignore_blocks = (
+ 'paragraph',
+ 'thematic_break',
+ 'linebreak',
+ 'heading',
+ )
+ if prev and prev['type'] not in ignore_blocks:
+ text = '..\n\n' + text
+ return text
+
+ def block_html(self, token: Dict[str, Any], state: BlockState) -> str:
+ raw = token['raw']
+ return '.. raw:: html\n\n' + indent(raw, ' ') + '\n\n'
+
+ def block_error(self, token: Dict[str, Any], state: BlockState) -> str:
+ return ''
+
+ def list(self, token: Dict[str, Any], state: BlockState) -> str:
+ return render_list(self, token, state)
diff --git a/src/mistune/toc.py b/src/mistune/toc.py
new file mode 100644
index 0000000..c908b0c
--- /dev/null
+++ b/src/mistune/toc.py
@@ -0,0 +1,111 @@
+from .util import striptags
+
+
+def add_toc_hook(md, min_level=1, max_level=3, heading_id=None):
+ """Add a hook to save toc items into ``state.env``. This is
+ usually helpful for doc generator::
+
+ import mistune
+ from mistune.toc import add_toc_hook, render_toc_ul
+
+ md = mistune.create_markdown(...)
+ add_toc_hook(md, level, heading_id)
+
+ html, state = md.parse(text)
+ toc_items = state.env['toc_items']
+ toc_html = render_toc_ul(toc_items)
+
+ :param md: Markdown instance
+ :param min_level: min heading level
+ :param max_level: max heading level
+ :param heading_id: a function to generate heading_id
+ """
+ if heading_id is None:
+ def heading_id(token, index):
+ return 'toc_' + str(index + 1)
+
+ def toc_hook(md, state):
+ headings = []
+
+ for tok in state.tokens:
+ if tok['type'] == 'heading':
+ level = tok['attrs']['level']
+ if min_level <= level <= max_level:
+ headings.append(tok)
+
+ toc_items = []
+ for i, tok in enumerate(headings):
+ tok['attrs']['id'] = heading_id(tok, i)
+ toc_items.append(normalize_toc_item(md, tok))
+
+ # save items into state
+ state.env['toc_items'] = toc_items
+
+ md.before_render_hooks.append(toc_hook)
+
+
+def normalize_toc_item(md, token):
+ text = token['text']
+ tokens = md.inline(text, {})
+ html = md.renderer(tokens, {})
+ text = striptags(html)
+ attrs = token['attrs']
+ return attrs['level'], attrs['id'], text
+
+
+def render_toc_ul(toc):
+ """Render a <ul> table of content HTML. The param "toc" should
+ be formatted into this structure::
+
+ [
+ (level, id, text),
+ ]
+
+ For example::
+
+ [
+ (1, 'toc-intro', 'Introduction'),
+ (2, 'toc-install', 'Install'),
+ (2, 'toc-upgrade', 'Upgrade'),
+ (1, 'toc-license', 'License'),
+ ]
+ """
+ if not toc:
+ return ''
+
+ s = '<ul>\n'
+ levels = []
+ for level, k, text in toc:
+ item = '<a href="#{}">{}</a>'.format(k, text)
+ if not levels:
+ s += '<li>' + item
+ levels.append(level)
+ elif level == levels[-1]:
+ s += '</li>\n<li>' + item
+ elif level > levels[-1]:
+ s += '\n<ul>\n<li>' + item
+ levels.append(level)
+ else:
+ levels.pop()
+ while levels:
+ last_level = levels.pop()
+ if level == last_level:
+ s += '</li>\n</ul>\n</li>\n<li>' + item
+ levels.append(level)
+ break
+ elif level > last_level:
+ s += '</li>\n<li>' + item
+ levels.append(last_level)
+ levels.append(level)
+ break
+ else:
+ s += '</li>\n</ul>\n'
+ else:
+ levels.append(level)
+ s += '</li>\n<li>' + item
+
+ while len(levels) > 1:
+ s += '</li>\n</ul>\n'
+ levels.pop()
+
+ return s + '</li>\n</ul>\n'
diff --git a/src/mistune/util.py b/src/mistune/util.py
new file mode 100644
index 0000000..e2337b4
--- /dev/null
+++ b/src/mistune/util.py
@@ -0,0 +1,81 @@
+import re
+from urllib.parse import quote
+from html import _replace_charref
+
+
+_expand_tab_re = re.compile(r'^( {0,3})\t', flags=re.M)
+
+
+def expand_leading_tab(text, width=4):
+ def repl(m):
+ s = m.group(1)
+ return s + ' ' * (width - len(s))
+ return _expand_tab_re.sub(repl, text)
+
+
+def expand_tab(text, space=' '):
+ repl = r'\1' + space
+ return _expand_tab_re.sub(repl, text)
+
+
+def escape(s, quote=True):
+ """Escape characters of ``&<>``. If quote=True, ``"`` will be
+ converted to ``&quote;``."""
+ s = s.replace("&", "&amp;")
+ s = s.replace("<", "&lt;")
+ s = s.replace(">", "&gt;")
+ if quote:
+ s = s.replace('"', "&quot;")
+ return s
+
+
+def escape_url(link):
+ """Escape URL for safety."""
+ safe = (
+ ':/?#@' # gen-delims - '[]' (rfc3986)
+ '!$&()*+,;=' # sub-delims - "'" (rfc3986)
+ '%' # leave already-encoded octets alone
+ )
+ return escape(quote(unescape(link), safe=safe))
+
+
+def safe_entity(s):
+ """Escape characters for safety."""
+ return escape(unescape(s))
+
+
+def unikey(s):
+ """Generate a unique key for links and footnotes."""
+ key = ' '.join(s.split()).strip()
+ return key.lower().upper()
+
+
+_charref_re = re.compile(
+ r'&(#[0-9]{1,7};'
+ r'|#[xX][0-9a-fA-F]+;'
+ r'|[^\t\n\f <&#;]{1,32};)'
+)
+
+
+def unescape(s):
+ """
+ Copy from `html.unescape`, but `_charref` is different. CommonMark
+ does not accept entity references without a trailing semicolon
+ """
+ if '&' not in s:
+ return s
+ return _charref_re.sub(_replace_charref, s)
+
+
+_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
+
+
+def striptags(s):
+ return _striptags_re.sub('', s)
+
+
+_strip_end_re = re.compile(r'\n\s+$')
+
+
+def strip_end(src):
+ return _strip_end_re.sub('\n', src)