summaryrefslogtreecommitdiff
path: root/src/mistune/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/mistune/util.py')
-rw-r--r--src/mistune/util.py81
1 files changed, 81 insertions, 0 deletions
diff --git a/src/mistune/util.py b/src/mistune/util.py
new file mode 100644
index 0000000..e2337b4
--- /dev/null
+++ b/src/mistune/util.py
@@ -0,0 +1,81 @@
+import re
+from urllib.parse import quote
+from html import _replace_charref
+
+
+_expand_tab_re = re.compile(r'^( {0,3})\t', flags=re.M)
+
+
+def expand_leading_tab(text, width=4):
+ def repl(m):
+ s = m.group(1)
+ return s + ' ' * (width - len(s))
+ return _expand_tab_re.sub(repl, text)
+
+
+def expand_tab(text, space=' '):
+ repl = r'\1' + space
+ return _expand_tab_re.sub(repl, text)
+
+
+def escape(s, quote=True):
+ """Escape characters of ``&<>``. If quote=True, ``"`` will be
+ converted to ``&quote;``."""
+ s = s.replace("&", "&amp;")
+ s = s.replace("<", "&lt;")
+ s = s.replace(">", "&gt;")
+ if quote:
+ s = s.replace('"', "&quot;")
+ return s
+
+
+def escape_url(link):
+ """Escape URL for safety."""
+ safe = (
+ ':/?#@' # gen-delims - '[]' (rfc3986)
+ '!$&()*+,;=' # sub-delims - "'" (rfc3986)
+ '%' # leave already-encoded octets alone
+ )
+ return escape(quote(unescape(link), safe=safe))
+
+
+def safe_entity(s):
+ """Escape characters for safety."""
+ return escape(unescape(s))
+
+
+def unikey(s):
+ """Generate a unique key for links and footnotes."""
+ key = ' '.join(s.split()).strip()
+ return key.lower().upper()
+
+
+_charref_re = re.compile(
+ r'&(#[0-9]{1,7};'
+ r'|#[xX][0-9a-fA-F]+;'
+ r'|[^\t\n\f <&#;]{1,32};)'
+)
+
+
+def unescape(s):
+ """
+ Copy from `html.unescape`, but `_charref` is different. CommonMark
+ does not accept entity references without a trailing semicolon
+ """
+ if '&' not in s:
+ return s
+ return _charref_re.sub(_replace_charref, s)
+
+
+_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
+
+
+def striptags(s):
+ return _striptags_re.sub('', s)
+
+
+_strip_end_re = re.compile(r'\n\s+$')
+
+
+def strip_end(src):
+ return _strip_end_re.sub('\n', src)