import re from urllib.parse import quote from html import _replace_charref _expand_tab_re = re.compile(r'^( {0,3})\t', flags=re.M) def expand_leading_tab(text, width=4): def repl(m): s = m.group(1) return s + ' ' * (width - len(s)) return _expand_tab_re.sub(repl, text) def expand_tab(text, space=' '): repl = r'\1' + space return _expand_tab_re.sub(repl, text) def escape(s, quote=True): """Escape characters of ``&<>``. If quote=True, ``"`` will be converted to ``"e;``.""" s = s.replace("&", "&") s = s.replace("<", "<") s = s.replace(">", ">") if quote: s = s.replace('"', """) return s def escape_url(link): """Escape URL for safety.""" safe = ( ':/?#@' # gen-delims - '[]' (rfc3986) '!$&()*+,;=' # sub-delims - "'" (rfc3986) '%' # leave already-encoded octets alone ) return escape(quote(unescape(link), safe=safe)) def safe_entity(s): """Escape characters for safety.""" return escape(unescape(s)) def unikey(s): """Generate a unique key for links and footnotes.""" key = ' '.join(s.split()).strip() return key.lower().upper() _charref_re = re.compile( r'&(#[0-9]{1,7};' r'|#[xX][0-9a-fA-F]+;' r'|[^\t\n\f <&#;]{1,32};)' ) def unescape(s): """ Copy from `html.unescape`, but `_charref` is different. CommonMark does not accept entity references without a trailing semicolon """ if '&' not in s: return s return _charref_re.sub(_replace_charref, s) _striptags_re = re.compile(r'(|<[^>]*>)') def striptags(s): return _striptags_re.sub('', s) _strip_end_re = re.compile(r'\n\s+$') def strip_end(src): return _strip_end_re.sub('\n', src)