__init__.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. # -*- coding: utf-8 -*-
  2. """
  3. markupsafe
  4. ~~~~~~~~~~
  5. Implements an escape function and a Markup string to replace HTML
  6. special characters with safe representations.
  7. :copyright: 2010 Pallets
  8. :license: BSD-3-Clause
  9. """
  10. import re
  11. import string
  12. from ._compat import int_types
  13. from ._compat import iteritems
  14. from ._compat import Mapping
  15. from ._compat import PY2
  16. from ._compat import string_types
  17. from ._compat import text_type
  18. from ._compat import unichr
  19. __version__ = "1.1.1"
  20. __all__ = ["Markup", "soft_unicode", "escape", "escape_silent"]
  21. _striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)")
  22. _entity_re = re.compile(r"&([^& ;]+);")
  23. class Markup(text_type):
  24. """A string that is ready to be safely inserted into an HTML or XML
  25. document, either because it was escaped or because it was marked
  26. safe.
  27. Passing an object to the constructor converts it to text and wraps
  28. it to mark it safe without escaping. To escape the text, use the
  29. :meth:`escape` class method instead.
  30. >>> Markup('Hello, <em>World</em>!')
  31. Markup('Hello, <em>World</em>!')
  32. >>> Markup(42)
  33. Markup('42')
  34. >>> Markup.escape('Hello, <em>World</em>!')
  35. Markup('Hello &lt;em&gt;World&lt;/em&gt;!')
  36. This implements the ``__html__()`` interface that some frameworks
  37. use. Passing an object that implements ``__html__()`` will wrap the
  38. output of that method, marking it safe.
  39. >>> class Foo:
  40. ... def __html__(self):
  41. ... return '<a href="/foo">foo</a>'
  42. ...
  43. >>> Markup(Foo())
  44. Markup('<a href="/foo">foo</a>')
  45. This is a subclass of the text type (``str`` in Python 3,
  46. ``unicode`` in Python 2). It has the same methods as that type, but
  47. all methods escape their arguments and return a ``Markup`` instance.
  48. >>> Markup('<em>%s</em>') % 'foo & bar'
  49. Markup('<em>foo &amp; bar</em>')
  50. >>> Markup('<em>Hello</em> ') + '<foo>'
  51. Markup('<em>Hello</em> &lt;foo&gt;')
  52. """
  53. __slots__ = ()
  54. def __new__(cls, base=u"", encoding=None, errors="strict"):
  55. if hasattr(base, "__html__"):
  56. base = base.__html__()
  57. if encoding is None:
  58. return text_type.__new__(cls, base)
  59. return text_type.__new__(cls, base, encoding, errors)
  60. def __html__(self):
  61. return self
  62. def __add__(self, other):
  63. if isinstance(other, string_types) or hasattr(other, "__html__"):
  64. return self.__class__(super(Markup, self).__add__(self.escape(other)))
  65. return NotImplemented
  66. def __radd__(self, other):
  67. if hasattr(other, "__html__") or isinstance(other, string_types):
  68. return self.escape(other).__add__(self)
  69. return NotImplemented
  70. def __mul__(self, num):
  71. if isinstance(num, int_types):
  72. return self.__class__(text_type.__mul__(self, num))
  73. return NotImplemented
  74. __rmul__ = __mul__
  75. def __mod__(self, arg):
  76. if isinstance(arg, tuple):
  77. arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg)
  78. else:
  79. arg = _MarkupEscapeHelper(arg, self.escape)
  80. return self.__class__(text_type.__mod__(self, arg))
  81. def __repr__(self):
  82. return "%s(%s)" % (self.__class__.__name__, text_type.__repr__(self))
  83. def join(self, seq):
  84. return self.__class__(text_type.join(self, map(self.escape, seq)))
  85. join.__doc__ = text_type.join.__doc__
  86. def split(self, *args, **kwargs):
  87. return list(map(self.__class__, text_type.split(self, *args, **kwargs)))
  88. split.__doc__ = text_type.split.__doc__
  89. def rsplit(self, *args, **kwargs):
  90. return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs)))
  91. rsplit.__doc__ = text_type.rsplit.__doc__
  92. def splitlines(self, *args, **kwargs):
  93. return list(map(self.__class__, text_type.splitlines(self, *args, **kwargs)))
  94. splitlines.__doc__ = text_type.splitlines.__doc__
  95. def unescape(self):
  96. """Convert escaped markup back into a text string. This replaces
  97. HTML entities with the characters they represent.
  98. >>> Markup('Main &raquo; <em>About</em>').unescape()
  99. 'Main » <em>About</em>'
  100. """
  101. from ._constants import HTML_ENTITIES
  102. def handle_match(m):
  103. name = m.group(1)
  104. if name in HTML_ENTITIES:
  105. return unichr(HTML_ENTITIES[name])
  106. try:
  107. if name[:2] in ("#x", "#X"):
  108. return unichr(int(name[2:], 16))
  109. elif name.startswith("#"):
  110. return unichr(int(name[1:]))
  111. except ValueError:
  112. pass
  113. # Don't modify unexpected input.
  114. return m.group()
  115. return _entity_re.sub(handle_match, text_type(self))
  116. def striptags(self):
  117. """:meth:`unescape` the markup, remove tags, and normalize
  118. whitespace to single spaces.
  119. >>> Markup('Main &raquo;\t<em>About</em>').striptags()
  120. 'Main » About'
  121. """
  122. stripped = u" ".join(_striptags_re.sub("", self).split())
  123. return Markup(stripped).unescape()
  124. @classmethod
  125. def escape(cls, s):
  126. """Escape a string. Calls :func:`escape` and ensures that for
  127. subclasses the correct type is returned.
  128. """
  129. rv = escape(s)
  130. if rv.__class__ is not cls:
  131. return cls(rv)
  132. return rv
  133. def make_simple_escaping_wrapper(name): # noqa: B902
  134. orig = getattr(text_type, name)
  135. def func(self, *args, **kwargs):
  136. args = _escape_argspec(list(args), enumerate(args), self.escape)
  137. _escape_argspec(kwargs, iteritems(kwargs), self.escape)
  138. return self.__class__(orig(self, *args, **kwargs))
  139. func.__name__ = orig.__name__
  140. func.__doc__ = orig.__doc__
  141. return func
  142. for method in (
  143. "__getitem__",
  144. "capitalize",
  145. "title",
  146. "lower",
  147. "upper",
  148. "replace",
  149. "ljust",
  150. "rjust",
  151. "lstrip",
  152. "rstrip",
  153. "center",
  154. "strip",
  155. "translate",
  156. "expandtabs",
  157. "swapcase",
  158. "zfill",
  159. ):
  160. locals()[method] = make_simple_escaping_wrapper(method)
  161. def partition(self, sep):
  162. return tuple(map(self.__class__, text_type.partition(self, self.escape(sep))))
  163. def rpartition(self, sep):
  164. return tuple(map(self.__class__, text_type.rpartition(self, self.escape(sep))))
  165. def format(self, *args, **kwargs):
  166. formatter = EscapeFormatter(self.escape)
  167. kwargs = _MagicFormatMapping(args, kwargs)
  168. return self.__class__(formatter.vformat(self, args, kwargs))
  169. def __html_format__(self, format_spec):
  170. if format_spec:
  171. raise ValueError("Unsupported format specification " "for Markup.")
  172. return self
  173. # not in python 3
  174. if hasattr(text_type, "__getslice__"):
  175. __getslice__ = make_simple_escaping_wrapper("__getslice__")
  176. del method, make_simple_escaping_wrapper
  177. class _MagicFormatMapping(Mapping):
  178. """This class implements a dummy wrapper to fix a bug in the Python
  179. standard library for string formatting.
  180. See http://bugs.python.org/issue13598 for information about why
  181. this is necessary.
  182. """
  183. def __init__(self, args, kwargs):
  184. self._args = args
  185. self._kwargs = kwargs
  186. self._last_index = 0
  187. def __getitem__(self, key):
  188. if key == "":
  189. idx = self._last_index
  190. self._last_index += 1
  191. try:
  192. return self._args[idx]
  193. except LookupError:
  194. pass
  195. key = str(idx)
  196. return self._kwargs[key]
  197. def __iter__(self):
  198. return iter(self._kwargs)
  199. def __len__(self):
  200. return len(self._kwargs)
  201. if hasattr(text_type, "format"):
  202. class EscapeFormatter(string.Formatter):
  203. def __init__(self, escape):
  204. self.escape = escape
  205. def format_field(self, value, format_spec):
  206. if hasattr(value, "__html_format__"):
  207. rv = value.__html_format__(format_spec)
  208. elif hasattr(value, "__html__"):
  209. if format_spec:
  210. raise ValueError(
  211. "Format specifier {0} given, but {1} does not"
  212. " define __html_format__. A class that defines"
  213. " __html__ must define __html_format__ to work"
  214. " with format specifiers.".format(format_spec, type(value))
  215. )
  216. rv = value.__html__()
  217. else:
  218. # We need to make sure the format spec is unicode here as
  219. # otherwise the wrong callback methods are invoked. For
  220. # instance a byte string there would invoke __str__ and
  221. # not __unicode__.
  222. rv = string.Formatter.format_field(self, value, text_type(format_spec))
  223. return text_type(self.escape(rv))
  224. def _escape_argspec(obj, iterable, escape):
  225. """Helper for various string-wrapped functions."""
  226. for key, value in iterable:
  227. if hasattr(value, "__html__") or isinstance(value, string_types):
  228. obj[key] = escape(value)
  229. return obj
  230. class _MarkupEscapeHelper(object):
  231. """Helper for Markup.__mod__"""
  232. def __init__(self, obj, escape):
  233. self.obj = obj
  234. self.escape = escape
  235. def __getitem__(self, item):
  236. return _MarkupEscapeHelper(self.obj[item], self.escape)
  237. def __str__(self):
  238. return text_type(self.escape(self.obj))
  239. __unicode__ = __str__
  240. def __repr__(self):
  241. return str(self.escape(repr(self.obj)))
  242. def __int__(self):
  243. return int(self.obj)
  244. def __float__(self):
  245. return float(self.obj)
  246. # we have to import it down here as the speedups and native
  247. # modules imports the markup type which is define above.
  248. try:
  249. from ._speedups import escape, escape_silent, soft_unicode
  250. except ImportError:
  251. from ._native import escape, escape_silent, soft_unicode
  252. if not PY2:
  253. soft_str = soft_unicode
  254. __all__.append("soft_str")