lint.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. """
  2. WSGI Protocol Linter
  3. ====================
  4. This module provides a middleware that performs sanity checks on the
  5. behavior of the WSGI server and application. It checks that the
  6. :pep:`3333` WSGI spec is properly implemented. It also warns on some
  7. common HTTP errors such as non-empty responses for 304 status codes.
  8. .. autoclass:: LintMiddleware
  9. :copyright: 2007 Pallets
  10. :license: BSD-3-Clause
  11. """
  12. from warnings import warn
  13. from .._compat import implements_iterator
  14. from .._compat import PY2
  15. from .._compat import string_types
  16. from ..datastructures import Headers
  17. from ..http import is_entity_header
  18. from ..wsgi import FileWrapper
  19. try:
  20. from urllib.parse import urlparse
  21. except ImportError:
  22. from urlparse import urlparse
  23. class WSGIWarning(Warning):
  24. """Warning class for WSGI warnings."""
  25. class HTTPWarning(Warning):
  26. """Warning class for HTTP warnings."""
  27. def check_string(context, obj, stacklevel=3):
  28. if type(obj) is not str:
  29. warn(
  30. "'%s' requires strings, got '%s'" % (context, type(obj).__name__),
  31. WSGIWarning,
  32. )
  33. class InputStream(object):
  34. def __init__(self, stream):
  35. self._stream = stream
  36. def read(self, *args):
  37. if len(args) == 0:
  38. warn(
  39. "WSGI does not guarantee an EOF marker on the input stream, thus making"
  40. " calls to 'wsgi.input.read()' unsafe. Conforming servers may never"
  41. " return from this call.",
  42. WSGIWarning,
  43. stacklevel=2,
  44. )
  45. elif len(args) != 1:
  46. warn(
  47. "Too many parameters passed to 'wsgi.input.read()'.",
  48. WSGIWarning,
  49. stacklevel=2,
  50. )
  51. return self._stream.read(*args)
  52. def readline(self, *args):
  53. if len(args) == 0:
  54. warn(
  55. "Calls to 'wsgi.input.readline()' without arguments are unsafe. Use"
  56. " 'wsgi.input.read()' instead.",
  57. WSGIWarning,
  58. stacklevel=2,
  59. )
  60. elif len(args) == 1:
  61. warn(
  62. "'wsgi.input.readline()' was called with a size hint. WSGI does not"
  63. " support this, although it's available on all major servers.",
  64. WSGIWarning,
  65. stacklevel=2,
  66. )
  67. else:
  68. raise TypeError("Too many arguments passed to 'wsgi.input.readline()'.")
  69. return self._stream.readline(*args)
  70. def __iter__(self):
  71. try:
  72. return iter(self._stream)
  73. except TypeError:
  74. warn("'wsgi.input' is not iterable.", WSGIWarning, stacklevel=2)
  75. return iter(())
  76. def close(self):
  77. warn("The application closed the input stream!", WSGIWarning, stacklevel=2)
  78. self._stream.close()
  79. class ErrorStream(object):
  80. def __init__(self, stream):
  81. self._stream = stream
  82. def write(self, s):
  83. check_string("wsgi.error.write()", s)
  84. self._stream.write(s)
  85. def flush(self):
  86. self._stream.flush()
  87. def writelines(self, seq):
  88. for line in seq:
  89. self.write(line)
  90. def close(self):
  91. warn("The application closed the error stream!", WSGIWarning, stacklevel=2)
  92. self._stream.close()
  93. class GuardedWrite(object):
  94. def __init__(self, write, chunks):
  95. self._write = write
  96. self._chunks = chunks
  97. def __call__(self, s):
  98. check_string("write()", s)
  99. self._write.write(s)
  100. self._chunks.append(len(s))
  101. @implements_iterator
  102. class GuardedIterator(object):
  103. def __init__(self, iterator, headers_set, chunks):
  104. self._iterator = iterator
  105. if PY2:
  106. self._next = iter(iterator).next
  107. else:
  108. self._next = iter(iterator).__next__
  109. self.closed = False
  110. self.headers_set = headers_set
  111. self.chunks = chunks
  112. def __iter__(self):
  113. return self
  114. def __next__(self):
  115. if self.closed:
  116. warn("Iterated over closed 'app_iter'.", WSGIWarning, stacklevel=2)
  117. rv = self._next()
  118. if not self.headers_set:
  119. warn(
  120. "The application returned before it started the response.",
  121. WSGIWarning,
  122. stacklevel=2,
  123. )
  124. check_string("application iterator items", rv)
  125. self.chunks.append(len(rv))
  126. return rv
  127. def close(self):
  128. self.closed = True
  129. if hasattr(self._iterator, "close"):
  130. self._iterator.close()
  131. if self.headers_set:
  132. status_code, headers = self.headers_set
  133. bytes_sent = sum(self.chunks)
  134. content_length = headers.get("content-length", type=int)
  135. if status_code == 304:
  136. for key, _value in headers:
  137. key = key.lower()
  138. if key not in ("expires", "content-location") and is_entity_header(
  139. key
  140. ):
  141. warn(
  142. "Entity header %r found in 304 response." % key, HTTPWarning
  143. )
  144. if bytes_sent:
  145. warn("304 responses must not have a body.", HTTPWarning)
  146. elif 100 <= status_code < 200 or status_code == 204:
  147. if content_length != 0:
  148. warn(
  149. "%r responses must have an empty content length." % status_code,
  150. HTTPWarning,
  151. )
  152. if bytes_sent:
  153. warn(
  154. "%r responses must not have a body." % status_code, HTTPWarning
  155. )
  156. elif content_length is not None and content_length != bytes_sent:
  157. warn(
  158. "Content-Length and the number of bytes sent to the client do not"
  159. " match.",
  160. WSGIWarning,
  161. )
  162. def __del__(self):
  163. if not self.closed:
  164. try:
  165. warn(
  166. "Iterator was garbage collected before it was closed.", WSGIWarning
  167. )
  168. except Exception:
  169. pass
  170. class LintMiddleware(object):
  171. """Warns about common errors in the WSGI and HTTP behavior of the
  172. server and wrapped application. Some of the issues it check are:
  173. - invalid status codes
  174. - non-bytestrings sent to the WSGI server
  175. - strings returned from the WSGI application
  176. - non-empty conditional responses
  177. - unquoted etags
  178. - relative URLs in the Location header
  179. - unsafe calls to wsgi.input
  180. - unclosed iterators
  181. Error information is emitted using the :mod:`warnings` module.
  182. :param app: The WSGI application to wrap.
  183. .. code-block:: python
  184. from werkzeug.middleware.lint import LintMiddleware
  185. app = LintMiddleware(app)
  186. """
  187. def __init__(self, app):
  188. self.app = app
  189. def check_environ(self, environ):
  190. if type(environ) is not dict:
  191. warn(
  192. "WSGI environment is not a standard Python dict.",
  193. WSGIWarning,
  194. stacklevel=4,
  195. )
  196. for key in (
  197. "REQUEST_METHOD",
  198. "SERVER_NAME",
  199. "SERVER_PORT",
  200. "wsgi.version",
  201. "wsgi.input",
  202. "wsgi.errors",
  203. "wsgi.multithread",
  204. "wsgi.multiprocess",
  205. "wsgi.run_once",
  206. ):
  207. if key not in environ:
  208. warn(
  209. "Required environment key %r not found" % key,
  210. WSGIWarning,
  211. stacklevel=3,
  212. )
  213. if environ["wsgi.version"] != (1, 0):
  214. warn("Environ is not a WSGI 1.0 environ.", WSGIWarning, stacklevel=3)
  215. script_name = environ.get("SCRIPT_NAME", "")
  216. path_info = environ.get("PATH_INFO", "")
  217. if script_name and script_name[0] != "/":
  218. warn(
  219. "'SCRIPT_NAME' does not start with a slash: %r" % script_name,
  220. WSGIWarning,
  221. stacklevel=3,
  222. )
  223. if path_info and path_info[0] != "/":
  224. warn(
  225. "'PATH_INFO' does not start with a slash: %r" % path_info,
  226. WSGIWarning,
  227. stacklevel=3,
  228. )
  229. def check_start_response(self, status, headers, exc_info):
  230. check_string("status", status)
  231. status_code = status.split(None, 1)[0]
  232. if len(status_code) != 3 or not status_code.isdigit():
  233. warn(WSGIWarning("Status code must be three digits"), stacklevel=3)
  234. if len(status) < 4 or status[3] != " ":
  235. warn(
  236. WSGIWarning(
  237. "Invalid value for status %r. Valid "
  238. "status strings are three digits, a space "
  239. "and a status explanation"
  240. ),
  241. stacklevel=3,
  242. )
  243. status_code = int(status_code)
  244. if status_code < 100:
  245. warn(WSGIWarning("status code < 100 detected"), stacklevel=3)
  246. if type(headers) is not list:
  247. warn(WSGIWarning("header list is not a list"), stacklevel=3)
  248. for item in headers:
  249. if type(item) is not tuple or len(item) != 2:
  250. warn(WSGIWarning("Headers must tuple 2-item tuples"), stacklevel=3)
  251. name, value = item
  252. if type(name) is not str or type(value) is not str:
  253. warn(WSGIWarning("header items must be strings"), stacklevel=3)
  254. if name.lower() == "status":
  255. warn(
  256. WSGIWarning(
  257. "The status header is not supported due to "
  258. "conflicts with the CGI spec."
  259. ),
  260. stacklevel=3,
  261. )
  262. if exc_info is not None and not isinstance(exc_info, tuple):
  263. warn(WSGIWarning("invalid value for exc_info"), stacklevel=3)
  264. headers = Headers(headers)
  265. self.check_headers(headers)
  266. return status_code, headers
  267. def check_headers(self, headers):
  268. etag = headers.get("etag")
  269. if etag is not None:
  270. if etag.startswith(("W/", "w/")):
  271. if etag.startswith("w/"):
  272. warn(
  273. HTTPWarning("weak etag indicator should be upcase."),
  274. stacklevel=4,
  275. )
  276. etag = etag[2:]
  277. if not (etag[:1] == etag[-1:] == '"'):
  278. warn(HTTPWarning("unquoted etag emitted."), stacklevel=4)
  279. location = headers.get("location")
  280. if location is not None:
  281. if not urlparse(location).netloc:
  282. warn(
  283. HTTPWarning("absolute URLs required for location header"),
  284. stacklevel=4,
  285. )
  286. def check_iterator(self, app_iter):
  287. if isinstance(app_iter, string_types):
  288. warn(
  289. "The application returned astring. The response will send one character"
  290. " at a time to the client, which will kill performance. Return a list"
  291. " or iterable instead.",
  292. WSGIWarning,
  293. stacklevel=3,
  294. )
  295. def __call__(self, *args, **kwargs):
  296. if len(args) != 2:
  297. warn("A WSGI app takes two arguments.", WSGIWarning, stacklevel=2)
  298. if kwargs:
  299. warn(
  300. "A WSGI app does not take keyword arguments.", WSGIWarning, stacklevel=2
  301. )
  302. environ, start_response = args
  303. self.check_environ(environ)
  304. environ["wsgi.input"] = InputStream(environ["wsgi.input"])
  305. environ["wsgi.errors"] = ErrorStream(environ["wsgi.errors"])
  306. # Hook our own file wrapper in so that applications will always
  307. # iterate to the end and we can check the content length.
  308. environ["wsgi.file_wrapper"] = FileWrapper
  309. headers_set = []
  310. chunks = []
  311. def checking_start_response(*args, **kwargs):
  312. if len(args) not in (2, 3):
  313. warn(
  314. "Invalid number of arguments: %s, expected 2 or 3." % len(args),
  315. WSGIWarning,
  316. stacklevel=2,
  317. )
  318. if kwargs:
  319. warn("'start_response' does not take keyword arguments.", WSGIWarning)
  320. status, headers = args[:2]
  321. if len(args) == 3:
  322. exc_info = args[2]
  323. else:
  324. exc_info = None
  325. headers_set[:] = self.check_start_response(status, headers, exc_info)
  326. return GuardedWrite(start_response(status, headers, exc_info), chunks)
  327. app_iter = self.app(environ, checking_start_response)
  328. self.check_iterator(app_iter)
  329. return GuardedIterator(app_iter, headers_set, chunks)