session.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. """PipSession and supporting code, containing all pip-specific
  2. network request configuration and behavior.
  3. """
  4. # The following comment should be removed at some point in the future.
  5. # mypy: disallow-untyped-defs=False
  6. import email.utils
  7. import json
  8. import logging
  9. import mimetypes
  10. import os
  11. import platform
  12. import sys
  13. import warnings
  14. from pip._vendor import requests, six, urllib3
  15. from pip._vendor.cachecontrol import CacheControlAdapter
  16. from pip._vendor.requests.adapters import BaseAdapter, HTTPAdapter
  17. from pip._vendor.requests.models import Response
  18. from pip._vendor.requests.structures import CaseInsensitiveDict
  19. from pip._vendor.six.moves.urllib import parse as urllib_parse
  20. from pip._vendor.urllib3.exceptions import InsecureRequestWarning
  21. from pip import __version__
  22. from pip._internal.network.auth import MultiDomainBasicAuth
  23. from pip._internal.network.cache import SafeFileCache
  24. # Import ssl from compat so the initial import occurs in only one place.
  25. from pip._internal.utils.compat import has_tls, ipaddress
  26. from pip._internal.utils.glibc import libc_ver
  27. from pip._internal.utils.misc import (
  28. build_url_from_netloc,
  29. get_installed_version,
  30. parse_netloc,
  31. )
  32. from pip._internal.utils.typing import MYPY_CHECK_RUNNING
  33. from pip._internal.utils.urls import url_to_path
  34. if MYPY_CHECK_RUNNING:
  35. from typing import (
  36. Iterator, List, Optional, Tuple, Union,
  37. )
  38. from pip._internal.models.link import Link
  39. SecureOrigin = Tuple[str, str, Optional[Union[int, str]]]
  40. logger = logging.getLogger(__name__)
  41. # Ignore warning raised when using --trusted-host.
  42. warnings.filterwarnings("ignore", category=InsecureRequestWarning)
  43. SECURE_ORIGINS = [
  44. # protocol, hostname, port
  45. # Taken from Chrome's list of secure origins (See: http://bit.ly/1qrySKC)
  46. ("https", "*", "*"),
  47. ("*", "localhost", "*"),
  48. ("*", "127.0.0.0/8", "*"),
  49. ("*", "::1/128", "*"),
  50. ("file", "*", None),
  51. # ssh is always secure.
  52. ("ssh", "*", "*"),
  53. ] # type: List[SecureOrigin]
  54. # These are environment variables present when running under various
  55. # CI systems. For each variable, some CI systems that use the variable
  56. # are indicated. The collection was chosen so that for each of a number
  57. # of popular systems, at least one of the environment variables is used.
  58. # This list is used to provide some indication of and lower bound for
  59. # CI traffic to PyPI. Thus, it is okay if the list is not comprehensive.
  60. # For more background, see: https://github.com/pypa/pip/issues/5499
  61. CI_ENVIRONMENT_VARIABLES = (
  62. # Azure Pipelines
  63. 'BUILD_BUILDID',
  64. # Jenkins
  65. 'BUILD_ID',
  66. # AppVeyor, CircleCI, Codeship, Gitlab CI, Shippable, Travis CI
  67. 'CI',
  68. # Explicit environment variable.
  69. 'PIP_IS_CI',
  70. )
  71. def looks_like_ci():
  72. # type: () -> bool
  73. """
  74. Return whether it looks like pip is running under CI.
  75. """
  76. # We don't use the method of checking for a tty (e.g. using isatty())
  77. # because some CI systems mimic a tty (e.g. Travis CI). Thus that
  78. # method doesn't provide definitive information in either direction.
  79. return any(name in os.environ for name in CI_ENVIRONMENT_VARIABLES)
  80. def user_agent():
  81. """
  82. Return a string representing the user agent.
  83. """
  84. data = {
  85. "installer": {"name": "pip", "version": __version__},
  86. "python": platform.python_version(),
  87. "implementation": {
  88. "name": platform.python_implementation(),
  89. },
  90. }
  91. if data["implementation"]["name"] == 'CPython':
  92. data["implementation"]["version"] = platform.python_version()
  93. elif data["implementation"]["name"] == 'PyPy':
  94. if sys.pypy_version_info.releaselevel == 'final':
  95. pypy_version_info = sys.pypy_version_info[:3]
  96. else:
  97. pypy_version_info = sys.pypy_version_info
  98. data["implementation"]["version"] = ".".join(
  99. [str(x) for x in pypy_version_info]
  100. )
  101. elif data["implementation"]["name"] == 'Jython':
  102. # Complete Guess
  103. data["implementation"]["version"] = platform.python_version()
  104. elif data["implementation"]["name"] == 'IronPython':
  105. # Complete Guess
  106. data["implementation"]["version"] = platform.python_version()
  107. if sys.platform.startswith("linux"):
  108. from pip._vendor import distro
  109. distro_infos = dict(filter(
  110. lambda x: x[1],
  111. zip(["name", "version", "id"], distro.linux_distribution()),
  112. ))
  113. libc = dict(filter(
  114. lambda x: x[1],
  115. zip(["lib", "version"], libc_ver()),
  116. ))
  117. if libc:
  118. distro_infos["libc"] = libc
  119. if distro_infos:
  120. data["distro"] = distro_infos
  121. if sys.platform.startswith("darwin") and platform.mac_ver()[0]:
  122. data["distro"] = {"name": "macOS", "version": platform.mac_ver()[0]}
  123. if platform.system():
  124. data.setdefault("system", {})["name"] = platform.system()
  125. if platform.release():
  126. data.setdefault("system", {})["release"] = platform.release()
  127. if platform.machine():
  128. data["cpu"] = platform.machine()
  129. if has_tls():
  130. import _ssl as ssl
  131. data["openssl_version"] = ssl.OPENSSL_VERSION
  132. setuptools_version = get_installed_version("setuptools")
  133. if setuptools_version is not None:
  134. data["setuptools_version"] = setuptools_version
  135. # Use None rather than False so as not to give the impression that
  136. # pip knows it is not being run under CI. Rather, it is a null or
  137. # inconclusive result. Also, we include some value rather than no
  138. # value to make it easier to know that the check has been run.
  139. data["ci"] = True if looks_like_ci() else None
  140. user_data = os.environ.get("PIP_USER_AGENT_USER_DATA")
  141. if user_data is not None:
  142. data["user_data"] = user_data
  143. return "{data[installer][name]}/{data[installer][version]} {json}".format(
  144. data=data,
  145. json=json.dumps(data, separators=(",", ":"), sort_keys=True),
  146. )
  147. class LocalFSAdapter(BaseAdapter):
  148. def send(self, request, stream=None, timeout=None, verify=None, cert=None,
  149. proxies=None):
  150. pathname = url_to_path(request.url)
  151. resp = Response()
  152. resp.status_code = 200
  153. resp.url = request.url
  154. try:
  155. stats = os.stat(pathname)
  156. except OSError as exc:
  157. resp.status_code = 404
  158. resp.raw = exc
  159. else:
  160. modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
  161. content_type = mimetypes.guess_type(pathname)[0] or "text/plain"
  162. resp.headers = CaseInsensitiveDict({
  163. "Content-Type": content_type,
  164. "Content-Length": stats.st_size,
  165. "Last-Modified": modified,
  166. })
  167. resp.raw = open(pathname, "rb")
  168. resp.close = resp.raw.close
  169. return resp
  170. def close(self):
  171. pass
  172. class InsecureHTTPAdapter(HTTPAdapter):
  173. def cert_verify(self, conn, url, verify, cert):
  174. super(InsecureHTTPAdapter, self).cert_verify(
  175. conn=conn, url=url, verify=False, cert=cert
  176. )
  177. class InsecureCacheControlAdapter(CacheControlAdapter):
  178. def cert_verify(self, conn, url, verify, cert):
  179. super(InsecureCacheControlAdapter, self).cert_verify(
  180. conn=conn, url=url, verify=False, cert=cert
  181. )
  182. class PipSession(requests.Session):
  183. timeout = None # type: Optional[int]
  184. def __init__(self, *args, **kwargs):
  185. """
  186. :param trusted_hosts: Domains not to emit warnings for when not using
  187. HTTPS.
  188. """
  189. retries = kwargs.pop("retries", 0)
  190. cache = kwargs.pop("cache", None)
  191. trusted_hosts = kwargs.pop("trusted_hosts", []) # type: List[str]
  192. index_urls = kwargs.pop("index_urls", None)
  193. super(PipSession, self).__init__(*args, **kwargs)
  194. # Namespace the attribute with "pip_" just in case to prevent
  195. # possible conflicts with the base class.
  196. self.pip_trusted_origins = [] # type: List[Tuple[str, Optional[int]]]
  197. # Attach our User Agent to the request
  198. self.headers["User-Agent"] = user_agent()
  199. # Attach our Authentication handler to the session
  200. self.auth = MultiDomainBasicAuth(index_urls=index_urls)
  201. # Create our urllib3.Retry instance which will allow us to customize
  202. # how we handle retries.
  203. retries = urllib3.Retry(
  204. # Set the total number of retries that a particular request can
  205. # have.
  206. total=retries,
  207. # A 503 error from PyPI typically means that the Fastly -> Origin
  208. # connection got interrupted in some way. A 503 error in general
  209. # is typically considered a transient error so we'll go ahead and
  210. # retry it.
  211. # A 500 may indicate transient error in Amazon S3
  212. # A 520 or 527 - may indicate transient error in CloudFlare
  213. status_forcelist=[500, 503, 520, 527],
  214. # Add a small amount of back off between failed requests in
  215. # order to prevent hammering the service.
  216. backoff_factor=0.25,
  217. )
  218. # Our Insecure HTTPAdapter disables HTTPS validation. It does not
  219. # support caching so we'll use it for all http:// URLs.
  220. # If caching is disabled, we will also use it for
  221. # https:// hosts that we've marked as ignoring
  222. # TLS errors for (trusted-hosts).
  223. insecure_adapter = InsecureHTTPAdapter(max_retries=retries)
  224. # We want to _only_ cache responses on securely fetched origins or when
  225. # the host is specified as trusted. We do this because
  226. # we can't validate the response of an insecurely/untrusted fetched
  227. # origin, and we don't want someone to be able to poison the cache and
  228. # require manual eviction from the cache to fix it.
  229. if cache:
  230. secure_adapter = CacheControlAdapter(
  231. cache=SafeFileCache(cache),
  232. max_retries=retries,
  233. )
  234. self._trusted_host_adapter = InsecureCacheControlAdapter(
  235. cache=SafeFileCache(cache),
  236. max_retries=retries,
  237. )
  238. else:
  239. secure_adapter = HTTPAdapter(max_retries=retries)
  240. self._trusted_host_adapter = insecure_adapter
  241. self.mount("https://", secure_adapter)
  242. self.mount("http://", insecure_adapter)
  243. # Enable file:// urls
  244. self.mount("file://", LocalFSAdapter())
  245. for host in trusted_hosts:
  246. self.add_trusted_host(host, suppress_logging=True)
  247. def add_trusted_host(self, host, source=None, suppress_logging=False):
  248. # type: (str, Optional[str], bool) -> None
  249. """
  250. :param host: It is okay to provide a host that has previously been
  251. added.
  252. :param source: An optional source string, for logging where the host
  253. string came from.
  254. """
  255. if not suppress_logging:
  256. msg = 'adding trusted host: {!r}'.format(host)
  257. if source is not None:
  258. msg += ' (from {})'.format(source)
  259. logger.info(msg)
  260. host_port = parse_netloc(host)
  261. if host_port not in self.pip_trusted_origins:
  262. self.pip_trusted_origins.append(host_port)
  263. self.mount(
  264. build_url_from_netloc(host) + '/',
  265. self._trusted_host_adapter
  266. )
  267. if not host_port[1]:
  268. # Mount wildcard ports for the same host.
  269. self.mount(
  270. build_url_from_netloc(host) + ':',
  271. self._trusted_host_adapter
  272. )
  273. def iter_secure_origins(self):
  274. # type: () -> Iterator[SecureOrigin]
  275. for secure_origin in SECURE_ORIGINS:
  276. yield secure_origin
  277. for host, port in self.pip_trusted_origins:
  278. yield ('*', host, '*' if port is None else port)
  279. def is_secure_origin(self, location):
  280. # type: (Link) -> bool
  281. # Determine if this url used a secure transport mechanism
  282. parsed = urllib_parse.urlparse(str(location))
  283. origin_protocol, origin_host, origin_port = (
  284. parsed.scheme, parsed.hostname, parsed.port,
  285. )
  286. # The protocol to use to see if the protocol matches.
  287. # Don't count the repository type as part of the protocol: in
  288. # cases such as "git+ssh", only use "ssh". (I.e., Only verify against
  289. # the last scheme.)
  290. origin_protocol = origin_protocol.rsplit('+', 1)[-1]
  291. # Determine if our origin is a secure origin by looking through our
  292. # hardcoded list of secure origins, as well as any additional ones
  293. # configured on this PackageFinder instance.
  294. for secure_origin in self.iter_secure_origins():
  295. secure_protocol, secure_host, secure_port = secure_origin
  296. if origin_protocol != secure_protocol and secure_protocol != "*":
  297. continue
  298. try:
  299. addr = ipaddress.ip_address(
  300. None
  301. if origin_host is None
  302. else six.ensure_text(origin_host)
  303. )
  304. network = ipaddress.ip_network(
  305. six.ensure_text(secure_host)
  306. )
  307. except ValueError:
  308. # We don't have both a valid address or a valid network, so
  309. # we'll check this origin against hostnames.
  310. if (
  311. origin_host and
  312. origin_host.lower() != secure_host.lower() and
  313. secure_host != "*"
  314. ):
  315. continue
  316. else:
  317. # We have a valid address and network, so see if the address
  318. # is contained within the network.
  319. if addr not in network:
  320. continue
  321. # Check to see if the port matches.
  322. if (
  323. origin_port != secure_port and
  324. secure_port != "*" and
  325. secure_port is not None
  326. ):
  327. continue
  328. # If we've gotten here, then this origin matches the current
  329. # secure origin and we should return True
  330. return True
  331. # If we've gotten to this point, then the origin isn't secure and we
  332. # will not accept it as a valid location to search. We will however
  333. # log a warning that we are ignoring it.
  334. logger.warning(
  335. "The repository located at %s is not a trusted or secure host and "
  336. "is being ignored. If this repository is available via HTTPS we "
  337. "recommend you use HTTPS instead, otherwise you may silence "
  338. "this warning and allow it anyway with '--trusted-host %s'.",
  339. origin_host,
  340. origin_host,
  341. )
  342. return False
  343. def request(self, method, url, *args, **kwargs):
  344. # Allow setting a default timeout on a session
  345. kwargs.setdefault("timeout", self.timeout)
  346. # Dispatch the actual request
  347. return super(PipSession, self).request(method, url, *args, **kwargs)