lazy_wheel.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. """Lazy ZIP over HTTP"""
  2. __all__ = ['HTTPRangeRequestUnsupported', 'dist_from_wheel_url']
  3. from bisect import bisect_left, bisect_right
  4. from contextlib import contextmanager
  5. from tempfile import NamedTemporaryFile
  6. from zipfile import BadZipfile, ZipFile
  7. from pip._vendor.requests.models import CONTENT_CHUNK_SIZE
  8. from pip._vendor.six.moves import range
  9. from pip._internal.network.utils import (
  10. HEADERS,
  11. raise_for_status,
  12. response_chunks,
  13. )
  14. from pip._internal.utils.typing import MYPY_CHECK_RUNNING
  15. from pip._internal.utils.wheel import pkg_resources_distribution_for_wheel
  16. if MYPY_CHECK_RUNNING:
  17. from typing import Any, Dict, Iterator, List, Optional, Tuple
  18. from pip._vendor.pkg_resources import Distribution
  19. from pip._vendor.requests.models import Response
  20. from pip._internal.network.session import PipSession
  21. class HTTPRangeRequestUnsupported(Exception):
  22. pass
  23. def dist_from_wheel_url(name, url, session):
  24. # type: (str, str, PipSession) -> Distribution
  25. """Return a pkg_resources.Distribution from the given wheel URL.
  26. This uses HTTP range requests to only fetch the potion of the wheel
  27. containing metadata, just enough for the object to be constructed.
  28. If such requests are not supported, HTTPRangeRequestUnsupported
  29. is raised.
  30. """
  31. with LazyZipOverHTTP(url, session) as wheel:
  32. # For read-only ZIP files, ZipFile only needs methods read,
  33. # seek, seekable and tell, not the whole IO protocol.
  34. zip_file = ZipFile(wheel) # type: ignore
  35. # After context manager exit, wheel.name
  36. # is an invalid file by intention.
  37. return pkg_resources_distribution_for_wheel(zip_file, name, wheel.name)
  38. class LazyZipOverHTTP(object):
  39. """File-like object mapped to a ZIP file over HTTP.
  40. This uses HTTP range requests to lazily fetch the file's content,
  41. which is supposed to be fed to ZipFile. If such requests are not
  42. supported by the server, raise HTTPRangeRequestUnsupported
  43. during initialization.
  44. """
  45. def __init__(self, url, session, chunk_size=CONTENT_CHUNK_SIZE):
  46. # type: (str, PipSession, int) -> None
  47. head = session.head(url, headers=HEADERS)
  48. raise_for_status(head)
  49. assert head.status_code == 200
  50. self._session, self._url, self._chunk_size = session, url, chunk_size
  51. self._length = int(head.headers['Content-Length'])
  52. self._file = NamedTemporaryFile()
  53. self.truncate(self._length)
  54. self._left = [] # type: List[int]
  55. self._right = [] # type: List[int]
  56. if 'bytes' not in head.headers.get('Accept-Ranges', 'none'):
  57. raise HTTPRangeRequestUnsupported('range request is not supported')
  58. self._check_zip()
  59. @property
  60. def mode(self):
  61. # type: () -> str
  62. """Opening mode, which is always rb."""
  63. return 'rb'
  64. @property
  65. def name(self):
  66. # type: () -> str
  67. """Path to the underlying file."""
  68. return self._file.name
  69. def seekable(self):
  70. # type: () -> bool
  71. """Return whether random access is supported, which is True."""
  72. return True
  73. def close(self):
  74. # type: () -> None
  75. """Close the file."""
  76. self._file.close()
  77. @property
  78. def closed(self):
  79. # type: () -> bool
  80. """Whether the file is closed."""
  81. return self._file.closed
  82. def read(self, size=-1):
  83. # type: (int) -> bytes
  84. """Read up to size bytes from the object and return them.
  85. As a convenience, if size is unspecified or -1,
  86. all bytes until EOF are returned. Fewer than
  87. size bytes may be returned if EOF is reached.
  88. """
  89. download_size = max(size, self._chunk_size)
  90. start, length = self.tell(), self._length
  91. stop = length if size < 0 else min(start+download_size, length)
  92. start = max(0, stop-download_size)
  93. self._download(start, stop-1)
  94. return self._file.read(size)
  95. def readable(self):
  96. # type: () -> bool
  97. """Return whether the file is readable, which is True."""
  98. return True
  99. def seek(self, offset, whence=0):
  100. # type: (int, int) -> int
  101. """Change stream position and return the new absolute position.
  102. Seek to offset relative position indicated by whence:
  103. * 0: Start of stream (the default). pos should be >= 0;
  104. * 1: Current position - pos may be negative;
  105. * 2: End of stream - pos usually negative.
  106. """
  107. return self._file.seek(offset, whence)
  108. def tell(self):
  109. # type: () -> int
  110. """Return the current possition."""
  111. return self._file.tell()
  112. def truncate(self, size=None):
  113. # type: (Optional[int]) -> int
  114. """Resize the stream to the given size in bytes.
  115. If size is unspecified resize to the current position.
  116. The current stream position isn't changed.
  117. Return the new file size.
  118. """
  119. return self._file.truncate(size)
  120. def writable(self):
  121. # type: () -> bool
  122. """Return False."""
  123. return False
  124. def __enter__(self):
  125. # type: () -> LazyZipOverHTTP
  126. self._file.__enter__()
  127. return self
  128. def __exit__(self, *exc):
  129. # type: (*Any) -> Optional[bool]
  130. return self._file.__exit__(*exc)
  131. @contextmanager
  132. def _stay(self):
  133. # type: ()-> Iterator[None]
  134. """Return a context manager keeping the position.
  135. At the end of the block, seek back to original position.
  136. """
  137. pos = self.tell()
  138. try:
  139. yield
  140. finally:
  141. self.seek(pos)
  142. def _check_zip(self):
  143. # type: () -> None
  144. """Check and download until the file is a valid ZIP."""
  145. end = self._length - 1
  146. for start in reversed(range(0, end, self._chunk_size)):
  147. self._download(start, end)
  148. with self._stay():
  149. try:
  150. # For read-only ZIP files, ZipFile only needs
  151. # methods read, seek, seekable and tell.
  152. ZipFile(self) # type: ignore
  153. except BadZipfile:
  154. pass
  155. else:
  156. break
  157. def _stream_response(self, start, end, base_headers=HEADERS):
  158. # type: (int, int, Dict[str, str]) -> Response
  159. """Return HTTP response to a range request from start to end."""
  160. headers = base_headers.copy()
  161. headers['Range'] = 'bytes={}-{}'.format(start, end)
  162. # TODO: Get range requests to be correctly cached
  163. headers['Cache-Control'] = 'no-cache'
  164. return self._session.get(self._url, headers=headers, stream=True)
  165. def _merge(self, start, end, left, right):
  166. # type: (int, int, int, int) -> Iterator[Tuple[int, int]]
  167. """Return an iterator of intervals to be fetched.
  168. Args:
  169. start (int): Start of needed interval
  170. end (int): End of needed interval
  171. left (int): Index of first overlapping downloaded data
  172. right (int): Index after last overlapping downloaded data
  173. """
  174. lslice, rslice = self._left[left:right], self._right[left:right]
  175. i = start = min([start]+lslice[:1])
  176. end = max([end]+rslice[-1:])
  177. for j, k in zip(lslice, rslice):
  178. if j > i:
  179. yield i, j-1
  180. i = k + 1
  181. if i <= end:
  182. yield i, end
  183. self._left[left:right], self._right[left:right] = [start], [end]
  184. def _download(self, start, end):
  185. # type: (int, int) -> None
  186. """Download bytes from start to end inclusively."""
  187. with self._stay():
  188. left = bisect_left(self._right, start)
  189. right = bisect_right(self._left, end)
  190. for start, end in self._merge(start, end, left, right):
  191. response = self._stream_response(start, end)
  192. response.raise_for_status()
  193. self.seek(start)
  194. for chunk in response_chunks(response, self._chunk_size):
  195. self._file.write(chunk)