req_file.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. """
  2. Requirements file parsing
  3. """
  4. # The following comment should be removed at some point in the future.
  5. # mypy: strict-optional=False
  6. from __future__ import absolute_import
  7. import optparse
  8. import os
  9. import re
  10. import shlex
  11. import sys
  12. from pip._vendor.six.moves.urllib import parse as urllib_parse
  13. from pip._internal.cli import cmdoptions
  14. from pip._internal.exceptions import (
  15. InstallationError,
  16. RequirementsFileParseError,
  17. )
  18. from pip._internal.models.search_scope import SearchScope
  19. from pip._internal.utils.encoding import auto_decode
  20. from pip._internal.utils.typing import MYPY_CHECK_RUNNING
  21. from pip._internal.utils.urls import get_url_scheme
  22. if MYPY_CHECK_RUNNING:
  23. from optparse import Values
  24. from typing import (
  25. Any, Callable, Dict, Iterator, List, NoReturn, Optional, Text, Tuple,
  26. )
  27. from pip._internal.index.package_finder import PackageFinder
  28. from pip._internal.network.session import PipSession
  29. ReqFileLines = Iterator[Tuple[int, Text]]
  30. LineParser = Callable[[Text], Tuple[str, Values]]
  31. __all__ = ['parse_requirements']
  32. SCHEME_RE = re.compile(r'^(http|https|file):', re.I)
  33. COMMENT_RE = re.compile(r'(^|\s+)#.*$')
  34. # Matches environment variable-style values in '${MY_VARIABLE_1}' with the
  35. # variable name consisting of only uppercase letters, digits or the '_'
  36. # (underscore). This follows the POSIX standard defined in IEEE Std 1003.1,
  37. # 2013 Edition.
  38. ENV_VAR_RE = re.compile(r'(?P<var>\$\{(?P<name>[A-Z0-9_]+)\})')
  39. SUPPORTED_OPTIONS = [
  40. cmdoptions.index_url,
  41. cmdoptions.extra_index_url,
  42. cmdoptions.no_index,
  43. cmdoptions.constraints,
  44. cmdoptions.requirements,
  45. cmdoptions.editable,
  46. cmdoptions.find_links,
  47. cmdoptions.no_binary,
  48. cmdoptions.only_binary,
  49. cmdoptions.require_hashes,
  50. cmdoptions.pre,
  51. cmdoptions.trusted_host,
  52. cmdoptions.always_unzip, # Deprecated
  53. ] # type: List[Callable[..., optparse.Option]]
  54. # options to be passed to requirements
  55. SUPPORTED_OPTIONS_REQ = [
  56. cmdoptions.install_options,
  57. cmdoptions.global_options,
  58. cmdoptions.hash,
  59. ] # type: List[Callable[..., optparse.Option]]
  60. # the 'dest' string values
  61. SUPPORTED_OPTIONS_REQ_DEST = [str(o().dest) for o in SUPPORTED_OPTIONS_REQ]
  62. class ParsedRequirement(object):
  63. def __init__(
  64. self,
  65. requirement, # type:str
  66. is_editable, # type: bool
  67. comes_from, # type: str
  68. constraint, # type: bool
  69. options=None, # type: Optional[Dict[str, Any]]
  70. line_source=None, # type: Optional[str]
  71. ):
  72. # type: (...) -> None
  73. self.requirement = requirement
  74. self.is_editable = is_editable
  75. self.comes_from = comes_from
  76. self.options = options
  77. self.constraint = constraint
  78. self.line_source = line_source
  79. class ParsedLine(object):
  80. def __init__(
  81. self,
  82. filename, # type: str
  83. lineno, # type: int
  84. comes_from, # type: str
  85. args, # type: str
  86. opts, # type: Values
  87. constraint, # type: bool
  88. ):
  89. # type: (...) -> None
  90. self.filename = filename
  91. self.lineno = lineno
  92. self.comes_from = comes_from
  93. self.opts = opts
  94. self.constraint = constraint
  95. if args:
  96. self.is_requirement = True
  97. self.is_editable = False
  98. self.requirement = args
  99. elif opts.editables:
  100. self.is_requirement = True
  101. self.is_editable = True
  102. # We don't support multiple -e on one line
  103. self.requirement = opts.editables[0]
  104. else:
  105. self.is_requirement = False
  106. def parse_requirements(
  107. filename, # type: str
  108. session, # type: PipSession
  109. finder=None, # type: Optional[PackageFinder]
  110. comes_from=None, # type: Optional[str]
  111. options=None, # type: Optional[optparse.Values]
  112. constraint=False, # type: bool
  113. ):
  114. # type: (...) -> Iterator[ParsedRequirement]
  115. """Parse a requirements file and yield InstallRequirement instances.
  116. :param filename: Path or url of requirements file.
  117. :param session: PipSession instance.
  118. :param finder: Instance of pip.index.PackageFinder.
  119. :param comes_from: Origin description of requirements.
  120. :param options: cli options.
  121. :param constraint: If true, parsing a constraint file rather than
  122. requirements file.
  123. """
  124. line_parser = get_line_parser(finder)
  125. parser = RequirementsFileParser(session, line_parser, comes_from)
  126. for parsed_line in parser.parse(filename, constraint):
  127. parsed_req = handle_line(
  128. parsed_line,
  129. options=options,
  130. finder=finder,
  131. session=session
  132. )
  133. if parsed_req is not None:
  134. yield parsed_req
  135. def preprocess(content):
  136. # type: (Text) -> ReqFileLines
  137. """Split, filter, and join lines, and return a line iterator
  138. :param content: the content of the requirements file
  139. """
  140. lines_enum = enumerate(content.splitlines(), start=1) # type: ReqFileLines
  141. lines_enum = join_lines(lines_enum)
  142. lines_enum = ignore_comments(lines_enum)
  143. lines_enum = expand_env_variables(lines_enum)
  144. return lines_enum
  145. def handle_requirement_line(
  146. line, # type: ParsedLine
  147. options=None, # type: Optional[optparse.Values]
  148. ):
  149. # type: (...) -> ParsedRequirement
  150. # preserve for the nested code path
  151. line_comes_from = '{} {} (line {})'.format(
  152. '-c' if line.constraint else '-r', line.filename, line.lineno,
  153. )
  154. assert line.is_requirement
  155. if line.is_editable:
  156. # For editable requirements, we don't support per-requirement
  157. # options, so just return the parsed requirement.
  158. return ParsedRequirement(
  159. requirement=line.requirement,
  160. is_editable=line.is_editable,
  161. comes_from=line_comes_from,
  162. constraint=line.constraint,
  163. )
  164. else:
  165. if options:
  166. # Disable wheels if the user has specified build options
  167. cmdoptions.check_install_build_global(options, line.opts)
  168. # get the options that apply to requirements
  169. req_options = {}
  170. for dest in SUPPORTED_OPTIONS_REQ_DEST:
  171. if dest in line.opts.__dict__ and line.opts.__dict__[dest]:
  172. req_options[dest] = line.opts.__dict__[dest]
  173. line_source = 'line {} of {}'.format(line.lineno, line.filename)
  174. return ParsedRequirement(
  175. requirement=line.requirement,
  176. is_editable=line.is_editable,
  177. comes_from=line_comes_from,
  178. constraint=line.constraint,
  179. options=req_options,
  180. line_source=line_source,
  181. )
  182. def handle_option_line(
  183. opts, # type: Values
  184. filename, # type: str
  185. lineno, # type: int
  186. finder=None, # type: Optional[PackageFinder]
  187. options=None, # type: Optional[optparse.Values]
  188. session=None, # type: Optional[PipSession]
  189. ):
  190. # type: (...) -> None
  191. # percolate hash-checking option upward
  192. if opts.require_hashes:
  193. options.require_hashes = opts.require_hashes
  194. # set finder options
  195. elif finder:
  196. find_links = finder.find_links
  197. index_urls = finder.index_urls
  198. if opts.index_url:
  199. index_urls = [opts.index_url]
  200. if opts.no_index is True:
  201. index_urls = []
  202. if opts.extra_index_urls:
  203. index_urls.extend(opts.extra_index_urls)
  204. if opts.find_links:
  205. # FIXME: it would be nice to keep track of the source
  206. # of the find_links: support a find-links local path
  207. # relative to a requirements file.
  208. value = opts.find_links[0]
  209. req_dir = os.path.dirname(os.path.abspath(filename))
  210. relative_to_reqs_file = os.path.join(req_dir, value)
  211. if os.path.exists(relative_to_reqs_file):
  212. value = relative_to_reqs_file
  213. find_links.append(value)
  214. search_scope = SearchScope(
  215. find_links=find_links,
  216. index_urls=index_urls,
  217. )
  218. finder.search_scope = search_scope
  219. if opts.pre:
  220. finder.set_allow_all_prereleases()
  221. if session:
  222. for host in opts.trusted_hosts or []:
  223. source = 'line {} of {}'.format(lineno, filename)
  224. session.add_trusted_host(host, source=source)
  225. def handle_line(
  226. line, # type: ParsedLine
  227. options=None, # type: Optional[optparse.Values]
  228. finder=None, # type: Optional[PackageFinder]
  229. session=None, # type: Optional[PipSession]
  230. ):
  231. # type: (...) -> Optional[ParsedRequirement]
  232. """Handle a single parsed requirements line; This can result in
  233. creating/yielding requirements, or updating the finder.
  234. :param line: The parsed line to be processed.
  235. :param options: CLI options.
  236. :param finder: The finder - updated by non-requirement lines.
  237. :param session: The session - updated by non-requirement lines.
  238. Returns a ParsedRequirement object if the line is a requirement line,
  239. otherwise returns None.
  240. For lines that contain requirements, the only options that have an effect
  241. are from SUPPORTED_OPTIONS_REQ, and they are scoped to the
  242. requirement. Other options from SUPPORTED_OPTIONS may be present, but are
  243. ignored.
  244. For lines that do not contain requirements, the only options that have an
  245. effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may
  246. be present, but are ignored. These lines may contain multiple options
  247. (although our docs imply only one is supported), and all our parsed and
  248. affect the finder.
  249. """
  250. if line.is_requirement:
  251. parsed_req = handle_requirement_line(line, options)
  252. return parsed_req
  253. else:
  254. handle_option_line(
  255. line.opts,
  256. line.filename,
  257. line.lineno,
  258. finder,
  259. options,
  260. session,
  261. )
  262. return None
  263. class RequirementsFileParser(object):
  264. def __init__(
  265. self,
  266. session, # type: PipSession
  267. line_parser, # type: LineParser
  268. comes_from, # type: str
  269. ):
  270. # type: (...) -> None
  271. self._session = session
  272. self._line_parser = line_parser
  273. self._comes_from = comes_from
  274. def parse(self, filename, constraint):
  275. # type: (str, bool) -> Iterator[ParsedLine]
  276. """Parse a given file, yielding parsed lines.
  277. """
  278. for line in self._parse_and_recurse(filename, constraint):
  279. yield line
  280. def _parse_and_recurse(self, filename, constraint):
  281. # type: (str, bool) -> Iterator[ParsedLine]
  282. for line in self._parse_file(filename, constraint):
  283. if (
  284. not line.is_requirement and
  285. (line.opts.requirements or line.opts.constraints)
  286. ):
  287. # parse a nested requirements file
  288. if line.opts.requirements:
  289. req_path = line.opts.requirements[0]
  290. nested_constraint = False
  291. else:
  292. req_path = line.opts.constraints[0]
  293. nested_constraint = True
  294. # original file is over http
  295. if SCHEME_RE.search(filename):
  296. # do a url join so relative paths work
  297. req_path = urllib_parse.urljoin(filename, req_path)
  298. # original file and nested file are paths
  299. elif not SCHEME_RE.search(req_path):
  300. # do a join so relative paths work
  301. req_path = os.path.join(
  302. os.path.dirname(filename), req_path,
  303. )
  304. for inner_line in self._parse_and_recurse(
  305. req_path, nested_constraint,
  306. ):
  307. yield inner_line
  308. else:
  309. yield line
  310. def _parse_file(self, filename, constraint):
  311. # type: (str, bool) -> Iterator[ParsedLine]
  312. _, content = get_file_content(
  313. filename, self._session, comes_from=self._comes_from
  314. )
  315. lines_enum = preprocess(content)
  316. for line_number, line in lines_enum:
  317. try:
  318. args_str, opts = self._line_parser(line)
  319. except OptionParsingError as e:
  320. # add offending line
  321. msg = 'Invalid requirement: {}\n{}'.format(line, e.msg)
  322. raise RequirementsFileParseError(msg)
  323. yield ParsedLine(
  324. filename,
  325. line_number,
  326. self._comes_from,
  327. args_str,
  328. opts,
  329. constraint,
  330. )
  331. def get_line_parser(finder):
  332. # type: (Optional[PackageFinder]) -> LineParser
  333. def parse_line(line):
  334. # type: (Text) -> Tuple[str, Values]
  335. # Build new parser for each line since it accumulates appendable
  336. # options.
  337. parser = build_parser()
  338. defaults = parser.get_default_values()
  339. defaults.index_url = None
  340. if finder:
  341. defaults.format_control = finder.format_control
  342. args_str, options_str = break_args_options(line)
  343. # Prior to 2.7.3, shlex cannot deal with unicode entries
  344. if sys.version_info < (2, 7, 3):
  345. # https://github.com/python/mypy/issues/1174
  346. options_str = options_str.encode('utf8') # type: ignore
  347. # https://github.com/python/mypy/issues/1174
  348. opts, _ = parser.parse_args(
  349. shlex.split(options_str), defaults) # type: ignore
  350. return args_str, opts
  351. return parse_line
  352. def break_args_options(line):
  353. # type: (Text) -> Tuple[str, Text]
  354. """Break up the line into an args and options string. We only want to shlex
  355. (and then optparse) the options, not the args. args can contain markers
  356. which are corrupted by shlex.
  357. """
  358. tokens = line.split(' ')
  359. args = []
  360. options = tokens[:]
  361. for token in tokens:
  362. if token.startswith('-') or token.startswith('--'):
  363. break
  364. else:
  365. args.append(token)
  366. options.pop(0)
  367. return ' '.join(args), ' '.join(options) # type: ignore
  368. class OptionParsingError(Exception):
  369. def __init__(self, msg):
  370. # type: (str) -> None
  371. self.msg = msg
  372. def build_parser():
  373. # type: () -> optparse.OptionParser
  374. """
  375. Return a parser for parsing requirement lines
  376. """
  377. parser = optparse.OptionParser(add_help_option=False)
  378. option_factories = SUPPORTED_OPTIONS + SUPPORTED_OPTIONS_REQ
  379. for option_factory in option_factories:
  380. option = option_factory()
  381. parser.add_option(option)
  382. # By default optparse sys.exits on parsing errors. We want to wrap
  383. # that in our own exception.
  384. def parser_exit(self, msg):
  385. # type: (Any, str) -> NoReturn
  386. raise OptionParsingError(msg)
  387. # NOTE: mypy disallows assigning to a method
  388. # https://github.com/python/mypy/issues/2427
  389. parser.exit = parser_exit # type: ignore
  390. return parser
  391. def join_lines(lines_enum):
  392. # type: (ReqFileLines) -> ReqFileLines
  393. """Joins a line ending in '\' with the previous line (except when following
  394. comments). The joined line takes on the index of the first line.
  395. """
  396. primary_line_number = None
  397. new_line = [] # type: List[Text]
  398. for line_number, line in lines_enum:
  399. if not line.endswith('\\') or COMMENT_RE.match(line):
  400. if COMMENT_RE.match(line):
  401. # this ensures comments are always matched later
  402. line = ' ' + line
  403. if new_line:
  404. new_line.append(line)
  405. yield primary_line_number, ''.join(new_line)
  406. new_line = []
  407. else:
  408. yield line_number, line
  409. else:
  410. if not new_line:
  411. primary_line_number = line_number
  412. new_line.append(line.strip('\\'))
  413. # last line contains \
  414. if new_line:
  415. yield primary_line_number, ''.join(new_line)
  416. # TODO: handle space after '\'.
  417. def ignore_comments(lines_enum):
  418. # type: (ReqFileLines) -> ReqFileLines
  419. """
  420. Strips comments and filter empty lines.
  421. """
  422. for line_number, line in lines_enum:
  423. line = COMMENT_RE.sub('', line)
  424. line = line.strip()
  425. if line:
  426. yield line_number, line
  427. def expand_env_variables(lines_enum):
  428. # type: (ReqFileLines) -> ReqFileLines
  429. """Replace all environment variables that can be retrieved via `os.getenv`.
  430. The only allowed format for environment variables defined in the
  431. requirement file is `${MY_VARIABLE_1}` to ensure two things:
  432. 1. Strings that contain a `$` aren't accidentally (partially) expanded.
  433. 2. Ensure consistency across platforms for requirement files.
  434. These points are the result of a discussion on the `github pull
  435. request #3514 <https://github.com/pypa/pip/pull/3514>`_.
  436. Valid characters in variable names follow the `POSIX standard
  437. <http://pubs.opengroup.org/onlinepubs/9699919799/>`_ and are limited
  438. to uppercase letter, digits and the `_` (underscore).
  439. """
  440. for line_number, line in lines_enum:
  441. for env_var, var_name in ENV_VAR_RE.findall(line):
  442. value = os.getenv(var_name)
  443. if not value:
  444. continue
  445. line = line.replace(env_var, value)
  446. yield line_number, line
  447. def get_file_content(url, session, comes_from=None):
  448. # type: (str, PipSession, Optional[str]) -> Tuple[str, Text]
  449. """Gets the content of a file; it may be a filename, file: URL, or
  450. http: URL. Returns (location, content). Content is unicode.
  451. Respects # -*- coding: declarations on the retrieved files.
  452. :param url: File path or url.
  453. :param session: PipSession instance.
  454. :param comes_from: Origin description of requirements.
  455. """
  456. scheme = get_url_scheme(url)
  457. if scheme in ['http', 'https']:
  458. # FIXME: catch some errors
  459. resp = session.get(url)
  460. resp.raise_for_status()
  461. return resp.url, resp.text
  462. elif scheme == 'file':
  463. if comes_from and comes_from.startswith('http'):
  464. raise InstallationError(
  465. 'Requirements file {} references URL {}, '
  466. 'which is local'.format(comes_from, url)
  467. )
  468. path = url.split(':', 1)[1]
  469. path = path.replace('\\', '/')
  470. match = _url_slash_drive_re.match(path)
  471. if match:
  472. path = match.group(1) + ':' + path.split('|', 1)[1]
  473. path = urllib_parse.unquote(path)
  474. if path.startswith('/'):
  475. path = '/' + path.lstrip('/')
  476. url = path
  477. try:
  478. with open(url, 'rb') as f:
  479. content = auto_decode(f.read())
  480. except IOError as exc:
  481. raise InstallationError(
  482. 'Could not open requirements file: {}'.format(exc)
  483. )
  484. return url, content
  485. _url_slash_drive_re = re.compile(r'/*([a-z])\|', re.I)