req_file.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. """
  2. Requirements file parsing
  3. """
  4. from __future__ import absolute_import
  5. import optparse
  6. import os
  7. import re
  8. import shlex
  9. import sys
  10. from pip._vendor.six.moves.urllib import parse as urllib_parse
  11. from pip._internal.cli import cmdoptions
  12. from pip._internal.exceptions import (
  13. InstallationError,
  14. RequirementsFileParseError,
  15. )
  16. from pip._internal.models.search_scope import SearchScope
  17. from pip._internal.network.utils import raise_for_status
  18. from pip._internal.utils.encoding import auto_decode
  19. from pip._internal.utils.typing import MYPY_CHECK_RUNNING
  20. from pip._internal.utils.urls import get_url_scheme
  21. if MYPY_CHECK_RUNNING:
  22. from optparse import Values
  23. from typing import (
  24. Any, Callable, Dict, Iterator, List, NoReturn, Optional, Text, Tuple,
  25. )
  26. from pip._internal.index.package_finder import PackageFinder
  27. from pip._internal.network.session import PipSession
  28. ReqFileLines = Iterator[Tuple[int, Text]]
  29. LineParser = Callable[[Text], Tuple[str, Values]]
  30. __all__ = ['parse_requirements']
  31. SCHEME_RE = re.compile(r'^(http|https|file):', re.I)
  32. COMMENT_RE = re.compile(r'(^|\s+)#.*$')
  33. # Matches environment variable-style values in '${MY_VARIABLE_1}' with the
  34. # variable name consisting of only uppercase letters, digits or the '_'
  35. # (underscore). This follows the POSIX standard defined in IEEE Std 1003.1,
  36. # 2013 Edition.
  37. ENV_VAR_RE = re.compile(r'(?P<var>\$\{(?P<name>[A-Z0-9_]+)\})')
  38. SUPPORTED_OPTIONS = [
  39. cmdoptions.index_url,
  40. cmdoptions.extra_index_url,
  41. cmdoptions.no_index,
  42. cmdoptions.constraints,
  43. cmdoptions.requirements,
  44. cmdoptions.editable,
  45. cmdoptions.find_links,
  46. cmdoptions.no_binary,
  47. cmdoptions.only_binary,
  48. cmdoptions.prefer_binary,
  49. cmdoptions.require_hashes,
  50. cmdoptions.pre,
  51. cmdoptions.trusted_host,
  52. cmdoptions.use_new_feature,
  53. ] # type: List[Callable[..., optparse.Option]]
  54. # options to be passed to requirements
  55. SUPPORTED_OPTIONS_REQ = [
  56. cmdoptions.install_options,
  57. cmdoptions.global_options,
  58. cmdoptions.hash,
  59. ] # type: List[Callable[..., optparse.Option]]
  60. # the 'dest' string values
  61. SUPPORTED_OPTIONS_REQ_DEST = [str(o().dest) for o in SUPPORTED_OPTIONS_REQ]
  62. class ParsedRequirement(object):
  63. def __init__(
  64. self,
  65. requirement, # type:str
  66. is_editable, # type: bool
  67. comes_from, # type: str
  68. constraint, # type: bool
  69. options=None, # type: Optional[Dict[str, Any]]
  70. line_source=None, # type: Optional[str]
  71. ):
  72. # type: (...) -> None
  73. self.requirement = requirement
  74. self.is_editable = is_editable
  75. self.comes_from = comes_from
  76. self.options = options
  77. self.constraint = constraint
  78. self.line_source = line_source
  79. class ParsedLine(object):
  80. def __init__(
  81. self,
  82. filename, # type: str
  83. lineno, # type: int
  84. comes_from, # type: Optional[str]
  85. args, # type: str
  86. opts, # type: Values
  87. constraint, # type: bool
  88. ):
  89. # type: (...) -> None
  90. self.filename = filename
  91. self.lineno = lineno
  92. self.comes_from = comes_from
  93. self.opts = opts
  94. self.constraint = constraint
  95. if args:
  96. self.is_requirement = True
  97. self.is_editable = False
  98. self.requirement = args
  99. elif opts.editables:
  100. self.is_requirement = True
  101. self.is_editable = True
  102. # We don't support multiple -e on one line
  103. self.requirement = opts.editables[0]
  104. else:
  105. self.is_requirement = False
  106. def parse_requirements(
  107. filename, # type: str
  108. session, # type: PipSession
  109. finder=None, # type: Optional[PackageFinder]
  110. comes_from=None, # type: Optional[str]
  111. options=None, # type: Optional[optparse.Values]
  112. constraint=False, # type: bool
  113. ):
  114. # type: (...) -> Iterator[ParsedRequirement]
  115. """Parse a requirements file and yield ParsedRequirement instances.
  116. :param filename: Path or url of requirements file.
  117. :param session: PipSession instance.
  118. :param finder: Instance of pip.index.PackageFinder.
  119. :param comes_from: Origin description of requirements.
  120. :param options: cli options.
  121. :param constraint: If true, parsing a constraint file rather than
  122. requirements file.
  123. """
  124. line_parser = get_line_parser(finder)
  125. parser = RequirementsFileParser(session, line_parser, comes_from)
  126. for parsed_line in parser.parse(filename, constraint):
  127. parsed_req = handle_line(
  128. parsed_line,
  129. options=options,
  130. finder=finder,
  131. session=session
  132. )
  133. if parsed_req is not None:
  134. yield parsed_req
  135. def preprocess(content):
  136. # type: (Text) -> ReqFileLines
  137. """Split, filter, and join lines, and return a line iterator
  138. :param content: the content of the requirements file
  139. """
  140. lines_enum = enumerate(content.splitlines(), start=1) # type: ReqFileLines
  141. lines_enum = join_lines(lines_enum)
  142. lines_enum = ignore_comments(lines_enum)
  143. lines_enum = expand_env_variables(lines_enum)
  144. return lines_enum
  145. def handle_requirement_line(
  146. line, # type: ParsedLine
  147. options=None, # type: Optional[optparse.Values]
  148. ):
  149. # type: (...) -> ParsedRequirement
  150. # preserve for the nested code path
  151. line_comes_from = '{} {} (line {})'.format(
  152. '-c' if line.constraint else '-r', line.filename, line.lineno,
  153. )
  154. assert line.is_requirement
  155. if line.is_editable:
  156. # For editable requirements, we don't support per-requirement
  157. # options, so just return the parsed requirement.
  158. return ParsedRequirement(
  159. requirement=line.requirement,
  160. is_editable=line.is_editable,
  161. comes_from=line_comes_from,
  162. constraint=line.constraint,
  163. )
  164. else:
  165. if options:
  166. # Disable wheels if the user has specified build options
  167. cmdoptions.check_install_build_global(options, line.opts)
  168. # get the options that apply to requirements
  169. req_options = {}
  170. for dest in SUPPORTED_OPTIONS_REQ_DEST:
  171. if dest in line.opts.__dict__ and line.opts.__dict__[dest]:
  172. req_options[dest] = line.opts.__dict__[dest]
  173. line_source = 'line {} of {}'.format(line.lineno, line.filename)
  174. return ParsedRequirement(
  175. requirement=line.requirement,
  176. is_editable=line.is_editable,
  177. comes_from=line_comes_from,
  178. constraint=line.constraint,
  179. options=req_options,
  180. line_source=line_source,
  181. )
  182. def handle_option_line(
  183. opts, # type: Values
  184. filename, # type: str
  185. lineno, # type: int
  186. finder=None, # type: Optional[PackageFinder]
  187. options=None, # type: Optional[optparse.Values]
  188. session=None, # type: Optional[PipSession]
  189. ):
  190. # type: (...) -> None
  191. if options:
  192. # percolate options upward
  193. if opts.require_hashes:
  194. options.require_hashes = opts.require_hashes
  195. if opts.features_enabled:
  196. options.features_enabled.extend(
  197. f for f in opts.features_enabled
  198. if f not in options.features_enabled
  199. )
  200. # set finder options
  201. if finder:
  202. find_links = finder.find_links
  203. index_urls = finder.index_urls
  204. if opts.index_url:
  205. index_urls = [opts.index_url]
  206. if opts.no_index is True:
  207. index_urls = []
  208. if opts.extra_index_urls:
  209. index_urls.extend(opts.extra_index_urls)
  210. if opts.find_links:
  211. # FIXME: it would be nice to keep track of the source
  212. # of the find_links: support a find-links local path
  213. # relative to a requirements file.
  214. value = opts.find_links[0]
  215. req_dir = os.path.dirname(os.path.abspath(filename))
  216. relative_to_reqs_file = os.path.join(req_dir, value)
  217. if os.path.exists(relative_to_reqs_file):
  218. value = relative_to_reqs_file
  219. find_links.append(value)
  220. search_scope = SearchScope(
  221. find_links=find_links,
  222. index_urls=index_urls,
  223. )
  224. finder.search_scope = search_scope
  225. if opts.pre:
  226. finder.set_allow_all_prereleases()
  227. if opts.prefer_binary:
  228. finder.set_prefer_binary()
  229. if session:
  230. for host in opts.trusted_hosts or []:
  231. source = 'line {} of {}'.format(lineno, filename)
  232. session.add_trusted_host(host, source=source)
  233. def handle_line(
  234. line, # type: ParsedLine
  235. options=None, # type: Optional[optparse.Values]
  236. finder=None, # type: Optional[PackageFinder]
  237. session=None, # type: Optional[PipSession]
  238. ):
  239. # type: (...) -> Optional[ParsedRequirement]
  240. """Handle a single parsed requirements line; This can result in
  241. creating/yielding requirements, or updating the finder.
  242. :param line: The parsed line to be processed.
  243. :param options: CLI options.
  244. :param finder: The finder - updated by non-requirement lines.
  245. :param session: The session - updated by non-requirement lines.
  246. Returns a ParsedRequirement object if the line is a requirement line,
  247. otherwise returns None.
  248. For lines that contain requirements, the only options that have an effect
  249. are from SUPPORTED_OPTIONS_REQ, and they are scoped to the
  250. requirement. Other options from SUPPORTED_OPTIONS may be present, but are
  251. ignored.
  252. For lines that do not contain requirements, the only options that have an
  253. effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may
  254. be present, but are ignored. These lines may contain multiple options
  255. (although our docs imply only one is supported), and all our parsed and
  256. affect the finder.
  257. """
  258. if line.is_requirement:
  259. parsed_req = handle_requirement_line(line, options)
  260. return parsed_req
  261. else:
  262. handle_option_line(
  263. line.opts,
  264. line.filename,
  265. line.lineno,
  266. finder,
  267. options,
  268. session,
  269. )
  270. return None
  271. class RequirementsFileParser(object):
  272. def __init__(
  273. self,
  274. session, # type: PipSession
  275. line_parser, # type: LineParser
  276. comes_from, # type: Optional[str]
  277. ):
  278. # type: (...) -> None
  279. self._session = session
  280. self._line_parser = line_parser
  281. self._comes_from = comes_from
  282. def parse(self, filename, constraint):
  283. # type: (str, bool) -> Iterator[ParsedLine]
  284. """Parse a given file, yielding parsed lines.
  285. """
  286. for line in self._parse_and_recurse(filename, constraint):
  287. yield line
  288. def _parse_and_recurse(self, filename, constraint):
  289. # type: (str, bool) -> Iterator[ParsedLine]
  290. for line in self._parse_file(filename, constraint):
  291. if (
  292. not line.is_requirement and
  293. (line.opts.requirements or line.opts.constraints)
  294. ):
  295. # parse a nested requirements file
  296. if line.opts.requirements:
  297. req_path = line.opts.requirements[0]
  298. nested_constraint = False
  299. else:
  300. req_path = line.opts.constraints[0]
  301. nested_constraint = True
  302. # original file is over http
  303. if SCHEME_RE.search(filename):
  304. # do a url join so relative paths work
  305. req_path = urllib_parse.urljoin(filename, req_path)
  306. # original file and nested file are paths
  307. elif not SCHEME_RE.search(req_path):
  308. # do a join so relative paths work
  309. req_path = os.path.join(
  310. os.path.dirname(filename), req_path,
  311. )
  312. for inner_line in self._parse_and_recurse(
  313. req_path, nested_constraint,
  314. ):
  315. yield inner_line
  316. else:
  317. yield line
  318. def _parse_file(self, filename, constraint):
  319. # type: (str, bool) -> Iterator[ParsedLine]
  320. _, content = get_file_content(
  321. filename, self._session, comes_from=self._comes_from
  322. )
  323. lines_enum = preprocess(content)
  324. for line_number, line in lines_enum:
  325. try:
  326. args_str, opts = self._line_parser(line)
  327. except OptionParsingError as e:
  328. # add offending line
  329. msg = 'Invalid requirement: {}\n{}'.format(line, e.msg)
  330. raise RequirementsFileParseError(msg)
  331. yield ParsedLine(
  332. filename,
  333. line_number,
  334. self._comes_from,
  335. args_str,
  336. opts,
  337. constraint,
  338. )
  339. def get_line_parser(finder):
  340. # type: (Optional[PackageFinder]) -> LineParser
  341. def parse_line(line):
  342. # type: (Text) -> Tuple[str, Values]
  343. # Build new parser for each line since it accumulates appendable
  344. # options.
  345. parser = build_parser()
  346. defaults = parser.get_default_values()
  347. defaults.index_url = None
  348. if finder:
  349. defaults.format_control = finder.format_control
  350. args_str, options_str = break_args_options(line)
  351. # Prior to 2.7.3, shlex cannot deal with unicode entries
  352. if sys.version_info < (2, 7, 3):
  353. # https://github.com/python/mypy/issues/1174
  354. options_str = options_str.encode('utf8') # type: ignore
  355. # https://github.com/python/mypy/issues/1174
  356. opts, _ = parser.parse_args(
  357. shlex.split(options_str), defaults) # type: ignore
  358. return args_str, opts
  359. return parse_line
  360. def break_args_options(line):
  361. # type: (Text) -> Tuple[str, Text]
  362. """Break up the line into an args and options string. We only want to shlex
  363. (and then optparse) the options, not the args. args can contain markers
  364. which are corrupted by shlex.
  365. """
  366. tokens = line.split(' ')
  367. args = []
  368. options = tokens[:]
  369. for token in tokens:
  370. if token.startswith('-') or token.startswith('--'):
  371. break
  372. else:
  373. args.append(token)
  374. options.pop(0)
  375. return ' '.join(args), ' '.join(options) # type: ignore
  376. class OptionParsingError(Exception):
  377. def __init__(self, msg):
  378. # type: (str) -> None
  379. self.msg = msg
  380. def build_parser():
  381. # type: () -> optparse.OptionParser
  382. """
  383. Return a parser for parsing requirement lines
  384. """
  385. parser = optparse.OptionParser(add_help_option=False)
  386. option_factories = SUPPORTED_OPTIONS + SUPPORTED_OPTIONS_REQ
  387. for option_factory in option_factories:
  388. option = option_factory()
  389. parser.add_option(option)
  390. # By default optparse sys.exits on parsing errors. We want to wrap
  391. # that in our own exception.
  392. def parser_exit(self, msg):
  393. # type: (Any, str) -> NoReturn
  394. raise OptionParsingError(msg)
  395. # NOTE: mypy disallows assigning to a method
  396. # https://github.com/python/mypy/issues/2427
  397. parser.exit = parser_exit # type: ignore
  398. return parser
  399. def join_lines(lines_enum):
  400. # type: (ReqFileLines) -> ReqFileLines
  401. """Joins a line ending in '\' with the previous line (except when following
  402. comments). The joined line takes on the index of the first line.
  403. """
  404. primary_line_number = None
  405. new_line = [] # type: List[Text]
  406. for line_number, line in lines_enum:
  407. if not line.endswith('\\') or COMMENT_RE.match(line):
  408. if COMMENT_RE.match(line):
  409. # this ensures comments are always matched later
  410. line = ' ' + line
  411. if new_line:
  412. new_line.append(line)
  413. assert primary_line_number is not None
  414. yield primary_line_number, ''.join(new_line)
  415. new_line = []
  416. else:
  417. yield line_number, line
  418. else:
  419. if not new_line:
  420. primary_line_number = line_number
  421. new_line.append(line.strip('\\'))
  422. # last line contains \
  423. if new_line:
  424. assert primary_line_number is not None
  425. yield primary_line_number, ''.join(new_line)
  426. # TODO: handle space after '\'.
  427. def ignore_comments(lines_enum):
  428. # type: (ReqFileLines) -> ReqFileLines
  429. """
  430. Strips comments and filter empty lines.
  431. """
  432. for line_number, line in lines_enum:
  433. line = COMMENT_RE.sub('', line)
  434. line = line.strip()
  435. if line:
  436. yield line_number, line
  437. def expand_env_variables(lines_enum):
  438. # type: (ReqFileLines) -> ReqFileLines
  439. """Replace all environment variables that can be retrieved via `os.getenv`.
  440. The only allowed format for environment variables defined in the
  441. requirement file is `${MY_VARIABLE_1}` to ensure two things:
  442. 1. Strings that contain a `$` aren't accidentally (partially) expanded.
  443. 2. Ensure consistency across platforms for requirement files.
  444. These points are the result of a discussion on the `github pull
  445. request #3514 <https://github.com/pypa/pip/pull/3514>`_.
  446. Valid characters in variable names follow the `POSIX standard
  447. <http://pubs.opengroup.org/onlinepubs/9699919799/>`_ and are limited
  448. to uppercase letter, digits and the `_` (underscore).
  449. """
  450. for line_number, line in lines_enum:
  451. for env_var, var_name in ENV_VAR_RE.findall(line):
  452. value = os.getenv(var_name)
  453. if not value:
  454. continue
  455. line = line.replace(env_var, value)
  456. yield line_number, line
  457. def get_file_content(url, session, comes_from=None):
  458. # type: (str, PipSession, Optional[str]) -> Tuple[str, Text]
  459. """Gets the content of a file; it may be a filename, file: URL, or
  460. http: URL. Returns (location, content). Content is unicode.
  461. Respects # -*- coding: declarations on the retrieved files.
  462. :param url: File path or url.
  463. :param session: PipSession instance.
  464. :param comes_from: Origin description of requirements.
  465. """
  466. scheme = get_url_scheme(url)
  467. if scheme in ['http', 'https']:
  468. # FIXME: catch some errors
  469. resp = session.get(url)
  470. raise_for_status(resp)
  471. return resp.url, resp.text
  472. elif scheme == 'file':
  473. if comes_from and comes_from.startswith('http'):
  474. raise InstallationError(
  475. 'Requirements file {} references URL {}, '
  476. 'which is local'.format(comes_from, url)
  477. )
  478. path = url.split(':', 1)[1]
  479. path = path.replace('\\', '/')
  480. match = _url_slash_drive_re.match(path)
  481. if match:
  482. path = match.group(1) + ':' + path.split('|', 1)[1]
  483. path = urllib_parse.unquote(path)
  484. if path.startswith('/'):
  485. path = '/' + path.lstrip('/')
  486. url = path
  487. try:
  488. with open(url, 'rb') as f:
  489. content = auto_decode(f.read())
  490. except IOError as exc:
  491. raise InstallationError(
  492. 'Could not open requirements file: {}'.format(exc)
  493. )
  494. return url, content
  495. _url_slash_drive_re = re.compile(r'/*([a-z])\|', re.I)