decoder.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945
  1. import datetime
  2. import io
  3. from os import linesep
  4. import re
  5. import sys
  6. from pip._vendor.toml.tz import TomlTz
  7. if sys.version_info < (3,):
  8. _range = xrange # noqa: F821
  9. else:
  10. unicode = str
  11. _range = range
  12. basestring = str
  13. unichr = chr
  14. def _detect_pathlib_path(p):
  15. if (3, 4) <= sys.version_info:
  16. import pathlib
  17. if isinstance(p, pathlib.PurePath):
  18. return True
  19. return False
  20. def _ispath(p):
  21. if isinstance(p, basestring):
  22. return True
  23. return _detect_pathlib_path(p)
  24. def _getpath(p):
  25. if (3, 6) <= sys.version_info:
  26. import os
  27. return os.fspath(p)
  28. if _detect_pathlib_path(p):
  29. return str(p)
  30. return p
  31. try:
  32. FNFError = FileNotFoundError
  33. except NameError:
  34. FNFError = IOError
  35. TIME_RE = re.compile("([0-9]{2}):([0-9]{2}):([0-9]{2})(\.([0-9]{3,6}))?")
  36. class TomlDecodeError(ValueError):
  37. """Base toml Exception / Error."""
  38. def __init__(self, msg, doc, pos):
  39. lineno = doc.count('\n', 0, pos) + 1
  40. colno = pos - doc.rfind('\n', 0, pos)
  41. emsg = '{} (line {} column {} char {})'.format(msg, lineno, colno, pos)
  42. ValueError.__init__(self, emsg)
  43. self.msg = msg
  44. self.doc = doc
  45. self.pos = pos
  46. self.lineno = lineno
  47. self.colno = colno
  48. # Matches a TOML number, which allows underscores for readability
  49. _number_with_underscores = re.compile('([0-9])(_([0-9]))*')
  50. def _strictly_valid_num(n):
  51. n = n.strip()
  52. if not n:
  53. return False
  54. if n[0] == '_':
  55. return False
  56. if n[-1] == '_':
  57. return False
  58. if "_." in n or "._" in n:
  59. return False
  60. if len(n) == 1:
  61. return True
  62. if n[0] == '0' and n[1] not in ['.', 'o', 'b', 'x']:
  63. return False
  64. if n[0] == '+' or n[0] == '-':
  65. n = n[1:]
  66. if len(n) > 1 and n[0] == '0' and n[1] != '.':
  67. return False
  68. if '__' in n:
  69. return False
  70. return True
  71. def load(f, _dict=dict, decoder=None):
  72. """Parses named file or files as toml and returns a dictionary
  73. Args:
  74. f: Path to the file to open, array of files to read into single dict
  75. or a file descriptor
  76. _dict: (optional) Specifies the class of the returned toml dictionary
  77. Returns:
  78. Parsed toml file represented as a dictionary
  79. Raises:
  80. TypeError -- When f is invalid type
  81. TomlDecodeError: Error while decoding toml
  82. IOError / FileNotFoundError -- When an array with no valid (existing)
  83. (Python 2 / Python 3) file paths is passed
  84. """
  85. if _ispath(f):
  86. with io.open(_getpath(f), encoding='utf-8') as ffile:
  87. return loads(ffile.read(), _dict, decoder)
  88. elif isinstance(f, list):
  89. from os import path as op
  90. from warnings import warn
  91. if not [path for path in f if op.exists(path)]:
  92. error_msg = "Load expects a list to contain filenames only."
  93. error_msg += linesep
  94. error_msg += ("The list needs to contain the path of at least one "
  95. "existing file.")
  96. raise FNFError(error_msg)
  97. if decoder is None:
  98. decoder = TomlDecoder()
  99. d = decoder.get_empty_table()
  100. for l in f:
  101. if op.exists(l):
  102. d.update(load(l, _dict, decoder))
  103. else:
  104. warn("Non-existent filename in list with at least one valid "
  105. "filename")
  106. return d
  107. else:
  108. try:
  109. return loads(f.read(), _dict, decoder)
  110. except AttributeError:
  111. raise TypeError("You can only load a file descriptor, filename or "
  112. "list")
  113. _groupname_re = re.compile(r'^[A-Za-z0-9_-]+$')
  114. def loads(s, _dict=dict, decoder=None):
  115. """Parses string as toml
  116. Args:
  117. s: String to be parsed
  118. _dict: (optional) Specifies the class of the returned toml dictionary
  119. Returns:
  120. Parsed toml file represented as a dictionary
  121. Raises:
  122. TypeError: When a non-string is passed
  123. TomlDecodeError: Error while decoding toml
  124. """
  125. implicitgroups = []
  126. if decoder is None:
  127. decoder = TomlDecoder(_dict)
  128. retval = decoder.get_empty_table()
  129. currentlevel = retval
  130. if not isinstance(s, basestring):
  131. raise TypeError("Expecting something like a string")
  132. if not isinstance(s, unicode):
  133. s = s.decode('utf8')
  134. original = s
  135. sl = list(s)
  136. openarr = 0
  137. openstring = False
  138. openstrchar = ""
  139. multilinestr = False
  140. arrayoftables = False
  141. beginline = True
  142. keygroup = False
  143. dottedkey = False
  144. keyname = 0
  145. for i, item in enumerate(sl):
  146. if item == '\r' and sl[i + 1] == '\n':
  147. sl[i] = ' '
  148. continue
  149. if keyname:
  150. if item == '\n':
  151. raise TomlDecodeError("Key name found without value."
  152. " Reached end of line.", original, i)
  153. if openstring:
  154. if item == openstrchar:
  155. keyname = 2
  156. openstring = False
  157. openstrchar = ""
  158. continue
  159. elif keyname == 1:
  160. if item.isspace():
  161. keyname = 2
  162. continue
  163. elif item == '.':
  164. dottedkey = True
  165. continue
  166. elif item.isalnum() or item == '_' or item == '-':
  167. continue
  168. elif (dottedkey and sl[i - 1] == '.' and
  169. (item == '"' or item == "'")):
  170. openstring = True
  171. openstrchar = item
  172. continue
  173. elif keyname == 2:
  174. if item.isspace():
  175. if dottedkey:
  176. nextitem = sl[i + 1]
  177. if not nextitem.isspace() and nextitem != '.':
  178. keyname = 1
  179. continue
  180. if item == '.':
  181. dottedkey = True
  182. nextitem = sl[i + 1]
  183. if not nextitem.isspace() and nextitem != '.':
  184. keyname = 1
  185. continue
  186. if item == '=':
  187. keyname = 0
  188. dottedkey = False
  189. else:
  190. raise TomlDecodeError("Found invalid character in key name: '" +
  191. item + "'. Try quoting the key name.",
  192. original, i)
  193. if item == "'" and openstrchar != '"':
  194. k = 1
  195. try:
  196. while sl[i - k] == "'":
  197. k += 1
  198. if k == 3:
  199. break
  200. except IndexError:
  201. pass
  202. if k == 3:
  203. multilinestr = not multilinestr
  204. openstring = multilinestr
  205. else:
  206. openstring = not openstring
  207. if openstring:
  208. openstrchar = "'"
  209. else:
  210. openstrchar = ""
  211. if item == '"' and openstrchar != "'":
  212. oddbackslash = False
  213. k = 1
  214. tripquote = False
  215. try:
  216. while sl[i - k] == '"':
  217. k += 1
  218. if k == 3:
  219. tripquote = True
  220. break
  221. if k == 1 or (k == 3 and tripquote):
  222. while sl[i - k] == '\\':
  223. oddbackslash = not oddbackslash
  224. k += 1
  225. except IndexError:
  226. pass
  227. if not oddbackslash:
  228. if tripquote:
  229. multilinestr = not multilinestr
  230. openstring = multilinestr
  231. else:
  232. openstring = not openstring
  233. if openstring:
  234. openstrchar = '"'
  235. else:
  236. openstrchar = ""
  237. if item == '#' and (not openstring and not keygroup and
  238. not arrayoftables):
  239. j = i
  240. try:
  241. while sl[j] != '\n':
  242. sl[j] = ' '
  243. j += 1
  244. except IndexError:
  245. break
  246. if item == '[' and (not openstring and not keygroup and
  247. not arrayoftables):
  248. if beginline:
  249. if len(sl) > i + 1 and sl[i + 1] == '[':
  250. arrayoftables = True
  251. else:
  252. keygroup = True
  253. else:
  254. openarr += 1
  255. if item == ']' and not openstring:
  256. if keygroup:
  257. keygroup = False
  258. elif arrayoftables:
  259. if sl[i - 1] == ']':
  260. arrayoftables = False
  261. else:
  262. openarr -= 1
  263. if item == '\n':
  264. if openstring or multilinestr:
  265. if not multilinestr:
  266. raise TomlDecodeError("Unbalanced quotes", original, i)
  267. if ((sl[i - 1] == "'" or sl[i - 1] == '"') and (
  268. sl[i - 2] == sl[i - 1])):
  269. sl[i] = sl[i - 1]
  270. if sl[i - 3] == sl[i - 1]:
  271. sl[i - 3] = ' '
  272. elif openarr:
  273. sl[i] = ' '
  274. else:
  275. beginline = True
  276. elif beginline and sl[i] != ' ' and sl[i] != '\t':
  277. beginline = False
  278. if not keygroup and not arrayoftables:
  279. if sl[i] == '=':
  280. raise TomlDecodeError("Found empty keyname. ", original, i)
  281. keyname = 1
  282. s = ''.join(sl)
  283. s = s.split('\n')
  284. multikey = None
  285. multilinestr = ""
  286. multibackslash = False
  287. pos = 0
  288. for idx, line in enumerate(s):
  289. if idx > 0:
  290. pos += len(s[idx - 1]) + 1
  291. if not multilinestr or multibackslash or '\n' not in multilinestr:
  292. line = line.strip()
  293. if line == "" and (not multikey or multibackslash):
  294. continue
  295. if multikey:
  296. if multibackslash:
  297. multilinestr += line
  298. else:
  299. multilinestr += line
  300. multibackslash = False
  301. if len(line) > 2 and (line[-1] == multilinestr[0] and
  302. line[-2] == multilinestr[0] and
  303. line[-3] == multilinestr[0]):
  304. try:
  305. value, vtype = decoder.load_value(multilinestr)
  306. except ValueError as err:
  307. raise TomlDecodeError(str(err), original, pos)
  308. currentlevel[multikey] = value
  309. multikey = None
  310. multilinestr = ""
  311. else:
  312. k = len(multilinestr) - 1
  313. while k > -1 and multilinestr[k] == '\\':
  314. multibackslash = not multibackslash
  315. k -= 1
  316. if multibackslash:
  317. multilinestr = multilinestr[:-1]
  318. else:
  319. multilinestr += "\n"
  320. continue
  321. if line[0] == '[':
  322. arrayoftables = False
  323. if len(line) == 1:
  324. raise TomlDecodeError("Opening key group bracket on line by "
  325. "itself.", original, pos)
  326. if line[1] == '[':
  327. arrayoftables = True
  328. line = line[2:]
  329. splitstr = ']]'
  330. else:
  331. line = line[1:]
  332. splitstr = ']'
  333. i = 1
  334. quotesplits = decoder._get_split_on_quotes(line)
  335. quoted = False
  336. for quotesplit in quotesplits:
  337. if not quoted and splitstr in quotesplit:
  338. break
  339. i += quotesplit.count(splitstr)
  340. quoted = not quoted
  341. line = line.split(splitstr, i)
  342. if len(line) < i + 1 or line[-1].strip() != "":
  343. raise TomlDecodeError("Key group not on a line by itself.",
  344. original, pos)
  345. groups = splitstr.join(line[:-1]).split('.')
  346. i = 0
  347. while i < len(groups):
  348. groups[i] = groups[i].strip()
  349. if len(groups[i]) > 0 and (groups[i][0] == '"' or
  350. groups[i][0] == "'"):
  351. groupstr = groups[i]
  352. j = i + 1
  353. while not groupstr[0] == groupstr[-1]:
  354. j += 1
  355. if j > len(groups) + 2:
  356. raise TomlDecodeError("Invalid group name '" +
  357. groupstr + "' Something " +
  358. "went wrong.", original, pos)
  359. groupstr = '.'.join(groups[i:j]).strip()
  360. groups[i] = groupstr[1:-1]
  361. groups[i + 1:j] = []
  362. else:
  363. if not _groupname_re.match(groups[i]):
  364. raise TomlDecodeError("Invalid group name '" +
  365. groups[i] + "'. Try quoting it.",
  366. original, pos)
  367. i += 1
  368. currentlevel = retval
  369. for i in _range(len(groups)):
  370. group = groups[i]
  371. if group == "":
  372. raise TomlDecodeError("Can't have a keygroup with an empty "
  373. "name", original, pos)
  374. try:
  375. currentlevel[group]
  376. if i == len(groups) - 1:
  377. if group in implicitgroups:
  378. implicitgroups.remove(group)
  379. if arrayoftables:
  380. raise TomlDecodeError("An implicitly defined "
  381. "table can't be an array",
  382. original, pos)
  383. elif arrayoftables:
  384. currentlevel[group].append(decoder.get_empty_table()
  385. )
  386. else:
  387. raise TomlDecodeError("What? " + group +
  388. " already exists?" +
  389. str(currentlevel),
  390. original, pos)
  391. except TypeError:
  392. currentlevel = currentlevel[-1]
  393. if group not in currentlevel:
  394. currentlevel[group] = decoder.get_empty_table()
  395. if i == len(groups) - 1 and arrayoftables:
  396. currentlevel[group] = [decoder.get_empty_table()]
  397. except KeyError:
  398. if i != len(groups) - 1:
  399. implicitgroups.append(group)
  400. currentlevel[group] = decoder.get_empty_table()
  401. if i == len(groups) - 1 and arrayoftables:
  402. currentlevel[group] = [decoder.get_empty_table()]
  403. currentlevel = currentlevel[group]
  404. if arrayoftables:
  405. try:
  406. currentlevel = currentlevel[-1]
  407. except KeyError:
  408. pass
  409. elif line[0] == "{":
  410. if line[-1] != "}":
  411. raise TomlDecodeError("Line breaks are not allowed in inline"
  412. "objects", original, pos)
  413. try:
  414. decoder.load_inline_object(line, currentlevel, multikey,
  415. multibackslash)
  416. except ValueError as err:
  417. raise TomlDecodeError(str(err), original, pos)
  418. elif "=" in line:
  419. try:
  420. ret = decoder.load_line(line, currentlevel, multikey,
  421. multibackslash)
  422. except ValueError as err:
  423. raise TomlDecodeError(str(err), original, pos)
  424. if ret is not None:
  425. multikey, multilinestr, multibackslash = ret
  426. return retval
  427. def _load_date(val):
  428. microsecond = 0
  429. tz = None
  430. try:
  431. if len(val) > 19:
  432. if val[19] == '.':
  433. if val[-1].upper() == 'Z':
  434. subsecondval = val[20:-1]
  435. tzval = "Z"
  436. else:
  437. subsecondvalandtz = val[20:]
  438. if '+' in subsecondvalandtz:
  439. splitpoint = subsecondvalandtz.index('+')
  440. subsecondval = subsecondvalandtz[:splitpoint]
  441. tzval = subsecondvalandtz[splitpoint:]
  442. elif '-' in subsecondvalandtz:
  443. splitpoint = subsecondvalandtz.index('-')
  444. subsecondval = subsecondvalandtz[:splitpoint]
  445. tzval = subsecondvalandtz[splitpoint:]
  446. else:
  447. tzval = None
  448. subsecondval = subsecondvalandtz
  449. if tzval is not None:
  450. tz = TomlTz(tzval)
  451. microsecond = int(int(subsecondval) *
  452. (10 ** (6 - len(subsecondval))))
  453. else:
  454. tz = TomlTz(val[19:])
  455. except ValueError:
  456. tz = None
  457. if "-" not in val[1:]:
  458. return None
  459. try:
  460. if len(val) == 10:
  461. d = datetime.date(
  462. int(val[:4]), int(val[5:7]),
  463. int(val[8:10]))
  464. else:
  465. d = datetime.datetime(
  466. int(val[:4]), int(val[5:7]),
  467. int(val[8:10]), int(val[11:13]),
  468. int(val[14:16]), int(val[17:19]), microsecond, tz)
  469. except ValueError:
  470. return None
  471. return d
  472. def _load_unicode_escapes(v, hexbytes, prefix):
  473. skip = False
  474. i = len(v) - 1
  475. while i > -1 and v[i] == '\\':
  476. skip = not skip
  477. i -= 1
  478. for hx in hexbytes:
  479. if skip:
  480. skip = False
  481. i = len(hx) - 1
  482. while i > -1 and hx[i] == '\\':
  483. skip = not skip
  484. i -= 1
  485. v += prefix
  486. v += hx
  487. continue
  488. hxb = ""
  489. i = 0
  490. hxblen = 4
  491. if prefix == "\\U":
  492. hxblen = 8
  493. hxb = ''.join(hx[i:i + hxblen]).lower()
  494. if hxb.strip('0123456789abcdef'):
  495. raise ValueError("Invalid escape sequence: " + hxb)
  496. if hxb[0] == "d" and hxb[1].strip('01234567'):
  497. raise ValueError("Invalid escape sequence: " + hxb +
  498. ". Only scalar unicode points are allowed.")
  499. v += unichr(int(hxb, 16))
  500. v += unicode(hx[len(hxb):])
  501. return v
  502. # Unescape TOML string values.
  503. # content after the \
  504. _escapes = ['0', 'b', 'f', 'n', 'r', 't', '"']
  505. # What it should be replaced by
  506. _escapedchars = ['\0', '\b', '\f', '\n', '\r', '\t', '\"']
  507. # Used for substitution
  508. _escape_to_escapedchars = dict(zip(_escapes, _escapedchars))
  509. def _unescape(v):
  510. """Unescape characters in a TOML string."""
  511. i = 0
  512. backslash = False
  513. while i < len(v):
  514. if backslash:
  515. backslash = False
  516. if v[i] in _escapes:
  517. v = v[:i - 1] + _escape_to_escapedchars[v[i]] + v[i + 1:]
  518. elif v[i] == '\\':
  519. v = v[:i - 1] + v[i:]
  520. elif v[i] == 'u' or v[i] == 'U':
  521. i += 1
  522. else:
  523. raise ValueError("Reserved escape sequence used")
  524. continue
  525. elif v[i] == '\\':
  526. backslash = True
  527. i += 1
  528. return v
  529. class InlineTableDict(object):
  530. """Sentinel subclass of dict for inline tables."""
  531. class TomlDecoder(object):
  532. def __init__(self, _dict=dict):
  533. self._dict = _dict
  534. def get_empty_table(self):
  535. return self._dict()
  536. def get_empty_inline_table(self):
  537. class DynamicInlineTableDict(self._dict, InlineTableDict):
  538. """Concrete sentinel subclass for inline tables.
  539. It is a subclass of _dict which is passed in dynamically at load
  540. time
  541. It is also a subclass of InlineTableDict
  542. """
  543. return DynamicInlineTableDict()
  544. def load_inline_object(self, line, currentlevel, multikey=False,
  545. multibackslash=False):
  546. candidate_groups = line[1:-1].split(",")
  547. groups = []
  548. if len(candidate_groups) == 1 and not candidate_groups[0].strip():
  549. candidate_groups.pop()
  550. while len(candidate_groups) > 0:
  551. candidate_group = candidate_groups.pop(0)
  552. try:
  553. _, value = candidate_group.split('=', 1)
  554. except ValueError:
  555. raise ValueError("Invalid inline table encountered")
  556. value = value.strip()
  557. if ((value[0] == value[-1] and value[0] in ('"', "'")) or (
  558. value[0] in '-0123456789' or
  559. value in ('true', 'false') or
  560. (value[0] == "[" and value[-1] == "]") or
  561. (value[0] == '{' and value[-1] == '}'))):
  562. groups.append(candidate_group)
  563. elif len(candidate_groups) > 0:
  564. candidate_groups[0] = (candidate_group + "," +
  565. candidate_groups[0])
  566. else:
  567. raise ValueError("Invalid inline table value encountered")
  568. for group in groups:
  569. status = self.load_line(group, currentlevel, multikey,
  570. multibackslash)
  571. if status is not None:
  572. break
  573. def _get_split_on_quotes(self, line):
  574. doublequotesplits = line.split('"')
  575. quoted = False
  576. quotesplits = []
  577. if len(doublequotesplits) > 1 and "'" in doublequotesplits[0]:
  578. singlequotesplits = doublequotesplits[0].split("'")
  579. doublequotesplits = doublequotesplits[1:]
  580. while len(singlequotesplits) % 2 == 0 and len(doublequotesplits):
  581. singlequotesplits[-1] += '"' + doublequotesplits[0]
  582. doublequotesplits = doublequotesplits[1:]
  583. if "'" in singlequotesplits[-1]:
  584. singlequotesplits = (singlequotesplits[:-1] +
  585. singlequotesplits[-1].split("'"))
  586. quotesplits += singlequotesplits
  587. for doublequotesplit in doublequotesplits:
  588. if quoted:
  589. quotesplits.append(doublequotesplit)
  590. else:
  591. quotesplits += doublequotesplit.split("'")
  592. quoted = not quoted
  593. return quotesplits
  594. def load_line(self, line, currentlevel, multikey, multibackslash):
  595. i = 1
  596. quotesplits = self._get_split_on_quotes(line)
  597. quoted = False
  598. for quotesplit in quotesplits:
  599. if not quoted and '=' in quotesplit:
  600. break
  601. i += quotesplit.count('=')
  602. quoted = not quoted
  603. pair = line.split('=', i)
  604. strictly_valid = _strictly_valid_num(pair[-1])
  605. if _number_with_underscores.match(pair[-1]):
  606. pair[-1] = pair[-1].replace('_', '')
  607. while len(pair[-1]) and (pair[-1][0] != ' ' and pair[-1][0] != '\t' and
  608. pair[-1][0] != "'" and pair[-1][0] != '"' and
  609. pair[-1][0] != '[' and pair[-1][0] != '{' and
  610. pair[-1] != 'true' and pair[-1] != 'false'):
  611. try:
  612. float(pair[-1])
  613. break
  614. except ValueError:
  615. pass
  616. if _load_date(pair[-1]) is not None:
  617. break
  618. i += 1
  619. prev_val = pair[-1]
  620. pair = line.split('=', i)
  621. if prev_val == pair[-1]:
  622. raise ValueError("Invalid date or number")
  623. if strictly_valid:
  624. strictly_valid = _strictly_valid_num(pair[-1])
  625. pair = ['='.join(pair[:-1]).strip(), pair[-1].strip()]
  626. if '.' in pair[0]:
  627. if '"' in pair[0] or "'" in pair[0]:
  628. quotesplits = self._get_split_on_quotes(pair[0])
  629. quoted = False
  630. levels = []
  631. for quotesplit in quotesplits:
  632. if quoted:
  633. levels.append(quotesplit)
  634. else:
  635. levels += [level.strip() for level in
  636. quotesplit.split('.')]
  637. quoted = not quoted
  638. else:
  639. levels = pair[0].split('.')
  640. while levels[-1] == "":
  641. levels = levels[:-1]
  642. for level in levels[:-1]:
  643. if level == "":
  644. continue
  645. if level not in currentlevel:
  646. currentlevel[level] = self.get_empty_table()
  647. currentlevel = currentlevel[level]
  648. pair[0] = levels[-1].strip()
  649. elif (pair[0][0] == '"' or pair[0][0] == "'") and \
  650. (pair[0][-1] == pair[0][0]):
  651. pair[0] = pair[0][1:-1]
  652. if len(pair[1]) > 2 and ((pair[1][0] == '"' or pair[1][0] == "'") and
  653. pair[1][1] == pair[1][0] and
  654. pair[1][2] == pair[1][0] and
  655. not (len(pair[1]) > 5 and
  656. pair[1][-1] == pair[1][0] and
  657. pair[1][-2] == pair[1][0] and
  658. pair[1][-3] == pair[1][0])):
  659. k = len(pair[1]) - 1
  660. while k > -1 and pair[1][k] == '\\':
  661. multibackslash = not multibackslash
  662. k -= 1
  663. if multibackslash:
  664. multilinestr = pair[1][:-1]
  665. else:
  666. multilinestr = pair[1] + "\n"
  667. multikey = pair[0]
  668. else:
  669. value, vtype = self.load_value(pair[1], strictly_valid)
  670. try:
  671. currentlevel[pair[0]]
  672. raise ValueError("Duplicate keys!")
  673. except TypeError:
  674. raise ValueError("Duplicate keys!")
  675. except KeyError:
  676. if multikey:
  677. return multikey, multilinestr, multibackslash
  678. else:
  679. currentlevel[pair[0]] = value
  680. def load_value(self, v, strictly_valid=True):
  681. if not v:
  682. raise ValueError("Empty value is invalid")
  683. if v == 'true':
  684. return (True, "bool")
  685. elif v == 'false':
  686. return (False, "bool")
  687. elif v[0] == '"' or v[0] == "'":
  688. quotechar = v[0]
  689. testv = v[1:].split(quotechar)
  690. triplequote = False
  691. triplequotecount = 0
  692. if len(testv) > 1 and testv[0] == '' and testv[1] == '':
  693. testv = testv[2:]
  694. triplequote = True
  695. closed = False
  696. for tv in testv:
  697. if tv == '':
  698. if triplequote:
  699. triplequotecount += 1
  700. else:
  701. closed = True
  702. else:
  703. oddbackslash = False
  704. try:
  705. i = -1
  706. j = tv[i]
  707. while j == '\\':
  708. oddbackslash = not oddbackslash
  709. i -= 1
  710. j = tv[i]
  711. except IndexError:
  712. pass
  713. if not oddbackslash:
  714. if closed:
  715. raise ValueError("Stuff after closed string. WTF?")
  716. else:
  717. if not triplequote or triplequotecount > 1:
  718. closed = True
  719. else:
  720. triplequotecount = 0
  721. if quotechar == '"':
  722. escapeseqs = v.split('\\')[1:]
  723. backslash = False
  724. for i in escapeseqs:
  725. if i == '':
  726. backslash = not backslash
  727. else:
  728. if i[0] not in _escapes and (i[0] != 'u' and
  729. i[0] != 'U' and
  730. not backslash):
  731. raise ValueError("Reserved escape sequence used")
  732. if backslash:
  733. backslash = False
  734. for prefix in ["\\u", "\\U"]:
  735. if prefix in v:
  736. hexbytes = v.split(prefix)
  737. v = _load_unicode_escapes(hexbytes[0], hexbytes[1:],
  738. prefix)
  739. v = _unescape(v)
  740. if len(v) > 1 and v[1] == quotechar and (len(v) < 3 or
  741. v[1] == v[2]):
  742. v = v[2:-2]
  743. return (v[1:-1], "str")
  744. elif v[0] == '[':
  745. return (self.load_array(v), "array")
  746. elif v[0] == '{':
  747. inline_object = self.get_empty_inline_table()
  748. self.load_inline_object(v, inline_object)
  749. return (inline_object, "inline_object")
  750. elif TIME_RE.match(v):
  751. h, m, s, _, ms = TIME_RE.match(v).groups()
  752. time = datetime.time(int(h), int(m), int(s), int(ms) if ms else 0)
  753. return (time, "time")
  754. else:
  755. parsed_date = _load_date(v)
  756. if parsed_date is not None:
  757. return (parsed_date, "date")
  758. if not strictly_valid:
  759. raise ValueError("Weirdness with leading zeroes or "
  760. "underscores in your number.")
  761. itype = "int"
  762. neg = False
  763. if v[0] == '-':
  764. neg = True
  765. v = v[1:]
  766. elif v[0] == '+':
  767. v = v[1:]
  768. v = v.replace('_', '')
  769. lowerv = v.lower()
  770. if '.' in v or ('x' not in v and ('e' in v or 'E' in v)):
  771. if '.' in v and v.split('.', 1)[1] == '':
  772. raise ValueError("This float is missing digits after "
  773. "the point")
  774. if v[0] not in '0123456789':
  775. raise ValueError("This float doesn't have a leading "
  776. "digit")
  777. v = float(v)
  778. itype = "float"
  779. elif len(lowerv) == 3 and (lowerv == 'inf' or lowerv == 'nan'):
  780. v = float(v)
  781. itype = "float"
  782. if itype == "int":
  783. v = int(v, 0)
  784. if neg:
  785. return (0 - v, itype)
  786. return (v, itype)
  787. def bounded_string(self, s):
  788. if len(s) == 0:
  789. return True
  790. if s[-1] != s[0]:
  791. return False
  792. i = -2
  793. backslash = False
  794. while len(s) + i > 0:
  795. if s[i] == "\\":
  796. backslash = not backslash
  797. i -= 1
  798. else:
  799. break
  800. return not backslash
  801. def load_array(self, a):
  802. atype = None
  803. retval = []
  804. a = a.strip()
  805. if '[' not in a[1:-1] or "" != a[1:-1].split('[')[0].strip():
  806. strarray = False
  807. tmpa = a[1:-1].strip()
  808. if tmpa != '' and (tmpa[0] == '"' or tmpa[0] == "'"):
  809. strarray = True
  810. if not a[1:-1].strip().startswith('{'):
  811. a = a[1:-1].split(',')
  812. else:
  813. # a is an inline object, we must find the matching parenthesis
  814. # to define groups
  815. new_a = []
  816. start_group_index = 1
  817. end_group_index = 2
  818. in_str = False
  819. while end_group_index < len(a[1:]):
  820. if a[end_group_index] == '"' or a[end_group_index] == "'":
  821. if in_str:
  822. backslash_index = end_group_index - 1
  823. while (backslash_index > -1 and
  824. a[backslash_index] == '\\'):
  825. in_str = not in_str
  826. backslash_index -= 1
  827. in_str = not in_str
  828. if in_str or a[end_group_index] != '}':
  829. end_group_index += 1
  830. continue
  831. # Increase end_group_index by 1 to get the closing bracket
  832. end_group_index += 1
  833. new_a.append(a[start_group_index:end_group_index])
  834. # The next start index is at least after the closing
  835. # bracket, a closing bracket can be followed by a comma
  836. # since we are in an array.
  837. start_group_index = end_group_index + 1
  838. while (start_group_index < len(a[1:]) and
  839. a[start_group_index] != '{'):
  840. start_group_index += 1
  841. end_group_index = start_group_index + 1
  842. a = new_a
  843. b = 0
  844. if strarray:
  845. while b < len(a) - 1:
  846. ab = a[b].strip()
  847. while (not self.bounded_string(ab) or
  848. (len(ab) > 2 and
  849. ab[0] == ab[1] == ab[2] and
  850. ab[-2] != ab[0] and
  851. ab[-3] != ab[0])):
  852. a[b] = a[b] + ',' + a[b + 1]
  853. ab = a[b].strip()
  854. if b < len(a) - 2:
  855. a = a[:b + 1] + a[b + 2:]
  856. else:
  857. a = a[:b + 1]
  858. b += 1
  859. else:
  860. al = list(a[1:-1])
  861. a = []
  862. openarr = 0
  863. j = 0
  864. for i in _range(len(al)):
  865. if al[i] == '[':
  866. openarr += 1
  867. elif al[i] == ']':
  868. openarr -= 1
  869. elif al[i] == ',' and not openarr:
  870. a.append(''.join(al[j:i]))
  871. j = i + 1
  872. a.append(''.join(al[j:]))
  873. for i in _range(len(a)):
  874. a[i] = a[i].strip()
  875. if a[i] != '':
  876. nval, ntype = self.load_value(a[i])
  877. if atype:
  878. if ntype != atype:
  879. raise ValueError("Not a homogeneous array")
  880. else:
  881. atype = ntype
  882. retval.append(nval)
  883. return retval