toml.py 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039
  1. """Python module which parses and emits TOML.
  2. Released under the MIT license.
  3. """
  4. import re
  5. import io
  6. import datetime
  7. from os import linesep
  8. import sys
  9. __version__ = "0.9.6"
  10. _spec_ = "0.4.0"
  11. class TomlDecodeError(Exception):
  12. """Base toml Exception / Error."""
  13. pass
  14. class TomlTz(datetime.tzinfo):
  15. def __init__(self, toml_offset):
  16. if toml_offset == "Z":
  17. self._raw_offset = "+00:00"
  18. else:
  19. self._raw_offset = toml_offset
  20. self._sign = -1 if self._raw_offset[0] == '-' else 1
  21. self._hours = int(self._raw_offset[1:3])
  22. self._minutes = int(self._raw_offset[4:6])
  23. def tzname(self, dt):
  24. return "UTC" + self._raw_offset
  25. def utcoffset(self, dt):
  26. return self._sign * datetime.timedelta(hours=self._hours,
  27. minutes=self._minutes)
  28. def dst(self, dt):
  29. return datetime.timedelta(0)
  30. class InlineTableDict(object):
  31. """Sentinel subclass of dict for inline tables."""
  32. def _get_empty_inline_table(_dict):
  33. class DynamicInlineTableDict(_dict, InlineTableDict):
  34. """Concrete sentinel subclass for inline tables.
  35. It is a subclass of _dict which is passed in dynamically at load time
  36. It is also a subclass of InlineTableDict
  37. """
  38. return DynamicInlineTableDict()
  39. try:
  40. _range = xrange
  41. except NameError:
  42. unicode = str
  43. _range = range
  44. basestring = str
  45. unichr = chr
  46. try:
  47. FNFError = FileNotFoundError
  48. except NameError:
  49. FNFError = IOError
  50. def load(f, _dict=dict):
  51. """Parses named file or files as toml and returns a dictionary
  52. Args:
  53. f: Path to the file to open, array of files to read into single dict
  54. or a file descriptor
  55. _dict: (optional) Specifies the class of the returned toml dictionary
  56. Returns:
  57. Parsed toml file represented as a dictionary
  58. Raises:
  59. TypeError -- When f is invalid type
  60. TomlDecodeError: Error while decoding toml
  61. IOError / FileNotFoundError -- When an array with no valid (existing)
  62. (Python 2 / Python 3) file paths is passed
  63. """
  64. if isinstance(f, basestring):
  65. with io.open(f, encoding='utf-8') as ffile:
  66. return loads(ffile.read(), _dict)
  67. elif isinstance(f, list):
  68. from os import path as op
  69. from warnings import warn
  70. if not [path for path in f if op.exists(path)]:
  71. error_msg = "Load expects a list to contain filenames only."
  72. error_msg += linesep
  73. error_msg += ("The list needs to contain the path of at least one "
  74. "existing file.")
  75. raise FNFError(error_msg)
  76. d = _dict()
  77. for l in f:
  78. if op.exists(l):
  79. d.update(load(l))
  80. else:
  81. warn("Non-existent filename in list with at least one valid "
  82. "filename")
  83. return d
  84. else:
  85. try:
  86. return loads(f.read(), _dict)
  87. except AttributeError:
  88. raise TypeError("You can only load a file descriptor, filename or "
  89. "list")
  90. _groupname_re = re.compile(r'^[A-Za-z0-9_-]+$')
  91. def loads(s, _dict=dict):
  92. """Parses string as toml
  93. Args:
  94. s: String to be parsed
  95. _dict: (optional) Specifies the class of the returned toml dictionary
  96. Returns:
  97. Parsed toml file represented as a dictionary
  98. Raises:
  99. TypeError: When a non-string is passed
  100. TomlDecodeError: Error while decoding toml
  101. """
  102. implicitgroups = []
  103. retval = _dict()
  104. currentlevel = retval
  105. if not isinstance(s, basestring):
  106. raise TypeError("Expecting something like a string")
  107. if not isinstance(s, unicode):
  108. s = s.decode('utf8')
  109. sl = list(s)
  110. openarr = 0
  111. openstring = False
  112. openstrchar = ""
  113. multilinestr = False
  114. arrayoftables = False
  115. beginline = True
  116. keygroup = False
  117. keyname = 0
  118. for i, item in enumerate(sl):
  119. if item == '\r' and sl[i + 1] == '\n':
  120. sl[i] = ' '
  121. continue
  122. if keyname:
  123. if item == '\n':
  124. raise TomlDecodeError("Key name found without value."
  125. " Reached end of line.")
  126. if openstring:
  127. if item == openstrchar:
  128. keyname = 2
  129. openstring = False
  130. openstrchar = ""
  131. continue
  132. elif keyname == 1:
  133. if item.isspace():
  134. keyname = 2
  135. continue
  136. elif item.isalnum() or item == '_' or item == '-':
  137. continue
  138. elif keyname == 2 and item.isspace():
  139. continue
  140. if item == '=':
  141. keyname = 0
  142. else:
  143. raise TomlDecodeError("Found invalid character in key name: '" +
  144. item + "'. Try quoting the key name.")
  145. if item == "'" and openstrchar != '"':
  146. k = 1
  147. try:
  148. while sl[i - k] == "'":
  149. k += 1
  150. if k == 3:
  151. break
  152. except IndexError:
  153. pass
  154. if k == 3:
  155. multilinestr = not multilinestr
  156. openstring = multilinestr
  157. else:
  158. openstring = not openstring
  159. if openstring:
  160. openstrchar = "'"
  161. else:
  162. openstrchar = ""
  163. if item == '"' and openstrchar != "'":
  164. oddbackslash = False
  165. k = 1
  166. tripquote = False
  167. try:
  168. while sl[i - k] == '"':
  169. k += 1
  170. if k == 3:
  171. tripquote = True
  172. break
  173. if k == 1 or (k == 3 and tripquote):
  174. while sl[i - k] == '\\':
  175. oddbackslash = not oddbackslash
  176. k += 1
  177. except IndexError:
  178. pass
  179. if not oddbackslash:
  180. if tripquote:
  181. multilinestr = not multilinestr
  182. openstring = multilinestr
  183. else:
  184. openstring = not openstring
  185. if openstring:
  186. openstrchar = '"'
  187. else:
  188. openstrchar = ""
  189. if item == '#' and (not openstring and not keygroup and
  190. not arrayoftables):
  191. j = i
  192. try:
  193. while sl[j] != '\n':
  194. sl[j] = ' '
  195. j += 1
  196. except IndexError:
  197. break
  198. if item == '[' and (not openstring and not keygroup and
  199. not arrayoftables):
  200. if beginline:
  201. if len(sl) > i + 1 and sl[i + 1] == '[':
  202. arrayoftables = True
  203. else:
  204. keygroup = True
  205. else:
  206. openarr += 1
  207. if item == ']' and not openstring:
  208. if keygroup:
  209. keygroup = False
  210. elif arrayoftables:
  211. if sl[i - 1] == ']':
  212. arrayoftables = False
  213. else:
  214. openarr -= 1
  215. if item == '\n':
  216. if openstring or multilinestr:
  217. if not multilinestr:
  218. raise TomlDecodeError("Unbalanced quotes")
  219. if ((sl[i - 1] == "'" or sl[i - 1] == '"') and (
  220. sl[i - 2] == sl[i - 1])):
  221. sl[i] = sl[i - 1]
  222. if sl[i - 3] == sl[i - 1]:
  223. sl[i - 3] = ' '
  224. elif openarr:
  225. sl[i] = ' '
  226. else:
  227. beginline = True
  228. elif beginline and sl[i] != ' ' and sl[i] != '\t':
  229. beginline = False
  230. if not keygroup and not arrayoftables:
  231. if sl[i] == '=':
  232. raise TomlDecodeError("Found empty keyname. ")
  233. keyname = 1
  234. s = ''.join(sl)
  235. s = s.split('\n')
  236. multikey = None
  237. multilinestr = ""
  238. multibackslash = False
  239. for line in s:
  240. if not multilinestr or multibackslash or '\n' not in multilinestr:
  241. line = line.strip()
  242. if line == "" and (not multikey or multibackslash):
  243. continue
  244. if multikey:
  245. if multibackslash:
  246. multilinestr += line
  247. else:
  248. multilinestr += line
  249. multibackslash = False
  250. if len(line) > 2 and (line[-1] == multilinestr[0] and
  251. line[-2] == multilinestr[0] and
  252. line[-3] == multilinestr[0]):
  253. try:
  254. value, vtype = _load_value(multilinestr, _dict)
  255. except ValueError as err:
  256. raise TomlDecodeError(str(err))
  257. currentlevel[multikey] = value
  258. multikey = None
  259. multilinestr = ""
  260. else:
  261. k = len(multilinestr) - 1
  262. while k > -1 and multilinestr[k] == '\\':
  263. multibackslash = not multibackslash
  264. k -= 1
  265. if multibackslash:
  266. multilinestr = multilinestr[:-1]
  267. else:
  268. multilinestr += "\n"
  269. continue
  270. if line[0] == '[':
  271. arrayoftables = False
  272. if len(line) == 1:
  273. raise TomlDecodeError("Opening key group bracket on line by "
  274. "itself.")
  275. if line[1] == '[':
  276. arrayoftables = True
  277. line = line[2:]
  278. splitstr = ']]'
  279. else:
  280. line = line[1:]
  281. splitstr = ']'
  282. i = 1
  283. quotesplits = _get_split_on_quotes(line)
  284. quoted = False
  285. for quotesplit in quotesplits:
  286. if not quoted and splitstr in quotesplit:
  287. break
  288. i += quotesplit.count(splitstr)
  289. quoted = not quoted
  290. line = line.split(splitstr, i)
  291. if len(line) < i + 1 or line[-1].strip() != "":
  292. raise TomlDecodeError("Key group not on a line by itself.")
  293. groups = splitstr.join(line[:-1]).split('.')
  294. i = 0
  295. while i < len(groups):
  296. groups[i] = groups[i].strip()
  297. if len(groups[i]) > 0 and (groups[i][0] == '"' or
  298. groups[i][0] == "'"):
  299. groupstr = groups[i]
  300. j = i + 1
  301. while not groupstr[0] == groupstr[-1]:
  302. j += 1
  303. if j > len(groups) + 2:
  304. raise TomlDecodeError("Invalid group name '" +
  305. groupstr + "' Something " +
  306. "went wrong.")
  307. groupstr = '.'.join(groups[i:j]).strip()
  308. groups[i] = groupstr[1:-1]
  309. groups[i + 1:j] = []
  310. else:
  311. if not _groupname_re.match(groups[i]):
  312. raise TomlDecodeError("Invalid group name '" +
  313. groups[i] + "'. Try quoting it.")
  314. i += 1
  315. currentlevel = retval
  316. for i in _range(len(groups)):
  317. group = groups[i]
  318. if group == "":
  319. raise TomlDecodeError("Can't have a keygroup with an empty "
  320. "name")
  321. try:
  322. currentlevel[group]
  323. if i == len(groups) - 1:
  324. if group in implicitgroups:
  325. implicitgroups.remove(group)
  326. if arrayoftables:
  327. raise TomlDecodeError("An implicitly defined "
  328. "table can't be an array")
  329. elif arrayoftables:
  330. currentlevel[group].append(_dict())
  331. else:
  332. raise TomlDecodeError("What? " + group +
  333. " already exists?" +
  334. str(currentlevel))
  335. except TypeError:
  336. currentlevel = currentlevel[-1]
  337. try:
  338. currentlevel[group]
  339. except KeyError:
  340. currentlevel[group] = _dict()
  341. if i == len(groups) - 1 and arrayoftables:
  342. currentlevel[group] = [_dict()]
  343. except KeyError:
  344. if i != len(groups) - 1:
  345. implicitgroups.append(group)
  346. currentlevel[group] = _dict()
  347. if i == len(groups) - 1 and arrayoftables:
  348. currentlevel[group] = [_dict()]
  349. currentlevel = currentlevel[group]
  350. if arrayoftables:
  351. try:
  352. currentlevel = currentlevel[-1]
  353. except KeyError:
  354. pass
  355. elif line[0] == "{":
  356. if line[-1] != "}":
  357. raise TomlDecodeError("Line breaks are not allowed in inline"
  358. "objects")
  359. try:
  360. _load_inline_object(line, currentlevel, _dict, multikey,
  361. multibackslash)
  362. except ValueError as err:
  363. raise TomlDecodeError(str(err))
  364. elif "=" in line:
  365. try:
  366. ret = _load_line(line, currentlevel, _dict, multikey,
  367. multibackslash)
  368. except ValueError as err:
  369. raise TomlDecodeError(str(err))
  370. if ret is not None:
  371. multikey, multilinestr, multibackslash = ret
  372. return retval
  373. def _load_inline_object(line, currentlevel, _dict, multikey=False,
  374. multibackslash=False):
  375. candidate_groups = line[1:-1].split(",")
  376. groups = []
  377. if len(candidate_groups) == 1 and not candidate_groups[0].strip():
  378. candidate_groups.pop()
  379. while len(candidate_groups) > 0:
  380. candidate_group = candidate_groups.pop(0)
  381. try:
  382. _, value = candidate_group.split('=', 1)
  383. except ValueError:
  384. raise ValueError("Invalid inline table encountered")
  385. value = value.strip()
  386. if ((value[0] == value[-1] and value[0] in ('"', "'")) or (
  387. value[0] in '-0123456789' or
  388. value in ('true', 'false') or
  389. (value[0] == "[" and value[-1] == "]") or
  390. (value[0] == '{' and value[-1] == '}'))):
  391. groups.append(candidate_group)
  392. elif len(candidate_groups) > 0:
  393. candidate_groups[0] = candidate_group + "," + candidate_groups[0]
  394. else:
  395. raise ValueError("Invalid inline table value encountered")
  396. for group in groups:
  397. status = _load_line(group, currentlevel, _dict, multikey,
  398. multibackslash)
  399. if status is not None:
  400. break
  401. # Matches a TOML number, which allows underscores for readability
  402. _number_with_underscores = re.compile('([0-9])(_([0-9]))*')
  403. def _strictly_valid_num(n):
  404. n = n.strip()
  405. if not n:
  406. return False
  407. if n[0] == '_':
  408. return False
  409. if n[-1] == '_':
  410. return False
  411. if "_." in n or "._" in n:
  412. return False
  413. if len(n) == 1:
  414. return True
  415. if n[0] == '0' and n[1] != '.':
  416. return False
  417. if n[0] == '+' or n[0] == '-':
  418. n = n[1:]
  419. if n[0] == '0' and n[1] != '.':
  420. return False
  421. if '__' in n:
  422. return False
  423. return True
  424. def _get_split_on_quotes(line):
  425. doublequotesplits = line.split('"')
  426. quoted = False
  427. quotesplits = []
  428. if len(doublequotesplits) > 1 and "'" in doublequotesplits[0]:
  429. singlequotesplits = doublequotesplits[0].split("'")
  430. doublequotesplits = doublequotesplits[1:]
  431. while len(singlequotesplits) % 2 == 0 and len(doublequotesplits):
  432. singlequotesplits[-1] += '"' + doublequotesplits[0]
  433. doublequotesplits = doublequotesplits[1:]
  434. if "'" in singlequotesplits[-1]:
  435. singlequotesplits = (singlequotesplits[:-1] +
  436. singlequotesplits[-1].split("'"))
  437. quotesplits += singlequotesplits
  438. for doublequotesplit in doublequotesplits:
  439. if quoted:
  440. quotesplits.append(doublequotesplit)
  441. else:
  442. quotesplits += doublequotesplit.split("'")
  443. quoted = not quoted
  444. return quotesplits
  445. def _load_line(line, currentlevel, _dict, multikey, multibackslash):
  446. i = 1
  447. quotesplits = _get_split_on_quotes(line)
  448. quoted = False
  449. for quotesplit in quotesplits:
  450. if not quoted and '=' in quotesplit:
  451. break
  452. i += quotesplit.count('=')
  453. quoted = not quoted
  454. pair = line.split('=', i)
  455. strictly_valid = _strictly_valid_num(pair[-1])
  456. if _number_with_underscores.match(pair[-1]):
  457. pair[-1] = pair[-1].replace('_', '')
  458. while len(pair[-1]) and (pair[-1][0] != ' ' and pair[-1][0] != '\t' and
  459. pair[-1][0] != "'" and pair[-1][0] != '"' and
  460. pair[-1][0] != '[' and pair[-1][0] != '{' and
  461. pair[-1] != 'true' and pair[-1] != 'false'):
  462. try:
  463. float(pair[-1])
  464. break
  465. except ValueError:
  466. pass
  467. if _load_date(pair[-1]) is not None:
  468. break
  469. i += 1
  470. prev_val = pair[-1]
  471. pair = line.split('=', i)
  472. if prev_val == pair[-1]:
  473. raise ValueError("Invalid date or number")
  474. if strictly_valid:
  475. strictly_valid = _strictly_valid_num(pair[-1])
  476. pair = ['='.join(pair[:-1]).strip(), pair[-1].strip()]
  477. if (pair[0][0] == '"' or pair[0][0] == "'") and \
  478. (pair[0][-1] == '"' or pair[0][-1] == "'"):
  479. pair[0] = pair[0][1:-1]
  480. if len(pair[1]) > 2 and ((pair[1][0] == '"' or pair[1][0] == "'") and
  481. pair[1][1] == pair[1][0] and
  482. pair[1][2] == pair[1][0] and
  483. not (len(pair[1]) > 5 and
  484. pair[1][-1] == pair[1][0] and
  485. pair[1][-2] == pair[1][0] and
  486. pair[1][-3] == pair[1][0])):
  487. k = len(pair[1]) - 1
  488. while k > -1 and pair[1][k] == '\\':
  489. multibackslash = not multibackslash
  490. k -= 1
  491. if multibackslash:
  492. multilinestr = pair[1][:-1]
  493. else:
  494. multilinestr = pair[1] + "\n"
  495. multikey = pair[0]
  496. else:
  497. value, vtype = _load_value(pair[1], _dict, strictly_valid)
  498. try:
  499. currentlevel[pair[0]]
  500. raise ValueError("Duplicate keys!")
  501. except KeyError:
  502. if multikey:
  503. return multikey, multilinestr, multibackslash
  504. else:
  505. currentlevel[pair[0]] = value
  506. def _load_date(val):
  507. microsecond = 0
  508. tz = None
  509. try:
  510. if len(val) > 19:
  511. if val[19] == '.':
  512. if val[-1].upper() == 'Z':
  513. subsecondval = val[20:-1]
  514. tzval = "Z"
  515. else:
  516. subsecondvalandtz = val[20:]
  517. if '+' in subsecondvalandtz:
  518. splitpoint = subsecondvalandtz.index('+')
  519. subsecondval = subsecondvalandtz[:splitpoint]
  520. tzval = subsecondvalandtz[splitpoint:]
  521. elif '-' in subsecondvalandtz:
  522. splitpoint = subsecondvalandtz.index('-')
  523. subsecondval = subsecondvalandtz[:splitpoint]
  524. tzval = subsecondvalandtz[splitpoint:]
  525. tz = TomlTz(tzval)
  526. microsecond = int(int(subsecondval) *
  527. (10 ** (6 - len(subsecondval))))
  528. else:
  529. tz = TomlTz(val[19:])
  530. except ValueError:
  531. tz = None
  532. if "-" not in val[1:]:
  533. return None
  534. try:
  535. d = datetime.datetime(
  536. int(val[:4]), int(val[5:7]),
  537. int(val[8:10]), int(val[11:13]),
  538. int(val[14:16]), int(val[17:19]), microsecond, tz)
  539. except ValueError:
  540. return None
  541. return d
  542. def _load_unicode_escapes(v, hexbytes, prefix):
  543. skip = False
  544. i = len(v) - 1
  545. while i > -1 and v[i] == '\\':
  546. skip = not skip
  547. i -= 1
  548. for hx in hexbytes:
  549. if skip:
  550. skip = False
  551. i = len(hx) - 1
  552. while i > -1 and hx[i] == '\\':
  553. skip = not skip
  554. i -= 1
  555. v += prefix
  556. v += hx
  557. continue
  558. hxb = ""
  559. i = 0
  560. hxblen = 4
  561. if prefix == "\\U":
  562. hxblen = 8
  563. hxb = ''.join(hx[i:i + hxblen]).lower()
  564. if hxb.strip('0123456789abcdef'):
  565. raise ValueError("Invalid escape sequence: " + hxb)
  566. if hxb[0] == "d" and hxb[1].strip('01234567'):
  567. raise ValueError("Invalid escape sequence: " + hxb +
  568. ". Only scalar unicode points are allowed.")
  569. v += unichr(int(hxb, 16))
  570. v += unicode(hx[len(hxb):])
  571. return v
  572. # Unescape TOML string values.
  573. # content after the \
  574. _escapes = ['0', 'b', 'f', 'n', 'r', 't', '"']
  575. # What it should be replaced by
  576. _escapedchars = ['\0', '\b', '\f', '\n', '\r', '\t', '\"']
  577. # Used for substitution
  578. _escape_to_escapedchars = dict(zip(_escapes, _escapedchars))
  579. def _unescape(v):
  580. """Unescape characters in a TOML string."""
  581. i = 0
  582. backslash = False
  583. while i < len(v):
  584. if backslash:
  585. backslash = False
  586. if v[i] in _escapes:
  587. v = v[:i - 1] + _escape_to_escapedchars[v[i]] + v[i + 1:]
  588. elif v[i] == '\\':
  589. v = v[:i - 1] + v[i:]
  590. elif v[i] == 'u' or v[i] == 'U':
  591. i += 1
  592. else:
  593. raise ValueError("Reserved escape sequence used")
  594. continue
  595. elif v[i] == '\\':
  596. backslash = True
  597. i += 1
  598. return v
  599. def _load_value(v, _dict, strictly_valid=True):
  600. if not v:
  601. raise ValueError("Empty value is invalid")
  602. if v == 'true':
  603. return (True, "bool")
  604. elif v == 'false':
  605. return (False, "bool")
  606. elif v[0] == '"':
  607. testv = v[1:].split('"')
  608. triplequote = False
  609. triplequotecount = 0
  610. if len(testv) > 1 and testv[0] == '' and testv[1] == '':
  611. testv = testv[2:]
  612. triplequote = True
  613. closed = False
  614. for tv in testv:
  615. if tv == '':
  616. if triplequote:
  617. triplequotecount += 1
  618. else:
  619. closed = True
  620. else:
  621. oddbackslash = False
  622. try:
  623. i = -1
  624. j = tv[i]
  625. while j == '\\':
  626. oddbackslash = not oddbackslash
  627. i -= 1
  628. j = tv[i]
  629. except IndexError:
  630. pass
  631. if not oddbackslash:
  632. if closed:
  633. raise ValueError("Stuff after closed string. WTF?")
  634. else:
  635. if not triplequote or triplequotecount > 1:
  636. closed = True
  637. else:
  638. triplequotecount = 0
  639. escapeseqs = v.split('\\')[1:]
  640. backslash = False
  641. for i in escapeseqs:
  642. if i == '':
  643. backslash = not backslash
  644. else:
  645. if i[0] not in _escapes and (i[0] != 'u' and i[0] != 'U' and
  646. not backslash):
  647. raise ValueError("Reserved escape sequence used")
  648. if backslash:
  649. backslash = False
  650. for prefix in ["\\u", "\\U"]:
  651. if prefix in v:
  652. hexbytes = v.split(prefix)
  653. v = _load_unicode_escapes(hexbytes[0], hexbytes[1:], prefix)
  654. v = _unescape(v)
  655. if len(v) > 1 and v[1] == '"' and (len(v) < 3 or v[1] == v[2]):
  656. v = v[2:-2]
  657. return (v[1:-1], "str")
  658. elif v[0] == "'":
  659. if v[1] == "'" and (len(v) < 3 or v[1] == v[2]):
  660. v = v[2:-2]
  661. return (v[1:-1], "str")
  662. elif v[0] == '[':
  663. return (_load_array(v, _dict), "array")
  664. elif v[0] == '{':
  665. inline_object = _get_empty_inline_table(_dict)
  666. _load_inline_object(v, inline_object, _dict)
  667. return (inline_object, "inline_object")
  668. else:
  669. parsed_date = _load_date(v)
  670. if parsed_date is not None:
  671. return (parsed_date, "date")
  672. if not strictly_valid:
  673. raise ValueError("Weirdness with leading zeroes or "
  674. "underscores in your number.")
  675. itype = "int"
  676. neg = False
  677. if v[0] == '-':
  678. neg = True
  679. v = v[1:]
  680. elif v[0] == '+':
  681. v = v[1:]
  682. v = v.replace('_', '')
  683. if '.' in v or 'e' in v or 'E' in v:
  684. if '.' in v and v.split('.', 1)[1] == '':
  685. raise ValueError("This float is missing digits after "
  686. "the point")
  687. if v[0] not in '0123456789':
  688. raise ValueError("This float doesn't have a leading digit")
  689. v = float(v)
  690. itype = "float"
  691. else:
  692. v = int(v)
  693. if neg:
  694. return (0 - v, itype)
  695. return (v, itype)
  696. def _bounded_string(s):
  697. if len(s) == 0:
  698. return True
  699. if s[-1] != s[0]:
  700. return False
  701. i = -2
  702. backslash = False
  703. while len(s) + i > 0:
  704. if s[i] == "\\":
  705. backslash = not backslash
  706. i -= 1
  707. else:
  708. break
  709. return not backslash
  710. def _load_array(a, _dict):
  711. atype = None
  712. retval = []
  713. a = a.strip()
  714. if '[' not in a[1:-1] or "" != a[1:-1].split('[')[0].strip():
  715. strarray = False
  716. tmpa = a[1:-1].strip()
  717. if tmpa != '' and (tmpa[0] == '"' or tmpa[0] == "'"):
  718. strarray = True
  719. if not a[1:-1].strip().startswith('{'):
  720. a = a[1:-1].split(',')
  721. else:
  722. # a is an inline object, we must find the matching parenthesis
  723. # to define groups
  724. new_a = []
  725. start_group_index = 1
  726. end_group_index = 2
  727. in_str = False
  728. while end_group_index < len(a[1:]):
  729. if a[end_group_index] == '"' or a[end_group_index] == "'":
  730. if in_str:
  731. backslash_index = end_group_index - 1
  732. while (backslash_index > -1 and
  733. a[backslash_index] == '\\'):
  734. in_str = not in_str
  735. backslash_index -= 1
  736. in_str = not in_str
  737. if in_str or a[end_group_index] != '}':
  738. end_group_index += 1
  739. continue
  740. # Increase end_group_index by 1 to get the closing bracket
  741. end_group_index += 1
  742. new_a.append(a[start_group_index:end_group_index])
  743. # The next start index is at least after the closing bracket, a
  744. # closing bracket can be followed by a comma since we are in
  745. # an array.
  746. start_group_index = end_group_index + 1
  747. while (start_group_index < len(a[1:]) and
  748. a[start_group_index] != '{'):
  749. start_group_index += 1
  750. end_group_index = start_group_index + 1
  751. a = new_a
  752. b = 0
  753. if strarray:
  754. while b < len(a) - 1:
  755. ab = a[b].strip()
  756. while (not _bounded_string(ab) or
  757. (len(ab) > 2 and
  758. ab[0] == ab[1] == ab[2] and
  759. ab[-2] != ab[0] and
  760. ab[-3] != ab[0])):
  761. a[b] = a[b] + ',' + a[b + 1]
  762. ab = a[b].strip()
  763. if b < len(a) - 2:
  764. a = a[:b + 1] + a[b + 2:]
  765. else:
  766. a = a[:b + 1]
  767. b += 1
  768. else:
  769. al = list(a[1:-1])
  770. a = []
  771. openarr = 0
  772. j = 0
  773. for i in _range(len(al)):
  774. if al[i] == '[':
  775. openarr += 1
  776. elif al[i] == ']':
  777. openarr -= 1
  778. elif al[i] == ',' and not openarr:
  779. a.append(''.join(al[j:i]))
  780. j = i + 1
  781. a.append(''.join(al[j:]))
  782. for i in _range(len(a)):
  783. a[i] = a[i].strip()
  784. if a[i] != '':
  785. nval, ntype = _load_value(a[i], _dict)
  786. if atype:
  787. if ntype != atype:
  788. raise ValueError("Not a homogeneous array")
  789. else:
  790. atype = ntype
  791. retval.append(nval)
  792. return retval
  793. def dump(o, f):
  794. """Writes out dict as toml to a file
  795. Args:
  796. o: Object to dump into toml
  797. f: File descriptor where the toml should be stored
  798. Returns:
  799. String containing the toml corresponding to dictionary
  800. Raises:
  801. TypeError: When anything other than file descriptor is passed
  802. """
  803. if not f.write:
  804. raise TypeError("You can only dump an object to a file descriptor")
  805. d = dumps(o)
  806. f.write(d)
  807. return d
  808. def dumps(o, preserve=False):
  809. """Stringifies input dict as toml
  810. Args:
  811. o: Object to dump into toml
  812. preserve: Boolean parameter. If true, preserve inline tables.
  813. Returns:
  814. String containing the toml corresponding to dict
  815. """
  816. retval = ""
  817. addtoretval, sections = _dump_sections(o, "")
  818. retval += addtoretval
  819. while sections != {}:
  820. newsections = {}
  821. for section in sections:
  822. addtoretval, addtosections = _dump_sections(sections[section],
  823. section, preserve)
  824. if addtoretval or (not addtoretval and not addtosections):
  825. if retval and retval[-2:] != "\n\n":
  826. retval += "\n"
  827. retval += "[" + section + "]\n"
  828. if addtoretval:
  829. retval += addtoretval
  830. for s in addtosections:
  831. newsections[section + "." + s] = addtosections[s]
  832. sections = newsections
  833. return retval
  834. def _dump_sections(o, sup, preserve=False):
  835. retstr = ""
  836. if sup != "" and sup[-1] != ".":
  837. sup += '.'
  838. retdict = o.__class__()
  839. arraystr = ""
  840. for section in o:
  841. section = unicode(section)
  842. qsection = section
  843. if not re.match(r'^[A-Za-z0-9_-]+$', section):
  844. if '"' in section:
  845. qsection = "'" + section + "'"
  846. else:
  847. qsection = '"' + section + '"'
  848. if not isinstance(o[section], dict):
  849. arrayoftables = False
  850. if isinstance(o[section], list):
  851. for a in o[section]:
  852. if isinstance(a, dict):
  853. arrayoftables = True
  854. if arrayoftables:
  855. for a in o[section]:
  856. arraytabstr = "\n"
  857. arraystr += "[[" + sup + qsection + "]]\n"
  858. s, d = _dump_sections(a, sup + qsection)
  859. if s:
  860. if s[0] == "[":
  861. arraytabstr += s
  862. else:
  863. arraystr += s
  864. while d != {}:
  865. newd = {}
  866. for dsec in d:
  867. s1, d1 = _dump_sections(d[dsec], sup + qsection +
  868. "." + dsec)
  869. if s1:
  870. arraytabstr += ("[" + sup + qsection + "." +
  871. dsec + "]\n")
  872. arraytabstr += s1
  873. for s1 in d1:
  874. newd[dsec + "." + s1] = d1[s1]
  875. d = newd
  876. arraystr += arraytabstr
  877. else:
  878. if o[section] is not None:
  879. retstr += (qsection + " = " +
  880. unicode(_dump_value(o[section])) + '\n')
  881. elif preserve and isinstance(o[section], InlineTableDict):
  882. retstr += (qsection + " = " + _dump_inline_table(o[section]))
  883. else:
  884. retdict[qsection] = o[section]
  885. retstr += arraystr
  886. return (retstr, retdict)
  887. def _dump_inline_table(section):
  888. """Preserve inline table in its compact syntax instead of expanding
  889. into subsection.
  890. https://github.com/toml-lang/toml#user-content-inline-table
  891. """
  892. retval = ""
  893. if isinstance(section, dict):
  894. val_list = []
  895. for k, v in section.items():
  896. val = _dump_inline_table(v)
  897. val_list.append(k + " = " + val)
  898. retval += "{ " + ", ".join(val_list) + " }\n"
  899. return retval
  900. else:
  901. return unicode(_dump_value(section))
  902. def _dump_value(v):
  903. dump_funcs = {
  904. str: _dump_str,
  905. unicode: _dump_str,
  906. list: _dump_list,
  907. int: lambda v: v,
  908. bool: lambda v: unicode(v).lower(),
  909. float: _dump_float,
  910. datetime.datetime: lambda v: v.isoformat().replace('+00:00', 'Z'),
  911. }
  912. # Lookup function corresponding to v's type
  913. dump_fn = dump_funcs.get(type(v))
  914. if dump_fn is None and hasattr(v, '__iter__'):
  915. dump_fn = dump_funcs[list]
  916. # Evaluate function (if it exists) else return v
  917. return dump_fn(v) if dump_fn is not None else dump_funcs[str](v)
  918. def _dump_str(v):
  919. if sys.version_info < (3,) and hasattr(v, 'decode') and isinstance(v, str):
  920. v = v.decode('utf-8')
  921. v = "%r" % v
  922. if v[0] == 'u':
  923. v = v[1:]
  924. singlequote = v.startswith("'")
  925. if singlequote or v.startswith('"'):
  926. v = v[1:-1]
  927. if singlequote:
  928. v = v.replace("\\'", "'")
  929. v = v.replace('"', '\\"')
  930. v = v.split("\\x")
  931. while len(v) > 1:
  932. i = -1
  933. if not v[0]:
  934. v = v[1:]
  935. v[0] = v[0].replace("\\\\", "\\")
  936. # No, I don't know why != works and == breaks
  937. joinx = v[0][i] != "\\"
  938. while v[0][:i] and v[0][i] == "\\":
  939. joinx = not joinx
  940. i -= 1
  941. if joinx:
  942. joiner = "x"
  943. else:
  944. joiner = "u00"
  945. v = [v[0] + joiner + v[1]] + v[2:]
  946. return unicode('"' + v[0] + '"')
  947. def _dump_list(v):
  948. retval = "["
  949. for u in v:
  950. retval += " " + unicode(_dump_value(u)) + ","
  951. retval += "]"
  952. return retval
  953. def _dump_float(v):
  954. return "{0:.16}".format(v).replace("e+0", "e+").replace("e-0", "e-")