123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945 |
- import datetime
- import io
- from os import linesep
- import re
- import sys
- from pip._vendor.toml.tz import TomlTz
- if sys.version_info < (3,):
- _range = xrange # noqa: F821
- else:
- unicode = str
- _range = range
- basestring = str
- unichr = chr
- def _detect_pathlib_path(p):
- if (3, 4) <= sys.version_info:
- import pathlib
- if isinstance(p, pathlib.PurePath):
- return True
- return False
- def _ispath(p):
- if isinstance(p, basestring):
- return True
- return _detect_pathlib_path(p)
- def _getpath(p):
- if (3, 6) <= sys.version_info:
- import os
- return os.fspath(p)
- if _detect_pathlib_path(p):
- return str(p)
- return p
- try:
- FNFError = FileNotFoundError
- except NameError:
- FNFError = IOError
- TIME_RE = re.compile("([0-9]{2}):([0-9]{2}):([0-9]{2})(\.([0-9]{3,6}))?")
- class TomlDecodeError(ValueError):
- """Base toml Exception / Error."""
- def __init__(self, msg, doc, pos):
- lineno = doc.count('\n', 0, pos) + 1
- colno = pos - doc.rfind('\n', 0, pos)
- emsg = '{} (line {} column {} char {})'.format(msg, lineno, colno, pos)
- ValueError.__init__(self, emsg)
- self.msg = msg
- self.doc = doc
- self.pos = pos
- self.lineno = lineno
- self.colno = colno
- # Matches a TOML number, which allows underscores for readability
- _number_with_underscores = re.compile('([0-9])(_([0-9]))*')
- def _strictly_valid_num(n):
- n = n.strip()
- if not n:
- return False
- if n[0] == '_':
- return False
- if n[-1] == '_':
- return False
- if "_." in n or "._" in n:
- return False
- if len(n) == 1:
- return True
- if n[0] == '0' and n[1] not in ['.', 'o', 'b', 'x']:
- return False
- if n[0] == '+' or n[0] == '-':
- n = n[1:]
- if len(n) > 1 and n[0] == '0' and n[1] != '.':
- return False
- if '__' in n:
- return False
- return True
- def load(f, _dict=dict, decoder=None):
- """Parses named file or files as toml and returns a dictionary
- Args:
- f: Path to the file to open, array of files to read into single dict
- or a file descriptor
- _dict: (optional) Specifies the class of the returned toml dictionary
- Returns:
- Parsed toml file represented as a dictionary
- Raises:
- TypeError -- When f is invalid type
- TomlDecodeError: Error while decoding toml
- IOError / FileNotFoundError -- When an array with no valid (existing)
- (Python 2 / Python 3) file paths is passed
- """
- if _ispath(f):
- with io.open(_getpath(f), encoding='utf-8') as ffile:
- return loads(ffile.read(), _dict, decoder)
- elif isinstance(f, list):
- from os import path as op
- from warnings import warn
- if not [path for path in f if op.exists(path)]:
- error_msg = "Load expects a list to contain filenames only."
- error_msg += linesep
- error_msg += ("The list needs to contain the path of at least one "
- "existing file.")
- raise FNFError(error_msg)
- if decoder is None:
- decoder = TomlDecoder()
- d = decoder.get_empty_table()
- for l in f:
- if op.exists(l):
- d.update(load(l, _dict, decoder))
- else:
- warn("Non-existent filename in list with at least one valid "
- "filename")
- return d
- else:
- try:
- return loads(f.read(), _dict, decoder)
- except AttributeError:
- raise TypeError("You can only load a file descriptor, filename or "
- "list")
- _groupname_re = re.compile(r'^[A-Za-z0-9_-]+$')
- def loads(s, _dict=dict, decoder=None):
- """Parses string as toml
- Args:
- s: String to be parsed
- _dict: (optional) Specifies the class of the returned toml dictionary
- Returns:
- Parsed toml file represented as a dictionary
- Raises:
- TypeError: When a non-string is passed
- TomlDecodeError: Error while decoding toml
- """
- implicitgroups = []
- if decoder is None:
- decoder = TomlDecoder(_dict)
- retval = decoder.get_empty_table()
- currentlevel = retval
- if not isinstance(s, basestring):
- raise TypeError("Expecting something like a string")
- if not isinstance(s, unicode):
- s = s.decode('utf8')
- original = s
- sl = list(s)
- openarr = 0
- openstring = False
- openstrchar = ""
- multilinestr = False
- arrayoftables = False
- beginline = True
- keygroup = False
- dottedkey = False
- keyname = 0
- for i, item in enumerate(sl):
- if item == '\r' and sl[i + 1] == '\n':
- sl[i] = ' '
- continue
- if keyname:
- if item == '\n':
- raise TomlDecodeError("Key name found without value."
- " Reached end of line.", original, i)
- if openstring:
- if item == openstrchar:
- keyname = 2
- openstring = False
- openstrchar = ""
- continue
- elif keyname == 1:
- if item.isspace():
- keyname = 2
- continue
- elif item == '.':
- dottedkey = True
- continue
- elif item.isalnum() or item == '_' or item == '-':
- continue
- elif (dottedkey and sl[i - 1] == '.' and
- (item == '"' or item == "'")):
- openstring = True
- openstrchar = item
- continue
- elif keyname == 2:
- if item.isspace():
- if dottedkey:
- nextitem = sl[i + 1]
- if not nextitem.isspace() and nextitem != '.':
- keyname = 1
- continue
- if item == '.':
- dottedkey = True
- nextitem = sl[i + 1]
- if not nextitem.isspace() and nextitem != '.':
- keyname = 1
- continue
- if item == '=':
- keyname = 0
- dottedkey = False
- else:
- raise TomlDecodeError("Found invalid character in key name: '" +
- item + "'. Try quoting the key name.",
- original, i)
- if item == "'" and openstrchar != '"':
- k = 1
- try:
- while sl[i - k] == "'":
- k += 1
- if k == 3:
- break
- except IndexError:
- pass
- if k == 3:
- multilinestr = not multilinestr
- openstring = multilinestr
- else:
- openstring = not openstring
- if openstring:
- openstrchar = "'"
- else:
- openstrchar = ""
- if item == '"' and openstrchar != "'":
- oddbackslash = False
- k = 1
- tripquote = False
- try:
- while sl[i - k] == '"':
- k += 1
- if k == 3:
- tripquote = True
- break
- if k == 1 or (k == 3 and tripquote):
- while sl[i - k] == '\\':
- oddbackslash = not oddbackslash
- k += 1
- except IndexError:
- pass
- if not oddbackslash:
- if tripquote:
- multilinestr = not multilinestr
- openstring = multilinestr
- else:
- openstring = not openstring
- if openstring:
- openstrchar = '"'
- else:
- openstrchar = ""
- if item == '#' and (not openstring and not keygroup and
- not arrayoftables):
- j = i
- try:
- while sl[j] != '\n':
- sl[j] = ' '
- j += 1
- except IndexError:
- break
- if item == '[' and (not openstring and not keygroup and
- not arrayoftables):
- if beginline:
- if len(sl) > i + 1 and sl[i + 1] == '[':
- arrayoftables = True
- else:
- keygroup = True
- else:
- openarr += 1
- if item == ']' and not openstring:
- if keygroup:
- keygroup = False
- elif arrayoftables:
- if sl[i - 1] == ']':
- arrayoftables = False
- else:
- openarr -= 1
- if item == '\n':
- if openstring or multilinestr:
- if not multilinestr:
- raise TomlDecodeError("Unbalanced quotes", original, i)
- if ((sl[i - 1] == "'" or sl[i - 1] == '"') and (
- sl[i - 2] == sl[i - 1])):
- sl[i] = sl[i - 1]
- if sl[i - 3] == sl[i - 1]:
- sl[i - 3] = ' '
- elif openarr:
- sl[i] = ' '
- else:
- beginline = True
- elif beginline and sl[i] != ' ' and sl[i] != '\t':
- beginline = False
- if not keygroup and not arrayoftables:
- if sl[i] == '=':
- raise TomlDecodeError("Found empty keyname. ", original, i)
- keyname = 1
- s = ''.join(sl)
- s = s.split('\n')
- multikey = None
- multilinestr = ""
- multibackslash = False
- pos = 0
- for idx, line in enumerate(s):
- if idx > 0:
- pos += len(s[idx - 1]) + 1
- if not multilinestr or multibackslash or '\n' not in multilinestr:
- line = line.strip()
- if line == "" and (not multikey or multibackslash):
- continue
- if multikey:
- if multibackslash:
- multilinestr += line
- else:
- multilinestr += line
- multibackslash = False
- if len(line) > 2 and (line[-1] == multilinestr[0] and
- line[-2] == multilinestr[0] and
- line[-3] == multilinestr[0]):
- try:
- value, vtype = decoder.load_value(multilinestr)
- except ValueError as err:
- raise TomlDecodeError(str(err), original, pos)
- currentlevel[multikey] = value
- multikey = None
- multilinestr = ""
- else:
- k = len(multilinestr) - 1
- while k > -1 and multilinestr[k] == '\\':
- multibackslash = not multibackslash
- k -= 1
- if multibackslash:
- multilinestr = multilinestr[:-1]
- else:
- multilinestr += "\n"
- continue
- if line[0] == '[':
- arrayoftables = False
- if len(line) == 1:
- raise TomlDecodeError("Opening key group bracket on line by "
- "itself.", original, pos)
- if line[1] == '[':
- arrayoftables = True
- line = line[2:]
- splitstr = ']]'
- else:
- line = line[1:]
- splitstr = ']'
- i = 1
- quotesplits = decoder._get_split_on_quotes(line)
- quoted = False
- for quotesplit in quotesplits:
- if not quoted and splitstr in quotesplit:
- break
- i += quotesplit.count(splitstr)
- quoted = not quoted
- line = line.split(splitstr, i)
- if len(line) < i + 1 or line[-1].strip() != "":
- raise TomlDecodeError("Key group not on a line by itself.",
- original, pos)
- groups = splitstr.join(line[:-1]).split('.')
- i = 0
- while i < len(groups):
- groups[i] = groups[i].strip()
- if len(groups[i]) > 0 and (groups[i][0] == '"' or
- groups[i][0] == "'"):
- groupstr = groups[i]
- j = i + 1
- while not groupstr[0] == groupstr[-1]:
- j += 1
- if j > len(groups) + 2:
- raise TomlDecodeError("Invalid group name '" +
- groupstr + "' Something " +
- "went wrong.", original, pos)
- groupstr = '.'.join(groups[i:j]).strip()
- groups[i] = groupstr[1:-1]
- groups[i + 1:j] = []
- else:
- if not _groupname_re.match(groups[i]):
- raise TomlDecodeError("Invalid group name '" +
- groups[i] + "'. Try quoting it.",
- original, pos)
- i += 1
- currentlevel = retval
- for i in _range(len(groups)):
- group = groups[i]
- if group == "":
- raise TomlDecodeError("Can't have a keygroup with an empty "
- "name", original, pos)
- try:
- currentlevel[group]
- if i == len(groups) - 1:
- if group in implicitgroups:
- implicitgroups.remove(group)
- if arrayoftables:
- raise TomlDecodeError("An implicitly defined "
- "table can't be an array",
- original, pos)
- elif arrayoftables:
- currentlevel[group].append(decoder.get_empty_table()
- )
- else:
- raise TomlDecodeError("What? " + group +
- " already exists?" +
- str(currentlevel),
- original, pos)
- except TypeError:
- currentlevel = currentlevel[-1]
- if group not in currentlevel:
- currentlevel[group] = decoder.get_empty_table()
- if i == len(groups) - 1 and arrayoftables:
- currentlevel[group] = [decoder.get_empty_table()]
- except KeyError:
- if i != len(groups) - 1:
- implicitgroups.append(group)
- currentlevel[group] = decoder.get_empty_table()
- if i == len(groups) - 1 and arrayoftables:
- currentlevel[group] = [decoder.get_empty_table()]
- currentlevel = currentlevel[group]
- if arrayoftables:
- try:
- currentlevel = currentlevel[-1]
- except KeyError:
- pass
- elif line[0] == "{":
- if line[-1] != "}":
- raise TomlDecodeError("Line breaks are not allowed in inline"
- "objects", original, pos)
- try:
- decoder.load_inline_object(line, currentlevel, multikey,
- multibackslash)
- except ValueError as err:
- raise TomlDecodeError(str(err), original, pos)
- elif "=" in line:
- try:
- ret = decoder.load_line(line, currentlevel, multikey,
- multibackslash)
- except ValueError as err:
- raise TomlDecodeError(str(err), original, pos)
- if ret is not None:
- multikey, multilinestr, multibackslash = ret
- return retval
- def _load_date(val):
- microsecond = 0
- tz = None
- try:
- if len(val) > 19:
- if val[19] == '.':
- if val[-1].upper() == 'Z':
- subsecondval = val[20:-1]
- tzval = "Z"
- else:
- subsecondvalandtz = val[20:]
- if '+' in subsecondvalandtz:
- splitpoint = subsecondvalandtz.index('+')
- subsecondval = subsecondvalandtz[:splitpoint]
- tzval = subsecondvalandtz[splitpoint:]
- elif '-' in subsecondvalandtz:
- splitpoint = subsecondvalandtz.index('-')
- subsecondval = subsecondvalandtz[:splitpoint]
- tzval = subsecondvalandtz[splitpoint:]
- else:
- tzval = None
- subsecondval = subsecondvalandtz
- if tzval is not None:
- tz = TomlTz(tzval)
- microsecond = int(int(subsecondval) *
- (10 ** (6 - len(subsecondval))))
- else:
- tz = TomlTz(val[19:])
- except ValueError:
- tz = None
- if "-" not in val[1:]:
- return None
- try:
- if len(val) == 10:
- d = datetime.date(
- int(val[:4]), int(val[5:7]),
- int(val[8:10]))
- else:
- d = datetime.datetime(
- int(val[:4]), int(val[5:7]),
- int(val[8:10]), int(val[11:13]),
- int(val[14:16]), int(val[17:19]), microsecond, tz)
- except ValueError:
- return None
- return d
- def _load_unicode_escapes(v, hexbytes, prefix):
- skip = False
- i = len(v) - 1
- while i > -1 and v[i] == '\\':
- skip = not skip
- i -= 1
- for hx in hexbytes:
- if skip:
- skip = False
- i = len(hx) - 1
- while i > -1 and hx[i] == '\\':
- skip = not skip
- i -= 1
- v += prefix
- v += hx
- continue
- hxb = ""
- i = 0
- hxblen = 4
- if prefix == "\\U":
- hxblen = 8
- hxb = ''.join(hx[i:i + hxblen]).lower()
- if hxb.strip('0123456789abcdef'):
- raise ValueError("Invalid escape sequence: " + hxb)
- if hxb[0] == "d" and hxb[1].strip('01234567'):
- raise ValueError("Invalid escape sequence: " + hxb +
- ". Only scalar unicode points are allowed.")
- v += unichr(int(hxb, 16))
- v += unicode(hx[len(hxb):])
- return v
- # Unescape TOML string values.
- # content after the \
- _escapes = ['0', 'b', 'f', 'n', 'r', 't', '"']
- # What it should be replaced by
- _escapedchars = ['\0', '\b', '\f', '\n', '\r', '\t', '\"']
- # Used for substitution
- _escape_to_escapedchars = dict(zip(_escapes, _escapedchars))
- def _unescape(v):
- """Unescape characters in a TOML string."""
- i = 0
- backslash = False
- while i < len(v):
- if backslash:
- backslash = False
- if v[i] in _escapes:
- v = v[:i - 1] + _escape_to_escapedchars[v[i]] + v[i + 1:]
- elif v[i] == '\\':
- v = v[:i - 1] + v[i:]
- elif v[i] == 'u' or v[i] == 'U':
- i += 1
- else:
- raise ValueError("Reserved escape sequence used")
- continue
- elif v[i] == '\\':
- backslash = True
- i += 1
- return v
- class InlineTableDict(object):
- """Sentinel subclass of dict for inline tables."""
- class TomlDecoder(object):
- def __init__(self, _dict=dict):
- self._dict = _dict
- def get_empty_table(self):
- return self._dict()
- def get_empty_inline_table(self):
- class DynamicInlineTableDict(self._dict, InlineTableDict):
- """Concrete sentinel subclass for inline tables.
- It is a subclass of _dict which is passed in dynamically at load
- time
- It is also a subclass of InlineTableDict
- """
- return DynamicInlineTableDict()
- def load_inline_object(self, line, currentlevel, multikey=False,
- multibackslash=False):
- candidate_groups = line[1:-1].split(",")
- groups = []
- if len(candidate_groups) == 1 and not candidate_groups[0].strip():
- candidate_groups.pop()
- while len(candidate_groups) > 0:
- candidate_group = candidate_groups.pop(0)
- try:
- _, value = candidate_group.split('=', 1)
- except ValueError:
- raise ValueError("Invalid inline table encountered")
- value = value.strip()
- if ((value[0] == value[-1] and value[0] in ('"', "'")) or (
- value[0] in '-0123456789' or
- value in ('true', 'false') or
- (value[0] == "[" and value[-1] == "]") or
- (value[0] == '{' and value[-1] == '}'))):
- groups.append(candidate_group)
- elif len(candidate_groups) > 0:
- candidate_groups[0] = (candidate_group + "," +
- candidate_groups[0])
- else:
- raise ValueError("Invalid inline table value encountered")
- for group in groups:
- status = self.load_line(group, currentlevel, multikey,
- multibackslash)
- if status is not None:
- break
- def _get_split_on_quotes(self, line):
- doublequotesplits = line.split('"')
- quoted = False
- quotesplits = []
- if len(doublequotesplits) > 1 and "'" in doublequotesplits[0]:
- singlequotesplits = doublequotesplits[0].split("'")
- doublequotesplits = doublequotesplits[1:]
- while len(singlequotesplits) % 2 == 0 and len(doublequotesplits):
- singlequotesplits[-1] += '"' + doublequotesplits[0]
- doublequotesplits = doublequotesplits[1:]
- if "'" in singlequotesplits[-1]:
- singlequotesplits = (singlequotesplits[:-1] +
- singlequotesplits[-1].split("'"))
- quotesplits += singlequotesplits
- for doublequotesplit in doublequotesplits:
- if quoted:
- quotesplits.append(doublequotesplit)
- else:
- quotesplits += doublequotesplit.split("'")
- quoted = not quoted
- return quotesplits
- def load_line(self, line, currentlevel, multikey, multibackslash):
- i = 1
- quotesplits = self._get_split_on_quotes(line)
- quoted = False
- for quotesplit in quotesplits:
- if not quoted and '=' in quotesplit:
- break
- i += quotesplit.count('=')
- quoted = not quoted
- pair = line.split('=', i)
- strictly_valid = _strictly_valid_num(pair[-1])
- if _number_with_underscores.match(pair[-1]):
- pair[-1] = pair[-1].replace('_', '')
- while len(pair[-1]) and (pair[-1][0] != ' ' and pair[-1][0] != '\t' and
- pair[-1][0] != "'" and pair[-1][0] != '"' and
- pair[-1][0] != '[' and pair[-1][0] != '{' and
- pair[-1] != 'true' and pair[-1] != 'false'):
- try:
- float(pair[-1])
- break
- except ValueError:
- pass
- if _load_date(pair[-1]) is not None:
- break
- i += 1
- prev_val = pair[-1]
- pair = line.split('=', i)
- if prev_val == pair[-1]:
- raise ValueError("Invalid date or number")
- if strictly_valid:
- strictly_valid = _strictly_valid_num(pair[-1])
- pair = ['='.join(pair[:-1]).strip(), pair[-1].strip()]
- if '.' in pair[0]:
- if '"' in pair[0] or "'" in pair[0]:
- quotesplits = self._get_split_on_quotes(pair[0])
- quoted = False
- levels = []
- for quotesplit in quotesplits:
- if quoted:
- levels.append(quotesplit)
- else:
- levels += [level.strip() for level in
- quotesplit.split('.')]
- quoted = not quoted
- else:
- levels = pair[0].split('.')
- while levels[-1] == "":
- levels = levels[:-1]
- for level in levels[:-1]:
- if level == "":
- continue
- if level not in currentlevel:
- currentlevel[level] = self.get_empty_table()
- currentlevel = currentlevel[level]
- pair[0] = levels[-1].strip()
- elif (pair[0][0] == '"' or pair[0][0] == "'") and \
- (pair[0][-1] == pair[0][0]):
- pair[0] = pair[0][1:-1]
- if len(pair[1]) > 2 and ((pair[1][0] == '"' or pair[1][0] == "'") and
- pair[1][1] == pair[1][0] and
- pair[1][2] == pair[1][0] and
- not (len(pair[1]) > 5 and
- pair[1][-1] == pair[1][0] and
- pair[1][-2] == pair[1][0] and
- pair[1][-3] == pair[1][0])):
- k = len(pair[1]) - 1
- while k > -1 and pair[1][k] == '\\':
- multibackslash = not multibackslash
- k -= 1
- if multibackslash:
- multilinestr = pair[1][:-1]
- else:
- multilinestr = pair[1] + "\n"
- multikey = pair[0]
- else:
- value, vtype = self.load_value(pair[1], strictly_valid)
- try:
- currentlevel[pair[0]]
- raise ValueError("Duplicate keys!")
- except TypeError:
- raise ValueError("Duplicate keys!")
- except KeyError:
- if multikey:
- return multikey, multilinestr, multibackslash
- else:
- currentlevel[pair[0]] = value
- def load_value(self, v, strictly_valid=True):
- if not v:
- raise ValueError("Empty value is invalid")
- if v == 'true':
- return (True, "bool")
- elif v == 'false':
- return (False, "bool")
- elif v[0] == '"' or v[0] == "'":
- quotechar = v[0]
- testv = v[1:].split(quotechar)
- triplequote = False
- triplequotecount = 0
- if len(testv) > 1 and testv[0] == '' and testv[1] == '':
- testv = testv[2:]
- triplequote = True
- closed = False
- for tv in testv:
- if tv == '':
- if triplequote:
- triplequotecount += 1
- else:
- closed = True
- else:
- oddbackslash = False
- try:
- i = -1
- j = tv[i]
- while j == '\\':
- oddbackslash = not oddbackslash
- i -= 1
- j = tv[i]
- except IndexError:
- pass
- if not oddbackslash:
- if closed:
- raise ValueError("Stuff after closed string. WTF?")
- else:
- if not triplequote or triplequotecount > 1:
- closed = True
- else:
- triplequotecount = 0
- if quotechar == '"':
- escapeseqs = v.split('\\')[1:]
- backslash = False
- for i in escapeseqs:
- if i == '':
- backslash = not backslash
- else:
- if i[0] not in _escapes and (i[0] != 'u' and
- i[0] != 'U' and
- not backslash):
- raise ValueError("Reserved escape sequence used")
- if backslash:
- backslash = False
- for prefix in ["\\u", "\\U"]:
- if prefix in v:
- hexbytes = v.split(prefix)
- v = _load_unicode_escapes(hexbytes[0], hexbytes[1:],
- prefix)
- v = _unescape(v)
- if len(v) > 1 and v[1] == quotechar and (len(v) < 3 or
- v[1] == v[2]):
- v = v[2:-2]
- return (v[1:-1], "str")
- elif v[0] == '[':
- return (self.load_array(v), "array")
- elif v[0] == '{':
- inline_object = self.get_empty_inline_table()
- self.load_inline_object(v, inline_object)
- return (inline_object, "inline_object")
- elif TIME_RE.match(v):
- h, m, s, _, ms = TIME_RE.match(v).groups()
- time = datetime.time(int(h), int(m), int(s), int(ms) if ms else 0)
- return (time, "time")
- else:
- parsed_date = _load_date(v)
- if parsed_date is not None:
- return (parsed_date, "date")
- if not strictly_valid:
- raise ValueError("Weirdness with leading zeroes or "
- "underscores in your number.")
- itype = "int"
- neg = False
- if v[0] == '-':
- neg = True
- v = v[1:]
- elif v[0] == '+':
- v = v[1:]
- v = v.replace('_', '')
- lowerv = v.lower()
- if '.' in v or ('x' not in v and ('e' in v or 'E' in v)):
- if '.' in v and v.split('.', 1)[1] == '':
- raise ValueError("This float is missing digits after "
- "the point")
- if v[0] not in '0123456789':
- raise ValueError("This float doesn't have a leading "
- "digit")
- v = float(v)
- itype = "float"
- elif len(lowerv) == 3 and (lowerv == 'inf' or lowerv == 'nan'):
- v = float(v)
- itype = "float"
- if itype == "int":
- v = int(v, 0)
- if neg:
- return (0 - v, itype)
- return (v, itype)
- def bounded_string(self, s):
- if len(s) == 0:
- return True
- if s[-1] != s[0]:
- return False
- i = -2
- backslash = False
- while len(s) + i > 0:
- if s[i] == "\\":
- backslash = not backslash
- i -= 1
- else:
- break
- return not backslash
- def load_array(self, a):
- atype = None
- retval = []
- a = a.strip()
- if '[' not in a[1:-1] or "" != a[1:-1].split('[')[0].strip():
- strarray = False
- tmpa = a[1:-1].strip()
- if tmpa != '' and (tmpa[0] == '"' or tmpa[0] == "'"):
- strarray = True
- if not a[1:-1].strip().startswith('{'):
- a = a[1:-1].split(',')
- else:
- # a is an inline object, we must find the matching parenthesis
- # to define groups
- new_a = []
- start_group_index = 1
- end_group_index = 2
- in_str = False
- while end_group_index < len(a[1:]):
- if a[end_group_index] == '"' or a[end_group_index] == "'":
- if in_str:
- backslash_index = end_group_index - 1
- while (backslash_index > -1 and
- a[backslash_index] == '\\'):
- in_str = not in_str
- backslash_index -= 1
- in_str = not in_str
- if in_str or a[end_group_index] != '}':
- end_group_index += 1
- continue
- # Increase end_group_index by 1 to get the closing bracket
- end_group_index += 1
- new_a.append(a[start_group_index:end_group_index])
- # The next start index is at least after the closing
- # bracket, a closing bracket can be followed by a comma
- # since we are in an array.
- start_group_index = end_group_index + 1
- while (start_group_index < len(a[1:]) and
- a[start_group_index] != '{'):
- start_group_index += 1
- end_group_index = start_group_index + 1
- a = new_a
- b = 0
- if strarray:
- while b < len(a) - 1:
- ab = a[b].strip()
- while (not self.bounded_string(ab) or
- (len(ab) > 2 and
- ab[0] == ab[1] == ab[2] and
- ab[-2] != ab[0] and
- ab[-3] != ab[0])):
- a[b] = a[b] + ',' + a[b + 1]
- ab = a[b].strip()
- if b < len(a) - 2:
- a = a[:b + 1] + a[b + 2:]
- else:
- a = a[:b + 1]
- b += 1
- else:
- al = list(a[1:-1])
- a = []
- openarr = 0
- j = 0
- for i in _range(len(al)):
- if al[i] == '[':
- openarr += 1
- elif al[i] == ']':
- openarr -= 1
- elif al[i] == ',' and not openarr:
- a.append(''.join(al[j:i]))
- j = i + 1
- a.append(''.join(al[j:]))
- for i in _range(len(a)):
- a[i] = a[i].strip()
- if a[i] != '':
- nval, ntype = self.load_value(a[i])
- if atype:
- if ntype != atype:
- raise ValueError("Not a homogeneous array")
- else:
- atype = ntype
- retval.append(nval)
- return retval
|