1
0

etree.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. from __future__ import absolute_import, division, unicode_literals
  2. # pylint:disable=protected-access
  3. from pip._vendor.six import text_type
  4. import re
  5. from . import base
  6. from .. import _ihatexml
  7. from .. import constants
  8. from ..constants import namespaces
  9. from .._utils import moduleFactoryFactory
  10. tag_regexp = re.compile("{([^}]*)}(.*)")
  11. def getETreeBuilder(ElementTreeImplementation, fullTree=False):
  12. ElementTree = ElementTreeImplementation
  13. ElementTreeCommentType = ElementTree.Comment("asd").tag
  14. class Element(base.Node):
  15. def __init__(self, name, namespace=None):
  16. self._name = name
  17. self._namespace = namespace
  18. self._element = ElementTree.Element(self._getETreeTag(name,
  19. namespace))
  20. if namespace is None:
  21. self.nameTuple = namespaces["html"], self._name
  22. else:
  23. self.nameTuple = self._namespace, self._name
  24. self.parent = None
  25. self._childNodes = []
  26. self._flags = []
  27. def _getETreeTag(self, name, namespace):
  28. if namespace is None:
  29. etree_tag = name
  30. else:
  31. etree_tag = "{%s}%s" % (namespace, name)
  32. return etree_tag
  33. def _setName(self, name):
  34. self._name = name
  35. self._element.tag = self._getETreeTag(self._name, self._namespace)
  36. def _getName(self):
  37. return self._name
  38. name = property(_getName, _setName)
  39. def _setNamespace(self, namespace):
  40. self._namespace = namespace
  41. self._element.tag = self._getETreeTag(self._name, self._namespace)
  42. def _getNamespace(self):
  43. return self._namespace
  44. namespace = property(_getNamespace, _setNamespace)
  45. def _getAttributes(self):
  46. return self._element.attrib
  47. def _setAttributes(self, attributes):
  48. # Delete existing attributes first
  49. # XXX - there may be a better way to do this...
  50. for key in list(self._element.attrib.keys()):
  51. del self._element.attrib[key]
  52. for key, value in attributes.items():
  53. if isinstance(key, tuple):
  54. name = "{%s}%s" % (key[2], key[1])
  55. else:
  56. name = key
  57. self._element.set(name, value)
  58. attributes = property(_getAttributes, _setAttributes)
  59. def _getChildNodes(self):
  60. return self._childNodes
  61. def _setChildNodes(self, value):
  62. del self._element[:]
  63. self._childNodes = []
  64. for element in value:
  65. self.insertChild(element)
  66. childNodes = property(_getChildNodes, _setChildNodes)
  67. def hasContent(self):
  68. """Return true if the node has children or text"""
  69. return bool(self._element.text or len(self._element))
  70. def appendChild(self, node):
  71. self._childNodes.append(node)
  72. self._element.append(node._element)
  73. node.parent = self
  74. def insertBefore(self, node, refNode):
  75. index = list(self._element).index(refNode._element)
  76. self._element.insert(index, node._element)
  77. node.parent = self
  78. def removeChild(self, node):
  79. self._childNodes.remove(node)
  80. self._element.remove(node._element)
  81. node.parent = None
  82. def insertText(self, data, insertBefore=None):
  83. if not(len(self._element)):
  84. if not self._element.text:
  85. self._element.text = ""
  86. self._element.text += data
  87. elif insertBefore is None:
  88. # Insert the text as the tail of the last child element
  89. if not self._element[-1].tail:
  90. self._element[-1].tail = ""
  91. self._element[-1].tail += data
  92. else:
  93. # Insert the text before the specified node
  94. children = list(self._element)
  95. index = children.index(insertBefore._element)
  96. if index > 0:
  97. if not self._element[index - 1].tail:
  98. self._element[index - 1].tail = ""
  99. self._element[index - 1].tail += data
  100. else:
  101. if not self._element.text:
  102. self._element.text = ""
  103. self._element.text += data
  104. def cloneNode(self):
  105. element = type(self)(self.name, self.namespace)
  106. for name, value in self.attributes.items():
  107. element.attributes[name] = value
  108. return element
  109. def reparentChildren(self, newParent):
  110. if newParent.childNodes:
  111. newParent.childNodes[-1]._element.tail += self._element.text
  112. else:
  113. if not newParent._element.text:
  114. newParent._element.text = ""
  115. if self._element.text is not None:
  116. newParent._element.text += self._element.text
  117. self._element.text = ""
  118. base.Node.reparentChildren(self, newParent)
  119. class Comment(Element):
  120. def __init__(self, data):
  121. # Use the superclass constructor to set all properties on the
  122. # wrapper element
  123. self._element = ElementTree.Comment(data)
  124. self.parent = None
  125. self._childNodes = []
  126. self._flags = []
  127. def _getData(self):
  128. return self._element.text
  129. def _setData(self, value):
  130. self._element.text = value
  131. data = property(_getData, _setData)
  132. class DocumentType(Element):
  133. def __init__(self, name, publicId, systemId):
  134. Element.__init__(self, "<!DOCTYPE>")
  135. self._element.text = name
  136. self.publicId = publicId
  137. self.systemId = systemId
  138. def _getPublicId(self):
  139. return self._element.get("publicId", "")
  140. def _setPublicId(self, value):
  141. if value is not None:
  142. self._element.set("publicId", value)
  143. publicId = property(_getPublicId, _setPublicId)
  144. def _getSystemId(self):
  145. return self._element.get("systemId", "")
  146. def _setSystemId(self, value):
  147. if value is not None:
  148. self._element.set("systemId", value)
  149. systemId = property(_getSystemId, _setSystemId)
  150. class Document(Element):
  151. def __init__(self):
  152. Element.__init__(self, "DOCUMENT_ROOT")
  153. class DocumentFragment(Element):
  154. def __init__(self):
  155. Element.__init__(self, "DOCUMENT_FRAGMENT")
  156. def testSerializer(element):
  157. rv = []
  158. def serializeElement(element, indent=0):
  159. if not(hasattr(element, "tag")):
  160. element = element.getroot()
  161. if element.tag == "<!DOCTYPE>":
  162. if element.get("publicId") or element.get("systemId"):
  163. publicId = element.get("publicId") or ""
  164. systemId = element.get("systemId") or ""
  165. rv.append("""<!DOCTYPE %s "%s" "%s">""" %
  166. (element.text, publicId, systemId))
  167. else:
  168. rv.append("<!DOCTYPE %s>" % (element.text,))
  169. elif element.tag == "DOCUMENT_ROOT":
  170. rv.append("#document")
  171. if element.text is not None:
  172. rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
  173. if element.tail is not None:
  174. raise TypeError("Document node cannot have tail")
  175. if hasattr(element, "attrib") and len(element.attrib):
  176. raise TypeError("Document node cannot have attributes")
  177. elif element.tag == ElementTreeCommentType:
  178. rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
  179. else:
  180. assert isinstance(element.tag, text_type), \
  181. "Expected unicode, got %s, %s" % (type(element.tag), element.tag)
  182. nsmatch = tag_regexp.match(element.tag)
  183. if nsmatch is None:
  184. name = element.tag
  185. else:
  186. ns, name = nsmatch.groups()
  187. prefix = constants.prefixes[ns]
  188. name = "%s %s" % (prefix, name)
  189. rv.append("|%s<%s>" % (' ' * indent, name))
  190. if hasattr(element, "attrib"):
  191. attributes = []
  192. for name, value in element.attrib.items():
  193. nsmatch = tag_regexp.match(name)
  194. if nsmatch is not None:
  195. ns, name = nsmatch.groups()
  196. prefix = constants.prefixes[ns]
  197. attr_string = "%s %s" % (prefix, name)
  198. else:
  199. attr_string = name
  200. attributes.append((attr_string, value))
  201. for name, value in sorted(attributes):
  202. rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
  203. if element.text:
  204. rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
  205. indent += 2
  206. for child in element:
  207. serializeElement(child, indent)
  208. if element.tail:
  209. rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
  210. serializeElement(element, 0)
  211. return "\n".join(rv)
  212. def tostring(element): # pylint:disable=unused-variable
  213. """Serialize an element and its child nodes to a string"""
  214. rv = []
  215. filter = _ihatexml.InfosetFilter()
  216. def serializeElement(element):
  217. if isinstance(element, ElementTree.ElementTree):
  218. element = element.getroot()
  219. if element.tag == "<!DOCTYPE>":
  220. if element.get("publicId") or element.get("systemId"):
  221. publicId = element.get("publicId") or ""
  222. systemId = element.get("systemId") or ""
  223. rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
  224. (element.text, publicId, systemId))
  225. else:
  226. rv.append("<!DOCTYPE %s>" % (element.text,))
  227. elif element.tag == "DOCUMENT_ROOT":
  228. if element.text is not None:
  229. rv.append(element.text)
  230. if element.tail is not None:
  231. raise TypeError("Document node cannot have tail")
  232. if hasattr(element, "attrib") and len(element.attrib):
  233. raise TypeError("Document node cannot have attributes")
  234. for child in element:
  235. serializeElement(child)
  236. elif element.tag == ElementTreeCommentType:
  237. rv.append("<!--%s-->" % (element.text,))
  238. else:
  239. # This is assumed to be an ordinary element
  240. if not element.attrib:
  241. rv.append("<%s>" % (filter.fromXmlName(element.tag),))
  242. else:
  243. attr = " ".join(["%s=\"%s\"" % (
  244. filter.fromXmlName(name), value)
  245. for name, value in element.attrib.items()])
  246. rv.append("<%s %s>" % (element.tag, attr))
  247. if element.text:
  248. rv.append(element.text)
  249. for child in element:
  250. serializeElement(child)
  251. rv.append("</%s>" % (element.tag,))
  252. if element.tail:
  253. rv.append(element.tail)
  254. serializeElement(element)
  255. return "".join(rv)
  256. class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable
  257. documentClass = Document
  258. doctypeClass = DocumentType
  259. elementClass = Element
  260. commentClass = Comment
  261. fragmentClass = DocumentFragment
  262. implementation = ElementTreeImplementation
  263. def testSerializer(self, element):
  264. return testSerializer(element)
  265. def getDocument(self):
  266. if fullTree:
  267. return self.document._element
  268. else:
  269. if self.defaultNamespace is not None:
  270. return self.document._element.find(
  271. "{%s}html" % self.defaultNamespace)
  272. else:
  273. return self.document._element.find("html")
  274. def getFragment(self):
  275. return base.TreeBuilder.getFragment(self)._element
  276. return locals()
  277. getETreeModule = moduleFactoryFactory(getETreeBuilder)