dom.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. from __future__ import absolute_import, division, unicode_literals
  2. try:
  3. from collections.abc import MutableMapping
  4. except ImportError: # Python 2.7
  5. from collections import MutableMapping
  6. from xml.dom import minidom, Node
  7. import weakref
  8. from . import base
  9. from .. import constants
  10. from ..constants import namespaces
  11. from .._utils import moduleFactoryFactory
  12. def getDomBuilder(DomImplementation):
  13. Dom = DomImplementation
  14. class AttrList(MutableMapping):
  15. def __init__(self, element):
  16. self.element = element
  17. def __iter__(self):
  18. return iter(self.element.attributes.keys())
  19. def __setitem__(self, name, value):
  20. if isinstance(name, tuple):
  21. raise NotImplementedError
  22. else:
  23. attr = self.element.ownerDocument.createAttribute(name)
  24. attr.value = value
  25. self.element.attributes[name] = attr
  26. def __len__(self):
  27. return len(self.element.attributes)
  28. def items(self):
  29. return list(self.element.attributes.items())
  30. def values(self):
  31. return list(self.element.attributes.values())
  32. def __getitem__(self, name):
  33. if isinstance(name, tuple):
  34. raise NotImplementedError
  35. else:
  36. return self.element.attributes[name].value
  37. def __delitem__(self, name):
  38. if isinstance(name, tuple):
  39. raise NotImplementedError
  40. else:
  41. del self.element.attributes[name]
  42. class NodeBuilder(base.Node):
  43. def __init__(self, element):
  44. base.Node.__init__(self, element.nodeName)
  45. self.element = element
  46. namespace = property(lambda self: hasattr(self.element, "namespaceURI") and
  47. self.element.namespaceURI or None)
  48. def appendChild(self, node):
  49. node.parent = self
  50. self.element.appendChild(node.element)
  51. def insertText(self, data, insertBefore=None):
  52. text = self.element.ownerDocument.createTextNode(data)
  53. if insertBefore:
  54. self.element.insertBefore(text, insertBefore.element)
  55. else:
  56. self.element.appendChild(text)
  57. def insertBefore(self, node, refNode):
  58. self.element.insertBefore(node.element, refNode.element)
  59. node.parent = self
  60. def removeChild(self, node):
  61. if node.element.parentNode == self.element:
  62. self.element.removeChild(node.element)
  63. node.parent = None
  64. def reparentChildren(self, newParent):
  65. while self.element.hasChildNodes():
  66. child = self.element.firstChild
  67. self.element.removeChild(child)
  68. newParent.element.appendChild(child)
  69. self.childNodes = []
  70. def getAttributes(self):
  71. return AttrList(self.element)
  72. def setAttributes(self, attributes):
  73. if attributes:
  74. for name, value in list(attributes.items()):
  75. if isinstance(name, tuple):
  76. if name[0] is not None:
  77. qualifiedName = (name[0] + ":" + name[1])
  78. else:
  79. qualifiedName = name[1]
  80. self.element.setAttributeNS(name[2], qualifiedName,
  81. value)
  82. else:
  83. self.element.setAttribute(
  84. name, value)
  85. attributes = property(getAttributes, setAttributes)
  86. def cloneNode(self):
  87. return NodeBuilder(self.element.cloneNode(False))
  88. def hasContent(self):
  89. return self.element.hasChildNodes()
  90. def getNameTuple(self):
  91. if self.namespace is None:
  92. return namespaces["html"], self.name
  93. else:
  94. return self.namespace, self.name
  95. nameTuple = property(getNameTuple)
  96. class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable
  97. def documentClass(self):
  98. self.dom = Dom.getDOMImplementation().createDocument(None, None, None)
  99. return weakref.proxy(self)
  100. def insertDoctype(self, token):
  101. name = token["name"]
  102. publicId = token["publicId"]
  103. systemId = token["systemId"]
  104. domimpl = Dom.getDOMImplementation()
  105. doctype = domimpl.createDocumentType(name, publicId, systemId)
  106. self.document.appendChild(NodeBuilder(doctype))
  107. if Dom == minidom:
  108. doctype.ownerDocument = self.dom
  109. def elementClass(self, name, namespace=None):
  110. if namespace is None and self.defaultNamespace is None:
  111. node = self.dom.createElement(name)
  112. else:
  113. node = self.dom.createElementNS(namespace, name)
  114. return NodeBuilder(node)
  115. def commentClass(self, data):
  116. return NodeBuilder(self.dom.createComment(data))
  117. def fragmentClass(self):
  118. return NodeBuilder(self.dom.createDocumentFragment())
  119. def appendChild(self, node):
  120. self.dom.appendChild(node.element)
  121. def testSerializer(self, element):
  122. return testSerializer(element)
  123. def getDocument(self):
  124. return self.dom
  125. def getFragment(self):
  126. return base.TreeBuilder.getFragment(self).element
  127. def insertText(self, data, parent=None):
  128. data = data
  129. if parent != self:
  130. base.TreeBuilder.insertText(self, data, parent)
  131. else:
  132. # HACK: allow text nodes as children of the document node
  133. if hasattr(self.dom, '_child_node_types'):
  134. # pylint:disable=protected-access
  135. if Node.TEXT_NODE not in self.dom._child_node_types:
  136. self.dom._child_node_types = list(self.dom._child_node_types)
  137. self.dom._child_node_types.append(Node.TEXT_NODE)
  138. self.dom.appendChild(self.dom.createTextNode(data))
  139. implementation = DomImplementation
  140. name = None
  141. def testSerializer(element):
  142. element.normalize()
  143. rv = []
  144. def serializeElement(element, indent=0):
  145. if element.nodeType == Node.DOCUMENT_TYPE_NODE:
  146. if element.name:
  147. if element.publicId or element.systemId:
  148. publicId = element.publicId or ""
  149. systemId = element.systemId or ""
  150. rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
  151. (' ' * indent, element.name, publicId, systemId))
  152. else:
  153. rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name))
  154. else:
  155. rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
  156. elif element.nodeType == Node.DOCUMENT_NODE:
  157. rv.append("#document")
  158. elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
  159. rv.append("#document-fragment")
  160. elif element.nodeType == Node.COMMENT_NODE:
  161. rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue))
  162. elif element.nodeType == Node.TEXT_NODE:
  163. rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue))
  164. else:
  165. if (hasattr(element, "namespaceURI") and
  166. element.namespaceURI is not None):
  167. name = "%s %s" % (constants.prefixes[element.namespaceURI],
  168. element.nodeName)
  169. else:
  170. name = element.nodeName
  171. rv.append("|%s<%s>" % (' ' * indent, name))
  172. if element.hasAttributes():
  173. attributes = []
  174. for i in range(len(element.attributes)):
  175. attr = element.attributes.item(i)
  176. name = attr.nodeName
  177. value = attr.value
  178. ns = attr.namespaceURI
  179. if ns:
  180. name = "%s %s" % (constants.prefixes[ns], attr.localName)
  181. else:
  182. name = attr.nodeName
  183. attributes.append((name, value))
  184. for name, value in sorted(attributes):
  185. rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
  186. indent += 2
  187. for child in element.childNodes:
  188. serializeElement(child, indent)
  189. serializeElement(element, 0)
  190. return "\n".join(rv)
  191. return locals()
  192. # The actual means to get a module!
  193. getDomModule = moduleFactoryFactory(getDomBuilder)