etree.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. from __future__ import absolute_import, division, unicode_literals
  2. from collections import OrderedDict
  3. import re
  4. from pip._vendor.six import string_types
  5. from . import base
  6. from .._utils import moduleFactoryFactory
  7. tag_regexp = re.compile("{([^}]*)}(.*)")
  8. def getETreeBuilder(ElementTreeImplementation):
  9. ElementTree = ElementTreeImplementation
  10. ElementTreeCommentType = ElementTree.Comment("asd").tag
  11. class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable
  12. """Given the particular ElementTree representation, this implementation,
  13. to avoid using recursion, returns "nodes" as tuples with the following
  14. content:
  15. 1. The current element
  16. 2. The index of the element relative to its parent
  17. 3. A stack of ancestor elements
  18. 4. A flag "text", "tail" or None to indicate if the current node is a
  19. text node; either the text or tail of the current element (1)
  20. """
  21. def getNodeDetails(self, node):
  22. if isinstance(node, tuple): # It might be the root Element
  23. elt, _, _, flag = node
  24. if flag in ("text", "tail"):
  25. return base.TEXT, getattr(elt, flag)
  26. else:
  27. node = elt
  28. if not(hasattr(node, "tag")):
  29. node = node.getroot()
  30. if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
  31. return (base.DOCUMENT,)
  32. elif node.tag == "<!DOCTYPE>":
  33. return (base.DOCTYPE, node.text,
  34. node.get("publicId"), node.get("systemId"))
  35. elif node.tag == ElementTreeCommentType:
  36. return base.COMMENT, node.text
  37. else:
  38. assert isinstance(node.tag, string_types), type(node.tag)
  39. # This is assumed to be an ordinary element
  40. match = tag_regexp.match(node.tag)
  41. if match:
  42. namespace, tag = match.groups()
  43. else:
  44. namespace = None
  45. tag = node.tag
  46. attrs = OrderedDict()
  47. for name, value in list(node.attrib.items()):
  48. match = tag_regexp.match(name)
  49. if match:
  50. attrs[(match.group(1), match.group(2))] = value
  51. else:
  52. attrs[(None, name)] = value
  53. return (base.ELEMENT, namespace, tag,
  54. attrs, len(node) or node.text)
  55. def getFirstChild(self, node):
  56. if isinstance(node, tuple):
  57. element, key, parents, flag = node
  58. else:
  59. element, key, parents, flag = node, None, [], None
  60. if flag in ("text", "tail"):
  61. return None
  62. else:
  63. if element.text:
  64. return element, key, parents, "text"
  65. elif len(element):
  66. parents.append(element)
  67. return element[0], 0, parents, None
  68. else:
  69. return None
  70. def getNextSibling(self, node):
  71. if isinstance(node, tuple):
  72. element, key, parents, flag = node
  73. else:
  74. return None
  75. if flag == "text":
  76. if len(element):
  77. parents.append(element)
  78. return element[0], 0, parents, None
  79. else:
  80. return None
  81. else:
  82. if element.tail and flag != "tail":
  83. return element, key, parents, "tail"
  84. elif key < len(parents[-1]) - 1:
  85. return parents[-1][key + 1], key + 1, parents, None
  86. else:
  87. return None
  88. def getParentNode(self, node):
  89. if isinstance(node, tuple):
  90. element, key, parents, flag = node
  91. else:
  92. return None
  93. if flag == "text":
  94. if not parents:
  95. return element
  96. else:
  97. return element, key, parents, None
  98. else:
  99. parent = parents.pop()
  100. if not parents:
  101. return parent
  102. else:
  103. assert list(parents[-1]).count(parent) == 1
  104. return parent, list(parents[-1]).index(parent), parents, None
  105. return locals()
  106. getETreeModule = moduleFactoryFactory(getETreeBuilder)