123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417 |
- from __future__ import absolute_import, division, unicode_literals
- from pip._vendor.six import text_type
- from ..constants import scopingElements, tableInsertModeElements, namespaces
- # The scope markers are inserted when entering object elements,
- # marquees, table cells, and table captions, and are used to prevent formatting
- # from "leaking" into tables, object elements, and marquees.
- Marker = None
- listElementsMap = {
- None: (frozenset(scopingElements), False),
- "button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False),
- "list": (frozenset(scopingElements | {(namespaces["html"], "ol"),
- (namespaces["html"], "ul")}), False),
- "table": (frozenset([(namespaces["html"], "html"),
- (namespaces["html"], "table")]), False),
- "select": (frozenset([(namespaces["html"], "optgroup"),
- (namespaces["html"], "option")]), True)
- }
- class Node(object):
- """Represents an item in the tree"""
- def __init__(self, name):
- """Creates a Node
- :arg name: The tag name associated with the node
- """
- # The tag name associated with the node
- self.name = name
- # The parent of the current node (or None for the document node)
- self.parent = None
- # The value of the current node (applies to text nodes and comments)
- self.value = None
- # A dict holding name -> value pairs for attributes of the node
- self.attributes = {}
- # A list of child nodes of the current node. This must include all
- # elements but not necessarily other node types.
- self.childNodes = []
- # A list of miscellaneous flags that can be set on the node.
- self._flags = []
- def __str__(self):
- attributesStr = " ".join(["%s=\"%s\"" % (name, value)
- for name, value in
- self.attributes.items()])
- if attributesStr:
- return "<%s %s>" % (self.name, attributesStr)
- else:
- return "<%s>" % (self.name)
- def __repr__(self):
- return "<%s>" % (self.name)
- def appendChild(self, node):
- """Insert node as a child of the current node
- :arg node: the node to insert
- """
- raise NotImplementedError
- def insertText(self, data, insertBefore=None):
- """Insert data as text in the current node, positioned before the
- start of node insertBefore or to the end of the node's text.
- :arg data: the data to insert
- :arg insertBefore: True if you want to insert the text before the node
- and False if you want to insert it after the node
- """
- raise NotImplementedError
- def insertBefore(self, node, refNode):
- """Insert node as a child of the current node, before refNode in the
- list of child nodes. Raises ValueError if refNode is not a child of
- the current node
- :arg node: the node to insert
- :arg refNode: the child node to insert the node before
- """
- raise NotImplementedError
- def removeChild(self, node):
- """Remove node from the children of the current node
- :arg node: the child node to remove
- """
- raise NotImplementedError
- def reparentChildren(self, newParent):
- """Move all the children of the current node to newParent.
- This is needed so that trees that don't store text as nodes move the
- text in the correct way
- :arg newParent: the node to move all this node's children to
- """
- # XXX - should this method be made more general?
- for child in self.childNodes:
- newParent.appendChild(child)
- self.childNodes = []
- def cloneNode(self):
- """Return a shallow copy of the current node i.e. a node with the same
- name and attributes but with no parent or child nodes
- """
- raise NotImplementedError
- def hasContent(self):
- """Return true if the node has children or text, false otherwise
- """
- raise NotImplementedError
- class ActiveFormattingElements(list):
- def append(self, node):
- equalCount = 0
- if node != Marker:
- for element in self[::-1]:
- if element == Marker:
- break
- if self.nodesEqual(element, node):
- equalCount += 1
- if equalCount == 3:
- self.remove(element)
- break
- list.append(self, node)
- def nodesEqual(self, node1, node2):
- if not node1.nameTuple == node2.nameTuple:
- return False
- if not node1.attributes == node2.attributes:
- return False
- return True
- class TreeBuilder(object):
- """Base treebuilder implementation
- * documentClass - the class to use for the bottommost node of a document
- * elementClass - the class to use for HTML Elements
- * commentClass - the class to use for comments
- * doctypeClass - the class to use for doctypes
- """
- # pylint:disable=not-callable
- # Document class
- documentClass = None
- # The class to use for creating a node
- elementClass = None
- # The class to use for creating comments
- commentClass = None
- # The class to use for creating doctypes
- doctypeClass = None
- # Fragment class
- fragmentClass = None
- def __init__(self, namespaceHTMLElements):
- """Create a TreeBuilder
- :arg namespaceHTMLElements: whether or not to namespace HTML elements
- """
- if namespaceHTMLElements:
- self.defaultNamespace = "http://www.w3.org/1999/xhtml"
- else:
- self.defaultNamespace = None
- self.reset()
- def reset(self):
- self.openElements = []
- self.activeFormattingElements = ActiveFormattingElements()
- # XXX - rename these to headElement, formElement
- self.headPointer = None
- self.formPointer = None
- self.insertFromTable = False
- self.document = self.documentClass()
- def elementInScope(self, target, variant=None):
- # If we pass a node in we match that. if we pass a string
- # match any node with that name
- exactNode = hasattr(target, "nameTuple")
- if not exactNode:
- if isinstance(target, text_type):
- target = (namespaces["html"], target)
- assert isinstance(target, tuple)
- listElements, invert = listElementsMap[variant]
- for node in reversed(self.openElements):
- if exactNode and node == target:
- return True
- elif not exactNode and node.nameTuple == target:
- return True
- elif (invert ^ (node.nameTuple in listElements)):
- return False
- assert False # We should never reach this point
- def reconstructActiveFormattingElements(self):
- # Within this algorithm the order of steps described in the
- # specification is not quite the same as the order of steps in the
- # code. It should still do the same though.
- # Step 1: stop the algorithm when there's nothing to do.
- if not self.activeFormattingElements:
- return
- # Step 2 and step 3: we start with the last element. So i is -1.
- i = len(self.activeFormattingElements) - 1
- entry = self.activeFormattingElements[i]
- if entry == Marker or entry in self.openElements:
- return
- # Step 6
- while entry != Marker and entry not in self.openElements:
- if i == 0:
- # This will be reset to 0 below
- i = -1
- break
- i -= 1
- # Step 5: let entry be one earlier in the list.
- entry = self.activeFormattingElements[i]
- while True:
- # Step 7
- i += 1
- # Step 8
- entry = self.activeFormattingElements[i]
- clone = entry.cloneNode() # Mainly to get a new copy of the attributes
- # Step 9
- element = self.insertElement({"type": "StartTag",
- "name": clone.name,
- "namespace": clone.namespace,
- "data": clone.attributes})
- # Step 10
- self.activeFormattingElements[i] = element
- # Step 11
- if element == self.activeFormattingElements[-1]:
- break
- def clearActiveFormattingElements(self):
- entry = self.activeFormattingElements.pop()
- while self.activeFormattingElements and entry != Marker:
- entry = self.activeFormattingElements.pop()
- def elementInActiveFormattingElements(self, name):
- """Check if an element exists between the end of the active
- formatting elements and the last marker. If it does, return it, else
- return false"""
- for item in self.activeFormattingElements[::-1]:
- # Check for Marker first because if it's a Marker it doesn't have a
- # name attribute.
- if item == Marker:
- break
- elif item.name == name:
- return item
- return False
- def insertRoot(self, token):
- element = self.createElement(token)
- self.openElements.append(element)
- self.document.appendChild(element)
- def insertDoctype(self, token):
- name = token["name"]
- publicId = token["publicId"]
- systemId = token["systemId"]
- doctype = self.doctypeClass(name, publicId, systemId)
- self.document.appendChild(doctype)
- def insertComment(self, token, parent=None):
- if parent is None:
- parent = self.openElements[-1]
- parent.appendChild(self.commentClass(token["data"]))
- def createElement(self, token):
- """Create an element but don't insert it anywhere"""
- name = token["name"]
- namespace = token.get("namespace", self.defaultNamespace)
- element = self.elementClass(name, namespace)
- element.attributes = token["data"]
- return element
- def _getInsertFromTable(self):
- return self._insertFromTable
- def _setInsertFromTable(self, value):
- """Switch the function used to insert an element from the
- normal one to the misnested table one and back again"""
- self._insertFromTable = value
- if value:
- self.insertElement = self.insertElementTable
- else:
- self.insertElement = self.insertElementNormal
- insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
- def insertElementNormal(self, token):
- name = token["name"]
- assert isinstance(name, text_type), "Element %s not unicode" % name
- namespace = token.get("namespace", self.defaultNamespace)
- element = self.elementClass(name, namespace)
- element.attributes = token["data"]
- self.openElements[-1].appendChild(element)
- self.openElements.append(element)
- return element
- def insertElementTable(self, token):
- """Create an element and insert it into the tree"""
- element = self.createElement(token)
- if self.openElements[-1].name not in tableInsertModeElements:
- return self.insertElementNormal(token)
- else:
- # We should be in the InTable mode. This means we want to do
- # special magic element rearranging
- parent, insertBefore = self.getTableMisnestedNodePosition()
- if insertBefore is None:
- parent.appendChild(element)
- else:
- parent.insertBefore(element, insertBefore)
- self.openElements.append(element)
- return element
- def insertText(self, data, parent=None):
- """Insert text data."""
- if parent is None:
- parent = self.openElements[-1]
- if (not self.insertFromTable or (self.insertFromTable and
- self.openElements[-1].name
- not in tableInsertModeElements)):
- parent.insertText(data)
- else:
- # We should be in the InTable mode. This means we want to do
- # special magic element rearranging
- parent, insertBefore = self.getTableMisnestedNodePosition()
- parent.insertText(data, insertBefore)
- def getTableMisnestedNodePosition(self):
- """Get the foster parent element, and sibling to insert before
- (or None) when inserting a misnested table node"""
- # The foster parent element is the one which comes before the most
- # recently opened table element
- # XXX - this is really inelegant
- lastTable = None
- fosterParent = None
- insertBefore = None
- for elm in self.openElements[::-1]:
- if elm.name == "table":
- lastTable = elm
- break
- if lastTable:
- # XXX - we should really check that this parent is actually a
- # node here
- if lastTable.parent:
- fosterParent = lastTable.parent
- insertBefore = lastTable
- else:
- fosterParent = self.openElements[
- self.openElements.index(lastTable) - 1]
- else:
- fosterParent = self.openElements[0]
- return fosterParent, insertBefore
- def generateImpliedEndTags(self, exclude=None):
- name = self.openElements[-1].name
- # XXX td, th and tr are not actually needed
- if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and
- name != exclude):
- self.openElements.pop()
- # XXX This is not entirely what the specification says. We should
- # investigate it more closely.
- self.generateImpliedEndTags(exclude)
- def getDocument(self):
- """Return the final tree"""
- return self.document
- def getFragment(self):
- """Return the final fragment"""
- # assert self.innerHTML
- fragment = self.fragmentClass()
- self.openElements[0].reparentChildren(fragment)
- return fragment
- def testSerializer(self, node):
- """Serialize the subtree of node in the format required by unit tests
- :arg node: the node from which to start serializing
- """
- raise NotImplementedError
|