123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869 |
- from __future__ import absolute_import, division, unicode_literals
- from genshi.core import QName
- from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
- from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
- from . import base
- from ..constants import voidElements, namespaces
- class TreeWalker(base.TreeWalker):
- def __iter__(self):
- # Buffer the events so we can pass in the following one
- previous = None
- for event in self.tree:
- if previous is not None:
- for token in self.tokens(previous, event):
- yield token
- previous = event
- # Don't forget the final event!
- if previous is not None:
- for token in self.tokens(previous, None):
- yield token
- def tokens(self, event, next):
- kind, data, _ = event
- if kind == START:
- tag, attribs = data
- name = tag.localname
- namespace = tag.namespace
- converted_attribs = {}
- for k, v in attribs:
- if isinstance(k, QName):
- converted_attribs[(k.namespace, k.localname)] = v
- else:
- converted_attribs[(None, k)] = v
- if namespace == namespaces["html"] and name in voidElements:
- for token in self.emptyTag(namespace, name, converted_attribs,
- not next or next[0] != END or
- next[1] != tag):
- yield token
- else:
- yield self.startTag(namespace, name, converted_attribs)
- elif kind == END:
- name = data.localname
- namespace = data.namespace
- if namespace != namespaces["html"] or name not in voidElements:
- yield self.endTag(namespace, name)
- elif kind == COMMENT:
- yield self.comment(data)
- elif kind == TEXT:
- for token in self.text(data):
- yield token
- elif kind == DOCTYPE:
- yield self.doctype(*data)
- elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS,
- START_CDATA, END_CDATA, PI):
- pass
- else:
- yield self.unknown(kind)
|