genshi.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. from __future__ import absolute_import, division, unicode_literals
  2. from genshi.core import QName
  3. from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
  4. from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
  5. from . import base
  6. from ..constants import voidElements, namespaces
  7. class TreeWalker(base.TreeWalker):
  8. def __iter__(self):
  9. # Buffer the events so we can pass in the following one
  10. previous = None
  11. for event in self.tree:
  12. if previous is not None:
  13. for token in self.tokens(previous, event):
  14. yield token
  15. previous = event
  16. # Don't forget the final event!
  17. if previous is not None:
  18. for token in self.tokens(previous, None):
  19. yield token
  20. def tokens(self, event, next):
  21. kind, data, _ = event
  22. if kind == START:
  23. tag, attribs = data
  24. name = tag.localname
  25. namespace = tag.namespace
  26. converted_attribs = {}
  27. for k, v in attribs:
  28. if isinstance(k, QName):
  29. converted_attribs[(k.namespace, k.localname)] = v
  30. else:
  31. converted_attribs[(None, k)] = v
  32. if namespace == namespaces["html"] and name in voidElements:
  33. for token in self.emptyTag(namespace, name, converted_attribs,
  34. not next or next[0] != END or
  35. next[1] != tag):
  36. yield token
  37. else:
  38. yield self.startTag(namespace, name, converted_attribs)
  39. elif kind == END:
  40. name = data.localname
  41. namespace = data.namespace
  42. if namespace != namespaces["html"] or name not in voidElements:
  43. yield self.endTag(namespace, name)
  44. elif kind == COMMENT:
  45. yield self.comment(data)
  46. elif kind == TEXT:
  47. for token in self.text(data):
  48. yield token
  49. elif kind == DOCTYPE:
  50. yield self.doctype(*data)
  51. elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS,
  52. START_CDATA, END_CDATA, PI):
  53. pass
  54. else:
  55. yield self.unknown(kind)