paulb@18 | 1 | #!/usr/bin/env python |
paulb@18 | 2 | |
paulb@18 | 3 | """ |
paulb@18 | 4 | DOM wrapper around libxml2. |
paulb@18 | 5 | """ |
paulb@18 | 6 | |
paulb@18 | 7 | import xml.dom |
paulb@18 | 8 | import libxml2 |
paulb@18 | 9 | import sys |
paulb@18 | 10 | |
paulb@18 | 11 | # NOTE: libxml2 seems to use UTF-8 throughout. |
paulb@18 | 12 | |
paulb@18 | 13 | def from_unicode(s): |
paulb@18 | 14 | if type(s) == type(u""): |
paulb@18 | 15 | return s.encode("utf-8") |
paulb@18 | 16 | else: |
paulb@18 | 17 | return s |
paulb@18 | 18 | |
paulb@18 | 19 | def to_unicode(s): |
paulb@18 | 20 | if type(s) == type(""): |
paulb@18 | 21 | return unicode(s, encoding="utf-8") |
paulb@18 | 22 | else: |
paulb@18 | 23 | return s |
paulb@18 | 24 | |
paulb@18 | 25 | # NOTE: Consider a generator instead. |
paulb@18 | 26 | |
paulb@18 | 27 | class NamedNodeMap(object): |
paulb@18 | 28 | |
paulb@18 | 29 | def __init__(self, node): |
paulb@18 | 30 | self.node = node |
paulb@18 | 31 | |
paulb@18 | 32 | def getNamedItem(self, name): |
paulb@18 | 33 | return self.node.getAttributeNode(name) |
paulb@18 | 34 | |
paulb@18 | 35 | def getNamedItemNS(self, ns, localName): |
paulb@18 | 36 | return self.node.getAttributeNodeNS(ns, localName) |
paulb@18 | 37 | |
paulb@18 | 38 | def setNamedItem(self, node): |
paulb@18 | 39 | self.node.setAttributeNode(node.name, node) |
paulb@18 | 40 | |
paulb@18 | 41 | def setNamedItemNS(self, node): |
paulb@18 | 42 | self.node.setAttributeNodeNS(node.namespaceURI, node.localName, node) |
paulb@18 | 43 | |
paulb@18 | 44 | def __getitem__(self, name): |
paulb@18 | 45 | return self.getNamedItem(name) |
paulb@18 | 46 | |
paulb@18 | 47 | def __setitem__(self, name, node): |
paulb@18 | 48 | if name == node.nodeName: |
paulb@18 | 49 | self.setNamedItem(node) |
paulb@18 | 50 | else: |
paulb@18 | 51 | raise KeyError, name |
paulb@18 | 52 | |
paulb@18 | 53 | def __delitem__(self, name): |
paulb@18 | 54 | # NOTE: To be implemented. |
paulb@18 | 55 | pass |
paulb@18 | 56 | |
paulb@18 | 57 | def values(self): |
paulb@18 | 58 | attributes = [] |
paulb@18 | 59 | _attribute = self.node.as_native_node().properties |
paulb@18 | 60 | while _attribute is not None: |
paulb@18 | 61 | attributes.append(Node(_attribute, ownerElement=self.node)) |
paulb@18 | 62 | _attribute = _attribute.next |
paulb@18 | 63 | return attributes |
paulb@18 | 64 | |
paulb@18 | 65 | def keys(self): |
paulb@18 | 66 | return [(attr.namespaceURI, attr.localName) for attr in self.values()] |
paulb@18 | 67 | |
paulb@18 | 68 | def items(self): |
paulb@18 | 69 | return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] |
paulb@18 | 70 | |
paulb@18 | 71 | def __repr__(self): |
paulb@18 | 72 | return str(self) |
paulb@18 | 73 | |
paulb@18 | 74 | def __str__(self): |
paulb@18 | 75 | return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) |
paulb@18 | 76 | |
paulb@18 | 77 | def _get_prefix_and_localName(name): |
paulb@18 | 78 | t = name.split(":") |
paulb@18 | 79 | if len(t) == 1: |
paulb@18 | 80 | return None, name |
paulb@18 | 81 | elif len(t) == 2: |
paulb@18 | 82 | return t |
paulb@18 | 83 | else: |
paulb@18 | 84 | # NOTE: Should raise an exception. |
paulb@18 | 85 | return None, None |
paulb@18 | 86 | |
paulb@18 | 87 | class TemporaryNode(object): |
paulb@18 | 88 | def __init__(self, ns, name, nodeType): |
paulb@18 | 89 | self.ns = ns |
paulb@18 | 90 | self.name = name |
paulb@18 | 91 | self.nodeType = nodeType |
paulb@18 | 92 | self.prefix, self.localName = _get_prefix_and_localName(self.name) |
paulb@18 | 93 | |
paulb@18 | 94 | class Node(object): |
paulb@18 | 95 | |
paulb@18 | 96 | _nodeTypes = { |
paulb@18 | 97 | "attribute" : xml.dom.Node.ATTRIBUTE_NODE, |
paulb@18 | 98 | "comment" : xml.dom.Node.COMMENT_NODE, |
paulb@18 | 99 | "document_xml" : xml.dom.Node.DOCUMENT_NODE, |
paulb@18 | 100 | "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, |
paulb@18 | 101 | "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. |
paulb@18 | 102 | "element" : xml.dom.Node.ELEMENT_NODE, |
paulb@18 | 103 | "entity" : xml.dom.Node.ENTITY_NODE, |
paulb@18 | 104 | "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, |
paulb@18 | 105 | "notation" : xml.dom.Node.NOTATION_NODE, |
paulb@18 | 106 | "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, |
paulb@18 | 107 | "text" : xml.dom.Node.TEXT_NODE |
paulb@18 | 108 | } |
paulb@18 | 109 | |
paulb@18 | 110 | def __init__(self, node, ownerElement=None, doctype=None): |
paulb@18 | 111 | self._node = node |
paulb@18 | 112 | self.ownerElement = ownerElement |
paulb@18 | 113 | self.doctype = doctype |
paulb@18 | 114 | |
paulb@18 | 115 | def as_native_node(self): |
paulb@18 | 116 | return self._node |
paulb@18 | 117 | |
paulb@18 | 118 | def _ownerDocument(self): |
paulb@18 | 119 | return Node(self._node.doc) |
paulb@18 | 120 | |
paulb@18 | 121 | def _nodeType(self): |
paulb@18 | 122 | return self._nodeTypes[self._node.type] |
paulb@18 | 123 | |
paulb@18 | 124 | def _childNodes(self): |
paulb@18 | 125 | |
paulb@18 | 126 | # NOTE: Consider a generator instead. |
paulb@18 | 127 | |
paulb@18 | 128 | child_nodes = [] |
paulb@18 | 129 | _node = self._node.children |
paulb@18 | 130 | while _node is not None: |
paulb@18 | 131 | child_nodes.append(Node(_node)) |
paulb@18 | 132 | _node = _node.next |
paulb@18 | 133 | return child_nodes |
paulb@18 | 134 | |
paulb@18 | 135 | def _attributes(self): |
paulb@18 | 136 | return NamedNodeMap(self) |
paulb@18 | 137 | |
paulb@18 | 138 | def _getNs(self): |
paulb@18 | 139 | |
paulb@18 | 140 | "Internal namespace information retrieval." |
paulb@18 | 141 | |
paulb@18 | 142 | try: |
paulb@18 | 143 | return self._node.ns() |
paulb@18 | 144 | except libxml2.treeError: |
paulb@18 | 145 | return None |
paulb@18 | 146 | |
paulb@18 | 147 | def _namespaceURI(self): |
paulb@18 | 148 | ns = self._getNs() |
paulb@18 | 149 | if ns is not None: |
paulb@18 | 150 | return to_unicode(ns.content) |
paulb@18 | 151 | else: |
paulb@18 | 152 | return None |
paulb@18 | 153 | |
paulb@18 | 154 | def _nodeValue(self): |
paulb@18 | 155 | return to_unicode(self._node.content) |
paulb@18 | 156 | |
paulb@18 | 157 | def _prefix(self): |
paulb@18 | 158 | ns = self._getNs() |
paulb@18 | 159 | if ns is not None: |
paulb@18 | 160 | return to_unicode(ns.name) |
paulb@18 | 161 | else: |
paulb@18 | 162 | return None |
paulb@18 | 163 | |
paulb@18 | 164 | def _nodeName(self): |
paulb@18 | 165 | prefix = self._prefix() |
paulb@18 | 166 | if prefix is not None: |
paulb@18 | 167 | return prefix + ":" + self._localName() |
paulb@18 | 168 | else: |
paulb@18 | 169 | return self._localName() |
paulb@18 | 170 | |
paulb@18 | 171 | def _tagName(self): |
paulb@18 | 172 | if self._node.type == "element": |
paulb@18 | 173 | return self._nodeName() |
paulb@18 | 174 | else: |
paulb@18 | 175 | return None |
paulb@18 | 176 | |
paulb@18 | 177 | def _localName(self): |
paulb@18 | 178 | return to_unicode(self._node.name) |
paulb@18 | 179 | |
paulb@18 | 180 | def _parentNode(self): |
paulb@18 | 181 | if self.nodeType == xml.dom.Node.DOCUMENT_NODE: |
paulb@18 | 182 | return None |
paulb@18 | 183 | else: |
paulb@18 | 184 | return Node(self._node.parent) |
paulb@18 | 185 | |
paulb@24 | 186 | def _previousSibling(self): |
paulb@24 | 187 | if self._node.prev is not None: |
paulb@24 | 188 | return Node(self._node.prev) |
paulb@24 | 189 | else: |
paulb@24 | 190 | return None |
paulb@24 | 191 | |
paulb@18 | 192 | def _nextSibling(self): |
paulb@18 | 193 | if self._node.next is not None: |
paulb@18 | 194 | return Node(self._node.next) |
paulb@18 | 195 | else: |
paulb@18 | 196 | return None |
paulb@18 | 197 | |
paulb@18 | 198 | def hasAttributeNS(self, ns, localName): |
paulb@18 | 199 | return self.getAttributeNS(ns, localName) is not None |
paulb@18 | 200 | |
paulb@18 | 201 | def hasAttribute(self, name): |
paulb@18 | 202 | return self.getAttribute(name) is not None |
paulb@18 | 203 | |
paulb@18 | 204 | def getAttributeNS(self, ns, localName): |
paulb@18 | 205 | return to_unicode(self._node.nsProp(localName, ns)) |
paulb@18 | 206 | |
paulb@18 | 207 | def getAttribute(self, name): |
paulb@18 | 208 | return to_unicode(self._node.prop(name)) |
paulb@18 | 209 | |
paulb@18 | 210 | def getAttributeNodeNS(self, ns, localName): |
paulb@18 | 211 | return self.attributes[(ns, localName)] |
paulb@18 | 212 | |
paulb@18 | 213 | def getAttributeNode(self, localName): |
paulb@18 | 214 | # NOTE: Needs verifying. |
paulb@18 | 215 | return self.attributes[(None, localName)] |
paulb@18 | 216 | |
paulb@18 | 217 | def setAttributeNS(self, ns, name, value): |
paulb@18 | 218 | # NOTE: Need to convert from Unicode. |
paulb@18 | 219 | ns, name, value = map(from_unicode, [ns, name, value]) |
paulb@18 | 220 | |
paulb@18 | 221 | prefix, localName = _get_prefix_and_localName(name) |
paulb@18 | 222 | if prefix is not None: |
paulb@18 | 223 | self._node.setNsProp(self._node.newNs(ns, prefix), localName, value) |
paulb@18 | 224 | elif ns == self._node.ns().content: |
paulb@23 | 225 | self._node.setNsProp(self._node.ns(), localName, value) |
paulb@18 | 226 | else: |
paulb@18 | 227 | # NOTE: Needs verifying: what should happen to the namespace? |
paulb@18 | 228 | self._node.setNsProp(None, localName, value) |
paulb@18 | 229 | |
paulb@18 | 230 | def setAttribute(self, name, value): |
paulb@18 | 231 | # NOTE: Need to convert from Unicode. |
paulb@18 | 232 | name, value = map(from_unicode, [name, value]) |
paulb@18 | 233 | |
paulb@18 | 234 | self._node.setProp(name, value) |
paulb@18 | 235 | |
paulb@18 | 236 | def setAttributeNodeNS(self, ns, name, node): |
paulb@18 | 237 | # NOTE: Not actually putting the node on the element. |
paulb@18 | 238 | self.setAttributeNS(ns, name, node.nodeValue) |
paulb@18 | 239 | |
paulb@18 | 240 | def setAttributeNode(self, name, node): |
paulb@18 | 241 | # NOTE: Not actually putting the node on the element. |
paulb@18 | 242 | self.setAttribute(name, node.nodeValue) |
paulb@18 | 243 | |
paulb@18 | 244 | def createElementNS(self, ns, name): |
paulb@18 | 245 | # NOTE: Need to convert from Unicode. |
paulb@18 | 246 | ns, name = map(from_unicode, [ns, name]) |
paulb@18 | 247 | |
paulb@18 | 248 | prefix, localName = _get_prefix_and_localName(name) |
paulb@18 | 249 | _node = libxml2.newNode(localName) |
paulb@18 | 250 | _ns = _node.newNs(ns, prefix) |
paulb@18 | 251 | _node.setNs(_ns) |
paulb@18 | 252 | return Node(_node) |
paulb@18 | 253 | |
paulb@18 | 254 | def createElement(self, name): |
paulb@18 | 255 | # NOTE: Need to convert from Unicode. |
paulb@18 | 256 | name = from_unicode(name) |
paulb@18 | 257 | |
paulb@18 | 258 | _node = libxml2.newNode(localName) |
paulb@18 | 259 | return Node(_node) |
paulb@18 | 260 | |
paulb@18 | 261 | def createAttributeNS(self, ns, name): |
paulb@18 | 262 | # NOTE: Need to convert from Unicode. |
paulb@18 | 263 | ns, name = map(from_unicode, [ns, name]) |
paulb@18 | 264 | |
paulb@18 | 265 | prefix, localName = _get_prefix_and_localName(name) |
paulb@18 | 266 | return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) |
paulb@18 | 267 | |
paulb@18 | 268 | def createAttribute(self, name): |
paulb@18 | 269 | # NOTE: Need to convert from Unicode. |
paulb@18 | 270 | name = from_unicode(name) |
paulb@18 | 271 | |
paulb@18 | 272 | return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) |
paulb@18 | 273 | |
paulb@18 | 274 | def createTextNode(self, value): |
paulb@18 | 275 | # NOTE: Need to convert from Unicode. |
paulb@23 | 276 | #value = from_unicode(value) |
paulb@18 | 277 | |
paulb@18 | 278 | return Node(libxml2.newText(value)) |
paulb@18 | 279 | |
paulb@18 | 280 | def _add_node(self, tmp): |
paulb@18 | 281 | if tmp.nodeType == xml.dom.Node.ATTRIBUTE_NODE: |
paulb@18 | 282 | if tmp.ns is not None: |
paulb@18 | 283 | _child = self._node.newNsProp(None, tmp.localName, None) |
paulb@18 | 284 | _ns = _child.newNs(tmp.ns, tmp.prefix) |
paulb@18 | 285 | _child.setNs(_ns) |
paulb@18 | 286 | else: |
paulb@18 | 287 | _child = self._node.newProp(None, tmp.name, None) |
paulb@18 | 288 | else: |
paulb@18 | 289 | _child = None |
paulb@18 | 290 | |
paulb@18 | 291 | return _child |
paulb@18 | 292 | |
paulb@18 | 293 | def importNode(self, node, deep): |
paulb@18 | 294 | |
paulb@18 | 295 | if node.nodeType == xml.dom.Node.ELEMENT_NODE: |
paulb@18 | 296 | imported_element = self.ownerDocument.createElementNS(node.namespaceURI, node.tagName) |
paulb@18 | 297 | for value in node.attributes.values(): |
paulb@18 | 298 | imported_element.setAttributeNS(value.namespaceURI, value.nodeName, value.nodeValue) |
paulb@18 | 299 | |
paulb@18 | 300 | if deep: |
paulb@18 | 301 | for child in node.childNodes: |
paulb@18 | 302 | imported_child = self.importNode(child, deep) |
paulb@18 | 303 | if imported_child: |
paulb@18 | 304 | imported_element.appendChild(imported_child) |
paulb@18 | 305 | |
paulb@18 | 306 | return imported_element |
paulb@18 | 307 | |
paulb@18 | 308 | elif node.nodeType == xml.dom.Node.TEXT_NODE: |
paulb@18 | 309 | return self.ownerDocument.createTextNode(node.nodeValue) |
paulb@18 | 310 | |
paulb@18 | 311 | elif node.nodeType == xml.dom.Node.ATTRIBUTE_NODE: |
paulb@18 | 312 | return self.ownerDocument.createAttributeNS(node.namespaceURI, node.name) |
paulb@18 | 313 | |
paulb@18 | 314 | raise ValueError, node.nodeType |
paulb@18 | 315 | |
paulb@18 | 316 | def insertBefore(self, tmp, oldNode): |
paulb@18 | 317 | if tmp.nodeType == xml.dom.Node.TEXT_NODE: |
paulb@18 | 318 | _child = tmp._node |
paulb@18 | 319 | elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: |
paulb@18 | 320 | _child = tmp._node |
paulb@18 | 321 | else: |
paulb@18 | 322 | _child = self._add_node(tmp) |
paulb@18 | 323 | _child.unlinkNode() |
paulb@18 | 324 | return Node(oldNode._node.addPrevSibling(_child)) |
paulb@18 | 325 | |
paulb@18 | 326 | def replaceChild(self, tmp, oldNode): |
paulb@18 | 327 | if tmp.nodeType == xml.dom.Node.TEXT_NODE: |
paulb@18 | 328 | _child = tmp._node |
paulb@18 | 329 | elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: |
paulb@18 | 330 | _child = tmp._node |
paulb@18 | 331 | else: |
paulb@18 | 332 | _child = self._add_node(tmp) |
paulb@18 | 333 | _child.unlinkNode() |
paulb@18 | 334 | return Node(oldNode._node.replaceNode(_child)) |
paulb@18 | 335 | |
paulb@18 | 336 | def appendChild(self, tmp): |
paulb@18 | 337 | if tmp.nodeType == xml.dom.Node.TEXT_NODE: |
paulb@18 | 338 | _child = self._node.addChild(tmp._node) |
paulb@18 | 339 | elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: |
paulb@18 | 340 | _child = self._node.addChild(tmp._node) |
paulb@18 | 341 | else: |
paulb@18 | 342 | _child = self._add_node(tmp) |
paulb@18 | 343 | return Node(_child) |
paulb@18 | 344 | |
paulb@18 | 345 | def removeChild(self, tmp): |
paulb@18 | 346 | tmp._node.unlinkNode() |
paulb@18 | 347 | |
paulb@18 | 348 | #doctype defined in __init__ |
paulb@18 | 349 | #ownerElement defined in __init__ |
paulb@18 | 350 | ownerDocument = property(_ownerDocument) |
paulb@18 | 351 | childNodes = property(_childNodes) |
paulb@18 | 352 | value = data = nodeValue = property(_nodeValue) |
paulb@18 | 353 | name = nodeName = property(_nodeName) |
paulb@18 | 354 | tagName = property(_tagName) |
paulb@18 | 355 | namespaceURI = property(_namespaceURI) |
paulb@18 | 356 | prefix = property(_prefix) |
paulb@18 | 357 | localName = property(_localName) |
paulb@18 | 358 | parentNode = property(_parentNode) |
paulb@18 | 359 | nodeType = property(_nodeType) |
paulb@18 | 360 | attributes = property(_attributes) |
paulb@24 | 361 | previousSibling = property(_previousSibling) |
paulb@18 | 362 | nextSibling = property(_nextSibling) |
paulb@18 | 363 | |
paulb@18 | 364 | def isSameNode(self, other): |
paulb@18 | 365 | return self._node.nodePath() == other._node.nodePath() |
paulb@18 | 366 | |
paulb@18 | 367 | def __eq__(self, other): |
paulb@18 | 368 | return self._node.nodePath() == other._node.nodePath() |
paulb@18 | 369 | |
paulb@18 | 370 | # Utility functions. |
paulb@18 | 371 | |
paulb@18 | 372 | def createDocumentType(localName, publicId, systemId): |
paulb@18 | 373 | return None |
paulb@18 | 374 | |
paulb@18 | 375 | def createDocument(namespaceURI, localName, doctype): |
paulb@18 | 376 | # NOTE: Fixed to use version 1.0 only. |
paulb@18 | 377 | d = Node(libxml2.newDoc("1.0"), doctype=doctype) |
paulb@18 | 378 | if localName is not None: |
paulb@18 | 379 | root = d.createElementNS(namespaceURI, localName) |
paulb@18 | 380 | d.appendChild(root) |
paulb@18 | 381 | return d |
paulb@18 | 382 | |
paulb@18 | 383 | def parse(stream_or_string): |
paulb@18 | 384 | if hasattr(stream_or_string, "read"): |
paulb@18 | 385 | stream = stream_or_string |
paulb@18 | 386 | else: |
paulb@18 | 387 | stream = open(stream_or_string) |
paulb@18 | 388 | return parseString(stream.read()) |
paulb@18 | 389 | |
paulb@18 | 390 | def parseString(s): |
paulb@18 | 391 | return Node(libxml2.parseDoc(s)) |
paulb@18 | 392 | |
paulb@18 | 393 | def parseURI(uri): |
paulb@18 | 394 | return Node(libxml2.parseURI(uri)) |
paulb@18 | 395 | |
paulb@18 | 396 | def toString(node): |
paulb@18 | 397 | return node.as_native_node().serialize() |
paulb@18 | 398 | |
paulb@18 | 399 | def toStream(node, stream=None): |
paulb@18 | 400 | stream = stream or sys.stdout |
paulb@18 | 401 | stream.write(toString(node)) |
paulb@18 | 402 | |
paulb@18 | 403 | # vim: tabstop=4 expandtab shiftwidth=4 |