1 #!/usr/bin/env python 2 3 """ 4 DOM wrapper around libxml2. 5 """ 6 7 import xml.dom 8 import libxml2 9 import sys 10 11 # NOTE: libxml2 seems to use UTF-8 throughout. 12 13 def from_unicode(s): 14 if type(s) == type(u""): 15 return s.encode("utf-8") 16 else: 17 return s 18 19 def to_unicode(s): 20 if type(s) == type(""): 21 return unicode(s, encoding="utf-8") 22 else: 23 return s 24 25 # NOTE: Consider a generator instead. 26 27 class NamedNodeMap(object): 28 29 def __init__(self, node): 30 self.node = node 31 32 def getNamedItem(self, name): 33 return self.node.getAttributeNode(name) 34 35 def getNamedItemNS(self, ns, localName): 36 return self.node.getAttributeNodeNS(ns, localName) 37 38 def setNamedItem(self, node): 39 self.node.setAttributeNode(node.name, node) 40 41 def setNamedItemNS(self, node): 42 self.node.setAttributeNodeNS(node.namespaceURI, node.localName, node) 43 44 def __getitem__(self, name): 45 return self.getNamedItem(name) 46 47 def __setitem__(self, name, node): 48 if name == node.nodeName: 49 self.setNamedItem(node) 50 else: 51 raise KeyError, name 52 53 def __delitem__(self, name): 54 # NOTE: To be implemented. 55 pass 56 57 def values(self): 58 attributes = [] 59 _attribute = self.node.as_native_node().properties 60 while _attribute is not None: 61 attributes.append(Node(_attribute, ownerElement=self.node)) 62 _attribute = _attribute.next 63 return attributes 64 65 def keys(self): 66 return [(attr.namespaceURI, attr.localName) for attr in self.values()] 67 68 def items(self): 69 return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] 70 71 def __repr__(self): 72 return str(self) 73 74 def __str__(self): 75 return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) 76 77 def _get_prefix_and_localName(name): 78 t = name.split(":") 79 if len(t) == 1: 80 return None, name 81 elif len(t) == 2: 82 return t 83 else: 84 # NOTE: Should raise an exception. 85 return None, None 86 87 class TemporaryNode(object): 88 def __init__(self, ns, name, nodeType): 89 self.ns = ns 90 self.name = name 91 self.nodeType = nodeType 92 self.prefix, self.localName = _get_prefix_and_localName(self.name) 93 94 class Node(object): 95 96 _nodeTypes = { 97 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 98 "comment" : xml.dom.Node.COMMENT_NODE, 99 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 100 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 101 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 102 "element" : xml.dom.Node.ELEMENT_NODE, 103 "entity" : xml.dom.Node.ENTITY_NODE, 104 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 105 "notation" : xml.dom.Node.NOTATION_NODE, 106 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 107 "text" : xml.dom.Node.TEXT_NODE 108 } 109 110 def __init__(self, node, ownerElement=None, doctype=None): 111 self._node = node 112 self.ownerElement = ownerElement 113 self.doctype = doctype 114 115 def as_native_node(self): 116 return self._node 117 118 def _ownerDocument(self): 119 return Node(self._node.doc) 120 121 def _nodeType(self): 122 return self._nodeTypes[self._node.type] 123 124 def _childNodes(self): 125 126 # NOTE: Consider a generator instead. 127 128 child_nodes = [] 129 _node = self._node.children 130 while _node is not None: 131 child_nodes.append(Node(_node)) 132 _node = _node.next 133 return child_nodes 134 135 def _attributes(self): 136 return NamedNodeMap(self) 137 138 def _getNs(self): 139 140 "Internal namespace information retrieval." 141 142 try: 143 return self._node.ns() 144 except libxml2.treeError: 145 return None 146 147 def _namespaceURI(self): 148 ns = self._getNs() 149 if ns is not None: 150 return to_unicode(ns.content) 151 else: 152 return None 153 154 def _nodeValue(self): 155 return to_unicode(self._node.content) 156 157 def _prefix(self): 158 ns = self._getNs() 159 if ns is not None: 160 return to_unicode(ns.name) 161 else: 162 return None 163 164 def _nodeName(self): 165 prefix = self._prefix() 166 if prefix is not None: 167 return prefix + ":" + self._localName() 168 else: 169 return self._localName() 170 171 def _tagName(self): 172 if self._node.type == "element": 173 return self._nodeName() 174 else: 175 return None 176 177 def _localName(self): 178 return to_unicode(self._node.name) 179 180 def _parentNode(self): 181 if self.nodeType == xml.dom.Node.DOCUMENT_NODE: 182 return None 183 else: 184 return Node(self._node.parent) 185 186 def _nextSibling(self): 187 if self._node.next is not None: 188 return Node(self._node.next) 189 else: 190 return None 191 192 def hasAttributeNS(self, ns, localName): 193 return self.getAttributeNS(ns, localName) is not None 194 195 def hasAttribute(self, name): 196 return self.getAttribute(name) is not None 197 198 def getAttributeNS(self, ns, localName): 199 return to_unicode(self._node.nsProp(localName, ns)) 200 201 def getAttribute(self, name): 202 return to_unicode(self._node.prop(name)) 203 204 def getAttributeNodeNS(self, ns, localName): 205 return self.attributes[(ns, localName)] 206 207 def getAttributeNode(self, localName): 208 # NOTE: Needs verifying. 209 return self.attributes[(None, localName)] 210 211 def setAttributeNS(self, ns, name, value): 212 # NOTE: Need to convert from Unicode. 213 ns, name, value = map(from_unicode, [ns, name, value]) 214 215 prefix, localName = _get_prefix_and_localName(name) 216 if prefix is not None: 217 self._node.setNsProp(self._node.newNs(ns, prefix), localName, value) 218 elif ns == self._node.ns().content: 219 self._node.setNsProp(self._node.ns().content, localName, value) 220 else: 221 # NOTE: Needs verifying: what should happen to the namespace? 222 self._node.setNsProp(None, localName, value) 223 224 def setAttribute(self, name, value): 225 # NOTE: Need to convert from Unicode. 226 name, value = map(from_unicode, [name, value]) 227 228 self._node.setProp(name, value) 229 230 def setAttributeNodeNS(self, ns, name, node): 231 # NOTE: Not actually putting the node on the element. 232 self.setAttributeNS(ns, name, node.nodeValue) 233 234 def setAttributeNode(self, name, node): 235 # NOTE: Not actually putting the node on the element. 236 self.setAttribute(name, node.nodeValue) 237 238 def createElementNS(self, ns, name): 239 # NOTE: Need to convert from Unicode. 240 ns, name = map(from_unicode, [ns, name]) 241 242 prefix, localName = _get_prefix_and_localName(name) 243 _node = libxml2.newNode(localName) 244 _ns = _node.newNs(ns, prefix) 245 _node.setNs(_ns) 246 return Node(_node) 247 248 def createElement(self, name): 249 # NOTE: Need to convert from Unicode. 250 name = from_unicode(name) 251 252 _node = libxml2.newNode(localName) 253 return Node(_node) 254 255 def createAttributeNS(self, ns, name): 256 # NOTE: Need to convert from Unicode. 257 ns, name = map(from_unicode, [ns, name]) 258 259 prefix, localName = _get_prefix_and_localName(name) 260 return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) 261 262 def createAttribute(self, name): 263 # NOTE: Need to convert from Unicode. 264 name = from_unicode(name) 265 266 return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) 267 268 def createTextNode(self, value): 269 # NOTE: Need to convert from Unicode. 270 name = from_unicode(name) 271 272 return Node(libxml2.newText(value)) 273 274 def _add_node(self, tmp): 275 if tmp.nodeType == xml.dom.Node.ATTRIBUTE_NODE: 276 if tmp.ns is not None: 277 _child = self._node.newNsProp(None, tmp.localName, None) 278 _ns = _child.newNs(tmp.ns, tmp.prefix) 279 _child.setNs(_ns) 280 else: 281 _child = self._node.newProp(None, tmp.name, None) 282 else: 283 _child = None 284 285 return _child 286 287 def importNode(self, node, deep): 288 289 if node.nodeType == xml.dom.Node.ELEMENT_NODE: 290 imported_element = self.ownerDocument.createElementNS(node.namespaceURI, node.tagName) 291 for value in node.attributes.values(): 292 imported_element.setAttributeNS(value.namespaceURI, value.nodeName, value.nodeValue) 293 294 if deep: 295 for child in node.childNodes: 296 imported_child = self.importNode(child, deep) 297 if imported_child: 298 imported_element.appendChild(imported_child) 299 300 return imported_element 301 302 elif node.nodeType == xml.dom.Node.TEXT_NODE: 303 return self.ownerDocument.createTextNode(node.nodeValue) 304 305 elif node.nodeType == xml.dom.Node.ATTRIBUTE_NODE: 306 return self.ownerDocument.createAttributeNS(node.namespaceURI, node.name) 307 308 raise ValueError, node.nodeType 309 310 def insertBefore(self, tmp, oldNode): 311 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 312 _child = tmp._node 313 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 314 _child = tmp._node 315 else: 316 _child = self._add_node(tmp) 317 _child.unlinkNode() 318 return Node(oldNode._node.addPrevSibling(_child)) 319 320 def replaceChild(self, tmp, oldNode): 321 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 322 _child = tmp._node 323 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 324 _child = tmp._node 325 else: 326 _child = self._add_node(tmp) 327 _child.unlinkNode() 328 return Node(oldNode._node.replaceNode(_child)) 329 330 def appendChild(self, tmp): 331 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 332 _child = self._node.addChild(tmp._node) 333 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 334 _child = self._node.addChild(tmp._node) 335 else: 336 _child = self._add_node(tmp) 337 return Node(_child) 338 339 def removeChild(self, tmp): 340 tmp._node.unlinkNode() 341 342 #doctype defined in __init__ 343 #ownerElement defined in __init__ 344 ownerDocument = property(_ownerDocument) 345 childNodes = property(_childNodes) 346 value = data = nodeValue = property(_nodeValue) 347 name = nodeName = property(_nodeName) 348 tagName = property(_tagName) 349 namespaceURI = property(_namespaceURI) 350 prefix = property(_prefix) 351 localName = property(_localName) 352 parentNode = property(_parentNode) 353 nodeType = property(_nodeType) 354 attributes = property(_attributes) 355 nextSibling = property(_nextSibling) 356 357 def isSameNode(self, other): 358 return self._node.nodePath() == other._node.nodePath() 359 360 def __eq__(self, other): 361 return self._node.nodePath() == other._node.nodePath() 362 363 # Utility functions. 364 365 def createDocumentType(localName, publicId, systemId): 366 return None 367 368 def createDocument(namespaceURI, localName, doctype): 369 # NOTE: Fixed to use version 1.0 only. 370 d = Node(libxml2.newDoc("1.0"), doctype=doctype) 371 if localName is not None: 372 root = d.createElementNS(namespaceURI, localName) 373 d.appendChild(root) 374 return d 375 376 def parse(stream_or_string): 377 if hasattr(stream_or_string, "read"): 378 stream = stream_or_string 379 else: 380 stream = open(stream_or_string) 381 return parseString(stream.read()) 382 383 def parseString(s): 384 return Node(libxml2.parseDoc(s)) 385 386 def parseURI(uri): 387 return Node(libxml2.parseURI(uri)) 388 389 def toString(node): 390 return node.as_native_node().serialize() 391 392 def toStream(node, stream=None): 393 stream = stream or sys.stdout 394 stream.write(toString(node)) 395 396 # vim: tabstop=4 expandtab shiftwidth=4