1 #!/usr/bin/env python 2 3 """ 4 DOM wrapper around libxml2. 5 """ 6 7 import xml.dom 8 import libxml2 9 import sys 10 11 # NOTE: libxml2 seems to use UTF-8 throughout. 12 13 def to_unicode(s): 14 if s is None: 15 return None 16 elif type(s) == type(""): 17 return unicode(s, encoding="utf-8") 18 else: 19 return s 20 21 # NOTE: Consider a generator instead. 22 23 class NamedNodeMap(object): 24 25 def __init__(self, node): 26 self.node = node 27 28 def getNamedItem(self, name): 29 return self.node.getAttributeNode(name) 30 31 def getNamedItemNS(self, ns, localName): 32 return self.node.getAttributeNodeNS(ns, localName) 33 34 def setNamedItem(self, node): 35 self.node.setAttributeNode(node.name, node) 36 37 def setNamedItemNS(self, node): 38 self.node.setAttributeNodeNS(node.namespaceURI, node.localName, node) 39 40 def __getitem__(self, name): 41 return self.getNamedItem(name) 42 43 def __setitem__(self, name, node): 44 if name == node.nodeName: 45 self.setNamedItem(node) 46 else: 47 raise KeyError, name 48 49 def __delitem__(self, name): 50 # NOTE: To be implemented. 51 pass 52 53 def values(self): 54 attributes = [] 55 _attribute = self.node.as_native_node().properties 56 while _attribute is not None: 57 attributes.append(Node(_attribute, ownerElement=self.node)) 58 _attribute = _attribute.next 59 return attributes 60 61 def keys(self): 62 return [(attr.namespaceURI, attr.localName) for attr in self.values()] 63 64 def items(self): 65 return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] 66 67 def __repr__(self): 68 return str(self) 69 70 def __str__(self): 71 return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) 72 73 def _get_prefix_and_localName(name): 74 t = name.split(":") 75 if len(t) == 1: 76 return None, name 77 elif len(t) == 2: 78 return t 79 else: 80 # NOTE: Should raise an exception. 81 return None, None 82 83 class TemporaryNode(object): 84 def __init__(self, ns, name, nodeType): 85 self.ns = ns 86 self.name = name 87 self.nodeType = nodeType 88 self.prefix, self.localName = _get_prefix_and_localName(self.name) 89 90 class Node(object): 91 92 _nodeTypes = { 93 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 94 "comment" : xml.dom.Node.COMMENT_NODE, 95 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 96 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 97 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 98 "element" : xml.dom.Node.ELEMENT_NODE, 99 "entity" : xml.dom.Node.ENTITY_NODE, 100 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 101 "notation" : xml.dom.Node.NOTATION_NODE, 102 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 103 "text" : xml.dom.Node.TEXT_NODE 104 } 105 106 def __init__(self, node, ownerElement=None, doctype=None): 107 self._node = node 108 self.ownerElement = ownerElement 109 self.doctype = doctype 110 111 def as_native_node(self): 112 return self._node 113 114 def _ownerDocument(self): 115 return Node(self._node.doc) 116 117 def _nodeType(self): 118 return self._nodeTypes[self._node.type] 119 120 def _childNodes(self): 121 122 # NOTE: Consider a generator instead. 123 124 child_nodes = [] 125 _node = self._node.children 126 while _node is not None: 127 child_nodes.append(Node(_node)) 128 _node = _node.next 129 return child_nodes 130 131 def _attributes(self): 132 return NamedNodeMap(self) 133 134 def _getNs(self): 135 136 "Internal namespace information retrieval." 137 138 try: 139 return self._node.ns() 140 except libxml2.treeError: 141 return None 142 143 def _namespaceURI(self): 144 ns = self._getNs() 145 if ns is not None: 146 return to_unicode(ns.content) 147 else: 148 return None 149 150 def _nodeValue(self): 151 return to_unicode(self._node.content) 152 153 def _prefix(self): 154 ns = self._getNs() 155 if ns is not None: 156 return to_unicode(ns.name) 157 else: 158 return None 159 160 def _nodeName(self): 161 prefix = self._prefix() 162 if prefix is not None: 163 return prefix + ":" + self._localName() 164 else: 165 return self._localName() 166 167 def _tagName(self): 168 if self._node.type == "element": 169 return self._nodeName() 170 else: 171 return None 172 173 def _localName(self): 174 return to_unicode(self._node.name) 175 176 def _parentNode(self): 177 if self.nodeType == xml.dom.Node.DOCUMENT_NODE: 178 return None 179 else: 180 return Node(self._node.parent) 181 182 def _nextSibling(self): 183 if self._node.next is not None: 184 return Node(self._node.next) 185 else: 186 return None 187 188 def hasAttributeNS(self, ns, localName): 189 return self.getAttributeNS(ns, localName) is not None 190 191 def hasAttribute(self, name): 192 return self.getAttribute(name) is not None 193 194 def getAttributeNS(self, ns, localName): 195 return to_unicode(self._node.nsProp(localName, ns)) 196 197 def getAttribute(self, name): 198 return to_unicode(self._node.prop(name)) 199 200 def getAttributeNodeNS(self, ns, localName): 201 return self.attributes[(ns, localName)] 202 203 def getAttributeNode(self, localName): 204 # NOTE: Needs verifying. 205 return self.attributes[(None, localName)] 206 207 def setAttributeNS(self, ns, name, value): 208 prefix, localName = _get_prefix_and_localName(name) 209 if prefix is not None: 210 self._node.setNsProp(self._node.newNs(ns, prefix), localName, value) 211 elif ns == self._node.ns().content: 212 self._node.setNsProp(self._node.ns().content, localName, value) 213 else: 214 # NOTE: Needs verifying: what should happen to the namespace? 215 self._node.setNsProp(None, localName, value) 216 217 def setAttribute(self, name, value): 218 self._node.setProp(name, value) 219 220 def setAttributeNodeNS(self, ns, name, node): 221 # NOTE: Not actually putting the node on the element. 222 self.setAttributeNS(ns, name, node.nodeValue) 223 224 def setAttributeNode(self, name, node): 225 # NOTE: Not actually putting the node on the element. 226 self.setAttribute(name, node.nodeValue) 227 228 def createElementNS(self, ns, name): 229 prefix, localName = _get_prefix_and_localName(name) 230 _node = libxml2.newNode(localName) 231 _ns = _node.newNs(ns, prefix) 232 _node.setNs(_ns) 233 return Node(_node) 234 235 def createElement(self, name): 236 _node = libxml2.newNode(localName) 237 return Node(_node) 238 239 def createAttributeNS(self, ns, name): 240 prefix, localName = _get_prefix_and_localName(name) 241 return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) 242 243 def createAttribute(self, name): 244 return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE) 245 246 def createTextNode(self, value): 247 return Node(libxml2.newText(value)) 248 249 def _add_node(self, tmp): 250 if tmp.nodeType == xml.dom.Node.ATTRIBUTE_NODE: 251 if tmp.ns is not None: 252 _child = self._node.newNsProp(None, tmp.localName, None) 253 _ns = _child.newNs(tmp.ns, tmp.prefix) 254 _child.setNs(_ns) 255 else: 256 _child = self._node.newProp(None, tmp.name, None) 257 else: 258 _child = None 259 260 return _child 261 262 def importNode(self, node, deep): 263 264 if node.nodeType == xml.dom.Node.ELEMENT_NODE: 265 imported_element = self.ownerDocument.createElementNS(node.namespaceURI, node.tagName) 266 for value in node.attributes.values(): 267 imported_element.setAttributeNS(value.namespaceURI, value.nodeName, value.nodeValue) 268 269 if deep: 270 for child in node.childNodes: 271 imported_child = self.importNode(child, deep) 272 if imported_child: 273 imported_element.appendChild(imported_child) 274 275 return imported_element 276 277 elif node.nodeType == xml.dom.Node.TEXT_NODE: 278 return self.ownerDocument.createTextNode(node.nodeValue) 279 280 elif node.nodeType == xml.dom.Node.ATTRIBUTE_NODE: 281 return self.ownerDocument.createAttributeNS(node.namespaceURI, node.name) 282 283 raise ValueError, node.nodeType 284 285 def insertBefore(self, tmp, oldNode): 286 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 287 _child = tmp._node 288 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 289 _child = tmp._node 290 else: 291 _child = self._add_node(tmp) 292 _child.unlinkNode() 293 return Node(oldNode._node.addPrevSibling(_child)) 294 295 def replaceChild(self, tmp, oldNode): 296 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 297 _child = tmp._node 298 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 299 _child = tmp._node 300 else: 301 _child = self._add_node(tmp) 302 _child.unlinkNode() 303 return Node(oldNode._node.replaceNode(_child)) 304 305 def appendChild(self, tmp): 306 if tmp.nodeType == xml.dom.Node.TEXT_NODE: 307 _child = self._node.addChild(tmp._node) 308 elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE: 309 _child = self._node.addChild(tmp._node) 310 else: 311 _child = self._add_node(tmp) 312 return Node(_child) 313 314 def removeChild(self, tmp): 315 tmp._node.unlinkNode() 316 317 #doctype defined in __init__ 318 #ownerElement defined in __init__ 319 ownerDocument = property(_ownerDocument) 320 childNodes = property(_childNodes) 321 value = data = nodeValue = property(_nodeValue) 322 name = nodeName = property(_nodeName) 323 tagName = property(_tagName) 324 namespaceURI = property(_namespaceURI) 325 prefix = property(_prefix) 326 localName = property(_localName) 327 parentNode = property(_parentNode) 328 nodeType = property(_nodeType) 329 attributes = property(_attributes) 330 nextSibling = property(_nextSibling) 331 332 def isSameNode(self, other): 333 return self._node.nodePath() == other._node.nodePath() 334 335 def __eq__(self, other): 336 return self._node.nodePath() == other._node.nodePath() 337 338 # Utility functions. 339 340 def createDocumentType(localName, publicId, systemId): 341 return None 342 343 def createDocument(namespaceURI, localName, doctype): 344 # NOTE: Fixed to use version 1.0 only. 345 d = Node(libxml2.newDoc("1.0"), doctype=doctype) 346 if localName is not None: 347 root = d.createElementNS(namespaceURI, localName) 348 d.appendChild(root) 349 return d 350 351 def parse(stream_or_string): 352 if hasattr(stream_or_string, "read"): 353 stream = stream_or_string 354 else: 355 stream = open(stream_or_string) 356 return parseString(stream.read()) 357 358 def parseString(s): 359 return Node(libxml2.parseDoc(s)) 360 361 def parseURI(uri): 362 return Node(libxml2.parseURI(uri)) 363 364 def toString(node): 365 return node.as_native_node().serialize() 366 367 def toStream(node, stream=None): 368 stream = stream or sys.stdout 369 stream.write(toString(node)) 370 371 # vim: tabstop=4 expandtab shiftwidth=4