1 #!/usr/bin/env python 2 3 """ 4 DOM wrapper around libxml2, specifically the libxml2mod Python extension module. 5 """ 6 7 __version__ = "0.2.1" 8 9 import libxml2 10 from libxml2dom.macrolib import * 11 from libxml2dom.macrolib import \ 12 createDocument as Node_createDocument, \ 13 parseString as Node_parseString, parseURI as Node_parseURI, \ 14 parseFile as Node_parseFile, \ 15 toString as Node_toString, toStream as Node_toStream, \ 16 toFile as Node_toFile 17 import weakref 18 19 # Attribute and node list wrappers. 20 21 class NamedNodeMap(object): 22 23 """ 24 A wrapper around Node objects providing DOM and dictionary convenience 25 methods. 26 """ 27 28 def __init__(self, node): 29 self.node = node 30 31 def getNamedItem(self, name): 32 return self.node.getAttributeNode(name) 33 34 def getNamedItemNS(self, ns, localName): 35 return self.node.getAttributeNodeNS(ns, localName) 36 37 def setNamedItem(self, node): 38 try: 39 old = self.getNamedItem(node.nodeName) 40 except KeyError: 41 old = None 42 self.node.setAttributeNode(node) 43 return old 44 45 def setNamedItemNS(self, node): 46 try: 47 old = self.getNamedItemNS(node.namespaceURI, node.localName) 48 except KeyError: 49 old = None 50 self.node.setAttributeNodeNS(node) 51 return old 52 53 def removeNamedItem(self, name): 54 try: 55 old = self.getNamedItem(name) 56 except KeyError: 57 old = None 58 self.node.removeAttribute(name) 59 return old 60 61 def removeNamedItemNS(self, ns, localName): 62 try: 63 old = self.getNamedItemNS(ns, localName) 64 except KeyError: 65 old = None 66 self.node.removeAttributeNS(ns, localName) 67 return old 68 69 # Dictionary emulation methods. 70 71 def __getitem__(self, name): 72 return self.getNamedItem(name) 73 74 def __setitem__(self, name, node): 75 if name == node.nodeName: 76 self.setNamedItem(node) 77 else: 78 raise KeyError, name 79 80 def __delitem__(self, name): 81 # NOTE: To be implemented. 82 pass 83 84 def values(self): 85 return [Attribute(_node, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()] 86 87 def keys(self): 88 return [(attr.namespaceURI, attr.localName) for attr in self.values()] 89 90 def items(self): 91 return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] 92 93 def __repr__(self): 94 return str(self) 95 96 def __str__(self): 97 return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) 98 99 class NodeList(list): 100 101 "A wrapper around node lists." 102 103 def item(self, index): 104 return self[index] 105 106 def length(self): 107 return len(self) 108 109 # Node classes. 110 111 class Node(object): 112 113 """ 114 A DOM-style wrapper around libxml2mod objects. 115 """ 116 117 ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE 118 COMMENT_NODE = xml.dom.Node.COMMENT_NODE 119 DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE 120 DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE 121 ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE 122 ENTITY_NODE = xml.dom.Node.ENTITY_NODE 123 ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE 124 NOTATION_NODE = xml.dom.Node.NOTATION_NODE 125 PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE 126 TEXT_NODE = xml.dom.Node.TEXT_NODE 127 128 def __init__(self, node, ownerDocument=None): 129 self._node = node 130 self.ownerDocument = ownerDocument 131 132 def as_native_node(self): 133 return self._node 134 135 def _nodeType(self): 136 return Node_nodeType(self._node) 137 138 def _childNodes(self): 139 140 # NOTE: Consider a generator instead. 141 142 return NodeList([Node(_node, self.ownerDocument) for _node in Node_childNodes(self._node)]) 143 144 def _attributes(self): 145 return NamedNodeMap(self) 146 147 def _namespaceURI(self): 148 return Node_namespaceURI(self._node) 149 150 def _nodeValue(self): 151 return Node_nodeValue(self._node) 152 153 def _setNodeValue(self, value): 154 Node_setNodeValue(self._node, value) 155 156 def _prefix(self): 157 return Node_prefix(self._node) 158 159 def _nodeName(self): 160 return Node_nodeName(self._node) 161 162 def _tagName(self): 163 return Node_tagName(self._node) 164 165 def _localName(self): 166 return Node_localName(self._node) 167 168 def _parentNode(self): 169 return get_node(Node_parentNode(self._node), self) 170 171 def _previousSibling(self): 172 return Node(Node_previousSibling(self._node), self.ownerDocument) 173 174 def _nextSibling(self): 175 return Node(Node_nextSibling(self._node), self.ownerDocument) 176 177 def hasAttributeNS(self, ns, localName): 178 return Node_hasAttributeNS(self._node, ns, localName) 179 180 def hasAttribute(self, name): 181 return Node_hasAttribute(self._node, name) 182 183 def getAttributeNS(self, ns, localName): 184 return Node_getAttributeNS(self._node, ns, localName) 185 186 def getAttribute(self, name): 187 return Node_getAttribute(self._node, name) 188 189 def getAttributeNodeNS(self, ns, localName): 190 return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.ownerDocument) 191 192 def getAttributeNode(self, localName): 193 return Attribute(Node_getAttributeNode(self._node, localName), self.ownerDocument) 194 195 def setAttributeNS(self, ns, name, value): 196 Node_setAttributeNS(self._node, ns, name, value) 197 198 def setAttribute(self, name, value): 199 Node_setAttribute(self._node, name, value) 200 201 def setAttributeNodeNS(self, node): 202 Node_setAttributeNodeNS(self._node, node._node) 203 204 def setAttributeNode(self, node): 205 Node_setAttributeNode(self._node, node._node) 206 207 def removeAttributeNS(self, ns, localName): 208 Node_removeAttributeNS(self._node, ns, localName) 209 210 def removeAttribute(self, name): 211 Node_removeAttribute(self._node, name) 212 213 def createElementNS(self, ns, name): 214 return Node(Node_createElementNS(self._node, ns, name), self.ownerDocument) 215 216 def createElement(self, name): 217 return Node(Node_createElement(self._node, name), self.ownerDocument) 218 219 def createAttributeNS(self, ns, name): 220 tmp = self.createElement("tmp") 221 return Attribute(Node_createAttributeNS(tmp._node, ns, name)) 222 223 def createAttribute(self, name): 224 tmp = self.createElement("tmp") 225 return Attribute(Node_createAttribute(tmp._node, name)) 226 227 def createTextNode(self, value): 228 return Node(Node_createTextNode(self._node, value), self.ownerDocument) 229 230 def createComment(self, value): 231 return Node(Node_createComment(self._node, value), self.ownerDocument) 232 233 def importNode(self, node, deep): 234 if hasattr(node, "as_native_node"): 235 return Node(Node_importNode(self._node, node.as_native_node(), deep), self.ownerDocument) 236 else: 237 return Node(Node_importNode_DOM(self._node, node, deep), self.ownerDocument) 238 239 def insertBefore(self, tmp, oldNode): 240 if hasattr(tmp, "as_native_node"): 241 return Node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self.ownerDocument) 242 else: 243 return Node(Node_insertBefore(self._node, tmp, oldNode.as_native_node()), self.ownerDocument) 244 245 def replaceChild(self, tmp, oldNode): 246 if hasattr(tmp, "as_native_node"): 247 return Node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self.ownerDocument) 248 else: 249 return Node(Node_replaceChild(self._node, tmp, oldNode.as_native_node()), self.ownerDocument) 250 251 def appendChild(self, tmp): 252 if hasattr(tmp, "as_native_node"): 253 return Node(Node_appendChild(self._node, tmp.as_native_node()), self.ownerDocument) 254 else: 255 return Node(Node_appendChild(self._node, tmp), self.ownerDocument) 256 257 def removeChild(self, tmp): 258 if hasattr(tmp, "as_native_node"): 259 Node_removeChild(self._node, tmp.as_native_node()) 260 else: 261 Node_removeChild(self._node, tmp) 262 263 def getElementsByTagName(self, tagName): 264 return self.xpath("//" + tagName) 265 266 def getElementsByTagNameNS(self, namespaceURI, localName): 267 return self.xpath("//ns:" + localName, namespaces={"ns" : namespaceURI}) 268 269 # NOTE: normalize must be implemented specially for libxml2dom. 270 271 childNodes = property(_childNodes) 272 value = data = nodeValue = property(_nodeValue, _setNodeValue) 273 name = nodeName = property(_nodeName) 274 tagName = property(_tagName) 275 namespaceURI = property(_namespaceURI) 276 prefix = property(_prefix) 277 localName = property(_localName) 278 parentNode = property(_parentNode) 279 nodeType = property(_nodeType) 280 attributes = property(_attributes) 281 previousSibling = property(_previousSibling) 282 nextSibling = property(_nextSibling) 283 284 #def isSameNode(self, other): 285 # return self._node.nodePath() == other._node.nodePath() 286 287 #def __eq__(self, other): 288 # return self._node.nodePath() == other._node.nodePath() 289 290 # 4DOM extensions to the usual PyXML API. 291 # NOTE: To be finished. 292 293 def xpath(self, expr, variables=None, namespaces=None): 294 result = Node_xpath(self._node, expr, variables, namespaces) 295 if hasattr(result, "__len__"): 296 return NodeList([get_node(_node, self) for _node in result]) 297 else: 298 return result 299 300 # Convenience methods. 301 302 def toString(self, encoding=None): 303 return toString(self, encoding) 304 305 def toStream(self, stream, encoding=None): 306 toStream(self, stream, encoding) 307 308 def toFile(self, f, encoding=None): 309 toFile(self, f, encoding) 310 311 # Attribute nodes. 312 313 class Attribute(Node): 314 315 "A class providing attribute access." 316 317 def __init__(self, node, ownerDocument=None, ownerElement=None): 318 Node.__init__(self, node, ownerDocument) 319 self.ownerElement = ownerElement 320 321 def _parentNode(self): 322 return self.ownerElement 323 324 parentNode = property(_parentNode) 325 326 # Document housekeeping mechanisms. 327 328 class Document(Node): 329 330 "A class providing document-level housekeeping." 331 332 def __init__(self, node): 333 self._node = node 334 self.weakref_ownerDocument = weakref.ref(self) 335 336 def _ownerDocument(self): 337 return self.weakref_ownerDocument() 338 339 def __del__(self): 340 #print "Freeing document", self._node 341 libxml2mod.xmlFreeDoc(self._node) 342 343 ownerDocument = property(_ownerDocument) 344 345 # Factory functions. 346 347 def get_node(_node, context_node): 348 if Node_nodeType(_node) == context_node.DOCUMENT_NODE: 349 return context_node.ownerDocument 350 elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE: 351 return Attribute(_node, context_node.ownerDocument, context_node) 352 else: 353 return Node(_node, context_node.ownerDocument) 354 355 # Utility functions. 356 357 def createDocumentType(localName, publicId, systemId): 358 return None 359 360 def createDocument(namespaceURI, localName, doctype): 361 return Document(Node_createDocument(namespaceURI, localName, doctype)) 362 363 def parse(stream_or_string, html=0): 364 365 """ 366 Parse the given 'stream_or_string', where the supplied object can either be 367 a stream (such as a file or stream object), or a string (containing the text 368 of a document). If the optional 'html' parameter is set to a true value, the 369 content to be parsed will be treated as being HTML rather than XML. 370 371 A document object is returned by this function. 372 """ 373 374 if hasattr(stream_or_string, "read"): 375 stream = stream_or_string 376 return parseString(stream.read(), html) 377 else: 378 return parseFile(stream_or_string, html) 379 380 def parseFile(filename, html=0): 381 382 """ 383 Parse the file having the given 'filename'. If the optional 'html' parameter 384 is set to a true value, the content to be parsed will be treated as being 385 HTML rather than XML. 386 387 A document object is returned by this function. 388 """ 389 390 return Document(Node_parseFile(filename, html)) 391 392 def parseString(s, html=0): 393 394 """ 395 Parse the content of the given string 's'. If the optional 'html' parameter 396 is set to a true value, the content to be parsed will be treated as being 397 HTML rather than XML. 398 399 A document object is returned by this function. 400 """ 401 402 return Document(Node_parseString(s, html)) 403 404 def parseURI(uri, html=0): 405 406 """ 407 Parse the content found at the given 'uri'. If the optional 'html' parameter 408 is set to a true value, the content to be parsed will be treated as being 409 HTML rather than XML. 410 411 A document object is returned by this function. 412 """ 413 414 return Document(Node_parseURI(uri, html)) 415 416 def toString(node, encoding=None): 417 418 """ 419 Return a string containing the serialised form of the given 'node' and its 420 children. The optional 'encoding' can be used to override the default 421 character encoding used in the serialisation. 422 """ 423 424 return Node_toString(node.as_native_node(), encoding) 425 426 def toStream(node, stream, encoding=None): 427 428 """ 429 Write the serialised form of the given 'node' and its children to the given 430 'stream'. The optional 'encoding' can be used to override the default 431 character encoding used in the serialisation. 432 """ 433 434 Node_toStream(node.as_native_node(), stream, encoding) 435 436 def toFile(node, filename, encoding=None): 437 438 """ 439 Write the serialised form of the given 'node' and its children to a file 440 having the given 'filename'. The optional 'encoding' can be used to override 441 the default character encoding used in the serialisation. 442 """ 443 444 Node_toFile(node.as_native_node(), filename, encoding) 445 446 def adoptNodes(nodes): 447 448 """ 449 A special utility method which adopts the given low-level 'nodes' and which 450 returns a list of high-level equivalents. This is currently experimental and 451 should not be casually used. 452 """ 453 454 if len(nodes) == 0: 455 return [] 456 doc = Document(libxml2mod.doc(nodes[0])) 457 results = [] 458 for node in nodes: 459 results.append(Node(node, doc)) 460 return results 461 462 # vim: tabstop=4 expandtab shiftwidth=4