1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2mod node methods and properties. 5 6 Copyright (C) 2003, 2004, 2005, 2006, 2007 Paul Boddie <paul@boddie.org.uk> 7 8 This library is free software; you can redistribute it and/or 9 modify it under the terms of the GNU Lesser General Public 10 License as published by the Free Software Foundation; either 11 version 2.1 of the License, or (at your option) any later version. 12 13 This library is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 Lesser General Public License for more details. 17 18 You should have received a copy of the GNU Lesser General Public 19 License along with this library; if not, write to the Free Software 20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 21 """ 22 23 import xml.dom 24 25 # Try the conventional import first. 26 27 try: 28 import libxml2mod 29 except ImportError: 30 from libxmlmods import libxml2mod 31 32 # NOTE: libxml2 seems to use UTF-8 throughout. 33 34 def from_unicode(s): 35 if isinstance(s, unicode): 36 return s.encode("utf-8") 37 else: 38 # The string might contain non-ASCII characters, thus upsetting libxml2 39 # as it encounters a non-UTF-8 string. 40 try: 41 unicode(s) 42 except UnicodeError: 43 raise TypeError, "Please use Unicode for non-ASCII data." 44 return s 45 46 def to_unicode(s): 47 if isinstance(s, str): 48 return unicode(s, encoding="utf-8") 49 else: 50 return s 51 52 def get_ns(ns): 53 out_ns = to_unicode(libxml2mod.xmlNodeGetContent(ns)) 54 # Detect "" and produce None as the empty namespace. 55 if out_ns: 56 return out_ns 57 else: 58 return None 59 60 def _get_prefix_and_localName(name): 61 t = name.split(":") 62 if len(t) == 1: 63 return None, name 64 elif len(t) == 2: 65 return t 66 else: 67 # NOTE: Should raise an exception. 68 return None, None 69 70 def _find_namespace_for_prefix(node, prefix): 71 72 "Find the namespace definition node in the given 'node' for 'prefix'." 73 74 current = libxml2mod.xmlNodeGetNsDefs(node) 75 while current is not None: 76 if libxml2mod.name(current) == prefix: 77 return current 78 current = libxml2mod.next(current) 79 return None 80 81 def _find_namespace(node, ns, prefix): 82 83 """ 84 Find the namespace definition node in the given 'node' for the given 'ns' 85 and 'prefix'. 86 """ 87 88 new_ns = None 89 current = libxml2mod.xmlNodeGetNsDefs(node) 90 while current is not None: 91 if _check_namespace(current, ns, prefix): 92 new_ns = current 93 break 94 current = libxml2mod.next(current) 95 if new_ns is None: 96 node_ns = libxml2mod.xmlNodeGetNs(node) 97 if node_ns is not None and _check_namespace(node_ns, ns, prefix): 98 new_ns = node_ns 99 return new_ns 100 101 def _check_namespace(current, ns, prefix): 102 103 "Check the 'current' namespace definition node against 'ns' and 'prefix'." 104 105 current_ns = get_ns(current) 106 current_prefix = libxml2mod.name(current) 107 if ns == current_ns and (prefix is None or prefix == current_prefix): 108 return 1 109 else: 110 return 0 111 112 def _make_namespace(node, ns, prefix, set_default=0): 113 114 """ 115 Make a new namespace definition node within the given 'node' for 'ns', 116 'prefix', setting the default namespace on 'node' when 'prefix' is None and 117 'set_default' is set to a true value (unlike the default value for that 118 parameter). 119 """ 120 121 if prefix is not None or set_default: 122 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 123 else: 124 new_ns = None 125 return new_ns 126 127 def _get_invented_prefix(node, ns): 128 current = libxml2mod.xmlNodeGetNsDefs(node) 129 prefixes = [] 130 while current is not None: 131 current_prefix = libxml2mod.name(current) 132 prefixes.append(current_prefix) 133 current = libxml2mod.next(current) 134 i = 0 135 while 1: 136 prefix = "NS%d" % i 137 if prefix not in prefixes: 138 return prefix 139 i += 1 140 141 _nodeTypes = { 142 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 143 "cdata" : xml.dom.Node.CDATA_SECTION_NODE, 144 "comment" : xml.dom.Node.COMMENT_NODE, 145 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 146 "document_html" : xml.dom.Node.DOCUMENT_NODE, 147 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 148 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 149 "element" : xml.dom.Node.ELEMENT_NODE, 150 "entity" : xml.dom.Node.ENTITY_NODE, 151 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 152 "notation" : xml.dom.Node.NOTATION_NODE, 153 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 154 "text" : xml.dom.Node.TEXT_NODE 155 } 156 157 _reverseNodeTypes = {} 158 for label, value in _nodeTypes.items(): 159 _reverseNodeTypes[value] = label 160 161 def Node_equals(node, other): 162 return libxml2mod.xmlXPathCmpNodes(node, other) == 0 163 164 def Node_ownerDocument(node): 165 return libxml2mod.doc(node) 166 167 def Node_nodeType(node): 168 return _nodeTypes[libxml2mod.type(node)] 169 170 def Node_childNodes(node): 171 172 # NOTE: Consider a generator instead. 173 174 child_nodes = [] 175 node = libxml2mod.children(node) 176 while node is not None: 177 # Remove doctypes. 178 if Node_nodeType(node) != xml.dom.Node.DOCUMENT_TYPE_NODE: 179 child_nodes.append(node) 180 node = libxml2mod.next(node) 181 return child_nodes 182 183 def Node_attributes(node): 184 attributes = {} 185 186 # Include normal attributes. 187 188 current = libxml2mod.properties(node) 189 while current is not None: 190 ns = libxml2mod.xmlNodeGetNs(current) 191 if ns is not None: 192 attributes[(get_ns(ns), libxml2mod.name(current))] = current 193 else: 194 attributes[(None, libxml2mod.name(current))] = current 195 current = libxml2mod.next(current) 196 197 # Include xmlns attributes. 198 199 #current = libxml2mod.xmlNodeGetNsDefs(node) 200 #while current is not None: 201 # ns = get_ns(current) 202 # prefix = libxml2mod.name(current) 203 # attributes[(xml.dom.XMLNS_NAMESPACE, "xmlns:" + prefix)] = ns # NOTE: Need a real node here. 204 # current = libxml2mod.next(current) 205 206 return attributes 207 208 def Node_namespaceURI(node): 209 ns = libxml2mod.xmlNodeGetNs(node) 210 if ns is not None: 211 return get_ns(ns) 212 else: 213 return None 214 215 def Node_nodeValue(node): 216 return to_unicode(libxml2mod.xmlNodeGetContent(node)) 217 218 # NOTE: This is not properly exposed in the libxml2macro interface as the 219 # NOTE: writable form of nodeValue. 220 221 def Node_setNodeValue(node, value): 222 # NOTE: Cannot set attribute node values. 223 libxml2mod.xmlNodeSetContent(node, from_unicode(value)) 224 225 # NOTE: Verify this. The data attribute should only really exist for text, 226 # NOTE: character data, processing instructions and comments. 227 228 Node_data = Node_nodeValue 229 230 Node_textContent = Node_nodeValue 231 232 def Node_prefix(node): 233 ns = libxml2mod.xmlNodeGetNs(node) 234 if ns is not None: 235 return to_unicode(libxml2mod.name(ns)) 236 else: 237 return None 238 239 def Node_nodeName(node): 240 prefix = Node_prefix(node) 241 if prefix is not None: 242 return prefix + ":" + Node_localName(node) 243 else: 244 return Node_localName(node) 245 246 def Node_tagName(node): 247 if libxml2mod.type(node) == "element": 248 return Node_nodeName(node) 249 else: 250 return None 251 252 def Node_localName(node): 253 return to_unicode(libxml2mod.name(node)) 254 255 def Node_parentNode(node): 256 if libxml2mod.type(node) == "document_xml": 257 return None 258 else: 259 return libxml2mod.parent(node) 260 261 def Node_previousSibling(node): 262 if libxml2mod.prev(node) is not None: 263 return libxml2mod.prev(node) 264 else: 265 return None 266 267 def Node_nextSibling(node): 268 if libxml2mod.next(node) is not None: 269 return libxml2mod.next(node) 270 else: 271 return None 272 273 def Node_doctype(node): 274 return libxml2mod.xmlGetIntSubset(node) 275 276 def Node_hasAttributeNS(node, ns, localName): 277 return Node_getAttributeNS(node, ns, localName) is not None or \ 278 _find_namespace(node, ns, localName) is not None 279 280 def Node_hasAttribute(node, name): 281 return Node_getAttribute(node, name) is not None 282 283 def Node_getAttributeNS(node, ns, localName): 284 if ns == xml.dom.XMLNS_NAMESPACE: 285 ns_def = _find_namespace_for_prefix(node, localName) 286 if ns_def is not None: 287 return get_ns(ns_def) 288 else: 289 return None 290 else: 291 return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) 292 293 def Node_getAttribute(node, name): 294 return to_unicode(libxml2mod.xmlGetProp(node, name)) 295 296 def Node_getAttributeNodeNS(node, ns, localName): 297 # NOTE: Needs verifying. 298 return Node_attributes(node)[(ns, localName)] 299 300 def Node_getAttributeNode(node, name): 301 # NOTE: Needs verifying. 302 return Node_attributes(node)[(None, name)] 303 304 def Node_setAttributeNS(node, ns, name, value): 305 ns, name, value = map(from_unicode, [ns, name, value]) 306 prefix, localName = _get_prefix_and_localName(name) 307 308 # Detect setting of xmlns:localName=value, looking for cases where 309 # x:attr=value have caused the definition of xmlns:x=y (as a declaration 310 # with prefix=x, ns=y). 311 if prefix == "xmlns" and ns == xml.dom.XMLNS_NAMESPACE: 312 if _find_namespace(node, value, localName): 313 return 314 new_ns = _make_namespace(node, value, localName, set_default=0) 315 # For non-xmlns attributes, we find or make a namespace declaration and then 316 # set an attribute. 317 elif ns is not None: 318 # Look for a suitable namespace. 319 new_ns = _find_namespace(node, ns, prefix) 320 # Create a declaration if no suitable one was found. 321 if new_ns is None: 322 # Invent a prefix for unprefixed attributes with namespaces. 323 if prefix is None: 324 prefix = _get_invented_prefix(node, ns) 325 new_ns = _make_namespace(node, ns, prefix, set_default=0) 326 libxml2mod.xmlSetNsProp(node, new_ns, localName, value) 327 else: 328 # NOTE: Needs verifying: what should happen to the namespace? 329 # NOTE: This also catches the case where None is the element's 330 # NOTE: namespace and is also used for the attribute. 331 libxml2mod.xmlSetNsProp(node, None, localName, value) 332 333 def Node_setAttribute(node, name, value): 334 name, value = map(from_unicode, [name, value]) 335 336 libxml2mod.xmlSetProp(node, name, value) 337 338 def Node_setAttributeNodeNS(node, attr): 339 # NOTE: Not actually putting the node on the element. 340 Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 341 342 def Node_setAttributeNode(node, attr): 343 # NOTE: Not actually putting the node on the element. 344 Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) 345 346 def Node_removeAttributeNS(node, ns, localName): 347 attr = Node_getAttributeNodeNS(node, ns, localName) 348 libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) 349 350 def Node_removeAttribute(node, name): 351 name = from_unicode(name) 352 libxml2mod.xmlUnsetProp(node, name) 353 354 def Node_createElementNS(node, ns, name): 355 ns, name = map(from_unicode, [ns, name]) 356 357 prefix, localName = _get_prefix_and_localName(name) 358 new_node = libxml2mod.xmlNewNode(localName) 359 360 # If the namespace is not empty, set the declaration. 361 if ns is not None: 362 new_ns = _find_namespace(new_node, ns, prefix) 363 if new_ns is None: 364 new_ns = _make_namespace(new_node, ns, prefix, set_default=1) 365 libxml2mod.xmlSetNs(new_node, new_ns) 366 # If the namespace is empty, set a "null" declaration. 367 elif prefix is not None: 368 new_ns = _find_namespace(new_node, "", prefix) 369 if new_ns is None: 370 new_ns = _make_namespace(new_node, "", prefix) 371 libxml2mod.xmlSetNs(new_node, new_ns) 372 else: 373 libxml2mod.xmlSetNs(new_node, None) 374 Node_setAttribute(new_node, "xmlns", "") 375 return new_node 376 377 def Node_createElement(node, name): 378 name = from_unicode(name) 379 380 new_node = libxml2mod.xmlNewNode(name) 381 return new_node 382 383 def Node_createAttributeNS(node, ns, name): 384 ns, name = map(from_unicode, [ns, name]) 385 386 prefix, localName = _get_prefix_and_localName(name) 387 # NOTE: Does it make sense to set the namespace if it is empty? 388 if ns is not None: 389 new_ns = _find_namespace(node, ns, prefix) 390 if new_ns is None: 391 new_ns = _make_namespace(node, ns, prefix, set_default=0) 392 else: 393 new_ns = None 394 new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) 395 return new_node 396 397 def Node_createAttribute(node, name): 398 name = from_unicode(name) 399 400 # NOTE: xmlNewProp does not seem to work. 401 return Node_createAttributeNS(node, None, name) 402 403 def Node_createTextNode(node, value): 404 value = from_unicode(value) 405 406 return libxml2mod.xmlNewText(value) 407 408 def Node_createComment(node, value): 409 value = from_unicode(value) 410 411 return libxml2mod.xmlNewComment(value) 412 413 def Node_createCDATASection(node, value): 414 value = from_unicode(value) 415 416 return libxml2mod.xmlNewCDataBlock(Node_ownerDocument(node), value, len(value)) 417 418 def Node_insertBefore(node, tmp, oldNode): 419 return libxml2mod.xmlAddPrevSibling(oldNode, tmp) 420 421 def Node_replaceChild(node, tmp, oldNode): 422 return libxml2mod.xmlReplaceNode(oldNode, tmp) 423 424 def Node_appendChild(node, tmp): 425 return libxml2mod.xmlAddChild(node, tmp) 426 427 def Node_removeChild(node, child): 428 libxml2mod.xmlUnlinkNode(child) 429 430 def Node_importNode(node, other, deep): 431 if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: 432 imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) 433 for attr in Node_attributes(other).values(): 434 Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 435 436 if deep: 437 for child in Node_childNodes(other): 438 imported_child = Node_importNode(node, child, deep) 439 if imported_child: 440 Node_appendChild(imported_element, imported_child) 441 442 return imported_element 443 444 elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: 445 return Node_createTextNode(node, Node_nodeValue(other)) 446 447 elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: 448 return Node_createComment(node, Node_data(other)) 449 450 elif Node_nodeType(other) == xml.dom.Node.CDATA_SECTION_NODE: 451 return Node_createCDATASection(node, Node_data(other)) 452 453 raise xml.dom.DOMException(xml.dom.NOT_SUPPORTED_ERR, "Node type '%s' (%d) not supported." % (other, Node_nodeType(other))) 454 455 def Node_importNode_DOM(node, other, deep): 456 if other.nodeType == xml.dom.Node.ELEMENT_NODE: 457 imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) 458 for attr in other.attributes.values(): 459 Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) 460 461 if deep: 462 for child in other.childNodes: 463 imported_child = Node_importNode_DOM(node, child, deep) 464 if imported_child: 465 Node_appendChild(imported_element, imported_child) 466 467 return imported_element 468 469 elif other.nodeType == xml.dom.Node.TEXT_NODE: 470 return Node_createTextNode(node, other.nodeValue) 471 472 elif other.nodeType == xml.dom.Node.COMMENT_NODE: 473 return Node_createComment(node, other.data) 474 475 elif other.nodeType == xml.dom.Node.CDATA_SECTION_NODE: 476 return Node_createCDATASection(node, other.data) 477 478 raise xml.dom.DOMException(xml.dom.NOT_SUPPORTED_ERR, "Node type '%s' (%d) not supported." % (_reverseNodeTypes[other.nodeType], other.nodeType)) 479 480 def Node_xpath(node, expr, variables=None, namespaces=None): 481 expr = from_unicode(expr) 482 483 context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node) or node) 484 libxml2mod.xmlXPathSetContextNode(context, node) 485 # NOTE: Discover namespaces from the node. 486 # NOTE: Work out how to specify paths without having to use prefixes on 487 # NOTE: names all the time. 488 for prefix, ns in (namespaces or {}).items(): 489 libxml2mod.xmlXPathRegisterNs(context, prefix, ns) 490 # NOTE: No such functions are exposed in current versions of libxml2. 491 #for (prefix, ns), value in (variables or {}).items(): 492 # value = from_unicode(value) 493 # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) 494 result = libxml2mod.xmlXPathEval(expr, context) 495 libxml2mod.xmlXPathFreeContext(context) 496 return result 497 498 # Exceptions. 499 500 class LSException(Exception): 501 502 "DOM Level 3 Load/Save exception." 503 504 PARSE_ERR = 81 505 SERIALIZE_ERR = 82 506 507 # Utility functions. 508 509 def createDocument(namespaceURI, localName, doctype): 510 # NOTE: Fixed to use version 1.0 only. 511 d = libxml2mod.xmlNewDoc("1.0") 512 if localName is not None: 513 # NOTE: Verify that this is always what should occur. 514 root = Node_createElementNS(d, namespaceURI, localName) 515 Node_appendChild(d, root) 516 if doctype is not None: 517 libxml2mod.xmlCreateIntSubset(d, doctype.localName, doctype.publicId, doctype.systemId) 518 return d 519 520 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0): 521 if hasattr(stream_or_string, "read"): 522 stream = stream_or_string 523 return parseString(stream.read(), html=html, htmlencoding=htmlencoding, unfinished=unfinished) 524 else: 525 return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, unfinished=unfinished) 526 527 def parseFile(s, html=0, htmlencoding=None, unfinished=0): 528 # NOTE: Switching off validation and remote DTD resolution. 529 if not html: 530 context = libxml2mod.xmlCreateFileParserCtxt(s) 531 Parser_configure(context) 532 Parser_parse(context) 533 doc = Parser_document(context) 534 if unfinished or Parser_well_formed(context): 535 return doc 536 else: 537 raise LSException(LSException.PARSE_ERR) 538 else: 539 return libxml2mod.htmlReadFile(s, htmlencoding, HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 540 541 def parseString(s, html=0, htmlencoding=None, unfinished=0): 542 # NOTE: Switching off validation and remote DTD resolution. 543 if not html: 544 context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) 545 Parser_configure(context) 546 Parser_parse(context) 547 doc = Parser_document(context) 548 if unfinished or Parser_well_formed(context): 549 return doc 550 else: 551 raise LSException(LSException.PARSE_ERR) 552 else: 553 # NOTE: URL given as None. 554 html_url = None 555 return libxml2mod.htmlReadMemory(s, len(s), html_url, htmlencoding, 556 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 557 558 def parseURI(uri, html=0, htmlencoding=None, unfinished=0): 559 # NOTE: Switching off validation and remote DTD resolution. 560 if not html: 561 context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) 562 Parser_configure(context) 563 Parser_parse(context) 564 doc = Parser_document(context) 565 if unfinished or Parser_well_formed(context): 566 return doc 567 else: 568 raise LSException(LSException.PARSE_ERR) 569 else: 570 raise NotImplementedError, "parseURI does not yet support HTML" 571 572 def toString(node, encoding=None, prettyprint=0): 573 return libxml2mod.serializeNode(node, encoding, prettyprint) 574 575 def toStream(node, stream, encoding=None, prettyprint=0): 576 stream.write(toString(node, encoding, prettyprint)) 577 578 def toFile(node, f, encoding=None, prettyprint=0): 579 libxml2mod.saveNodeTo(node, f, encoding, prettyprint) 580 581 # libxml2mod constants and helper functions. 582 583 HTML_PARSE_NOERROR = 32 584 HTML_PARSE_NOWARNING = 64 585 HTML_PARSE_NONET = 2048 586 XML_PARSE_NOERROR = 32 587 XML_PARSE_NOWARNING = 64 588 XML_PARSE_NONET = 2048 589 590 def Parser_push(): 591 return libxml2mod.xmlCreatePushParser(None, "", 0, None) 592 593 def Parser_configure(context): 594 libxml2mod.xmlParserSetPedantic(context, 0) 595 libxml2mod.xmlParserSetValidate(context, 0) 596 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 597 598 def Parser_feed(context, s): 599 libxml2mod.xmlParseChunk(context, s, len(s), 1) 600 601 def Parser_well_formed(context): 602 return libxml2mod.xmlParserGetWellFormed(context) 603 604 def Parser_document(context): 605 return libxml2mod.xmlParserGetDoc(context) 606 607 def Parser_parse(context): 608 libxml2mod.xmlParseDocument(context) 609 610 # vim: tabstop=4 expandtab shiftwidth=4