1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2mod node methods and properties. 5 6 Copyright (C) 2003, 2004, 2005, 2006, 2007 Paul Boddie <paul@boddie.org.uk> 7 8 This library is free software; you can redistribute it and/or 9 modify it under the terms of the GNU Lesser General Public 10 License as published by the Free Software Foundation; either 11 version 2.1 of the License, or (at your option) any later version. 12 13 This library is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 Lesser General Public License for more details. 17 18 You should have received a copy of the GNU Lesser General Public 19 License along with this library; if not, write to the Free Software 20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 21 """ 22 23 import xml.dom 24 25 # Try the conventional import first. 26 27 try: 28 import libxml2mod 29 except ImportError: 30 from libxmlmods import libxml2mod 31 32 # NOTE: libxml2 seems to use UTF-8 throughout. 33 # NOTE: Implement: http://www.w3.org/TR/2006/REC-xml-20060816/#AVNormalize 34 35 def from_unicode(s): 36 if isinstance(s, unicode): 37 return s.encode("utf-8") 38 else: 39 # The string might contain non-ASCII characters, thus upsetting libxml2 40 # as it encounters a non-UTF-8 string. 41 try: 42 unicode(s) 43 except UnicodeError: 44 raise TypeError, "Please use Unicode for non-ASCII data." 45 return s 46 47 def to_unicode(s): 48 if isinstance(s, str): 49 return unicode(s, encoding="utf-8") 50 else: 51 return s 52 53 def get_ns(ns): 54 out_ns = to_unicode(libxml2mod.xmlNodeGetContent(ns)) 55 # Detect "" and produce None as the empty namespace. 56 if out_ns: 57 return out_ns 58 else: 59 return None 60 61 def _get_prefix_and_localName(name): 62 t = name.split(":") 63 if len(t) == 1: 64 return None, name 65 elif len(t) == 2: 66 return t 67 else: 68 # NOTE: Should raise an exception. 69 return None, None 70 71 def _find_namespace_for_prefix(node, prefix): 72 73 "Find the namespace definition node in the given 'node' for 'prefix'." 74 75 current = libxml2mod.xmlNodeGetNsDefs(node) 76 while current is not None: 77 if libxml2mod.name(current) == prefix: 78 return current 79 current = libxml2mod.next(current) 80 return None 81 82 def _find_namespace(node, ns, prefix): 83 84 """ 85 Find the namespace definition node in the given 'node' for the given 'ns' 86 and 'prefix'. 87 """ 88 89 new_ns = None 90 current = libxml2mod.xmlNodeGetNsDefs(node) 91 while current is not None: 92 if _check_namespace(current, ns, prefix): 93 new_ns = current 94 break 95 current = libxml2mod.next(current) 96 if new_ns is None: 97 node_ns = libxml2mod.xmlNodeGetNs(node) 98 if node_ns is not None and _check_namespace(node_ns, ns, prefix): 99 new_ns = node_ns 100 return new_ns 101 102 def _check_namespace(current, ns, prefix): 103 104 "Check the 'current' namespace definition node against 'ns' and 'prefix'." 105 106 current_ns = get_ns(current) 107 current_prefix = libxml2mod.name(current) 108 if ns == current_ns and (prefix is None or prefix == current_prefix): 109 return 1 110 else: 111 return 0 112 113 def _make_namespace(node, ns, prefix, set_default=0): 114 115 """ 116 Make a new namespace definition node within the given 'node' for 'ns', 117 'prefix', setting the default namespace on 'node' when 'prefix' is None and 118 'set_default' is set to a true value (unlike the default value for that 119 parameter). 120 """ 121 122 if prefix is not None or set_default: 123 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 124 else: 125 new_ns = None 126 return new_ns 127 128 def _get_invented_prefix(node, ns): 129 current = libxml2mod.xmlNodeGetNsDefs(node) 130 prefixes = [] 131 while current is not None: 132 current_prefix = libxml2mod.name(current) 133 prefixes.append(current_prefix) 134 current = libxml2mod.next(current) 135 i = 0 136 while 1: 137 prefix = "NS%d" % i 138 if prefix not in prefixes: 139 return prefix 140 i += 1 141 142 _nodeTypes = { 143 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 144 "cdata" : xml.dom.Node.CDATA_SECTION_NODE, 145 "comment" : xml.dom.Node.COMMENT_NODE, 146 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 147 "document_html" : xml.dom.Node.DOCUMENT_NODE, 148 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 149 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 150 "element" : xml.dom.Node.ELEMENT_NODE, 151 "entity" : xml.dom.Node.ENTITY_NODE, 152 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 153 "notation" : xml.dom.Node.NOTATION_NODE, 154 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 155 "text" : xml.dom.Node.TEXT_NODE 156 } 157 158 _reverseNodeTypes = {} 159 for label, value in _nodeTypes.items(): 160 _reverseNodeTypes[value] = label 161 162 def Node_equals(node, other): 163 return libxml2mod.xmlXPathCmpNodes(node, other) == 0 164 165 def Node_ownerDocument(node): 166 return libxml2mod.doc(node) 167 168 def Node_nodeType(node): 169 return _nodeTypes[libxml2mod.type(node)] 170 171 def Node_childNodes(node): 172 173 # NOTE: Consider a generator instead. 174 175 child_nodes = [] 176 node = libxml2mod.children(node) 177 while node is not None: 178 # Remove doctypes. 179 if Node_nodeType(node) != xml.dom.Node.DOCUMENT_TYPE_NODE: 180 child_nodes.append(node) 181 node = libxml2mod.next(node) 182 return child_nodes 183 184 def Node_attributes(node): 185 attributes = {} 186 187 # Include normal attributes. 188 189 current = libxml2mod.properties(node) 190 while current is not None: 191 ns = libxml2mod.xmlNodeGetNs(current) 192 if ns is not None: 193 attributes[(get_ns(ns), libxml2mod.name(current))] = current 194 else: 195 attributes[(None, libxml2mod.name(current))] = current 196 current = libxml2mod.next(current) 197 198 # Include xmlns attributes. 199 200 #current = libxml2mod.xmlNodeGetNsDefs(node) 201 #while current is not None: 202 # ns = get_ns(current) 203 # prefix = libxml2mod.name(current) 204 # attributes[(xml.dom.XMLNS_NAMESPACE, "xmlns:" + prefix)] = ns # NOTE: Need a real node here. 205 # current = libxml2mod.next(current) 206 207 return attributes 208 209 def Node_namespaceURI(node): 210 ns = libxml2mod.xmlNodeGetNs(node) 211 if ns is not None: 212 return get_ns(ns) 213 else: 214 return None 215 216 def Node_nodeValue(node): 217 return to_unicode(libxml2mod.xmlNodeGetContent(node)) 218 219 # NOTE: This is not properly exposed in the libxml2macro interface as the 220 # NOTE: writable form of nodeValue. 221 222 def Node_setNodeValue(node, value): 223 # NOTE: Cannot set attribute node values. 224 libxml2mod.xmlNodeSetContent(node, from_unicode(value)) 225 226 # NOTE: Verify this. The data attribute should only really exist for text, 227 # NOTE: character data, processing instructions and comments. 228 229 Node_data = Node_nodeValue 230 231 Node_textContent = Node_nodeValue 232 233 def Node_prefix(node): 234 ns = libxml2mod.xmlNodeGetNs(node) 235 if ns is not None: 236 return to_unicode(libxml2mod.name(ns)) 237 else: 238 return None 239 240 def Node_nodeName(node): 241 prefix = Node_prefix(node) 242 if prefix is not None: 243 return prefix + ":" + Node_localName(node) 244 else: 245 return Node_localName(node) 246 247 def Node_tagName(node): 248 if libxml2mod.type(node) == "element": 249 return Node_nodeName(node) 250 else: 251 return None 252 253 def Node_localName(node): 254 return to_unicode(libxml2mod.name(node)) 255 256 def Node_parentNode(node): 257 if libxml2mod.type(node) == "document_xml": 258 return None 259 else: 260 return libxml2mod.parent(node) 261 262 def Node_previousSibling(node): 263 if libxml2mod.prev(node) is not None: 264 return libxml2mod.prev(node) 265 else: 266 return None 267 268 def Node_nextSibling(node): 269 if libxml2mod.next(node) is not None: 270 return libxml2mod.next(node) 271 else: 272 return None 273 274 def Node_doctype(node): 275 return libxml2mod.xmlGetIntSubset(node) 276 277 def Node_hasAttributeNS(node, ns, localName): 278 return Node_getAttributeNS(node, ns, localName) is not None or \ 279 _find_namespace(node, ns, localName) is not None 280 281 def Node_hasAttribute(node, name): 282 return Node_getAttribute(node, name) is not None 283 284 def Node_getAttributeNS(node, ns, localName): 285 if ns == xml.dom.XMLNS_NAMESPACE: 286 ns_def = _find_namespace_for_prefix(node, localName) 287 if ns_def is not None: 288 return get_ns(ns_def) 289 else: 290 return None 291 else: 292 return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) 293 294 def Node_getAttribute(node, name): 295 return to_unicode(libxml2mod.xmlGetProp(node, name)) 296 297 def Node_getAttributeNodeNS(node, ns, localName): 298 # NOTE: Needs verifying. 299 return Node_attributes(node)[(ns, localName)] 300 301 def Node_getAttributeNode(node, name): 302 # NOTE: Needs verifying. 303 return Node_attributes(node)[(None, name)] 304 305 def Node_setAttributeNS(node, ns, name, value): 306 ns, name, value = map(from_unicode, [ns, name, value]) 307 prefix, localName = _get_prefix_and_localName(name) 308 309 # Detect setting of xmlns:localName=value, looking for cases where 310 # x:attr=value have caused the definition of xmlns:x=y (as a declaration 311 # with prefix=x, ns=y). 312 if prefix == "xmlns" and ns == xml.dom.XMLNS_NAMESPACE: 313 if _find_namespace(node, value, localName): 314 return 315 new_ns = _make_namespace(node, value, localName, set_default=0) 316 # For non-xmlns attributes, we find or make a namespace declaration and then 317 # set an attribute. 318 elif ns is not None: 319 # Look for a suitable namespace. 320 new_ns = _find_namespace(node, ns, prefix) 321 # Create a declaration if no suitable one was found. 322 if new_ns is None: 323 # Invent a prefix for unprefixed attributes with namespaces. 324 if prefix is None: 325 prefix = _get_invented_prefix(node, ns) 326 new_ns = _make_namespace(node, ns, prefix, set_default=0) 327 libxml2mod.xmlSetNsProp(node, new_ns, localName, value) 328 else: 329 # NOTE: Needs verifying: what should happen to the namespace? 330 # NOTE: This also catches the case where None is the element's 331 # NOTE: namespace and is also used for the attribute. 332 libxml2mod.xmlSetNsProp(node, None, localName, value) 333 334 def Node_setAttribute(node, name, value): 335 name, value = map(from_unicode, [name, value]) 336 337 libxml2mod.xmlSetProp(node, name, value) 338 339 def Node_setAttributeNodeNS(node, attr): 340 # NOTE: Not actually putting the node on the element. 341 Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 342 343 def Node_setAttributeNode(node, attr): 344 # NOTE: Not actually putting the node on the element. 345 Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) 346 347 def Node_removeAttributeNS(node, ns, localName): 348 attr = Node_getAttributeNodeNS(node, ns, localName) 349 libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) 350 351 def Node_removeAttribute(node, name): 352 name = from_unicode(name) 353 libxml2mod.xmlUnsetProp(node, name) 354 355 def Node_createElementNS(node, ns, name): 356 ns, name = map(from_unicode, [ns, name]) 357 358 prefix, localName = _get_prefix_and_localName(name) 359 new_node = libxml2mod.xmlNewNode(localName) 360 361 # If the namespace is not empty, set the declaration. 362 if ns is not None: 363 new_ns = _find_namespace(new_node, ns, prefix) 364 if new_ns is None: 365 new_ns = _make_namespace(new_node, ns, prefix, set_default=1) 366 libxml2mod.xmlSetNs(new_node, new_ns) 367 # If the namespace is empty, set a "null" declaration. 368 elif prefix is not None: 369 new_ns = _find_namespace(new_node, "", prefix) 370 if new_ns is None: 371 new_ns = _make_namespace(new_node, "", prefix) 372 libxml2mod.xmlSetNs(new_node, new_ns) 373 else: 374 libxml2mod.xmlSetNs(new_node, None) 375 Node_setAttribute(new_node, "xmlns", "") 376 return new_node 377 378 def Node_createElement(node, name): 379 name = from_unicode(name) 380 381 new_node = libxml2mod.xmlNewNode(name) 382 return new_node 383 384 def Node_createAttributeNS(node, ns, name): 385 ns, name = map(from_unicode, [ns, name]) 386 387 prefix, localName = _get_prefix_and_localName(name) 388 # NOTE: Does it make sense to set the namespace if it is empty? 389 if ns is not None: 390 new_ns = _find_namespace(node, ns, prefix) 391 if new_ns is None: 392 new_ns = _make_namespace(node, ns, prefix, set_default=0) 393 else: 394 new_ns = None 395 new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) 396 return new_node 397 398 def Node_createAttribute(node, name): 399 name = from_unicode(name) 400 401 # NOTE: xmlNewProp does not seem to work. 402 return Node_createAttributeNS(node, None, name) 403 404 def Node_createTextNode(node, value): 405 value = from_unicode(value) 406 407 return libxml2mod.xmlNewText(value) 408 409 def Node_createComment(node, value): 410 value = from_unicode(value) 411 412 return libxml2mod.xmlNewComment(value) 413 414 def Node_createCDATASection(node, value): 415 value = from_unicode(value) 416 417 return libxml2mod.xmlNewCDataBlock(Node_ownerDocument(node), value, len(value)) 418 419 def Node_insertBefore(node, tmp, oldNode): 420 return libxml2mod.xmlAddPrevSibling(oldNode, tmp) 421 422 def Node_replaceChild(node, tmp, oldNode): 423 return libxml2mod.xmlReplaceNode(oldNode, tmp) 424 425 def Node_appendChild(node, tmp): 426 return libxml2mod.xmlAddChild(node, tmp) 427 428 def Node_removeChild(node, child): 429 libxml2mod.xmlUnlinkNode(child) 430 431 def Node_importNode(node, other, deep): 432 if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: 433 imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) 434 for attr in Node_attributes(other).values(): 435 Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 436 437 if deep: 438 for child in Node_childNodes(other): 439 imported_child = Node_importNode(node, child, deep) 440 if imported_child: 441 Node_appendChild(imported_element, imported_child) 442 443 return imported_element 444 445 elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: 446 return Node_createTextNode(node, Node_nodeValue(other)) 447 448 elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: 449 return Node_createComment(node, Node_data(other)) 450 451 elif Node_nodeType(other) == xml.dom.Node.CDATA_SECTION_NODE: 452 return Node_createCDATASection(node, Node_data(other)) 453 454 raise xml.dom.DOMException(xml.dom.NOT_SUPPORTED_ERR, "Node type '%s' (%d) not supported." % (other, Node_nodeType(other))) 455 456 def Node_importNode_DOM(node, other, deep): 457 if other.nodeType == xml.dom.Node.ELEMENT_NODE: 458 imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) 459 for attr in other.attributes.values(): 460 Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) 461 462 if deep: 463 for child in other.childNodes: 464 imported_child = Node_importNode_DOM(node, child, deep) 465 if imported_child: 466 Node_appendChild(imported_element, imported_child) 467 468 return imported_element 469 470 elif other.nodeType == xml.dom.Node.TEXT_NODE: 471 return Node_createTextNode(node, other.nodeValue) 472 473 elif other.nodeType == xml.dom.Node.COMMENT_NODE: 474 return Node_createComment(node, other.data) 475 476 elif other.nodeType == xml.dom.Node.CDATA_SECTION_NODE: 477 return Node_createCDATASection(node, other.data) 478 479 raise xml.dom.DOMException(xml.dom.NOT_SUPPORTED_ERR, "Node type '%s' (%d) not supported." % (_reverseNodeTypes[other.nodeType], other.nodeType)) 480 481 def Node_xpath(node, expr, variables=None, namespaces=None): 482 expr = from_unicode(expr) 483 484 context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node) or node) 485 libxml2mod.xmlXPathSetContextNode(context, node) 486 # NOTE: Discover namespaces from the node. 487 # NOTE: Work out how to specify paths without having to use prefixes on 488 # NOTE: names all the time. 489 for prefix, ns in (namespaces or {}).items(): 490 libxml2mod.xmlXPathRegisterNs(context, prefix, ns) 491 # NOTE: No such functions are exposed in current versions of libxml2. 492 #for (prefix, ns), value in (variables or {}).items(): 493 # value = from_unicode(value) 494 # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) 495 result = libxml2mod.xmlXPathEval(expr, context) 496 libxml2mod.xmlXPathFreeContext(context) 497 return result 498 499 # Exceptions. 500 501 class LSException(Exception): 502 503 "DOM Level 3 Load/Save exception." 504 505 PARSE_ERR = 81 506 SERIALIZE_ERR = 82 507 508 # Utility functions. 509 510 def createDocument(namespaceURI, localName, doctype): 511 # NOTE: Fixed to use version 1.0 only. 512 d = libxml2mod.xmlNewDoc("1.0") 513 if localName is not None: 514 # NOTE: Verify that this is always what should occur. 515 root = Node_createElementNS(d, namespaceURI, localName) 516 Node_appendChild(d, root) 517 if doctype is not None: 518 libxml2mod.xmlCreateIntSubset(d, doctype.localName, doctype.publicId, doctype.systemId) 519 return d 520 521 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0): 522 if hasattr(stream_or_string, "read"): 523 stream = stream_or_string 524 return parseString(stream.read(), html=html, htmlencoding=htmlencoding, unfinished=unfinished) 525 else: 526 return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, unfinished=unfinished) 527 528 def parseFile(s, html=0, htmlencoding=None, unfinished=0): 529 # NOTE: Switching off validation and remote DTD resolution. 530 if not html: 531 context = libxml2mod.xmlCreateFileParserCtxt(s) 532 Parser_configure(context) 533 Parser_parse(context) 534 doc = Parser_document(context) 535 if unfinished or Parser_well_formed(context): 536 return doc 537 else: 538 raise LSException(LSException.PARSE_ERR) 539 else: 540 return libxml2mod.htmlReadFile(s, htmlencoding, HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 541 542 def parseString(s, html=0, htmlencoding=None, unfinished=0): 543 # NOTE: Switching off validation and remote DTD resolution. 544 if not html: 545 context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) 546 Parser_configure(context) 547 Parser_parse(context) 548 doc = Parser_document(context) 549 if unfinished or Parser_well_formed(context): 550 return doc 551 else: 552 raise LSException(LSException.PARSE_ERR) 553 else: 554 # NOTE: URL given as None. 555 html_url = None 556 return libxml2mod.htmlReadMemory(s, len(s), html_url, htmlencoding, 557 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 558 559 def parseURI(uri, html=0, htmlencoding=None, unfinished=0): 560 # NOTE: Switching off validation and remote DTD resolution. 561 if not html: 562 context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) 563 Parser_configure(context) 564 Parser_parse(context) 565 doc = Parser_document(context) 566 if unfinished or Parser_well_formed(context): 567 return doc 568 else: 569 raise LSException(LSException.PARSE_ERR) 570 else: 571 raise NotImplementedError, "parseURI does not yet support HTML" 572 573 def toString(node, encoding=None, prettyprint=0): 574 return libxml2mod.serializeNode(node, encoding, prettyprint) 575 576 def toStream(node, stream, encoding=None, prettyprint=0): 577 stream.write(toString(node, encoding, prettyprint)) 578 579 def toFile(node, f, encoding=None, prettyprint=0): 580 libxml2mod.saveNodeTo(node, f, encoding, prettyprint) 581 582 # libxml2mod constants and helper functions. 583 584 HTML_PARSE_NOERROR = 32 585 HTML_PARSE_NOWARNING = 64 586 HTML_PARSE_NONET = 2048 587 XML_PARSE_NOERROR = 32 588 XML_PARSE_NOWARNING = 64 589 XML_PARSE_NONET = 2048 590 591 def Parser_push(): 592 return libxml2mod.xmlCreatePushParser(None, "", 0, None) 593 594 def Parser_configure(context): 595 libxml2mod.xmlParserSetPedantic(context, 0) 596 libxml2mod.xmlParserSetValidate(context, 0) 597 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 598 599 def Parser_feed(context, s): 600 libxml2mod.xmlParseChunk(context, s, len(s), 1) 601 602 def Parser_well_formed(context): 603 return libxml2mod.xmlParserGetWellFormed(context) 604 605 def Parser_document(context): 606 return libxml2mod.xmlParserGetDoc(context) 607 608 def Parser_parse(context): 609 libxml2mod.xmlParseDocument(context) 610 611 # vim: tabstop=4 expandtab shiftwidth=4