1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2mod node methods and properties. 5 6 Copyright (C) 2003, 2004, 2005, 2006, 2007 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU Lesser General Public License as published by the Free 10 Software Foundation; either version 3 of the License, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 16 details. 17 18 You should have received a copy of the GNU Lesser General Public License along 19 with this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 import xml.dom 23 24 # Try the conventional import first. 25 26 try: 27 import libxml2mod 28 except ImportError: 29 from libxmlmods import libxml2mod 30 31 # NOTE: libxml2 seems to use UTF-8 throughout. 32 # NOTE: Implement: http://www.w3.org/TR/2006/REC-xml-20060816/#AVNormalize 33 34 def from_unicode(s): 35 if isinstance(s, unicode): 36 return s.encode("utf-8") 37 else: 38 # The string might contain non-ASCII characters, thus upsetting libxml2 39 # as it encounters a non-UTF-8 string. 40 try: 41 unicode(s) 42 except UnicodeError: 43 raise TypeError, "Please use Unicode for non-ASCII data." 44 return s 45 46 def to_unicode(s): 47 if isinstance(s, str): 48 return unicode(s, encoding="utf-8") 49 else: 50 return s 51 52 def get_ns(ns): 53 out_ns = to_unicode(libxml2mod.xmlNodeGetContent(ns)) 54 # Detect "" and produce None as the empty namespace. 55 if out_ns: 56 return out_ns 57 else: 58 return None 59 60 def _get_prefix_and_localName(name): 61 t = name.split(":") 62 if len(t) == 1: 63 return None, name 64 elif len(t) == 2: 65 return t 66 else: 67 # NOTE: Should raise an exception. 68 return None, None 69 70 def _find_namespace_for_prefix(node, prefix): 71 72 "Find the namespace definition node in the given 'node' for 'prefix'." 73 74 current = libxml2mod.xmlNodeGetNsDefs(node) 75 while current is not None: 76 if libxml2mod.name(current) == prefix: 77 return current 78 current = libxml2mod.next(current) 79 return None 80 81 def _find_namespace(node, ns, prefix): 82 83 """ 84 Find the namespace definition node in the given 'node' for the given 'ns' 85 and 'prefix'. 86 """ 87 88 new_ns = None 89 current = libxml2mod.xmlNodeGetNsDefs(node) 90 while current is not None: 91 if _check_namespace(current, ns, prefix): 92 new_ns = current 93 break 94 current = libxml2mod.next(current) 95 if new_ns is None: 96 node_ns = libxml2mod.xmlNodeGetNs(node) 97 if node_ns is not None and _check_namespace(node_ns, ns, prefix): 98 new_ns = node_ns 99 return new_ns 100 101 def _check_namespace(current, ns, prefix): 102 103 "Check the 'current' namespace definition node against 'ns' and 'prefix'." 104 105 current_ns = get_ns(current) 106 current_prefix = libxml2mod.name(current) 107 if ns == current_ns and (prefix is None or prefix == current_prefix): 108 return 1 109 else: 110 return 0 111 112 def _make_namespace(node, ns, prefix, set_default=0): 113 114 """ 115 Make a new namespace definition node within the given 'node' for 'ns', 116 'prefix', setting the default namespace on 'node' when 'prefix' is None and 117 'set_default' is set to a true value (unlike the default value for that 118 parameter). 119 """ 120 121 if prefix is not None or set_default: 122 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 123 else: 124 new_ns = None 125 return new_ns 126 127 def _get_invented_prefix(node, ns): 128 current = libxml2mod.xmlNodeGetNsDefs(node) 129 prefixes = [] 130 while current is not None: 131 current_prefix = libxml2mod.name(current) 132 prefixes.append(current_prefix) 133 current = libxml2mod.next(current) 134 i = 0 135 while 1: 136 prefix = "NS%d" % i 137 if prefix not in prefixes: 138 return prefix 139 i += 1 140 141 _nodeTypes = { 142 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 143 "cdata" : xml.dom.Node.CDATA_SECTION_NODE, 144 "comment" : xml.dom.Node.COMMENT_NODE, 145 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 146 "document_html" : xml.dom.Node.DOCUMENT_NODE, 147 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 148 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 149 "element" : xml.dom.Node.ELEMENT_NODE, 150 "entity" : xml.dom.Node.ENTITY_NODE, 151 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 152 "notation" : xml.dom.Node.NOTATION_NODE, 153 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 154 "text" : xml.dom.Node.TEXT_NODE 155 } 156 157 _reverseNodeTypes = {} 158 for label, value in _nodeTypes.items(): 159 _reverseNodeTypes[value] = label 160 161 def Node_equals(node, other): 162 return libxml2mod.xmlXPathCmpNodes(node, other) == 0 163 164 def Node_ownerDocument(node): 165 return libxml2mod.doc(node) 166 167 def Node_nodeType(node): 168 return _nodeTypes[libxml2mod.type(node)] 169 170 def Node_childNodes(node): 171 172 # NOTE: Consider a generator instead. 173 174 child_nodes = [] 175 node = libxml2mod.children(node) 176 while node is not None: 177 # Remove doctypes. 178 if Node_nodeType(node) != xml.dom.Node.DOCUMENT_TYPE_NODE: 179 child_nodes.append(node) 180 node = libxml2mod.next(node) 181 return child_nodes 182 183 def Node_attributes(node): 184 attributes = {} 185 186 # Include normal attributes. 187 188 current = libxml2mod.properties(node) 189 while current is not None: 190 ns = libxml2mod.xmlNodeGetNs(current) 191 if ns is not None: 192 attributes[(get_ns(ns), libxml2mod.name(current))] = current 193 else: 194 attributes[(None, libxml2mod.name(current))] = current 195 current = libxml2mod.next(current) 196 197 # Include xmlns attributes. 198 199 #current = libxml2mod.xmlNodeGetNsDefs(node) 200 #while current is not None: 201 # ns = get_ns(current) 202 # prefix = libxml2mod.name(current) 203 # attributes[(xml.dom.XMLNS_NAMESPACE, "xmlns:" + prefix)] = ns # NOTE: Need a real node here. 204 # current = libxml2mod.next(current) 205 206 return attributes 207 208 def Node_namespaceURI(node): 209 ns = libxml2mod.xmlNodeGetNs(node) 210 if ns is not None: 211 return get_ns(ns) 212 else: 213 return None 214 215 def Node_nodeValue(node): 216 return to_unicode(libxml2mod.xmlNodeGetContent(node)) 217 218 # NOTE: This is not properly exposed in the libxml2macro interface as the 219 # NOTE: writable form of nodeValue. 220 221 def Node_setNodeValue(node, value): 222 # NOTE: Cannot set attribute node values. 223 libxml2mod.xmlNodeSetContent(node, from_unicode(value)) 224 225 # NOTE: Verify this. The data attribute should only really exist for text, 226 # NOTE: character data, processing instructions and comments. 227 228 Node_data = Node_nodeValue 229 230 Node_textContent = Node_nodeValue 231 232 def Node_prefix(node): 233 ns = libxml2mod.xmlNodeGetNs(node) 234 if ns is not None: 235 return to_unicode(libxml2mod.name(ns)) 236 else: 237 return None 238 239 def Node_nodeName(node): 240 prefix = Node_prefix(node) 241 if prefix is not None: 242 return prefix + ":" + Node_localName(node) 243 else: 244 return Node_localName(node) 245 246 def Node_tagName(node): 247 if libxml2mod.type(node) == "element": 248 return Node_nodeName(node) 249 else: 250 return None 251 252 def Node_localName(node): 253 return to_unicode(libxml2mod.name(node)) 254 255 def Node_parentNode(node): 256 if libxml2mod.type(node) == "document_xml": 257 return None 258 else: 259 return libxml2mod.parent(node) 260 261 def Node_previousSibling(node): 262 if libxml2mod.prev(node) is not None: 263 return libxml2mod.prev(node) 264 else: 265 return None 266 267 def Node_nextSibling(node): 268 if libxml2mod.next(node) is not None: 269 return libxml2mod.next(node) 270 else: 271 return None 272 273 def Node_doctype(node): 274 return libxml2mod.xmlGetIntSubset(node) 275 276 def Node_hasAttributeNS(node, ns, localName): 277 return Node_getAttributeNS(node, ns, localName) is not None or \ 278 _find_namespace(node, ns, localName) is not None 279 280 def Node_hasAttribute(node, name): 281 return Node_getAttribute(node, name) is not None 282 283 def Node_getAttributeNS(node, ns, localName): 284 if ns == xml.dom.XMLNS_NAMESPACE: 285 ns_def = _find_namespace_for_prefix(node, localName) 286 if ns_def is not None: 287 return get_ns(ns_def) 288 else: 289 return None 290 else: 291 return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) 292 293 def Node_getAttribute(node, name): 294 return to_unicode(libxml2mod.xmlGetProp(node, name)) 295 296 def Node_getAttributeNodeNS(node, ns, localName): 297 # NOTE: Needs verifying. 298 return Node_attributes(node)[(ns, localName)] 299 300 def Node_getAttributeNode(node, name): 301 # NOTE: Needs verifying. 302 return Node_attributes(node)[(None, name)] 303 304 def Node_setAttributeNS(node, ns, name, value): 305 ns, name, value = map(from_unicode, [ns, name, value]) 306 prefix, localName = _get_prefix_and_localName(name) 307 308 # Detect setting of xmlns:localName=value, looking for cases where 309 # x:attr=value have caused the definition of xmlns:x=y (as a declaration 310 # with prefix=x, ns=y). 311 if prefix == "xmlns" and ns == xml.dom.XMLNS_NAMESPACE: 312 if _find_namespace(node, value, localName): 313 return 314 new_ns = _make_namespace(node, value, localName, set_default=0) 315 # For non-xmlns attributes, we find or make a namespace declaration and then 316 # set an attribute. 317 elif ns is not None: 318 # Look for a suitable namespace. 319 new_ns = _find_namespace(node, ns, prefix) 320 # Create a declaration if no suitable one was found. 321 if new_ns is None: 322 # Invent a prefix for unprefixed attributes with namespaces. 323 if prefix is None: 324 prefix = _get_invented_prefix(node, ns) 325 new_ns = _make_namespace(node, ns, prefix, set_default=0) 326 libxml2mod.xmlSetNsProp(node, new_ns, localName, value) 327 else: 328 # NOTE: Needs verifying: what should happen to the namespace? 329 # NOTE: This also catches the case where None is the element's 330 # NOTE: namespace and is also used for the attribute. 331 libxml2mod.xmlSetNsProp(node, None, localName, value) 332 333 def Node_setAttribute(node, name, value): 334 name, value = map(from_unicode, [name, value]) 335 336 libxml2mod.xmlSetProp(node, name, value) 337 338 def Node_setAttributeNodeNS(node, attr): 339 # NOTE: Not actually putting the node on the element. 340 Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 341 342 def Node_setAttributeNode(node, attr): 343 # NOTE: Not actually putting the node on the element. 344 Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) 345 346 def Node_removeAttributeNS(node, ns, localName): 347 attr = Node_getAttributeNodeNS(node, ns, localName) 348 libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) 349 350 def Node_removeAttribute(node, name): 351 name = from_unicode(name) 352 libxml2mod.xmlUnsetProp(node, name) 353 354 def Node_createElementNS(node, ns, name): 355 ns, name = map(from_unicode, [ns, name]) 356 357 prefix, localName = _get_prefix_and_localName(name) 358 new_node = libxml2mod.xmlNewNode(localName) 359 360 # If the namespace is not empty, set the declaration. 361 if ns is not None: 362 new_ns = _find_namespace(new_node, ns, prefix) 363 if new_ns is None: 364 new_ns = _make_namespace(new_node, ns, prefix, set_default=1) 365 libxml2mod.xmlSetNs(new_node, new_ns) 366 # If the namespace is empty, set a "null" declaration. 367 elif prefix is not None: 368 new_ns = _find_namespace(new_node, "", prefix) 369 if new_ns is None: 370 new_ns = _make_namespace(new_node, "", prefix) 371 libxml2mod.xmlSetNs(new_node, new_ns) 372 else: 373 libxml2mod.xmlSetNs(new_node, None) 374 Node_setAttribute(new_node, "xmlns", "") 375 return new_node 376 377 def Node_createElement(node, name): 378 name = from_unicode(name) 379 380 new_node = libxml2mod.xmlNewNode(name) 381 return new_node 382 383 def Node_createAttributeNS(node, ns, name): 384 ns, name = map(from_unicode, [ns, name]) 385 386 prefix, localName = _get_prefix_and_localName(name) 387 # NOTE: Does it make sense to set the namespace if it is empty? 388 if ns is not None: 389 new_ns = _find_namespace(node, ns, prefix) 390 if new_ns is None: 391 new_ns = _make_namespace(node, ns, prefix, set_default=0) 392 else: 393 new_ns = None 394 new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) 395 return new_node 396 397 def Node_createAttribute(node, name): 398 name = from_unicode(name) 399 400 # NOTE: xmlNewProp does not seem to work. 401 return Node_createAttributeNS(node, None, name) 402 403 def Node_createTextNode(node, value): 404 value = from_unicode(value) 405 406 return libxml2mod.xmlNewText(value) 407 408 def Node_createComment(node, value): 409 value = from_unicode(value) 410 411 return libxml2mod.xmlNewComment(value) 412 413 def Node_createCDATASection(node, value): 414 value = from_unicode(value) 415 416 return libxml2mod.xmlNewCDataBlock(Node_ownerDocument(node), value, len(value)) 417 418 def Node_insertBefore(node, tmp, oldNode): 419 return libxml2mod.xmlAddPrevSibling(oldNode, tmp) 420 421 def Node_replaceChild(node, tmp, oldNode): 422 return libxml2mod.xmlReplaceNode(oldNode, tmp) 423 424 def Node_appendChild(node, tmp): 425 return libxml2mod.xmlAddChild(node, tmp) 426 427 def Node_removeChild(node, child): 428 libxml2mod.xmlUnlinkNode(child) 429 430 def Node_importNode(node, other, deep): 431 if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: 432 imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) 433 for attr in Node_attributes(other).values(): 434 Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 435 436 if deep: 437 for child in Node_childNodes(other): 438 imported_child = Node_importNode(node, child, deep) 439 if imported_child: 440 Node_appendChild(imported_element, imported_child) 441 442 return imported_element 443 444 elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: 445 return Node_createTextNode(node, Node_nodeValue(other)) 446 447 elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: 448 return Node_createComment(node, Node_data(other)) 449 450 elif Node_nodeType(other) == xml.dom.Node.CDATA_SECTION_NODE: 451 return Node_createCDATASection(node, Node_data(other)) 452 453 raise xml.dom.DOMException(xml.dom.NOT_SUPPORTED_ERR, "Node type '%s' (%d) not supported." % (other, Node_nodeType(other))) 454 455 def Node_importNode_DOM(node, other, deep): 456 if other.nodeType == xml.dom.Node.ELEMENT_NODE: 457 imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) 458 for attr in other.attributes.values(): 459 Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) 460 461 if deep: 462 for child in other.childNodes: 463 imported_child = Node_importNode_DOM(node, child, deep) 464 if imported_child: 465 Node_appendChild(imported_element, imported_child) 466 467 return imported_element 468 469 elif other.nodeType == xml.dom.Node.TEXT_NODE: 470 return Node_createTextNode(node, other.nodeValue) 471 472 elif other.nodeType == xml.dom.Node.COMMENT_NODE: 473 return Node_createComment(node, other.data) 474 475 elif other.nodeType == xml.dom.Node.CDATA_SECTION_NODE: 476 return Node_createCDATASection(node, other.data) 477 478 raise xml.dom.DOMException(xml.dom.NOT_SUPPORTED_ERR, "Node type '%s' (%d) not supported." % (_reverseNodeTypes[other.nodeType], other.nodeType)) 479 480 def Node_xpath(node, expr, variables=None, namespaces=None): 481 expr = from_unicode(expr) 482 483 context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node) or node) 484 libxml2mod.xmlXPathSetContextNode(context, node) 485 # NOTE: Discover namespaces from the node. 486 # NOTE: Work out how to specify paths without having to use prefixes on 487 # NOTE: names all the time. 488 for prefix, ns in (namespaces or {}).items(): 489 libxml2mod.xmlXPathRegisterNs(context, prefix, ns) 490 # NOTE: No such functions are exposed in current versions of libxml2. 491 #for (prefix, ns), value in (variables or {}).items(): 492 # value = from_unicode(value) 493 # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) 494 result = libxml2mod.xmlXPathEval(expr, context) 495 libxml2mod.xmlXPathFreeContext(context) 496 return result 497 498 # Exceptions. 499 500 class LSException(Exception): 501 502 "DOM Level 3 Load/Save exception." 503 504 PARSE_ERR = 81 505 SERIALIZE_ERR = 82 506 507 # Utility functions. 508 509 def createDocument(namespaceURI, localName, doctype): 510 # NOTE: Fixed to use version 1.0 only. 511 d = libxml2mod.xmlNewDoc("1.0") 512 if localName is not None: 513 # NOTE: Verify that this is always what should occur. 514 root = Node_createElementNS(d, namespaceURI, localName) 515 Node_appendChild(d, root) 516 if doctype is not None: 517 libxml2mod.xmlCreateIntSubset(d, doctype.localName, doctype.publicId, doctype.systemId) 518 return d 519 520 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0): 521 if hasattr(stream_or_string, "read"): 522 stream = stream_or_string 523 return parseString(stream.read(), html=html, htmlencoding=htmlencoding, unfinished=unfinished) 524 else: 525 return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, unfinished=unfinished) 526 527 def parseFile(s, html=0, htmlencoding=None, unfinished=0): 528 # NOTE: Switching off validation and remote DTD resolution. 529 if not html: 530 context = libxml2mod.xmlCreateFileParserCtxt(s) 531 Parser_configure(context) 532 Parser_parse(context) 533 doc = Parser_document(context) 534 if unfinished or Parser_well_formed(context): 535 return doc 536 else: 537 raise LSException(LSException.PARSE_ERR) 538 else: 539 return libxml2mod.htmlReadFile(s, htmlencoding, HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 540 541 def parseString(s, html=0, htmlencoding=None, unfinished=0): 542 # NOTE: Switching off validation and remote DTD resolution. 543 if not html: 544 context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) 545 Parser_configure(context) 546 Parser_parse(context) 547 doc = Parser_document(context) 548 if unfinished or Parser_well_formed(context): 549 return doc 550 else: 551 raise LSException(LSException.PARSE_ERR) 552 else: 553 # NOTE: URL given as None. 554 html_url = None 555 return libxml2mod.htmlReadMemory(s, len(s), html_url, htmlencoding, 556 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 557 558 def parseURI(uri, html=0, htmlencoding=None, unfinished=0): 559 # NOTE: Switching off validation and remote DTD resolution. 560 if not html: 561 context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) 562 Parser_configure(context) 563 Parser_parse(context) 564 doc = Parser_document(context) 565 if unfinished or Parser_well_formed(context): 566 return doc 567 else: 568 raise LSException(LSException.PARSE_ERR) 569 else: 570 raise NotImplementedError, "parseURI does not yet support HTML" 571 572 def toString(node, encoding=None, prettyprint=0): 573 return libxml2mod.serializeNode(node, encoding, prettyprint) 574 575 def toStream(node, stream, encoding=None, prettyprint=0): 576 stream.write(toString(node, encoding, prettyprint)) 577 578 def toFile(node, f, encoding=None, prettyprint=0): 579 libxml2mod.saveNodeTo(node, f, encoding, prettyprint) 580 581 # libxml2mod constants and helper functions. 582 583 HTML_PARSE_NOERROR = 32 584 HTML_PARSE_NOWARNING = 64 585 HTML_PARSE_NONET = 2048 586 XML_PARSE_NOERROR = 32 587 XML_PARSE_NOWARNING = 64 588 XML_PARSE_NONET = 2048 589 590 def Parser_push(): 591 return libxml2mod.xmlCreatePushParser(None, "", 0, None) 592 593 def Parser_configure(context): 594 libxml2mod.xmlParserSetPedantic(context, 0) 595 libxml2mod.xmlParserSetValidate(context, 0) 596 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 597 598 def Parser_feed(context, s): 599 libxml2mod.xmlParseChunk(context, s, len(s), 1) 600 601 def Parser_well_formed(context): 602 return libxml2mod.xmlParserGetWellFormed(context) 603 604 def Parser_document(context): 605 return libxml2mod.xmlParserGetDoc(context) 606 607 def Parser_parse(context): 608 libxml2mod.xmlParseDocument(context) 609 610 # vim: tabstop=4 expandtab shiftwidth=4