1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2mod node methods and properties. 5 6 Copyright (C) 2003, 2004, 2005, 2006, 2007 Paul Boddie <paul@boddie.org.uk> 7 8 This library is free software; you can redistribute it and/or 9 modify it under the terms of the GNU Lesser General Public 10 License as published by the Free Software Foundation; either 11 version 2.1 of the License, or (at your option) any later version. 12 13 This library is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 Lesser General Public License for more details. 17 18 You should have received a copy of the GNU Lesser General Public 19 License along with this library; if not, write to the Free Software 20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 21 """ 22 23 import xml.dom 24 25 # Try the conventional import first. 26 27 try: 28 import libxml2mod 29 except ImportError: 30 from libxmlmods import libxml2mod 31 32 # NOTE: libxml2 seems to use UTF-8 throughout. 33 34 def from_unicode(s): 35 if isinstance(s, unicode): 36 return s.encode("utf-8") 37 else: 38 # The string might contain non-ASCII characters, thus upsetting libxml2 39 # as it encounters a non-UTF-8 string. 40 try: 41 unicode(s) 42 except UnicodeError: 43 raise TypeError, "Please use Unicode for non-ASCII data." 44 return s 45 46 def to_unicode(s): 47 if isinstance(s, str): 48 return unicode(s, encoding="utf-8") 49 else: 50 return s 51 52 def check_document(node, tmp): 53 if Node_ownerDocument(tmp) is not None and Node_ownerDocument(node) is not Node_ownerDocument(tmp): 54 raise xml.dom.DOMException(xml.dom.WRONG_DOCUMENT_ERR) 55 56 def get_ns(ns): 57 out_ns = to_unicode(libxml2mod.xmlNodeGetContent(ns)) 58 # Detect "" and produce None as the empty namespace. 59 if out_ns: 60 return out_ns 61 else: 62 return None 63 64 def _get_prefix_and_localName(name): 65 t = name.split(":") 66 if len(t) == 1: 67 return None, name 68 elif len(t) == 2: 69 return t 70 else: 71 # NOTE: Should raise an exception. 72 return None, None 73 74 def _find_namespace_for_prefix(node, prefix): 75 76 "Find the namespace definition node in the given 'node' for 'prefix'." 77 78 current = libxml2mod.xmlNodeGetNsDefs(node) 79 while current is not None: 80 if libxml2mod.name(current) == prefix: 81 return current 82 current = libxml2mod.next(current) 83 return None 84 85 def _find_namespace(node, ns, prefix): 86 87 """ 88 Find the namespace definition node in the given 'node' for the given 'ns' 89 and 'prefix'. 90 """ 91 92 new_ns = None 93 current = libxml2mod.xmlNodeGetNsDefs(node) 94 while current is not None: 95 if _check_namespace(current, ns, prefix): 96 new_ns = current 97 break 98 current = libxml2mod.next(current) 99 if new_ns is None: 100 node_ns = libxml2mod.xmlNodeGetNs(node) 101 if node_ns is not None and _check_namespace(node_ns, ns, prefix): 102 new_ns = node_ns 103 return new_ns 104 105 def _check_namespace(current, ns, prefix): 106 107 "Check the 'current' namespace definition node against 'ns' and 'prefix'." 108 109 current_ns = get_ns(current) 110 current_prefix = libxml2mod.name(current) 111 if ns == current_ns and (prefix is None or prefix == current_prefix): 112 return 1 113 else: 114 return 0 115 116 def _make_namespace(node, ns, prefix, set_default=0): 117 118 """ 119 Make a new namespace definition node within the given 'node' for 'ns', 120 'prefix', setting the default namespace on 'node' when 'prefix' is None and 121 'set_default' is set to a true value (unlike the default value for that 122 parameter). 123 """ 124 125 if prefix is not None or set_default: 126 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 127 else: 128 new_ns = None 129 return new_ns 130 131 def _get_invented_prefix(node, ns): 132 current = libxml2mod.xmlNodeGetNsDefs(node) 133 prefixes = [] 134 while current is not None: 135 current_prefix = libxml2mod.name(current) 136 prefixes.append(current_prefix) 137 current = libxml2mod.next(current) 138 i = 0 139 while 1: 140 prefix = "NS%d" % i 141 if prefix not in prefixes: 142 return prefix 143 i += 1 144 145 _nodeTypes = { 146 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 147 "cdata" : xml.dom.Node.CDATA_SECTION_NODE, 148 "comment" : xml.dom.Node.COMMENT_NODE, 149 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 150 "document_html" : xml.dom.Node.DOCUMENT_NODE, 151 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 152 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 153 "element" : xml.dom.Node.ELEMENT_NODE, 154 "entity" : xml.dom.Node.ENTITY_NODE, 155 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 156 "notation" : xml.dom.Node.NOTATION_NODE, 157 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 158 "text" : xml.dom.Node.TEXT_NODE 159 } 160 161 _reverseNodeTypes = {} 162 for label, value in _nodeTypes.items(): 163 _reverseNodeTypes[value] = label 164 165 def Node_ownerDocument(node): 166 return libxml2mod.doc(node) 167 168 def Node_nodeType(node): 169 return _nodeTypes[libxml2mod.type(node)] 170 171 def Node_childNodes(node): 172 173 # NOTE: Consider a generator instead. 174 175 child_nodes = [] 176 node = libxml2mod.children(node) 177 while node is not None: 178 # Remove doctypes. 179 if Node_nodeType(node) != xml.dom.Node.DOCUMENT_TYPE_NODE: 180 child_nodes.append(node) 181 node = libxml2mod.next(node) 182 return child_nodes 183 184 def Node_attributes(node): 185 attributes = {} 186 187 # Include normal attributes. 188 189 current = libxml2mod.properties(node) 190 while current is not None: 191 ns = libxml2mod.xmlNodeGetNs(current) 192 if ns is not None: 193 attributes[(get_ns(ns), libxml2mod.name(current))] = current 194 else: 195 attributes[(None, libxml2mod.name(current))] = current 196 current = libxml2mod.next(current) 197 198 # Include xmlns attributes. 199 200 #current = libxml2mod.xmlNodeGetNsDefs(node) 201 #while current is not None: 202 # ns = get_ns(current) 203 # prefix = libxml2mod.name(current) 204 # attributes[(xml.dom.XMLNS_NAMESPACE, "xmlns:" + prefix)] = ns # NOTE: Need a real node here. 205 # current = libxml2mod.next(current) 206 207 return attributes 208 209 def Node_namespaceURI(node): 210 ns = libxml2mod.xmlNodeGetNs(node) 211 if ns is not None: 212 return get_ns(ns) 213 else: 214 return None 215 216 def Node_nodeValue(node): 217 return to_unicode(libxml2mod.xmlNodeGetContent(node)) 218 219 # NOTE: This is not properly exposed in the libxml2macro interface as the 220 # NOTE: writable form of nodeValue. 221 222 def Node_setNodeValue(node, value): 223 # NOTE: Cannot set attribute node values. 224 libxml2mod.xmlNodeSetContent(node, from_unicode(value)) 225 226 # NOTE: Verify this. The data attribute should only really exist for text, 227 # NOTE: character data, processing instructions and comments. 228 229 Node_data = Node_nodeValue 230 231 Node_textContent = Node_nodeValue 232 233 def Node_prefix(node): 234 ns = libxml2mod.xmlNodeGetNs(node) 235 if ns is not None: 236 return to_unicode(libxml2mod.name(ns)) 237 else: 238 return None 239 240 def Node_nodeName(node): 241 prefix = Node_prefix(node) 242 if prefix is not None: 243 return prefix + ":" + Node_localName(node) 244 else: 245 return Node_localName(node) 246 247 def Node_tagName(node): 248 if libxml2mod.type(node) == "element": 249 return Node_nodeName(node) 250 else: 251 return None 252 253 def Node_localName(node): 254 return to_unicode(libxml2mod.name(node)) 255 256 def Node_parentNode(node): 257 if libxml2mod.type(node) == "document_xml": 258 return None 259 else: 260 return libxml2mod.parent(node) 261 262 def Node_previousSibling(node): 263 if libxml2mod.prev(node) is not None: 264 return libxml2mod.prev(node) 265 else: 266 return None 267 268 def Node_nextSibling(node): 269 if libxml2mod.next(node) is not None: 270 return libxml2mod.next(node) 271 else: 272 return None 273 274 def Node_doctype(node): 275 return libxml2mod.xmlGetIntSubset(node) 276 277 def Node_hasAttributeNS(node, ns, localName): 278 return Node_getAttributeNS(node, ns, localName) is not None or \ 279 _find_namespace(node, ns, localName) is not None 280 281 def Node_hasAttribute(node, name): 282 return Node_getAttribute(node, name) is not None 283 284 def Node_getAttributeNS(node, ns, localName): 285 if ns == xml.dom.XMLNS_NAMESPACE: 286 ns_def = _find_namespace_for_prefix(node, localName) 287 if ns_def is not None: 288 return get_ns(ns_def) 289 else: 290 return None 291 else: 292 return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) 293 294 def Node_getAttribute(node, name): 295 return to_unicode(libxml2mod.xmlGetProp(node, name)) 296 297 def Node_getAttributeNodeNS(node, ns, localName): 298 # NOTE: Needs verifying. 299 return Node_attributes(node)[(ns, localName)] 300 301 def Node_getAttributeNode(node, name): 302 # NOTE: Needs verifying. 303 return Node_attributes(node)[(None, name)] 304 305 def Node_setAttributeNS(node, ns, name, value): 306 ns, name, value = map(from_unicode, [ns, name, value]) 307 prefix, localName = _get_prefix_and_localName(name) 308 309 # Detect setting of xmlns:localName=value, looking for cases where 310 # x:attr=value have caused the definition of xmlns:x=y (as a declaration 311 # with prefix=x, ns=y). 312 if prefix == "xmlns" and ns == xml.dom.XMLNS_NAMESPACE: 313 if _find_namespace(node, value, localName): 314 return 315 new_ns = _make_namespace(node, value, localName, set_default=0) 316 # For non-xmlns attributes, we find or make a namespace declaration and then 317 # set an attribute. 318 elif ns is not None: 319 # Look for a suitable namespace. 320 new_ns = _find_namespace(node, ns, prefix) 321 # Create a declaration if no suitable one was found. 322 if new_ns is None: 323 # Invent a prefix for unprefixed attributes with namespaces. 324 if prefix is None: 325 prefix = _get_invented_prefix(node, ns) 326 new_ns = _make_namespace(node, ns, prefix, set_default=0) 327 libxml2mod.xmlSetNsProp(node, new_ns, localName, value) 328 else: 329 # NOTE: Needs verifying: what should happen to the namespace? 330 # NOTE: This also catches the case where None is the element's 331 # NOTE: namespace and is also used for the attribute. 332 libxml2mod.xmlSetNsProp(node, None, localName, value) 333 334 def Node_setAttribute(node, name, value): 335 name, value = map(from_unicode, [name, value]) 336 337 libxml2mod.xmlSetProp(node, name, value) 338 339 def Node_setAttributeNodeNS(node, attr): 340 # NOTE: Not actually putting the node on the element. 341 Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 342 343 def Node_setAttributeNode(node, attr): 344 # NOTE: Not actually putting the node on the element. 345 Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) 346 347 def Node_removeAttributeNS(node, ns, localName): 348 attr = Node_getAttributeNodeNS(node, ns, localName) 349 libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) 350 351 def Node_removeAttribute(node, name): 352 name = from_unicode(name) 353 libxml2mod.xmlUnsetProp(node, name) 354 355 def Node_createElementNS(node, ns, name): 356 ns, name = map(from_unicode, [ns, name]) 357 358 prefix, localName = _get_prefix_and_localName(name) 359 new_node = libxml2mod.xmlNewNode(localName) 360 361 # If the namespace is not empty, set the declaration. 362 if ns is not None: 363 new_ns = _find_namespace(new_node, ns, prefix) 364 if new_ns is None: 365 new_ns = _make_namespace(new_node, ns, prefix, set_default=1) 366 libxml2mod.xmlSetNs(new_node, new_ns) 367 # If the namespace is empty, set a "null" declaration. 368 elif prefix is not None: 369 new_ns = _find_namespace(new_node, "", prefix) 370 if new_ns is None: 371 new_ns = _make_namespace(new_node, "", prefix) 372 libxml2mod.xmlSetNs(new_node, new_ns) 373 else: 374 libxml2mod.xmlSetNs(new_node, None) 375 Node_setAttribute(new_node, "xmlns", "") 376 return new_node 377 378 def Node_createElement(node, name): 379 name = from_unicode(name) 380 381 new_node = libxml2mod.xmlNewNode(name) 382 return new_node 383 384 def Node_createAttributeNS(node, ns, name): 385 ns, name = map(from_unicode, [ns, name]) 386 387 prefix, localName = _get_prefix_and_localName(name) 388 # NOTE: Does it make sense to set the namespace if it is empty? 389 if ns is not None: 390 new_ns = _find_namespace(node, ns, prefix) 391 if new_ns is None: 392 new_ns = _make_namespace(node, ns, prefix, set_default=0) 393 else: 394 new_ns = None 395 new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) 396 return new_node 397 398 def Node_createAttribute(node, name): 399 name = from_unicode(name) 400 401 # NOTE: xmlNewProp does not seem to work. 402 return Node_createAttributeNS(node, None, name) 403 404 def Node_createTextNode(node, value): 405 value = from_unicode(value) 406 407 return libxml2mod.xmlNewText(value) 408 409 def Node_createComment(node, value): 410 value = from_unicode(value) 411 412 return libxml2mod.xmlNewComment(value) 413 414 def Node_createCDATASection(node, value): 415 value = from_unicode(value) 416 417 return libxml2mod.xmlNewCDataBlock(Node_ownerDocument(node), value, len(value)) 418 419 def Node_insertBefore(node, tmp, oldNode): 420 check_document(node, tmp) 421 return libxml2mod.xmlAddPrevSibling(oldNode, tmp) 422 423 def Node_replaceChild(node, tmp, oldNode): 424 check_document(node, tmp) 425 return libxml2mod.xmlReplaceNode(oldNode, tmp) 426 427 def Node_appendChild(node, tmp): 428 check_document(node, tmp) 429 return libxml2mod.xmlAddChild(node, tmp) 430 431 def Node_removeChild(node, child): 432 libxml2mod.xmlUnlinkNode(child) 433 434 def Node_importNode(node, other, deep): 435 if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: 436 imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) 437 for attr in Node_attributes(other).values(): 438 Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 439 440 if deep: 441 for child in Node_childNodes(other): 442 imported_child = Node_importNode(node, child, deep) 443 if imported_child: 444 Node_appendChild(imported_element, imported_child) 445 446 return imported_element 447 448 elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: 449 return Node_createTextNode(node, Node_nodeValue(other)) 450 451 elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: 452 return Node_createComment(node, Node_data(other)) 453 454 elif Node_nodeType(other) == xml.dom.Node.CDATA_SECTION_NODE: 455 return Node_createCDATASection(node, Node_data(other)) 456 457 raise xml.dom.DOMException(xml.dom.NOT_SUPPORTED_ERR, "Node type '%s' (%d) not supported." % (other, Node_nodeType(other))) 458 459 def Node_importNode_DOM(node, other, deep): 460 if other.nodeType == xml.dom.Node.ELEMENT_NODE: 461 imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) 462 for attr in other.attributes.values(): 463 Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) 464 465 if deep: 466 for child in other.childNodes: 467 imported_child = Node_importNode_DOM(node, child, deep) 468 if imported_child: 469 Node_appendChild(imported_element, imported_child) 470 471 return imported_element 472 473 elif other.nodeType == xml.dom.Node.TEXT_NODE: 474 return Node_createTextNode(node, other.nodeValue) 475 476 elif other.nodeType == xml.dom.Node.COMMENT_NODE: 477 return Node_createComment(node, other.data) 478 479 elif other.nodeType == xml.dom.Node.CDATA_SECTION_NODE: 480 return Node_createCDATASection(node, other.data) 481 482 raise xml.dom.DOMException(xml.dom.NOT_SUPPORTED_ERR, "Node type '%s' (%d) not supported." % (_reverseNodeTypes[other.nodeType], other.nodeType)) 483 484 def Node_xpath(node, expr, variables=None, namespaces=None): 485 expr = from_unicode(expr) 486 487 context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node) or node) 488 libxml2mod.xmlXPathSetContextNode(context, node) 489 # NOTE: Discover namespaces from the node. 490 # NOTE: Work out how to specify paths without having to use prefixes on 491 # NOTE: names all the time. 492 for prefix, ns in (namespaces or {}).items(): 493 libxml2mod.xmlXPathRegisterNs(context, prefix, ns) 494 # NOTE: No such functions are exposed in current versions of libxml2. 495 #for (prefix, ns), value in (variables or {}).items(): 496 # value = from_unicode(value) 497 # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) 498 result = libxml2mod.xmlXPathEval(expr, context) 499 libxml2mod.xmlXPathFreeContext(context) 500 return result 501 502 # Exceptions. 503 504 class LSException(Exception): 505 506 "DOM Level 3 Load/Save exception." 507 508 PARSE_ERR = 81 509 SERIALIZE_ERR = 82 510 511 # Utility functions. 512 513 def createDocument(namespaceURI, localName, doctype): 514 # NOTE: Fixed to use version 1.0 only. 515 d = libxml2mod.xmlNewDoc("1.0") 516 if localName is not None: 517 # NOTE: Verify that this is always what should occur. 518 root = Node_createElementNS(d, namespaceURI, localName) 519 Node_appendChild(d, root) 520 if doctype is not None: 521 libxml2mod.xmlCreateIntSubset(d, doctype.localName, doctype.publicId, doctype.systemId) 522 return d 523 524 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0): 525 if hasattr(stream_or_string, "read"): 526 stream = stream_or_string 527 return parseString(stream.read(), html=html, htmlencoding=htmlencoding, unfinished=unfinished) 528 else: 529 return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, unfinished=unfinished) 530 531 def parseFile(s, html=0, htmlencoding=None, unfinished=0): 532 # NOTE: Switching off validation and remote DTD resolution. 533 if not html: 534 context = libxml2mod.xmlCreateFileParserCtxt(s) 535 Parser_configure(context) 536 Parser_parse(context) 537 doc = Parser_document(context) 538 if unfinished or Parser_well_formed(context): 539 return doc 540 else: 541 raise LSException(LSException.PARSE_ERR) 542 else: 543 return libxml2mod.htmlReadFile(s, htmlencoding, HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 544 545 def parseString(s, html=0, htmlencoding=None, unfinished=0): 546 # NOTE: Switching off validation and remote DTD resolution. 547 if not html: 548 context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) 549 Parser_configure(context) 550 Parser_parse(context) 551 doc = Parser_document(context) 552 if unfinished or Parser_well_formed(context): 553 return doc 554 else: 555 raise LSException(LSException.PARSE_ERR) 556 else: 557 # NOTE: URL given as None. 558 html_url = None 559 return libxml2mod.htmlReadMemory(s, len(s), html_url, htmlencoding, 560 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 561 562 def parseURI(uri, html=0, htmlencoding=None, unfinished=0): 563 # NOTE: Switching off validation and remote DTD resolution. 564 if not html: 565 context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) 566 Parser_configure(context) 567 Parser_parse(context) 568 doc = Parser_document(context) 569 if unfinished or Parser_well_formed(context): 570 return doc 571 else: 572 raise LSException(LSException.PARSE_ERR) 573 else: 574 raise NotImplementedError, "parseURI does not yet support HTML" 575 576 def toString(node, encoding=None, prettyprint=0): 577 return libxml2mod.serializeNode(node, encoding, prettyprint) 578 579 def toStream(node, stream, encoding=None, prettyprint=0): 580 stream.write(toString(node, encoding, prettyprint)) 581 582 def toFile(node, f, encoding=None, prettyprint=0): 583 libxml2mod.saveNodeTo(node, f, encoding, prettyprint) 584 585 # libxml2mod constants and helper functions. 586 587 HTML_PARSE_NOERROR = 32 588 HTML_PARSE_NOWARNING = 64 589 HTML_PARSE_NONET = 2048 590 XML_PARSE_NOERROR = 32 591 XML_PARSE_NOWARNING = 64 592 XML_PARSE_NONET = 2048 593 594 def Parser_push(): 595 return libxml2mod.xmlCreatePushParser(None, "", 0, None) 596 597 def Parser_configure(context): 598 libxml2mod.xmlParserSetPedantic(context, 0) 599 libxml2mod.xmlParserSetValidate(context, 0) 600 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 601 602 def Parser_feed(context, s): 603 libxml2mod.xmlParseChunk(context, s, len(s), 1) 604 605 def Parser_well_formed(context): 606 return libxml2mod.xmlParserGetWellFormed(context) 607 608 def Parser_document(context): 609 return libxml2mod.xmlParserGetDoc(context) 610 611 def Parser_parse(context): 612 libxml2mod.xmlParseDocument(context) 613 614 # vim: tabstop=4 expandtab shiftwidth=4