1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2mod node methods and properties. 5 6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013, 2014 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU Lesser General Public License as published by the Free 10 Software Foundation; either version 3 of the License, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 16 details. 17 18 You should have received a copy of the GNU Lesser General Public License along 19 with this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 import xml.dom 23 from libxml2dom.errors import DOMError 24 25 # Try the conventional import first. 26 27 try: 28 import libxml2mod 29 except ImportError: 30 from libxmlmods import libxml2mod 31 32 # Global reconfiguration. This enables prettyprinting. 33 34 libxml2mod.xmlKeepBlanksDefault(0) 35 36 # NOTE: libxml2 seems to use UTF-8 throughout. 37 # NOTE: Implement: http://www.w3.org/TR/2006/REC-xml-20060816/#AVNormalize 38 39 def from_unicode(s): 40 if isinstance(s, unicode): 41 return s.encode("utf-8") 42 else: 43 # The string might contain non-ASCII characters, thus upsetting libxml2 44 # as it encounters a non-UTF-8 string. 45 try: 46 unicode(s) 47 except UnicodeError: 48 raise TypeError, "Please use Unicode for non-ASCII data." 49 return s 50 51 def to_unicode(s): 52 if isinstance(s, str): 53 return unicode(s, encoding="utf-8") 54 else: 55 return s 56 57 def get_ns(ns): 58 out_ns = to_unicode(libxml2mod.xmlNodeGetContent(ns)) 59 # Detect "" and produce None as the empty namespace. 60 if out_ns: 61 return out_ns 62 else: 63 return None 64 65 def _get_prefix_and_localName(name): 66 t = name.split(":") 67 if len(t) == 1: 68 return None, name 69 elif len(t) == 2: 70 return t 71 else: 72 # NOTE: Should raise an exception. 73 return None, None 74 75 def _find_namespace_for_prefix(node, prefix): 76 77 "Find the namespace definition node in the given 'node' for 'prefix'." 78 79 current = libxml2mod.xmlNodeGetNsDefs(node) 80 while current is not None: 81 if libxml2mod.name(current) == prefix: 82 return current 83 current = libxml2mod.next(current) 84 return None 85 86 def _find_namespace(node, ns, prefix): 87 88 """ 89 Find the namespace definition node in the given 'node' for the given 'ns' 90 and 'prefix'. 91 """ 92 93 # Special treatment for XML namespace. 94 95 if prefix == "xml" and ns == xml.dom.XML_NAMESPACE: 96 return libxml2mod.xmlSearchNsByHref(Node_ownerDocument(node), node, xml.dom.XML_NAMESPACE) 97 98 new_ns = None 99 current = libxml2mod.xmlNodeGetNsDefs(node) 100 while current is not None: 101 if _check_namespace(current, ns, prefix): 102 new_ns = current 103 break 104 current = libxml2mod.next(current) 105 if new_ns is None: 106 node_ns = libxml2mod.xmlNodeGetNs(node) 107 if node_ns is not None and _check_namespace(node_ns, ns, prefix): 108 new_ns = node_ns 109 return new_ns 110 111 def _check_namespace(current, ns, prefix): 112 113 "Check the 'current' namespace definition node against 'ns' and 'prefix'." 114 115 current_ns = get_ns(current) 116 current_prefix = libxml2mod.name(current) 117 if ns == current_ns and (prefix is None or prefix == current_prefix): 118 return 1 119 else: 120 return 0 121 122 def _make_namespace(node, ns, prefix, set_default=0): 123 124 """ 125 Make a new namespace definition node within the given 'node' for 'ns', 126 'prefix', setting the default namespace on 'node' when 'prefix' is None and 127 'set_default' is set to a true value (unlike the default value for that 128 parameter). 129 """ 130 131 if prefix is not None or set_default: 132 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 133 else: 134 new_ns = None 135 return new_ns 136 137 def _get_invented_prefix(node, ns): 138 current = libxml2mod.xmlNodeGetNsDefs(node) 139 prefixes = [] 140 while current is not None: 141 current_prefix = libxml2mod.name(current) 142 prefixes.append(current_prefix) 143 current = libxml2mod.next(current) 144 i = 0 145 while 1: 146 prefix = "NS%d" % i 147 if prefix not in prefixes: 148 return prefix 149 i += 1 150 151 _nodeTypes = { 152 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 153 "cdata" : xml.dom.Node.CDATA_SECTION_NODE, 154 "comment" : xml.dom.Node.COMMENT_NODE, 155 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 156 "document_html" : xml.dom.Node.DOCUMENT_NODE, 157 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 158 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 159 "element" : xml.dom.Node.ELEMENT_NODE, 160 "entity" : xml.dom.Node.ENTITY_NODE, 161 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 162 "notation" : xml.dom.Node.NOTATION_NODE, 163 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 164 "text" : xml.dom.Node.TEXT_NODE 165 } 166 167 _reverseNodeTypes = {} 168 for label, value in _nodeTypes.items(): 169 _reverseNodeTypes[value] = label 170 171 def Node_equals(node, other): 172 return libxml2mod.xmlXPathCmpNodes(node, other) == 0 173 174 def Node_ownerDocument(node): 175 return libxml2mod.doc(node) 176 177 def Node_nodeType(node): 178 return _nodeTypes[libxml2mod.type(node)] 179 180 def Node_childNodes(node): 181 182 # NOTE: Consider a generator instead. 183 184 child_nodes = [] 185 node = libxml2mod.children(node) 186 while node is not None: 187 # Remove doctypes. 188 if Node_nodeType(node) != xml.dom.Node.DOCUMENT_TYPE_NODE: 189 child_nodes.append(node) 190 node = libxml2mod.next(node) 191 return child_nodes 192 193 def Node_attributes(node): 194 attributes = {} 195 196 # Include normal attributes. 197 198 current = libxml2mod.properties(node) 199 while current is not None: 200 ns = libxml2mod.xmlNodeGetNs(current) 201 if ns is not None: 202 attributes[(get_ns(ns), libxml2mod.name(current))] = current 203 else: 204 attributes[(None, libxml2mod.name(current))] = current 205 current = libxml2mod.next(current) 206 207 # Include xmlns attributes. 208 209 #current = libxml2mod.xmlNodeGetNsDefs(node) 210 #while current is not None: 211 # ns = get_ns(current) 212 # prefix = libxml2mod.name(current) 213 # attributes[(xml.dom.XMLNS_NAMESPACE, "xmlns:" + prefix)] = ns # NOTE: Need a real node here. 214 # current = libxml2mod.next(current) 215 216 return attributes 217 218 def Node_namespaceURI(node): 219 ns = libxml2mod.xmlNodeGetNs(node) 220 if ns is not None: 221 return get_ns(ns) 222 else: 223 return None 224 225 def Node_nodeValue(node): 226 return to_unicode(libxml2mod.xmlNodeGetContent(node)) 227 228 # NOTE: This is not properly exposed in the libxml2macro interface as the 229 # NOTE: writable form of nodeValue. 230 231 def Node_setNodeValue(node, value): 232 # NOTE: Cannot set attribute node values. 233 libxml2mod.xmlNodeSetContent(node, from_unicode(value)) 234 235 # NOTE: Verify this. The data attribute should only really exist for text, 236 # NOTE: character data, processing instructions and comments. 237 238 Node_data = Node_nodeValue 239 240 Node_textContent = Node_nodeValue 241 242 def Node_prefix(node): 243 ns = libxml2mod.xmlNodeGetNs(node) 244 if ns is not None: 245 return to_unicode(libxml2mod.name(ns)) 246 else: 247 return None 248 249 def Node_nodeName(node): 250 prefix = Node_prefix(node) 251 if prefix is not None: 252 return prefix + ":" + Node_localName(node) 253 else: 254 return Node_localName(node) 255 256 def Node_tagName(node): 257 if libxml2mod.type(node) == "element": 258 return Node_nodeName(node) 259 else: 260 return None 261 262 def Node_localName(node): 263 return to_unicode(libxml2mod.name(node)) 264 265 def Node_parentNode(node): 266 if node is None or libxml2mod.type(node) == "document_xml": 267 return None 268 else: 269 return libxml2mod.parent(node) 270 271 def Node_previousSibling(node): 272 if node is not None and libxml2mod.prev(node) is not None: 273 return libxml2mod.prev(node) 274 else: 275 return None 276 277 def Node_nextSibling(node): 278 if node is not None and libxml2mod.next(node) is not None: 279 return libxml2mod.next(node) 280 else: 281 return None 282 283 def Node_doctype(node): 284 return libxml2mod.xmlGetIntSubset(node) 285 286 def Node_hasAttributeNS(node, ns, localName): 287 return Node_getAttributeNS(node, ns, localName) is not None or \ 288 _find_namespace(node, ns, localName) is not None 289 290 def Node_hasAttribute(node, name): 291 return Node_getAttribute(node, name) is not None 292 293 def Node_getAttributeNS(node, ns, localName): 294 if ns == xml.dom.XMLNS_NAMESPACE: 295 ns_def = _find_namespace_for_prefix(node, localName) 296 if ns_def is not None: 297 return get_ns(ns_def) 298 else: 299 return None 300 else: 301 return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) 302 303 def Node_getAttribute(node, name): 304 return to_unicode(libxml2mod.xmlGetProp(node, name)) 305 306 def Node_getAttributeNodeNS(node, ns, localName): 307 # NOTE: Needs verifying. 308 return Node_attributes(node)[(ns, localName)] 309 310 def Node_getAttributeNode(node, name): 311 # NOTE: Needs verifying. 312 return Node_attributes(node)[(None, name)] 313 314 def Node_setAttributeNS(node, ns, name, value): 315 ns, name, value = map(from_unicode, [ns, name, value]) 316 prefix, localName = _get_prefix_and_localName(name) 317 318 # Detect setting of xmlns:localName=value, looking for cases where 319 # x:attr=value have caused the definition of xmlns:x=y (as a declaration 320 # with prefix=x, ns=y). 321 if prefix == "xmlns" and ns == xml.dom.XMLNS_NAMESPACE: 322 if _find_namespace(node, value, localName): 323 return 324 new_ns = _make_namespace(node, value, localName, set_default=0) 325 # For non-xmlns attributes, we find or make a namespace declaration and then 326 # set an attribute. 327 elif ns is not None: 328 # Look for a suitable namespace. 329 new_ns = _find_namespace(node, ns, prefix) 330 # Create a declaration if no suitable one was found. 331 if new_ns is None: 332 # Invent a prefix for unprefixed attributes with namespaces. 333 if prefix is None: 334 prefix = _get_invented_prefix(node, ns) 335 new_ns = _make_namespace(node, ns, prefix, set_default=0) 336 # Remove any conflicting attribute. 337 if Node_hasAttributeNS(node, ns, localName): 338 Node_removeAttributeNS(node, ns, localName) 339 libxml2mod.xmlSetNsProp(node, new_ns, localName, value) 340 else: 341 # NOTE: Needs verifying: what should happen to the namespace? 342 # NOTE: This also catches the case where None is the element's 343 # NOTE: namespace and is also used for the attribute. 344 libxml2mod.xmlSetNsProp(node, None, localName, value) 345 346 def Node_setAttribute(node, name, value): 347 name, value = map(from_unicode, [name, value]) 348 349 libxml2mod.xmlSetProp(node, name, value) 350 351 def Node_setAttributeNodeNS(node, attr): 352 # NOTE: Not actually putting the node on the element. 353 Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 354 355 def Node_setAttributeNode(node, attr): 356 # NOTE: Not actually putting the node on the element. 357 Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) 358 359 def Node_removeAttributeNS(node, ns, localName): 360 attr = Node_getAttributeNodeNS(node, ns, localName) 361 libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) 362 363 def Node_removeAttribute(node, name): 364 name = from_unicode(name) 365 libxml2mod.xmlUnsetProp(node, name) 366 367 def Node_createElementNS(node, ns, name): 368 ns, name = map(from_unicode, [ns, name]) 369 370 prefix, localName = _get_prefix_and_localName(name) 371 new_node = libxml2mod.xmlNewNode(localName) 372 373 # If the namespace is not empty, set the declaration. 374 if ns is not None: 375 new_ns = _find_namespace(new_node, ns, prefix) 376 if new_ns is None: 377 new_ns = _make_namespace(new_node, ns, prefix, set_default=1) 378 libxml2mod.xmlSetNs(new_node, new_ns) 379 # If the namespace is empty, set a "null" declaration. 380 elif prefix is not None: 381 new_ns = _find_namespace(new_node, "", prefix) 382 if new_ns is None: 383 new_ns = _make_namespace(new_node, "", prefix) 384 libxml2mod.xmlSetNs(new_node, new_ns) 385 else: 386 libxml2mod.xmlSetNs(new_node, None) 387 Node_setAttribute(new_node, "xmlns", "") 388 return new_node 389 390 def Node_createElement(node, name): 391 name = from_unicode(name) 392 393 new_node = libxml2mod.xmlNewNode(name) 394 return new_node 395 396 def Node_createAttributeNS(node, ns, name): 397 ns, name = map(from_unicode, [ns, name]) 398 399 prefix, localName = _get_prefix_and_localName(name) 400 # NOTE: Does it make sense to set the namespace if it is empty? 401 if ns is not None: 402 new_ns = _find_namespace(node, ns, prefix) 403 if new_ns is None: 404 new_ns = _make_namespace(node, ns, prefix, set_default=0) 405 else: 406 new_ns = None 407 new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) 408 return new_node 409 410 def Node_createAttribute(node, name): 411 name = from_unicode(name) 412 413 # NOTE: xmlNewProp does not seem to work. 414 return Node_createAttributeNS(node, None, name) 415 416 def Node_createTextNode(node, value): 417 value = from_unicode(value) 418 419 return libxml2mod.xmlNewText(value) 420 421 def Node_createComment(node, value): 422 value = from_unicode(value) 423 424 return libxml2mod.xmlNewComment(value) 425 426 def Node_createCDATASection(node, value): 427 value = from_unicode(value) 428 429 return libxml2mod.xmlNewCDataBlock(Node_ownerDocument(node), value, len(value)) 430 431 def Node_insertBefore(node, tmp, oldNode): 432 433 # Work around libxml2 tendency to merge text nodes and free nodes silently. 434 435 if libxml2mod.type(tmp) == "text": 436 placeholder = libxml2mod.xmlNewNode("tmp") 437 placeholder = libxml2mod.xmlAddPrevSibling(oldNode, placeholder) 438 libxml2mod.xmlReplaceNode(placeholder, tmp) 439 return tmp 440 else: 441 return libxml2mod.xmlAddPrevSibling(oldNode, tmp) 442 443 def Node_replaceChild(node, tmp, oldNode): 444 return libxml2mod.xmlReplaceNode(oldNode, tmp) 445 446 def Node_appendChild(node, tmp): 447 448 # Work around libxml2 tendency to merge text nodes and free nodes silently. 449 450 if libxml2mod.type(tmp) == "text": 451 placeholder = libxml2mod.xmlNewNode("tmp") 452 placeholder = libxml2mod.xmlAddChild(node, placeholder) 453 libxml2mod.xmlReplaceNode(placeholder, tmp) 454 return tmp 455 else: 456 return libxml2mod.xmlAddChild(node, tmp) 457 458 def Node_removeChild(node, child): 459 libxml2mod.xmlUnlinkNode(child) 460 461 def Node_importNode(node, other, deep): 462 if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: 463 imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) 464 for attr in Node_attributes(other).values(): 465 Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 466 467 if deep: 468 for child in Node_childNodes(other): 469 imported_child = Node_importNode(node, child, deep) 470 if imported_child: 471 Node_appendChild(imported_element, imported_child) 472 473 return imported_element 474 475 elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: 476 return Node_createTextNode(node, Node_nodeValue(other)) 477 478 elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: 479 return Node_createComment(node, Node_data(other)) 480 481 elif Node_nodeType(other) == xml.dom.Node.CDATA_SECTION_NODE: 482 return Node_createCDATASection(node, Node_data(other)) 483 484 raise xml.dom.NotSupportedErr("Node type '%s' (%d) not supported." % (other, Node_nodeType(other))) 485 486 def Node_importNode_DOM(node, other, deep): 487 if other.nodeType == xml.dom.Node.ELEMENT_NODE: 488 imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) 489 for attr in other.attributes.values(): 490 Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) 491 492 if deep: 493 for child in other.childNodes: 494 imported_child = Node_importNode_DOM(node, child, deep) 495 if imported_child: 496 Node_appendChild(imported_element, imported_child) 497 498 return imported_element 499 500 elif other.nodeType == xml.dom.Node.TEXT_NODE: 501 return Node_createTextNode(node, other.nodeValue) 502 503 elif other.nodeType == xml.dom.Node.COMMENT_NODE: 504 return Node_createComment(node, other.data) 505 506 elif other.nodeType == xml.dom.Node.CDATA_SECTION_NODE: 507 return Node_createCDATASection(node, other.data) 508 509 raise xml.dom.NotSupportedErr( 510 "Node type '%s' (%d) not supported." % (_reverseNodeTypes[other.nodeType], other.nodeType) 511 ) 512 513 def Node_getElementById(doc, identifier): 514 node = libxml2mod.xmlGetID(doc, identifier) 515 if node is None: 516 return None 517 else: 518 return Node_parentNode(node) 519 520 def Node_xpath(node, expr, variables=None, namespaces=None): 521 expr = from_unicode(expr) 522 523 context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node) or node) 524 libxml2mod.xmlXPathSetContextNode(context, node) 525 # NOTE: Discover namespaces from the node. 526 # NOTE: Work out how to specify paths without having to use prefixes on 527 # NOTE: names all the time. 528 for prefix, ns in (namespaces or {}).items(): 529 libxml2mod.xmlXPathRegisterNs(context, prefix, ns) 530 # NOTE: No such functions are exposed in current versions of libxml2. 531 #for (prefix, ns), value in (variables or {}).items(): 532 # value = from_unicode(value) 533 # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) 534 result = libxml2mod.xmlXPathEval(expr, context) 535 libxml2mod.xmlXPathFreeContext(context) 536 return result 537 538 def Node_xinclude(node): 539 result = libxml2mod.xmlXIncludeProcessFlags(node, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 540 if result == -1: 541 raise XIncludeException() 542 else: 543 return result 544 545 # Exceptions. 546 547 class LSException(Exception): 548 549 "DOM Level 3 Load/Save exception." 550 551 PARSE_ERR = 81 552 SERIALIZE_ERR = 82 553 554 def __repr__(self): 555 exctype, excdata = self.args[0:2] 556 return "LSException(%d, %r)" % (exctype, excdata) 557 558 def __str__(self): 559 exctype, excdata = self.args[0:2] 560 if exctype == self.PARSE_ERR: 561 return "Parse error: %s" % excdata 562 elif exctype == self.SERIALIZE_ERR: 563 return "Serialize error: %s" % excdata 564 else: 565 return repr(self) 566 567 class XIncludeException(Exception): 568 569 "Unstandardised XInclude exception." 570 571 pass 572 573 # Utility functions. 574 575 def createDocument(namespaceURI, localName, doctype): 576 # NOTE: Fixed to use version 1.0 only. 577 d = libxml2mod.xmlNewDoc("1.0") 578 if localName is not None: 579 # NOTE: Verify that this is always what should occur. 580 root = Node_createElementNS(d, namespaceURI, localName) 581 Node_appendChild(d, root) 582 if doctype is not None: 583 libxml2mod.xmlCreateIntSubset(d, doctype.localName, doctype.publicId, doctype.systemId) 584 return d 585 586 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0): 587 if hasattr(stream_or_string, "read"): 588 stream = stream_or_string 589 return parseString(stream.read(), html=html, htmlencoding=htmlencoding, unfinished=unfinished) 590 else: 591 return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, unfinished=unfinished) 592 593 def parseFile(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0): 594 if not html: 595 context = libxml2mod.xmlCreateFileParserCtxt(s) 596 return _parseXML(context, unfinished, validate, remote) 597 else: 598 d = libxml2mod.htmlReadFile(s, htmlencoding, 599 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | html_net_flag(remote)) 600 if d is None: 601 raise LSException(LSException.PARSE_ERR, DOMError(DOMError.SEVERITY_FATAL_ERROR, get_parse_error_message())) 602 return d 603 604 def parseString(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0): 605 if not html: 606 context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) 607 return _parseXML(context, unfinished, validate, remote) 608 else: 609 # NOTE: URL given as None. 610 html_url = None 611 d = libxml2mod.htmlReadMemory(s, len(s), html_url, htmlencoding, 612 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | html_net_flag(remote)) 613 if d is None: 614 raise LSException(LSException.PARSE_ERR, DOMError(DOMError.SEVERITY_FATAL_ERROR, get_parse_error_message())) 615 return d 616 617 def parseURI(uri, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0): 618 if not html: 619 context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) 620 return _parseXML(context, unfinished, validate, remote) 621 else: 622 raise NotImplementedError, "parseURI does not yet support HTML" 623 624 def _parseXML(context, unfinished, validate, remote): 625 if context is None: 626 raise LSException(LSException.PARSE_ERR, DOMError(DOMError.SEVERITY_FATAL_ERROR)) 627 628 # Remove spurious error conditions. 629 630 error = Parser_error() 631 if error is not None: 632 Parser_resetError(error) 633 634 Parser_configure(context, validate, remote) 635 Parser_parse(context) 636 doc = Parser_document(context) 637 error = Parser_error() 638 639 try: 640 if validate and not Parser_valid(context): 641 642 # NOTE: May not be the correct exception. 643 644 raise LSException( 645 LSException.PARSE_ERR, 646 DOMError( 647 DOMError.SEVERITY_FATAL_ERROR, 648 get_parse_error_message() or "Document did not validate" 649 )) 650 651 elif unfinished and (error is None or Parser_errorCode(error) == XML_ERR_TAG_NOT_FINISHED): 652 653 # NOTE: There may be other unfinished conditions. 654 655 return doc 656 657 elif error is not None and Parser_errorLevel(error) == XML_ERR_FATAL: 658 raise LSException( 659 LSException.PARSE_ERR, 660 DOMError( 661 DOMError.SEVERITY_FATAL_ERROR, 662 get_parse_error_message() or "Document caused fatal error" 663 )) 664 665 else: 666 667 # NOTE: Could provide non-fatal errors or warnings. 668 669 return doc 670 671 finally: 672 Parser_resetError(error) 673 libxml2mod.xmlFreeParserCtxt(context) 674 675 def toString(node, encoding=None, prettyprint=0): 676 return libxml2mod.serializeNode(node, encoding, prettyprint) 677 678 def toStream(node, stream, encoding=None, prettyprint=0): 679 stream.write(toString(node, encoding, prettyprint)) 680 681 def toFile(node, f, encoding=None, prettyprint=0): 682 libxml2mod.saveNodeTo(node, f, encoding, prettyprint) 683 684 # libxml2mod constants and helper functions. 685 686 HTML_PARSE_NOERROR = 32 687 HTML_PARSE_NOWARNING = 64 688 HTML_PARSE_NONET = 2048 689 XML_PARSE_DTDVALID = 16 690 XML_PARSE_NOERROR = 32 691 XML_PARSE_NOWARNING = 64 692 XML_PARSE_NONET = 2048 693 694 XML_ERR_NONE = 0 695 XML_ERR_WARNING = 1 696 XML_ERR_ERROR = 2 697 XML_ERR_FATAL = 3 698 699 XML_ERR_TAG_NOT_FINISHED = 77 700 701 def html_net_flag(remote): 702 if remote: 703 return 0 704 else: 705 return HTML_PARSE_NONET 706 707 def xml_net_flag(remote): 708 if remote: 709 return 0 710 else: 711 return XML_PARSE_NONET 712 713 def xml_validate_flag(validate): 714 if validate: 715 return XML_PARSE_DTDVALID 716 else: 717 return 0 718 719 def get_parse_error_message(): 720 error = Parser_error() 721 if error is not None: 722 filename = libxml2mod.xmlErrorGetFile(error) 723 if filename is None: 724 filename = "<string>" 725 else: 726 filename = repr(filename) 727 line = libxml2mod.xmlErrorGetLine(error) 728 error_message = libxml2mod.xmlErrorGetMessage(error).strip() 729 return "Filename %s, line %d: %s" % (filename, line, error_message) 730 else: 731 return None 732 733 def Parser_error(): 734 return libxml2mod.xmlGetLastError() 735 736 def Parser_resetError(error): 737 if error is None: 738 return libxml2mod.xmlResetLastError() 739 else: 740 return libxml2mod.xmlResetError(error) 741 742 def Parser_errorLevel(error): 743 return libxml2mod.xmlErrorGetLevel(error) 744 745 def Parser_errorCode(error): 746 return libxml2mod.xmlErrorGetCode(error) 747 748 def Parser_push(): 749 return libxml2mod.xmlCreatePushParser(None, "", 0, None) 750 751 def Parser_configure(context, validate=0, remote=0): 752 libxml2mod.xmlParserSetPedantic(context, 0) 753 #libxml2mod.xmlParserSetValidate(context, validate) 754 libxml2mod.xmlCtxtUseOptions(context, 755 XML_PARSE_NOERROR | XML_PARSE_NOWARNING | xml_net_flag(remote) | xml_validate_flag(validate)) 756 757 def Parser_feed(context, s): 758 libxml2mod.xmlParseChunk(context, s, len(s), 1) 759 760 def Parser_well_formed(context): 761 return libxml2mod.xmlParserGetWellFormed(context) 762 763 def Parser_valid(context): 764 return libxml2mod.xmlParserGetIsValid(context) 765 766 def Parser_document(context): 767 return libxml2mod.xmlParserGetDoc(context) 768 769 def Parser_parse(context): 770 libxml2mod.xmlParseDocument(context) 771 772 # Schema and validation helper functions and classes. 773 # NOTE: Should potentially combine these with other definitions. 774 775 RELAXNG_NS = "http://relaxng.org/ns/structure/1.0" 776 SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron" 777 XMLSCHEMA_NS = "http://www.w3.org/2001/XMLSchema" 778 779 def Document_schema(doc, namespaceURI): 780 if namespaceURI == RELAXNG_NS: 781 return Schema_parseRelaxNG(libxml2mod.xmlRelaxNGNewDocParserCtxt(doc)) 782 elif namespaceURI == SCHEMATRON_NS: 783 return Schema_parseSchematron(libxml2mod.xmlSchematronNewDocParserCtxt(doc)) 784 elif namespaceURI == XMLSCHEMA_NS: 785 return Schema_parseSchema(libxml2mod.xmlSchemaNewDocParserCtxt(doc)) 786 else: 787 return None 788 789 def Document_schemaFromString(s, namespaceURI): 790 if namespaceURI == RELAXNG_NS: 791 return Schema_parseRelaxNG(libxml2mod.xmlRelaxNGNewMemParserCtxt(s, len(s))) 792 elif namespaceURI == SCHEMATRON_NS: 793 return Schema_parseSchematron(libxml2mod.xmlSchematronNewMemParserCtxt(s, len(s))) 794 elif namespaceURI == XMLSCHEMA_NS: 795 return Schema_parseSchema(libxml2mod.xmlSchemaNewMemParserCtxt(s, len(s))) 796 else: 797 return None 798 799 def Document_validate(schema, doc, error_handler, namespaceURI): 800 if namespaceURI == RELAXNG_NS: 801 return Document_validateRelaxNG(schema, doc, error_handler) 802 elif namespaceURI == SCHEMATRON_NS: 803 return Document_validateSchematron(schema, doc, error_handler) 804 elif namespaceURI == XMLSCHEMA_NS: 805 return Document_validateSchema(schema, doc, error_handler) 806 else: 807 return 0 808 809 def Document_validateRelaxNG(schema, doc, error_handler): 810 validator_context = libxml2mod.xmlRelaxNGNewValidCtxt(schema) 811 handler = ValidationHandler(error_handler) 812 libxml2mod.xmlRelaxNGSetValidErrors(validator_context, handler.error, handler.warning, None) 813 try: 814 status = libxml2mod.xmlRelaxNGValidateDoc(validator_context, doc) 815 return status == 0 816 finally: 817 libxml2mod.xmlRelaxNGFreeValidCtxt(validator_context) 818 819 def Document_validateSchematron(schema, doc, error_handler): 820 validator_context = libxml2mod.xmlSchematronNewValidCtxt(schema) 821 handler = ValidationHandler(error_handler) 822 libxml2mod.xmlSchematronSetValidErrors(validator_context, handler.error, handler.warning, None) 823 try: 824 status = libxml2mod.xmlSchematronValidateDoc(validator_context, doc) 825 return status == 0 826 finally: 827 libxml2mod.xmlSchematronFreeValidCtxt(validator_context) 828 829 def Document_validateSchema(schema, doc, error_handler): 830 validator_context = libxml2mod.xmlSchemaNewValidCtxt(schema) 831 handler = ValidationHandler(error_handler) 832 libxml2mod.xmlSchemaSetValidErrors(validator_context, handler.error, handler.warning, None) 833 try: 834 status = libxml2mod.xmlSchemaValidateDoc(validator_context, doc) 835 return status == 0 836 finally: 837 libxml2mod.xmlSchemaFreeValidCtxt(validator_context) 838 839 def Schema_parseRelaxNG(context): 840 try: 841 return libxml2mod.xmlRelaxNGParse(context) 842 finally: 843 libxml2mod.xmlRelaxNGFreeParserCtxt(context) 844 845 def Schema_parseSchematron(context): 846 try: 847 return libxml2mod.xmlSchematronParse(context) 848 finally: 849 libxml2mod.xmlSchematronFreeParserCtxt(context) 850 851 def Schema_parseSchema(context): 852 try: 853 return libxml2mod.xmlSchemaParse(context) 854 finally: 855 libxml2mod.xmlSchemaFreeParserCtxt(context) 856 857 def Schema_free(schema, namespaceURI): 858 if namespaceURI == RELAXNG_NS: 859 libxml2mod.xmlRelaxNGFree(schema) 860 elif namespaceURI == SCHEMATRON_NS: 861 libxml2mod.xmlSchematronFree(schema) 862 elif namespaceURI == XMLSCHEMA_NS: 863 libxml2mod.xmlSchemaFree(schema) 864 865 class ValidationHandler: 866 867 """ 868 A handler which collects validation errors and warnings and passes them to a 869 DOMErrorHandler. 870 """ 871 872 def __init__(self, error_handler): 873 self.error_handler = error_handler 874 875 def error(self, msg, arg): 876 self.error_handler.handleError(DOMError(DOMError.SEVERITY_FATAL_ERROR, msg.strip())) 877 878 def warning(self, msg, arg): 879 self.error_handler.handleError(DOMError(DOMError.SEVERITY_WARNING, msg.strip())) 880 881 # vim: tabstop=4 expandtab shiftwidth=4