1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2mod node methods and properties. 5 6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU Lesser General Public License as published by the Free 10 Software Foundation; either version 3 of the License, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 16 details. 17 18 You should have received a copy of the GNU Lesser General Public License along 19 with this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 import xml.dom 23 from libxml2dom.errors import DOMError 24 25 # Try the conventional import first. 26 27 try: 28 import libxml2mod 29 except ImportError: 30 from libxmlmods import libxml2mod 31 32 # NOTE: libxml2 seems to use UTF-8 throughout. 33 # NOTE: Implement: http://www.w3.org/TR/2006/REC-xml-20060816/#AVNormalize 34 35 def from_unicode(s): 36 if isinstance(s, unicode): 37 return s.encode("utf-8") 38 else: 39 # The string might contain non-ASCII characters, thus upsetting libxml2 40 # as it encounters a non-UTF-8 string. 41 try: 42 unicode(s) 43 except UnicodeError: 44 raise TypeError, "Please use Unicode for non-ASCII data." 45 return s 46 47 def to_unicode(s): 48 if isinstance(s, str): 49 return unicode(s, encoding="utf-8") 50 else: 51 return s 52 53 def get_ns(ns): 54 out_ns = to_unicode(libxml2mod.xmlNodeGetContent(ns)) 55 # Detect "" and produce None as the empty namespace. 56 if out_ns: 57 return out_ns 58 else: 59 return None 60 61 def _get_prefix_and_localName(name): 62 t = name.split(":") 63 if len(t) == 1: 64 return None, name 65 elif len(t) == 2: 66 return t 67 else: 68 # NOTE: Should raise an exception. 69 return None, None 70 71 def _find_namespace_for_prefix(node, prefix): 72 73 "Find the namespace definition node in the given 'node' for 'prefix'." 74 75 current = libxml2mod.xmlNodeGetNsDefs(node) 76 while current is not None: 77 if libxml2mod.name(current) == prefix: 78 return current 79 current = libxml2mod.next(current) 80 return None 81 82 def _find_namespace(node, ns, prefix): 83 84 """ 85 Find the namespace definition node in the given 'node' for the given 'ns' 86 and 'prefix'. 87 """ 88 89 # Special treatment for XML namespace. 90 91 if prefix == "xml" and ns == xml.dom.XML_NAMESPACE: 92 return libxml2mod.xmlSearchNsByHref(Node_ownerDocument(node), node, xml.dom.XML_NAMESPACE) 93 94 new_ns = None 95 current = libxml2mod.xmlNodeGetNsDefs(node) 96 while current is not None: 97 if _check_namespace(current, ns, prefix): 98 new_ns = current 99 break 100 current = libxml2mod.next(current) 101 if new_ns is None: 102 node_ns = libxml2mod.xmlNodeGetNs(node) 103 if node_ns is not None and _check_namespace(node_ns, ns, prefix): 104 new_ns = node_ns 105 return new_ns 106 107 def _check_namespace(current, ns, prefix): 108 109 "Check the 'current' namespace definition node against 'ns' and 'prefix'." 110 111 current_ns = get_ns(current) 112 current_prefix = libxml2mod.name(current) 113 if ns == current_ns and (prefix is None or prefix == current_prefix): 114 return 1 115 else: 116 return 0 117 118 def _make_namespace(node, ns, prefix, set_default=0): 119 120 """ 121 Make a new namespace definition node within the given 'node' for 'ns', 122 'prefix', setting the default namespace on 'node' when 'prefix' is None and 123 'set_default' is set to a true value (unlike the default value for that 124 parameter). 125 """ 126 127 if prefix is not None or set_default: 128 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 129 else: 130 new_ns = None 131 return new_ns 132 133 def _get_invented_prefix(node, ns): 134 current = libxml2mod.xmlNodeGetNsDefs(node) 135 prefixes = [] 136 while current is not None: 137 current_prefix = libxml2mod.name(current) 138 prefixes.append(current_prefix) 139 current = libxml2mod.next(current) 140 i = 0 141 while 1: 142 prefix = "NS%d" % i 143 if prefix not in prefixes: 144 return prefix 145 i += 1 146 147 _nodeTypes = { 148 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 149 "cdata" : xml.dom.Node.CDATA_SECTION_NODE, 150 "comment" : xml.dom.Node.COMMENT_NODE, 151 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 152 "document_html" : xml.dom.Node.DOCUMENT_NODE, 153 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 154 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 155 "element" : xml.dom.Node.ELEMENT_NODE, 156 "entity" : xml.dom.Node.ENTITY_NODE, 157 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 158 "notation" : xml.dom.Node.NOTATION_NODE, 159 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 160 "text" : xml.dom.Node.TEXT_NODE 161 } 162 163 _reverseNodeTypes = {} 164 for label, value in _nodeTypes.items(): 165 _reverseNodeTypes[value] = label 166 167 def Node_equals(node, other): 168 return libxml2mod.xmlXPathCmpNodes(node, other) == 0 169 170 def Node_ownerDocument(node): 171 return libxml2mod.doc(node) 172 173 def Node_nodeType(node): 174 return _nodeTypes[libxml2mod.type(node)] 175 176 def Node_childNodes(node): 177 178 # NOTE: Consider a generator instead. 179 180 child_nodes = [] 181 node = libxml2mod.children(node) 182 while node is not None: 183 # Remove doctypes. 184 if Node_nodeType(node) != xml.dom.Node.DOCUMENT_TYPE_NODE: 185 child_nodes.append(node) 186 node = libxml2mod.next(node) 187 return child_nodes 188 189 def Node_attributes(node): 190 attributes = {} 191 192 # Include normal attributes. 193 194 current = libxml2mod.properties(node) 195 while current is not None: 196 ns = libxml2mod.xmlNodeGetNs(current) 197 if ns is not None: 198 attributes[(get_ns(ns), libxml2mod.name(current))] = current 199 else: 200 attributes[(None, libxml2mod.name(current))] = current 201 current = libxml2mod.next(current) 202 203 # Include xmlns attributes. 204 205 #current = libxml2mod.xmlNodeGetNsDefs(node) 206 #while current is not None: 207 # ns = get_ns(current) 208 # prefix = libxml2mod.name(current) 209 # attributes[(xml.dom.XMLNS_NAMESPACE, "xmlns:" + prefix)] = ns # NOTE: Need a real node here. 210 # current = libxml2mod.next(current) 211 212 return attributes 213 214 def Node_namespaceURI(node): 215 ns = libxml2mod.xmlNodeGetNs(node) 216 if ns is not None: 217 return get_ns(ns) 218 else: 219 return None 220 221 def Node_nodeValue(node): 222 return to_unicode(libxml2mod.xmlNodeGetContent(node)) 223 224 # NOTE: This is not properly exposed in the libxml2macro interface as the 225 # NOTE: writable form of nodeValue. 226 227 def Node_setNodeValue(node, value): 228 # NOTE: Cannot set attribute node values. 229 libxml2mod.xmlNodeSetContent(node, from_unicode(value)) 230 231 # NOTE: Verify this. The data attribute should only really exist for text, 232 # NOTE: character data, processing instructions and comments. 233 234 Node_data = Node_nodeValue 235 236 Node_textContent = Node_nodeValue 237 238 def Node_prefix(node): 239 ns = libxml2mod.xmlNodeGetNs(node) 240 if ns is not None: 241 return to_unicode(libxml2mod.name(ns)) 242 else: 243 return None 244 245 def Node_nodeName(node): 246 prefix = Node_prefix(node) 247 if prefix is not None: 248 return prefix + ":" + Node_localName(node) 249 else: 250 return Node_localName(node) 251 252 def Node_tagName(node): 253 if libxml2mod.type(node) == "element": 254 return Node_nodeName(node) 255 else: 256 return None 257 258 def Node_localName(node): 259 return to_unicode(libxml2mod.name(node)) 260 261 def Node_parentNode(node): 262 if node is None or libxml2mod.type(node) == "document_xml": 263 return None 264 else: 265 return libxml2mod.parent(node) 266 267 def Node_previousSibling(node): 268 if node is not None and libxml2mod.prev(node) is not None: 269 return libxml2mod.prev(node) 270 else: 271 return None 272 273 def Node_nextSibling(node): 274 if node is not None and libxml2mod.next(node) is not None: 275 return libxml2mod.next(node) 276 else: 277 return None 278 279 def Node_doctype(node): 280 return libxml2mod.xmlGetIntSubset(node) 281 282 def Node_hasAttributeNS(node, ns, localName): 283 return Node_getAttributeNS(node, ns, localName) is not None or \ 284 _find_namespace(node, ns, localName) is not None 285 286 def Node_hasAttribute(node, name): 287 return Node_getAttribute(node, name) is not None 288 289 def Node_getAttributeNS(node, ns, localName): 290 if ns == xml.dom.XMLNS_NAMESPACE: 291 ns_def = _find_namespace_for_prefix(node, localName) 292 if ns_def is not None: 293 return get_ns(ns_def) 294 else: 295 return None 296 else: 297 return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) 298 299 def Node_getAttribute(node, name): 300 return to_unicode(libxml2mod.xmlGetProp(node, name)) 301 302 def Node_getAttributeNodeNS(node, ns, localName): 303 # NOTE: Needs verifying. 304 return Node_attributes(node)[(ns, localName)] 305 306 def Node_getAttributeNode(node, name): 307 # NOTE: Needs verifying. 308 return Node_attributes(node)[(None, name)] 309 310 def Node_setAttributeNS(node, ns, name, value): 311 ns, name, value = map(from_unicode, [ns, name, value]) 312 prefix, localName = _get_prefix_and_localName(name) 313 314 # Detect setting of xmlns:localName=value, looking for cases where 315 # x:attr=value have caused the definition of xmlns:x=y (as a declaration 316 # with prefix=x, ns=y). 317 if prefix == "xmlns" and ns == xml.dom.XMLNS_NAMESPACE: 318 if _find_namespace(node, value, localName): 319 return 320 new_ns = _make_namespace(node, value, localName, set_default=0) 321 # For non-xmlns attributes, we find or make a namespace declaration and then 322 # set an attribute. 323 elif ns is not None: 324 # Look for a suitable namespace. 325 new_ns = _find_namespace(node, ns, prefix) 326 # Create a declaration if no suitable one was found. 327 if new_ns is None: 328 # Invent a prefix for unprefixed attributes with namespaces. 329 if prefix is None: 330 prefix = _get_invented_prefix(node, ns) 331 new_ns = _make_namespace(node, ns, prefix, set_default=0) 332 # Remove any conflicting attribute. 333 if Node_hasAttributeNS(node, ns, localName): 334 Node_removeAttributeNS(node, ns, localName) 335 libxml2mod.xmlSetNsProp(node, new_ns, localName, value) 336 else: 337 # NOTE: Needs verifying: what should happen to the namespace? 338 # NOTE: This also catches the case where None is the element's 339 # NOTE: namespace and is also used for the attribute. 340 libxml2mod.xmlSetNsProp(node, None, localName, value) 341 342 def Node_setAttribute(node, name, value): 343 name, value = map(from_unicode, [name, value]) 344 345 libxml2mod.xmlSetProp(node, name, value) 346 347 def Node_setAttributeNodeNS(node, attr): 348 # NOTE: Not actually putting the node on the element. 349 Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 350 351 def Node_setAttributeNode(node, attr): 352 # NOTE: Not actually putting the node on the element. 353 Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) 354 355 def Node_removeAttributeNS(node, ns, localName): 356 attr = Node_getAttributeNodeNS(node, ns, localName) 357 libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) 358 359 def Node_removeAttribute(node, name): 360 name = from_unicode(name) 361 libxml2mod.xmlUnsetProp(node, name) 362 363 def Node_createElementNS(node, ns, name): 364 ns, name = map(from_unicode, [ns, name]) 365 366 prefix, localName = _get_prefix_and_localName(name) 367 new_node = libxml2mod.xmlNewNode(localName) 368 369 # If the namespace is not empty, set the declaration. 370 if ns is not None: 371 new_ns = _find_namespace(new_node, ns, prefix) 372 if new_ns is None: 373 new_ns = _make_namespace(new_node, ns, prefix, set_default=1) 374 libxml2mod.xmlSetNs(new_node, new_ns) 375 # If the namespace is empty, set a "null" declaration. 376 elif prefix is not None: 377 new_ns = _find_namespace(new_node, "", prefix) 378 if new_ns is None: 379 new_ns = _make_namespace(new_node, "", prefix) 380 libxml2mod.xmlSetNs(new_node, new_ns) 381 else: 382 libxml2mod.xmlSetNs(new_node, None) 383 Node_setAttribute(new_node, "xmlns", "") 384 return new_node 385 386 def Node_createElement(node, name): 387 name = from_unicode(name) 388 389 new_node = libxml2mod.xmlNewNode(name) 390 return new_node 391 392 def Node_createAttributeNS(node, ns, name): 393 ns, name = map(from_unicode, [ns, name]) 394 395 prefix, localName = _get_prefix_and_localName(name) 396 # NOTE: Does it make sense to set the namespace if it is empty? 397 if ns is not None: 398 new_ns = _find_namespace(node, ns, prefix) 399 if new_ns is None: 400 new_ns = _make_namespace(node, ns, prefix, set_default=0) 401 else: 402 new_ns = None 403 new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) 404 return new_node 405 406 def Node_createAttribute(node, name): 407 name = from_unicode(name) 408 409 # NOTE: xmlNewProp does not seem to work. 410 return Node_createAttributeNS(node, None, name) 411 412 def Node_createTextNode(node, value): 413 value = from_unicode(value) 414 415 return libxml2mod.xmlNewText(value) 416 417 def Node_createComment(node, value): 418 value = from_unicode(value) 419 420 return libxml2mod.xmlNewComment(value) 421 422 def Node_createCDATASection(node, value): 423 value = from_unicode(value) 424 425 return libxml2mod.xmlNewCDataBlock(Node_ownerDocument(node), value, len(value)) 426 427 def Node_insertBefore(node, tmp, oldNode): 428 return libxml2mod.xmlAddPrevSibling(oldNode, tmp) 429 430 def Node_replaceChild(node, tmp, oldNode): 431 return libxml2mod.xmlReplaceNode(oldNode, tmp) 432 433 def Node_appendChild(node, tmp): 434 return libxml2mod.xmlAddChild(node, tmp) 435 436 def Node_removeChild(node, child): 437 libxml2mod.xmlUnlinkNode(child) 438 439 def Node_importNode(node, other, deep): 440 if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: 441 imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) 442 for attr in Node_attributes(other).values(): 443 Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 444 445 if deep: 446 for child in Node_childNodes(other): 447 imported_child = Node_importNode(node, child, deep) 448 if imported_child: 449 Node_appendChild(imported_element, imported_child) 450 451 return imported_element 452 453 elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: 454 return Node_createTextNode(node, Node_nodeValue(other)) 455 456 elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: 457 return Node_createComment(node, Node_data(other)) 458 459 elif Node_nodeType(other) == xml.dom.Node.CDATA_SECTION_NODE: 460 return Node_createCDATASection(node, Node_data(other)) 461 462 raise xml.dom.NotSupportedErr("Node type '%s' (%d) not supported." % (other, Node_nodeType(other))) 463 464 def Node_importNode_DOM(node, other, deep): 465 if other.nodeType == xml.dom.Node.ELEMENT_NODE: 466 imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) 467 for attr in other.attributes.values(): 468 Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) 469 470 if deep: 471 for child in other.childNodes: 472 imported_child = Node_importNode_DOM(node, child, deep) 473 if imported_child: 474 Node_appendChild(imported_element, imported_child) 475 476 return imported_element 477 478 elif other.nodeType == xml.dom.Node.TEXT_NODE: 479 return Node_createTextNode(node, other.nodeValue) 480 481 elif other.nodeType == xml.dom.Node.COMMENT_NODE: 482 return Node_createComment(node, other.data) 483 484 elif other.nodeType == xml.dom.Node.CDATA_SECTION_NODE: 485 return Node_createCDATASection(node, other.data) 486 487 raise xml.dom.NotSupportedErr( 488 "Node type '%s' (%d) not supported." % (_reverseNodeTypes[other.nodeType], other.nodeType) 489 ) 490 491 def Node_getElementById(doc, identifier): 492 node = libxml2mod.xmlGetID(doc, identifier) 493 if node is None: 494 return None 495 else: 496 return Node_parentNode(node) 497 498 def Node_xpath(node, expr, variables=None, namespaces=None): 499 expr = from_unicode(expr) 500 501 context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node) or node) 502 libxml2mod.xmlXPathSetContextNode(context, node) 503 # NOTE: Discover namespaces from the node. 504 # NOTE: Work out how to specify paths without having to use prefixes on 505 # NOTE: names all the time. 506 for prefix, ns in (namespaces or {}).items(): 507 libxml2mod.xmlXPathRegisterNs(context, prefix, ns) 508 # NOTE: No such functions are exposed in current versions of libxml2. 509 #for (prefix, ns), value in (variables or {}).items(): 510 # value = from_unicode(value) 511 # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) 512 result = libxml2mod.xmlXPathEval(expr, context) 513 libxml2mod.xmlXPathFreeContext(context) 514 return result 515 516 def Node_xinclude(node): 517 result = libxml2mod.xmlXIncludeProcessFlags(node, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 518 if result == -1: 519 raise XIncludeException() 520 else: 521 return result 522 523 # Exceptions. 524 525 class LSException(Exception): 526 527 "DOM Level 3 Load/Save exception." 528 529 PARSE_ERR = 81 530 SERIALIZE_ERR = 82 531 532 def __repr__(self): 533 exctype, excdata = self.args[0:2] 534 return "LSException(%d, %r)" % (exctype, excdata) 535 536 def __str__(self): 537 exctype, excdata = self.args[0:2] 538 if exctype == self.PARSE_ERR: 539 return "Parse error: %s" % excdata 540 elif exctype == self.SERIALIZE_ERR: 541 return "Serialize error: %s" % excdata 542 else: 543 return repr(self) 544 545 class XIncludeException(Exception): 546 547 "Unstandardised XInclude exception." 548 549 pass 550 551 # Utility functions. 552 553 def createDocument(namespaceURI, localName, doctype): 554 # NOTE: Fixed to use version 1.0 only. 555 d = libxml2mod.xmlNewDoc("1.0") 556 if localName is not None: 557 # NOTE: Verify that this is always what should occur. 558 root = Node_createElementNS(d, namespaceURI, localName) 559 Node_appendChild(d, root) 560 if doctype is not None: 561 libxml2mod.xmlCreateIntSubset(d, doctype.localName, doctype.publicId, doctype.systemId) 562 return d 563 564 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0): 565 if hasattr(stream_or_string, "read"): 566 stream = stream_or_string 567 return parseString(stream.read(), html=html, htmlencoding=htmlencoding, unfinished=unfinished) 568 else: 569 return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, unfinished=unfinished) 570 571 def parseFile(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0): 572 if not html: 573 context = libxml2mod.xmlCreateFileParserCtxt(s) 574 return _parseXML(context, unfinished, validate, remote) 575 else: 576 return libxml2mod.htmlReadFile(s, htmlencoding, 577 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | html_net_flag(remote)) 578 579 def parseString(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0): 580 if not html: 581 context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) 582 return _parseXML(context, unfinished, validate, remote) 583 else: 584 # NOTE: URL given as None. 585 html_url = None 586 return libxml2mod.htmlReadMemory(s, len(s), html_url, htmlencoding, 587 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | html_net_flag(remote)) 588 589 def parseURI(uri, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0): 590 if not html: 591 context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) 592 return _parseXML(context, unfinished, validate, remote) 593 else: 594 raise NotImplementedError, "parseURI does not yet support HTML" 595 596 def _parseXML(context, unfinished, validate, remote): 597 if context is None: 598 raise LSException(LSException.PARSE_ERR, DOMError(DOMError.SEVERITY_FATAL_ERROR)) 599 600 Parser_configure(context, validate, remote) 601 Parser_parse(context) 602 doc = Parser_document(context) 603 error = Parser_error() 604 605 try: 606 if validate and not Parser_valid(context): 607 608 # NOTE: May not be the correct exception. 609 610 raise LSException( 611 LSException.PARSE_ERR, 612 DOMError( 613 DOMError.SEVERITY_FATAL_ERROR, 614 get_parse_error_message() or "Document did not validate" 615 )) 616 617 elif unfinished and (error is None or Parser_errorCode(error) == XML_ERR_TAG_NOT_FINISHED): 618 619 # NOTE: There may be other unfinished conditions. 620 621 return doc 622 623 elif error is not None and Parser_errorLevel(error) == XML_ERR_FATAL: 624 raise LSException( 625 LSException.PARSE_ERR, 626 DOMError( 627 DOMError.SEVERITY_FATAL_ERROR, 628 get_parse_error_message() or "Document caused fatal error" 629 )) 630 631 else: 632 633 # NOTE: Could provide non-fatal errors or warnings. 634 635 return doc 636 637 finally: 638 Parser_resetError(error) 639 libxml2mod.xmlFreeParserCtxt(context) 640 641 def toString(node, encoding=None, prettyprint=0): 642 return libxml2mod.serializeNode(node, encoding, prettyprint) 643 644 def toStream(node, stream, encoding=None, prettyprint=0): 645 stream.write(toString(node, encoding, prettyprint)) 646 647 def toFile(node, f, encoding=None, prettyprint=0): 648 libxml2mod.saveNodeTo(node, f, encoding, prettyprint) 649 650 # libxml2mod constants and helper functions. 651 652 HTML_PARSE_NOERROR = 32 653 HTML_PARSE_NOWARNING = 64 654 HTML_PARSE_NONET = 2048 655 XML_PARSE_DTDVALID = 16 656 XML_PARSE_NOERROR = 32 657 XML_PARSE_NOWARNING = 64 658 XML_PARSE_NONET = 2048 659 660 XML_ERR_NONE = 0 661 XML_ERR_WARNING = 1 662 XML_ERR_ERROR = 2 663 XML_ERR_FATAL = 3 664 665 XML_ERR_TAG_NOT_FINISHED = 77 666 667 def html_net_flag(remote): 668 if remote: 669 return 0 670 else: 671 return HTML_PARSE_NONET 672 673 def xml_net_flag(remote): 674 if remote: 675 return 0 676 else: 677 return XML_PARSE_NONET 678 679 def xml_validate_flag(validate): 680 if validate: 681 return XML_PARSE_DTDVALID 682 else: 683 return 0 684 685 def get_parse_error_message(): 686 error = Parser_error() 687 if error is not None: 688 filename = libxml2mod.xmlErrorGetFile(error) 689 if filename is None: 690 filename = "<string>" 691 else: 692 filename = repr(filename) 693 line = libxml2mod.xmlErrorGetLine(error) 694 error_message = libxml2mod.xmlErrorGetMessage(error).strip() 695 return "Filename %s, line %d: %s" % (filename, line, error_message) 696 else: 697 return None 698 699 def Parser_error(): 700 return libxml2mod.xmlGetLastError() 701 702 def Parser_resetError(error): 703 if error is None: 704 return libxml2mod.xmlResetLastError() 705 else: 706 return libxml2mod.xmlResetError(error) 707 708 def Parser_errorLevel(error): 709 return libxml2mod.xmlErrorGetLevel(error) 710 711 def Parser_errorCode(error): 712 return libxml2mod.xmlErrorGetCode(error) 713 714 def Parser_push(): 715 return libxml2mod.xmlCreatePushParser(None, "", 0, None) 716 717 def Parser_configure(context, validate, remote): 718 libxml2mod.xmlParserSetPedantic(context, 0) 719 #libxml2mod.xmlParserSetValidate(context, validate) 720 libxml2mod.xmlCtxtUseOptions(context, 721 XML_PARSE_NOERROR | XML_PARSE_NOWARNING | xml_net_flag(remote) | xml_validate_flag(validate)) 722 723 def Parser_feed(context, s): 724 libxml2mod.xmlParseChunk(context, s, len(s), 1) 725 726 def Parser_well_formed(context): 727 return libxml2mod.xmlParserGetWellFormed(context) 728 729 def Parser_valid(context): 730 return libxml2mod.xmlParserGetIsValid(context) 731 732 def Parser_document(context): 733 return libxml2mod.xmlParserGetDoc(context) 734 735 def Parser_parse(context): 736 libxml2mod.xmlParseDocument(context) 737 738 # Schema and validation helper functions and classes. 739 # NOTE: Should potentially combine these with other definitions. 740 741 RELAXNG_NS = "http://relaxng.org/ns/structure/1.0" 742 SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron" 743 XMLSCHEMA_NS = "http://www.w3.org/2001/XMLSchema" 744 745 def Document_schema(doc, namespaceURI): 746 if namespaceURI == RELAXNG_NS: 747 return Schema_parseRelaxNG(libxml2mod.xmlRelaxNGNewDocParserCtxt(doc)) 748 elif namespaceURI == SCHEMATRON_NS: 749 return Schema_parseSchematron(libxml2mod.xmlSchematronNewDocParserCtxt(doc)) 750 elif namespaceURI == XMLSCHEMA_NS: 751 return Schema_parseSchema(libxml2mod.xmlSchemaNewDocParserCtxt(doc)) 752 else: 753 return None 754 755 def Document_schemaFromString(s, namespaceURI): 756 if namespaceURI == RELAXNG_NS: 757 return Schema_parseRelaxNG(libxml2mod.xmlRelaxNGNewMemParserCtxt(s, len(s))) 758 elif namespaceURI == SCHEMATRON_NS: 759 return Schema_parseSchematron(libxml2mod.xmlSchematronNewMemParserCtxt(s, len(s))) 760 elif namespaceURI == XMLSCHEMA_NS: 761 return Schema_parseSchema(libxml2mod.xmlSchemaNewMemParserCtxt(s, len(s))) 762 else: 763 return None 764 765 def Document_validate(schema, doc, error_handler, namespaceURI): 766 if namespaceURI == RELAXNG_NS: 767 return Document_validateRelaxNG(schema, doc, error_handler) 768 elif namespaceURI == SCHEMATRON_NS: 769 return Document_validateSchematron(schema, doc, error_handler) 770 elif namespaceURI == XMLSCHEMA_NS: 771 return Document_validateSchema(schema, doc, error_handler) 772 else: 773 return 0 774 775 def Document_validateRelaxNG(schema, doc, error_handler): 776 validator_context = libxml2mod.xmlRelaxNGNewValidCtxt(schema) 777 handler = ValidationHandler(error_handler) 778 libxml2mod.xmlRelaxNGSetValidErrors(validator_context, handler.error, handler.warning, None) 779 try: 780 status = libxml2mod.xmlRelaxNGValidateDoc(validator_context, doc) 781 return status == 0 782 finally: 783 libxml2mod.xmlRelaxNGFreeValidCtxt(validator_context) 784 785 def Document_validateSchematron(schema, doc, error_handler): 786 validator_context = libxml2mod.xmlSchematronNewValidCtxt(schema) 787 handler = ValidationHandler(error_handler) 788 libxml2mod.xmlSchematronSetValidErrors(validator_context, handler.error, handler.warning, None) 789 try: 790 status = libxml2mod.xmlSchematronValidateDoc(validator_context, doc) 791 return status == 0 792 finally: 793 libxml2mod.xmlSchematronFreeValidCtxt(validator_context) 794 795 def Document_validateSchema(schema, doc, error_handler): 796 validator_context = libxml2mod.xmlSchemaNewValidCtxt(schema) 797 handler = ValidationHandler(error_handler) 798 libxml2mod.xmlSchemaSetValidErrors(validator_context, handler.error, handler.warning, None) 799 try: 800 status = libxml2mod.xmlSchemaValidateDoc(validator_context, doc) 801 return status == 0 802 finally: 803 libxml2mod.xmlSchemaFreeValidCtxt(validator_context) 804 805 def Schema_parseRelaxNG(context): 806 try: 807 return libxml2mod.xmlRelaxNGParse(context) 808 finally: 809 libxml2mod.xmlRelaxNGFreeParserCtxt(context) 810 811 def Schema_parseSchematron(context): 812 try: 813 return libxml2mod.xmlSchematronParse(context) 814 finally: 815 libxml2mod.xmlSchematronFreeParserCtxt(context) 816 817 def Schema_parseSchema(context): 818 try: 819 return libxml2mod.xmlSchemaParse(context) 820 finally: 821 libxml2mod.xmlSchemaFreeParserCtxt(context) 822 823 def Schema_free(schema, namespaceURI): 824 if namespaceURI == RELAXNG_NS: 825 libxml2mod.xmlRelaxNGFree(schema) 826 elif namespaceURI == SCHEMATRON_NS: 827 libxml2mod.xmlSchematronFree(schema) 828 elif namespaceURI == XMLSCHEMA_NS: 829 libxml2mod.xmlSchemaFree(schema) 830 831 class ValidationHandler: 832 833 """ 834 A handler which collects validation errors and warnings and passes them to a 835 DOMErrorHandler. 836 """ 837 838 def __init__(self, error_handler): 839 self.error_handler = error_handler 840 841 def error(self, msg, arg): 842 self.error_handler.handleError(DOMError(DOMError.SEVERITY_FATAL_ERROR, msg.strip())) 843 844 def warning(self, msg, arg): 845 self.error_handler.handleError(DOMError(DOMError.SEVERITY_WARNING, msg.strip())) 846 847 # vim: tabstop=4 expandtab shiftwidth=4