1 #!/usr/bin/env python 2 3 """ 4 DOM wrapper around libxml2, specifically the libxml2mod Python extension module. 5 6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2012, 2013 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU Lesser General Public License as published by the Free 10 Software Foundation; either version 3 of the License, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 16 details. 17 18 You should have received a copy of the GNU Lesser General Public License along 19 with this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 __version__ = "0.5.1" 23 24 from libxml2dom.macrolib import * 25 from libxml2dom.macrolib import \ 26 createDocument as Node_createDocument, \ 27 parseString as Node_parseString, parseURI as Node_parseURI, \ 28 parseFile as Node_parseFile, \ 29 toString as Node_toString, toStream as Node_toStream, \ 30 toFile as Node_toFile 31 import libxml2dom.errors 32 33 # Standard namespaces. 34 35 XML_NAMESPACE = xml.dom.XML_NAMESPACE 36 37 # Default namespace bindings for XPath. 38 39 default_ns = { 40 "xml" : XML_NAMESPACE 41 } 42 43 class Implementation(object): 44 45 "Contains an abstraction over the DOM implementation." 46 47 def createDocumentType(self, localName, publicId, systemId): 48 return DocumentType(localName, publicId, systemId) 49 50 def createDocument(self, namespaceURI, localName, doctype): 51 return Document(Node_createDocument(namespaceURI, localName, doctype), self) 52 53 # Wrapping of documents. 54 55 def adoptDocument(self, node): 56 return Document(node, self) 57 58 # Factory functions. 59 60 def get_node(self, _node, context_node): 61 62 # Return the existing document. 63 64 if Node_nodeType(_node) == context_node.DOCUMENT_NODE: 65 return context_node.ownerDocument 66 67 # Return an attribute using the parent of the attribute as the owner 68 # element. 69 70 elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE: 71 return Attribute(_node, self, context_node.ownerDocument, 72 self.get_node(Node_parentNode(_node), context_node)) 73 74 # Return other nodes. 75 76 else: 77 return Node(_node, self, context_node.ownerDocument) 78 79 def get_node_or_none(self, _node, context_node): 80 if _node is None: 81 return None 82 else: 83 return self.get_node(_node, context_node) 84 85 # Attribute and node list wrappers. 86 87 class NamedNodeMap(object): 88 89 """ 90 A wrapper around Node objects providing DOM and dictionary convenience 91 methods. 92 """ 93 94 def __init__(self, node, impl): 95 self.node = node 96 self.impl = impl 97 98 def getNamedItem(self, name): 99 return self.node.getAttributeNode(name) 100 101 def getNamedItemNS(self, ns, localName): 102 return self.node.getAttributeNodeNS(ns, localName) 103 104 def setNamedItem(self, node): 105 try: 106 old = self.getNamedItem(node.nodeName) 107 except KeyError: 108 old = None 109 self.node.setAttributeNode(node) 110 return old 111 112 def setNamedItemNS(self, node): 113 try: 114 old = self.getNamedItemNS(node.namespaceURI, node.localName) 115 except KeyError: 116 old = None 117 self.node.setAttributeNodeNS(node) 118 return old 119 120 def removeNamedItem(self, name): 121 try: 122 old = self.getNamedItem(name) 123 except KeyError: 124 old = None 125 self.node.removeAttribute(name) 126 return old 127 128 def removeNamedItemNS(self, ns, localName): 129 try: 130 old = self.getNamedItemNS(ns, localName) 131 except KeyError: 132 old = None 133 self.node.removeAttributeNS(ns, localName) 134 return old 135 136 # Iterator emulation. 137 138 def __iter__(self): 139 return NamedNodeMapIterator(self) 140 141 # Dictionary emulation methods. 142 143 def __getitem__(self, name): 144 return self.getNamedItem(name) 145 146 def __setitem__(self, name, node): 147 if name == node.nodeName: 148 self.setNamedItem(node) 149 else: 150 raise KeyError, name 151 152 def __delitem__(self, name): 153 # NOTE: To be implemented. 154 pass 155 156 def values(self): 157 return [Attribute(_node, self.impl, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()] 158 159 def keys(self): 160 return [(attr.namespaceURI, attr.localName) for attr in self.values()] 161 162 def items(self): 163 return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] 164 165 def __repr__(self): 166 return str(self) 167 168 def __str__(self): 169 return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) 170 171 def _length(self): 172 return len(self.values()) 173 174 length = property(_length) 175 176 class NamedNodeMapIterator(object): 177 178 "An iterator over a NamedNodeMap." 179 180 def __init__(self, nodemap): 181 self.nodemap = nodemap 182 self.items = self.nodemap.items() 183 184 def next(self): 185 if self.items: 186 current = self.items[0][1] 187 self.items = self.items[1:] 188 return current 189 else: 190 raise StopIteration 191 192 class NodeList(list): 193 194 "A wrapper around node lists." 195 196 def item(self, index): 197 return self[index] 198 199 def _length(self): 200 return len(self) 201 202 length = property(_length) 203 204 # Node classes. 205 206 class Node(object): 207 208 """ 209 A DOM-style wrapper around libxml2mod objects. 210 """ 211 212 ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE 213 COMMENT_NODE = xml.dom.Node.COMMENT_NODE 214 DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE 215 DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE 216 ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE 217 ENTITY_NODE = xml.dom.Node.ENTITY_NODE 218 ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE 219 NOTATION_NODE = xml.dom.Node.NOTATION_NODE 220 PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE 221 TEXT_NODE = xml.dom.Node.TEXT_NODE 222 223 def __init__(self, node, impl=None, ownerDocument=None): 224 self._node = node 225 self.impl = impl or default_impl 226 self.ownerDocument = ownerDocument 227 228 def __repr__(self): 229 return "<%s: %r>" % (self.__class__.__name__, self.nodeName) 230 231 def as_native_node(self): 232 return self._node 233 234 def _nodeType(self): 235 return Node_nodeType(self._node) 236 237 def _childNodes(self): 238 239 # NOTE: Consider a generator instead. 240 241 return NodeList([self.impl.get_node(_node, self) for _node in Node_childNodes(self._node)]) 242 243 def _firstChild(self): 244 return (self.childNodes or [None])[0] 245 246 def _lastChild(self): 247 return (self.childNodes or [None])[-1] 248 249 def _attributes(self): 250 return NamedNodeMap(self, self.impl) 251 252 def _namespaceURI(self): 253 return Node_namespaceURI(self._node) 254 255 def _textContent(self): 256 return Node_textContent(self._node) 257 258 def _nodeValue(self): 259 if self.nodeType in null_value_node_types: 260 return None 261 return Node_nodeValue(self._node) 262 263 def _setNodeValue(self, value): 264 Node_setNodeValue(self._node, value) 265 266 def _prefix(self): 267 return Node_prefix(self._node) 268 269 def _nodeName(self): 270 return Node_nodeName(self._node) 271 272 def _tagName(self): 273 return Node_tagName(self._node) 274 275 def _localName(self): 276 return Node_localName(self._node) 277 278 def _parentNode(self): 279 return self.impl.get_node_or_none(Node_parentNode(self._node), self) 280 281 def _previousSibling(self): 282 return self.impl.get_node_or_none(Node_previousSibling(self._node), self) 283 284 def _nextSibling(self): 285 return self.impl.get_node_or_none(Node_nextSibling(self._node), self) 286 287 def _doctype(self): 288 _doctype = Node_doctype(self._node) 289 if _doctype is not None: 290 return self.impl.get_node(_doctype, self) 291 else: 292 return None 293 294 def _publicId(self): 295 # NOTE: To be fixed when the libxml2mod API has been figured out. 296 if self.nodeType != self.DOCUMENT_TYPE_NODE: 297 return None 298 declaration = self.toString() 299 return self._findId(declaration, "PUBLIC") 300 301 def _systemId(self): 302 # NOTE: To be fixed when the libxml2mod API has been figured out. 303 if self.nodeType != self.DOCUMENT_TYPE_NODE: 304 return None 305 declaration = self.toString() 306 if self._findId(declaration, "PUBLIC"): 307 return self._findIdValue(declaration, 0) 308 return self._findId(declaration, "SYSTEM") 309 310 # NOTE: To be removed when the libxml2mod API has been figured out. 311 312 def _findId(self, declaration, identifier): 313 i = declaration.find(identifier) 314 if i == -1: 315 return None 316 return self._findIdValue(declaration, i) 317 318 def _findIdValue(self, declaration, i): 319 q = declaration.find('"', i) 320 if q == -1: 321 return None 322 q2 = declaration.find('"', q + 1) 323 if q2 == -1: 324 return None 325 return declaration[q+1:q2] 326 327 def hasChildNodes(self): 328 return bool(self.childNodes) 329 330 def hasAttributeNS(self, ns, localName): 331 return Node_hasAttributeNS(self._node, ns, localName) 332 333 def hasAttribute(self, name): 334 return Node_hasAttribute(self._node, name) 335 336 def getAttributeNS(self, ns, localName): 337 return Node_getAttributeNS(self._node, ns, localName) 338 339 def getAttribute(self, name): 340 return Node_getAttribute(self._node, name) 341 342 def getAttributeNodeNS(self, ns, localName): 343 return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.impl, self.ownerDocument, self) 344 345 def getAttributeNode(self, localName): 346 return Attribute(Node_getAttributeNode(self._node, localName), self.impl, self.ownerDocument, self) 347 348 def setAttributeNS(self, ns, name, value): 349 Node_setAttributeNS(self._node, ns, name, value) 350 351 def setAttribute(self, name, value): 352 Node_setAttribute(self._node, name, value) 353 354 def setAttributeNodeNS(self, node): 355 Node_setAttributeNodeNS(self._node, node._node) 356 357 def setAttributeNode(self, node): 358 Node_setAttributeNode(self._node, node._node) 359 360 def removeAttributeNS(self, ns, localName): 361 Node_removeAttributeNS(self._node, ns, localName) 362 363 def removeAttribute(self, name): 364 Node_removeAttribute(self._node, name) 365 366 def createElementNS(self, ns, name): 367 return self.impl.get_node(Node_createElementNS(self._node, ns, name), self) 368 369 def createElement(self, name): 370 return self.impl.get_node(Node_createElement(self._node, name), self) 371 372 def createAttributeNS(self, ns, name): 373 tmp = self.createElement("tmp") 374 return Attribute(Node_createAttributeNS(tmp._node, self.impl, ns, name)) 375 376 def createAttribute(self, name): 377 tmp = self.createElement("tmp") 378 return Attribute(Node_createAttribute(tmp._node, name), self.impl) 379 380 def createTextNode(self, value): 381 return self.impl.get_node(Node_createTextNode(self._node, value), self) 382 383 def createComment(self, value): 384 return self.impl.get_node(Node_createComment(self._node, value), self) 385 386 def createCDATASection(self, value): 387 return self.impl.get_node(Node_createCDATASection(self._node, value), self) 388 389 def importNode(self, node, deep): 390 if hasattr(node, "as_native_node"): 391 return self.impl.get_node(Node_importNode(self._node, node.as_native_node(), deep), self) 392 else: 393 return self.impl.get_node(Node_importNode_DOM(self._node, node, deep), self) 394 395 def cloneNode(self, deep): 396 # This takes advantage of the ubiquity of importNode (in spite of the DOM specification). 397 return self.importNode(self, deep) 398 399 def insertBefore(self, tmp, oldNode): 400 if tmp.ownerDocument != self.ownerDocument: 401 raise xml.dom.WrongDocumentErr() 402 if oldNode.parentNode != self: 403 raise xml.dom.NotFoundErr() 404 405 # Nodes must be from this implementation before insertion. 406 407 if not hasattr(tmp, "as_native_node"): 408 raise xml.dom.WrongDocumentErr() 409 410 return self.impl.get_node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self) 411 412 def replaceChild(self, tmp, oldNode): 413 if tmp.ownerDocument != self.ownerDocument: 414 raise xml.dom.WrongDocumentErr() 415 if oldNode.parentNode != self: 416 raise xml.dom.NotFoundErr() 417 418 # Nodes must be from this implementation before insertion. 419 420 if not hasattr(tmp, "as_native_node"): 421 raise xml.dom.WrongDocumentErr() 422 423 return self.impl.get_node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self) 424 425 def appendChild(self, tmp): 426 if tmp.ownerDocument != self.ownerDocument: 427 raise xml.dom.WrongDocumentErr() 428 429 # Nodes must be from this implementation before insertion. 430 431 if not hasattr(tmp, "as_native_node"): 432 raise xml.dom.WrongDocumentErr() 433 434 return self.impl.get_node(Node_appendChild(self._node, tmp.as_native_node()), self) 435 436 def removeChild(self, tmp): 437 438 # Nodes must be from this implementation in order to be removed. 439 440 if not hasattr(tmp, "as_native_node"): 441 raise xml.dom.WrongDocumentErr() 442 443 Node_removeChild(self._node, tmp.as_native_node()) 444 return tmp 445 446 def getElementById(self, identifier): 447 _node = Node_getElementById(self.ownerDocument.as_native_node(), identifier) 448 if _node is None: 449 return None 450 else: 451 return self.impl.get_node(_node, self) 452 453 def getElementsByTagName(self, tagName): 454 return self.xpath(".//" + tagName) 455 456 def getElementsByTagNameNS(self, namespaceURI, localName): 457 return self.xpath(".//ns:" + localName, namespaces={"ns" : namespaceURI}) 458 459 def normalize(self): 460 text_nodes = [] 461 for node in self.childNodes: 462 if node.nodeType == node.TEXT_NODE: 463 text_nodes.append(node) 464 elif len(text_nodes) != 0: 465 self._normalize(text_nodes) 466 text_nodes = [] 467 if len(text_nodes) != 0: 468 self._normalize(text_nodes) 469 470 def _normalize(self, text_nodes): 471 texts = [] 472 for text_node in text_nodes[:-1]: 473 texts.append(text_node.nodeValue) 474 self.removeChild(text_node) 475 texts.append(text_nodes[-1].nodeValue) 476 self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1]) 477 478 childNodes = property(_childNodes) 479 firstChild = property(_firstChild) 480 lastChild = property(_lastChild) 481 value = data = nodeValue = property(_nodeValue, _setNodeValue) 482 textContent = property(_textContent) 483 name = nodeName = property(_nodeName) 484 tagName = property(_tagName) 485 namespaceURI = property(_namespaceURI) 486 prefix = property(_prefix) 487 localName = property(_localName) 488 parentNode = property(_parentNode) 489 nodeType = property(_nodeType) 490 attributes = property(_attributes) 491 previousSibling = property(_previousSibling) 492 nextSibling = property(_nextSibling) 493 doctype = property(_doctype) 494 publicId = property(_publicId) 495 systemId = property(_systemId) 496 497 # NOTE: To be fixed - these being doctype-specific values. 498 499 entities = {} 500 notations = {} 501 502 def isSameNode(self, other): 503 return self == other 504 505 def __hash__(self): 506 return hash(self.localName) 507 508 def __eq__(self, other): 509 return isinstance(other, Node) and Node_equals(self._node, other._node) 510 511 def __ne__(self, other): 512 return not (self == other) 513 514 # 4DOM extensions to the usual PyXML API. 515 # NOTE: To be finished. 516 517 def xpath(self, expr, variables=None, namespaces=None): 518 519 """ 520 Evaluate the given expression 'expr' using the optional 'variables' and 521 'namespaces' mappings. 522 """ 523 524 ns = {} 525 ns.update(default_ns) 526 ns.update(namespaces or {}) 527 result = Node_xpath(self._node, expr, variables, ns) 528 if isinstance(result, str): 529 return to_unicode(result) 530 elif hasattr(result, "__len__"): 531 return NodeList([self.impl.get_node(_node, self) for _node in result]) 532 else: 533 return result 534 535 # Other extensions to the usual PyXML API. 536 537 def xinclude(self): 538 539 """ 540 Process XInclude declarations within the document, returning the number 541 of substitutions performed (zero or more), raising an XIncludeException 542 otherwise. 543 """ 544 545 return Node_xinclude(self._node) 546 547 # Convenience methods. 548 549 def toString(self, encoding=None, prettyprint=0): 550 return toString(self, encoding, prettyprint) 551 552 def toStream(self, stream, encoding=None, prettyprint=0): 553 toStream(self, stream, encoding, prettyprint) 554 555 def toFile(self, f, encoding=None, prettyprint=0): 556 toFile(self, f, encoding, prettyprint) 557 558 # Attribute nodes. 559 560 class Attribute(Node): 561 562 "A class providing attribute access." 563 564 def __init__(self, node, impl, ownerDocument=None, ownerElement=None): 565 Node.__init__(self, node, impl, ownerDocument) 566 self.ownerElement = ownerElement 567 568 def _parentNode(self): 569 return self.ownerElement 570 571 parentNode = property(_parentNode) 572 573 # Document housekeeping mechanisms. 574 575 class _Document: 576 577 """ 578 An abstract class providing document-level housekeeping and distinct 579 functionality. Configuration of the document is also supported. 580 See: http://www.w3.org/TR/DOM-Level-3-Core/core.html#DOMConfiguration 581 """ 582 583 # Constants from 584 # See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-NodeEditVAL 585 586 VAL_TRUE = 5 587 VAL_FALSE = 6 588 VAL_UNKNOWN = 7 589 590 def __init__(self, node, impl): 591 self._node = node 592 self.implementation = self.impl = impl 593 self.error_handler = libxml2dom.errors.DOMErrorHandler() 594 595 # Standard DOM properties and their implementations. 596 597 def _documentElement(self): 598 return self.xpath("*")[0] 599 600 def _ownerDocument(self): 601 return self 602 603 def __del__(self): 604 #print "Freeing document", self._node 605 libxml2mod.xmlFreeDoc(self._node) 606 607 documentElement = property(_documentElement) 608 ownerDocument = property(_ownerDocument) 609 610 # DOM Level 3 Core DOMConfiguration methods. 611 612 def setParameter(self, name, value): 613 if name == "error-handler": 614 raise xml.dom.NotSupportedErr() 615 raise xml.dom.NotFoundErr() 616 617 def getParameter(self, name): 618 if name == "error-handler": 619 return self.error_handler 620 raise xml.dom.NotFoundErr() 621 622 def canSetParameter(self, name, value): 623 return 0 624 625 def _parameterNames(self): 626 return [] 627 628 # Extensions to the usual PyXML API. 629 630 def validate(self, doc): 631 632 """ 633 Validate the document against the given schema document, 'doc'. 634 """ 635 636 validation_ns = doc.documentElement.namespaceURI 637 638 if hasattr(doc, "as_native_node"): 639 _schema = Document_schema(doc.as_native_node(), validation_ns) 640 else: 641 _schema = Document_schemaFromString(doc.toString(), validation_ns) 642 try: 643 self.error_handler.reset() 644 return Document_validate(_schema, self._node, self.error_handler, validation_ns) 645 finally: 646 Schema_free(_schema, validation_ns) 647 648 # DOM Level 3 Validation methods. 649 650 def validateDocument(self, doc): 651 652 """ 653 Validate the document against the given schema document, 'doc'. 654 See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-DocumentEditVAL-validateDocument 655 """ 656 657 return self.validate(doc) and self.VAL_TRUE or self.VAL_FALSE 658 659 class Document(_Document, Node): 660 661 """ 662 A generic document class. Specialised document classes should inherit from 663 the _Document class and their own variation of Node. 664 """ 665 666 pass 667 668 class DocumentType(object): 669 670 "A class providing a container for document type information." 671 672 def __init__(self, localName, publicId, systemId): 673 self.name = self.localName = localName 674 self.publicId = publicId 675 self.systemId = systemId 676 677 # NOTE: Nothing is currently provided to support the following 678 # NOTE: attributes. 679 680 self.entities = {} 681 self.notations = {} 682 683 # Constants. 684 685 null_value_node_types = [ 686 Node.DOCUMENT_NODE, Node.DOCUMENT_TYPE_NODE, Node.ELEMENT_NODE, 687 Node.ENTITY_NODE, Node.ENTITY_REFERENCE_NODE, Node.NOTATION_NODE 688 ] 689 690 # Utility functions. 691 692 def createDocumentType(localName, publicId, systemId): 693 return default_impl.createDocumentType(localName, publicId, systemId) 694 695 def createDocument(namespaceURI, localName, doctype): 696 return default_impl.createDocument(namespaceURI, localName, doctype) 697 698 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 699 700 """ 701 Parse the given 'stream_or_string', where the supplied object can either be 702 a stream (such as a file or stream object), or a string (containing the 703 filename of a document). The optional parameters described below should be 704 provided as keyword arguments. 705 706 If the optional 'html' parameter is set to a true value, the content to be 707 parsed will be treated as being HTML rather than XML. If the optional 708 'htmlencoding' is specified, HTML parsing will be performed with the 709 document encoding assumed to be that specified. 710 711 If the optional 'unfinished' parameter is set to a true value, unfinished 712 documents will be parsed, even though such documents may be missing content 713 such as closing tags. 714 715 If the optional 'validate' parameter is set to a true value, an attempt will 716 be made to validate the parsed document. 717 718 If the optional 'remote' parameter is set to a true value, references to 719 remote documents (such as DTDs) will be followed in order to obtain such 720 documents. 721 722 A document object is returned by this function. 723 """ 724 725 impl = impl or default_impl 726 727 if hasattr(stream_or_string, "read"): 728 stream = stream_or_string 729 return parseString(stream.read(), html=html, htmlencoding=htmlencoding, 730 unfinished=unfinished, validate=validate, remote=remote, impl=impl) 731 else: 732 return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, 733 unfinished=unfinished, validate=validate, remote=remote, impl=impl) 734 735 def parseFile(filename, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 736 737 """ 738 Parse the file having the given 'filename'. The optional parameters 739 described below should be provided as keyword arguments. 740 741 If the optional 'html' parameter is set to a true value, the content to be 742 parsed will be treated as being HTML rather than XML. If the optional 743 'htmlencoding' is specified, HTML parsing will be performed with the 744 document encoding assumed to be that specified. 745 746 If the optional 'unfinished' parameter is set to a true value, unfinished 747 documents will be parsed, even though such documents may be missing content 748 such as closing tags. 749 750 If the optional 'validate' parameter is set to a true value, an attempt will 751 be made to validate the parsed document. 752 753 If the optional 'remote' parameter is set to a true value, references to 754 remote documents (such as DTDs) will be followed in order to obtain such 755 documents. 756 757 A document object is returned by this function. 758 """ 759 760 impl = impl or default_impl 761 return impl.adoptDocument(Node_parseFile(filename, html=html, htmlencoding=htmlencoding, 762 unfinished=unfinished, validate=validate, remote=remote)) 763 764 def parseString(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 765 766 """ 767 Parse the content of the given string 's'. The optional parameters described 768 below should be provided as keyword arguments. 769 770 If the optional 'html' parameter is set to a true value, the content to be 771 parsed will be treated as being HTML rather than XML. If the optional 772 'htmlencoding' is specified, HTML parsing will be performed with the 773 document encoding assumed to be that specified. 774 775 If the optional 'unfinished' parameter is set to a true value, unfinished 776 documents will be parsed, even though such documents may be missing content 777 such as closing tags. 778 779 If the optional 'validate' parameter is set to a true value, an attempt will 780 be made to validate the parsed document. 781 782 If the optional 'remote' parameter is set to a true value, references to 783 remote documents (such as DTDs) will be followed in order to obtain such 784 documents. 785 786 A document object is returned by this function. 787 """ 788 789 impl = impl or default_impl 790 return impl.adoptDocument(Node_parseString(s, html=html, htmlencoding=htmlencoding, 791 unfinished=unfinished, validate=validate, remote=remote)) 792 793 def parseURI(uri, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 794 795 """ 796 Parse the content found at the given 'uri'. The optional parameters 797 described below should be provided as keyword arguments. 798 799 If the optional 'html' parameter is set to a true value, the content to be 800 parsed will be treated as being HTML rather than XML. If the optional 801 'htmlencoding' is specified, HTML parsing will be performed with the 802 document encoding assumed to be that specified. 803 804 If the optional 'unfinished' parameter is set to a true value, unfinished 805 documents will be parsed, even though such documents may be missing content 806 such as closing tags. 807 808 If the optional 'validate' parameter is set to a true value, an attempt will 809 be made to validate the parsed document. 810 811 If the optional 'remote' parameter is set to a true value, references to 812 remote documents (such as DTDs) will be followed in order to obtain such 813 documents. 814 815 Documents are retrieved using libxml2's own network capabilities. To 816 retrieve documents using Python's own modules for this purpose (such as 817 urllib or urllib2), open a stream and pass it to the parse function: 818 819 f = urllib.urlopen(uri) 820 try: 821 doc = libxml2dom.parse(f, html) 822 finally: 823 f.close() 824 825 A document object is returned by this function. 826 """ 827 828 if html: 829 return parseFile(uri, html=html, htmlencoding=htmlencoding, unfinished=unfinished, 830 validate=validate, remote=remote, impl=impl) 831 else: 832 impl = impl or default_impl 833 return impl.adoptDocument(Node_parseURI(uri, unfinished=unfinished, 834 validate=validate, remote=remote)) 835 836 def toString(node, encoding=None, prettyprint=0): 837 838 """ 839 Return a string containing the serialised form of the given 'node' and its 840 children. The optional 'encoding' can be used to override the default 841 character encoding used in the serialisation. The optional 'prettyprint' 842 indicates whether the serialised form is prettyprinted or not (the default 843 setting). 844 """ 845 846 return Node_toString(node.as_native_node(), encoding, prettyprint) 847 848 def toStream(node, stream, encoding=None, prettyprint=0): 849 850 """ 851 Write the serialised form of the given 'node' and its children to the given 852 'stream'. The optional 'encoding' can be used to override the default 853 character encoding used in the serialisation. The optional 'prettyprint' 854 indicates whether the serialised form is prettyprinted or not (the default 855 setting). 856 """ 857 858 Node_toStream(node.as_native_node(), stream, encoding, prettyprint) 859 860 def toFile(node, filename, encoding=None, prettyprint=0): 861 862 """ 863 Write the serialised form of the given 'node' and its children to a file 864 having the given 'filename'. The optional 'encoding' can be used to override 865 the default character encoding used in the serialisation. The optional 866 'prettyprint' indicates whether the serialised form is prettyprinted or not 867 (the default setting). 868 """ 869 870 Node_toFile(node.as_native_node(), filename, encoding, prettyprint) 871 872 def adoptNodes(nodes, impl=None): 873 874 """ 875 A special utility method which adopts the given low-level 'nodes' and which 876 returns a list of high-level equivalents. This is currently experimental and 877 should not be casually used. 878 """ 879 880 impl = impl or default_impl 881 882 if len(nodes) == 0: 883 return [] 884 doc = impl.adoptDocument(libxml2mod.doc(nodes[0])) 885 results = [] 886 for node in nodes: 887 results.append(Node(node, impl, doc)) 888 return results 889 890 def getDOMImplementation(): 891 892 "Return the default DOM implementation." 893 894 return default_impl 895 896 # Single instance of the implementation. 897 898 default_impl = Implementation() 899 900 # vim: tabstop=4 expandtab shiftwidth=4