1 #!/usr/bin/env python 2 3 """ 4 DOM wrapper around libxml2, specifically the libxml2mod Python extension module. 5 6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2012, 2013, 2014 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU Lesser General Public License as published by the Free 10 Software Foundation; either version 3 of the License, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 16 details. 17 18 You should have received a copy of the GNU Lesser General Public License along 19 with this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 __version__ = "0.5.1" 23 24 from libxml2dom.macrolib import * 25 from libxml2dom.macrolib import \ 26 createDocument as Node_createDocument, \ 27 parseString as Node_parseString, parseURI as Node_parseURI, \ 28 parseFile as Node_parseFile, \ 29 toString as Node_toString, toStream as Node_toStream, \ 30 toFile as Node_toFile, \ 31 LSException, XIncludeException 32 import libxml2dom.errors 33 34 # Standard namespaces. 35 36 XML_NAMESPACE = xml.dom.XML_NAMESPACE 37 38 # Default namespace bindings for XPath. 39 40 default_ns = { 41 "xml" : XML_NAMESPACE 42 } 43 44 class Implementation(object): 45 46 "Contains an abstraction over the DOM implementation." 47 48 def createDocumentType(self, localName, publicId, systemId): 49 return DocumentType(localName, publicId, systemId) 50 51 def createDocument(self, namespaceURI, localName, doctype): 52 return Document(Node_createDocument(namespaceURI, localName, doctype), self) 53 54 # Wrapping of documents. 55 56 def adoptDocument(self, node): 57 return Document(node, self) 58 59 # Factory functions. 60 61 def get_node(self, _node, context_node): 62 63 # Return the existing document. 64 65 if Node_nodeType(_node) == context_node.DOCUMENT_NODE: 66 return context_node.ownerDocument 67 68 # Return an attribute using the parent of the attribute as the owner 69 # element. 70 71 elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE: 72 return Attribute(_node, self, context_node.ownerDocument, 73 self.get_node(Node_parentNode(_node), context_node)) 74 75 # Return other nodes. 76 77 else: 78 return Node(_node, self, context_node.ownerDocument) 79 80 def get_node_or_none(self, _node, context_node): 81 if _node is None: 82 return None 83 else: 84 return self.get_node(_node, context_node) 85 86 # Attribute and node list wrappers. 87 88 class NamedNodeMap(object): 89 90 """ 91 A wrapper around Node objects providing DOM and dictionary convenience 92 methods. 93 """ 94 95 def __init__(self, node, impl): 96 self.node = node 97 self.impl = impl 98 99 def getNamedItem(self, name): 100 return self.node.getAttributeNode(name) 101 102 def getNamedItemNS(self, ns, localName): 103 return self.node.getAttributeNodeNS(ns, localName) 104 105 def setNamedItem(self, node): 106 try: 107 old = self.getNamedItem(node.nodeName) 108 except KeyError: 109 old = None 110 self.node.setAttributeNode(node) 111 return old 112 113 def setNamedItemNS(self, node): 114 try: 115 old = self.getNamedItemNS(node.namespaceURI, node.localName) 116 except KeyError: 117 old = None 118 self.node.setAttributeNodeNS(node) 119 return old 120 121 def removeNamedItem(self, name): 122 try: 123 old = self.getNamedItem(name) 124 except KeyError: 125 old = None 126 self.node.removeAttribute(name) 127 return old 128 129 def removeNamedItemNS(self, ns, localName): 130 try: 131 old = self.getNamedItemNS(ns, localName) 132 except KeyError: 133 old = None 134 self.node.removeAttributeNS(ns, localName) 135 return old 136 137 # Iterator emulation. 138 139 def __iter__(self): 140 return NamedNodeMapIterator(self) 141 142 # Dictionary emulation methods. 143 144 def __getitem__(self, name): 145 return self.getNamedItem(name) 146 147 def __setitem__(self, name, node): 148 if name == node.nodeName: 149 self.setNamedItem(node) 150 else: 151 raise KeyError, name 152 153 def __delitem__(self, name): 154 # NOTE: To be implemented. 155 pass 156 157 def values(self): 158 return [Attribute(_node, self.impl, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()] 159 160 def keys(self): 161 return [(attr.namespaceURI, attr.localName) for attr in self.values()] 162 163 def items(self): 164 return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()] 165 166 def __repr__(self): 167 return str(self) 168 169 def __str__(self): 170 return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()]) 171 172 def _length(self): 173 return len(self.values()) 174 175 length = property(_length) 176 177 class NamedNodeMapIterator(object): 178 179 "An iterator over a NamedNodeMap." 180 181 def __init__(self, nodemap): 182 self.nodemap = nodemap 183 self.items = self.nodemap.items() 184 185 def next(self): 186 if self.items: 187 current = self.items[0][1] 188 self.items = self.items[1:] 189 return current 190 else: 191 raise StopIteration 192 193 class NodeList(list): 194 195 "A wrapper around node lists." 196 197 def item(self, index): 198 return self[index] 199 200 def _length(self): 201 return len(self) 202 203 length = property(_length) 204 205 # Node classes. 206 207 class Node(object): 208 209 """ 210 A DOM-style wrapper around libxml2mod objects. 211 """ 212 213 ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE 214 COMMENT_NODE = xml.dom.Node.COMMENT_NODE 215 DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE 216 DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE 217 ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE 218 ENTITY_NODE = xml.dom.Node.ENTITY_NODE 219 ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE 220 NOTATION_NODE = xml.dom.Node.NOTATION_NODE 221 PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE 222 TEXT_NODE = xml.dom.Node.TEXT_NODE 223 224 def __init__(self, node, impl=None, ownerDocument=None): 225 self._node = node 226 self.impl = impl or default_impl 227 self.ownerDocument = ownerDocument 228 229 def __repr__(self): 230 return "<%s: %r>" % (self.__class__.__name__, self.nodeName) 231 232 def as_native_node(self): 233 return self._node 234 235 def _nodeType(self): 236 return Node_nodeType(self._node) 237 238 def _childNodes(self): 239 240 # NOTE: Consider a generator instead. 241 242 return NodeList([self.impl.get_node(_node, self) for _node in Node_childNodes(self._node)]) 243 244 def _firstChild(self): 245 return (self.childNodes or [None])[0] 246 247 def _lastChild(self): 248 return (self.childNodes or [None])[-1] 249 250 def _attributes(self): 251 return NamedNodeMap(self, self.impl) 252 253 def _namespaceURI(self): 254 return Node_namespaceURI(self._node) 255 256 def _textContent(self): 257 return Node_textContent(self._node) 258 259 def _nodeValue(self): 260 if self.nodeType in null_value_node_types: 261 return None 262 return Node_nodeValue(self._node) 263 264 def _setNodeValue(self, value): 265 Node_setNodeValue(self._node, value) 266 267 def _prefix(self): 268 return Node_prefix(self._node) 269 270 def _nodeName(self): 271 return Node_nodeName(self._node) 272 273 def _tagName(self): 274 return Node_tagName(self._node) 275 276 def _localName(self): 277 return Node_localName(self._node) 278 279 def _parentNode(self): 280 return self.impl.get_node_or_none(Node_parentNode(self._node), self) 281 282 def _previousSibling(self): 283 return self.impl.get_node_or_none(Node_previousSibling(self._node), self) 284 285 def _nextSibling(self): 286 return self.impl.get_node_or_none(Node_nextSibling(self._node), self) 287 288 def _doctype(self): 289 _doctype = Node_doctype(self._node) 290 if _doctype is not None: 291 return self.impl.get_node(_doctype, self) 292 else: 293 return None 294 295 def _publicId(self): 296 # NOTE: To be fixed when the libxml2mod API has been figured out. 297 if self.nodeType != self.DOCUMENT_TYPE_NODE: 298 return None 299 declaration = self.toString() 300 return self._findId(declaration, "PUBLIC") 301 302 def _systemId(self): 303 # NOTE: To be fixed when the libxml2mod API has been figured out. 304 if self.nodeType != self.DOCUMENT_TYPE_NODE: 305 return None 306 declaration = self.toString() 307 if self._findId(declaration, "PUBLIC"): 308 return self._findIdValue(declaration, 0) 309 return self._findId(declaration, "SYSTEM") 310 311 # NOTE: To be removed when the libxml2mod API has been figured out. 312 313 def _findId(self, declaration, identifier): 314 i = declaration.find(identifier) 315 if i == -1: 316 return None 317 return self._findIdValue(declaration, i) 318 319 def _findIdValue(self, declaration, i): 320 q = declaration.find('"', i) 321 if q == -1: 322 return None 323 q2 = declaration.find('"', q + 1) 324 if q2 == -1: 325 return None 326 return declaration[q+1:q2] 327 328 def hasChildNodes(self): 329 return bool(self.childNodes) 330 331 def hasAttributeNS(self, ns, localName): 332 return Node_hasAttributeNS(self._node, ns, localName) 333 334 def hasAttribute(self, name): 335 return Node_hasAttribute(self._node, name) 336 337 def getAttributeNS(self, ns, localName): 338 return Node_getAttributeNS(self._node, ns, localName) 339 340 def getAttribute(self, name): 341 return Node_getAttribute(self._node, name) 342 343 def getAttributeNodeNS(self, ns, localName): 344 return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.impl, self.ownerDocument, self) 345 346 def getAttributeNode(self, localName): 347 return Attribute(Node_getAttributeNode(self._node, localName), self.impl, self.ownerDocument, self) 348 349 def setAttributeNS(self, ns, name, value): 350 Node_setAttributeNS(self._node, ns, name, value) 351 352 def setAttribute(self, name, value): 353 Node_setAttribute(self._node, name, value) 354 355 def setAttributeNodeNS(self, node): 356 Node_setAttributeNodeNS(self._node, node._node) 357 358 def setAttributeNode(self, node): 359 Node_setAttributeNode(self._node, node._node) 360 361 def removeAttributeNS(self, ns, localName): 362 Node_removeAttributeNS(self._node, ns, localName) 363 364 def removeAttribute(self, name): 365 Node_removeAttribute(self._node, name) 366 367 def createElementNS(self, ns, name): 368 return self.impl.get_node(Node_createElementNS(self._node, ns, name), self) 369 370 def createElement(self, name): 371 return self.impl.get_node(Node_createElement(self._node, name), self) 372 373 def createAttributeNS(self, ns, name): 374 tmp = self.createElement("tmp") 375 return Attribute(Node_createAttributeNS(tmp._node, self.impl, ns, name)) 376 377 def createAttribute(self, name): 378 tmp = self.createElement("tmp") 379 return Attribute(Node_createAttribute(tmp._node, name), self.impl) 380 381 def createTextNode(self, value): 382 return self.impl.get_node(Node_createTextNode(self._node, value), self) 383 384 def createComment(self, value): 385 return self.impl.get_node(Node_createComment(self._node, value), self) 386 387 def createCDATASection(self, value): 388 return self.impl.get_node(Node_createCDATASection(self._node, value), self) 389 390 def importNode(self, node, deep): 391 if hasattr(node, "as_native_node"): 392 return self.impl.get_node(Node_importNode(self._node, node.as_native_node(), deep), self) 393 else: 394 return self.impl.get_node(Node_importNode_DOM(self._node, node, deep), self) 395 396 def cloneNode(self, deep): 397 # This takes advantage of the ubiquity of importNode (in spite of the DOM specification). 398 return self.importNode(self, deep) 399 400 def insertBefore(self, tmp, oldNode): 401 if tmp.ownerDocument != self.ownerDocument: 402 raise xml.dom.WrongDocumentErr() 403 if oldNode.parentNode != self: 404 raise xml.dom.NotFoundErr() 405 406 # Nodes must be from this implementation before insertion. 407 408 if not hasattr(tmp, "as_native_node"): 409 raise xml.dom.WrongDocumentErr() 410 411 return self.impl.get_node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self) 412 413 def replaceChild(self, tmp, oldNode): 414 if tmp.ownerDocument != self.ownerDocument: 415 raise xml.dom.WrongDocumentErr() 416 if oldNode.parentNode != self: 417 raise xml.dom.NotFoundErr() 418 419 # Nodes must be from this implementation before insertion. 420 421 if not hasattr(tmp, "as_native_node"): 422 raise xml.dom.WrongDocumentErr() 423 424 return self.impl.get_node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self) 425 426 def appendChild(self, tmp): 427 if tmp.ownerDocument != self.ownerDocument: 428 raise xml.dom.WrongDocumentErr() 429 430 # Nodes must be from this implementation before insertion. 431 432 if not hasattr(tmp, "as_native_node"): 433 raise xml.dom.WrongDocumentErr() 434 435 return self.impl.get_node(Node_appendChild(self._node, tmp.as_native_node()), self) 436 437 def removeChild(self, tmp): 438 439 # Nodes must be from this implementation in order to be removed. 440 441 if not hasattr(tmp, "as_native_node"): 442 raise xml.dom.WrongDocumentErr() 443 444 Node_removeChild(self._node, tmp.as_native_node()) 445 return tmp 446 447 def getElementById(self, identifier): 448 _node = Node_getElementById(self.ownerDocument.as_native_node(), identifier) 449 if _node is None: 450 return None 451 else: 452 return self.impl.get_node(_node, self) 453 454 def getElementsByTagName(self, tagName): 455 return self.xpath(".//" + tagName) 456 457 def getElementsByTagNameNS(self, namespaceURI, localName): 458 return self.xpath(".//ns:" + localName, namespaces={"ns" : namespaceURI}) 459 460 def normalize(self): 461 text_nodes = [] 462 for node in self.childNodes: 463 if node.nodeType == node.TEXT_NODE: 464 text_nodes.append(node) 465 elif len(text_nodes) != 0: 466 self._normalize(text_nodes) 467 text_nodes = [] 468 if len(text_nodes) != 0: 469 self._normalize(text_nodes) 470 471 def _normalize(self, text_nodes): 472 texts = [] 473 for text_node in text_nodes[:-1]: 474 texts.append(text_node.nodeValue) 475 self.removeChild(text_node) 476 texts.append(text_nodes[-1].nodeValue) 477 self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1]) 478 479 childNodes = property(_childNodes) 480 firstChild = property(_firstChild) 481 lastChild = property(_lastChild) 482 value = data = nodeValue = property(_nodeValue, _setNodeValue) 483 textContent = property(_textContent) 484 name = nodeName = property(_nodeName) 485 tagName = property(_tagName) 486 namespaceURI = property(_namespaceURI) 487 prefix = property(_prefix) 488 localName = property(_localName) 489 parentNode = property(_parentNode) 490 nodeType = property(_nodeType) 491 attributes = property(_attributes) 492 previousSibling = property(_previousSibling) 493 nextSibling = property(_nextSibling) 494 doctype = property(_doctype) 495 publicId = property(_publicId) 496 systemId = property(_systemId) 497 498 # NOTE: To be fixed - these being doctype-specific values. 499 500 entities = {} 501 notations = {} 502 503 def isSameNode(self, other): 504 return self == other 505 506 def __hash__(self): 507 return hash(self.localName) 508 509 def __eq__(self, other): 510 return isinstance(other, Node) and Node_equals(self._node, other._node) 511 512 def __ne__(self, other): 513 return not (self == other) 514 515 # 4DOM extensions to the usual PyXML API. 516 # NOTE: To be finished. 517 518 def xpath(self, expr, variables=None, namespaces=None): 519 520 """ 521 Evaluate the given expression 'expr' using the optional 'variables' and 522 'namespaces' mappings. 523 """ 524 525 ns = {} 526 ns.update(self.ownerDocument.namespaces) 527 if namespaces: 528 ns.update(namespaces) 529 result = Node_xpath(self._node, expr, variables, ns) 530 if isinstance(result, str): 531 return to_unicode(result) 532 elif hasattr(result, "__len__"): 533 return NodeList([self.impl.get_node(_node, self) for _node in result]) 534 else: 535 return result 536 537 # Other extensions to the usual PyXML API. 538 539 def xinclude(self): 540 541 """ 542 Process XInclude declarations within the document, returning the number 543 of substitutions performed (zero or more), raising an XIncludeException 544 otherwise. 545 """ 546 547 return Node_xinclude(self._node) 548 549 # Convenience methods. 550 551 def toString(self, encoding=None, prettyprint=0): 552 return toString(self, encoding, prettyprint) 553 554 def toStream(self, stream, encoding=None, prettyprint=0): 555 toStream(self, stream, encoding, prettyprint) 556 557 def toFile(self, f, encoding=None, prettyprint=0): 558 toFile(self, f, encoding, prettyprint) 559 560 # Attribute nodes. 561 562 class Attribute(Node): 563 564 "A class providing attribute access." 565 566 def __init__(self, node, impl, ownerDocument=None, ownerElement=None): 567 Node.__init__(self, node, impl, ownerDocument) 568 self.ownerElement = ownerElement 569 570 def _parentNode(self): 571 return self.ownerElement 572 573 parentNode = property(_parentNode) 574 575 # Document housekeeping mechanisms. 576 577 class _Document: 578 579 """ 580 An abstract class providing document-level housekeeping and distinct 581 functionality. Configuration of the document is also supported. 582 See: http://www.w3.org/TR/DOM-Level-3-Core/core.html#DOMConfiguration 583 """ 584 585 # Constants from 586 # See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-NodeEditVAL 587 588 VAL_TRUE = 5 589 VAL_FALSE = 6 590 VAL_UNKNOWN = 7 591 592 def __init__(self, node, impl, namespaces=None): 593 self._node = node 594 self.implementation = self.impl = impl 595 self.error_handler = libxml2dom.errors.DOMErrorHandler() 596 self.namespaces = {} 597 self._update_namespaces([default_ns, namespaces]) 598 599 def _update_namespaces(self, additional_namespaces): 600 for namespaces in additional_namespaces: 601 if namespaces: 602 self.namespaces.update(namespaces) 603 604 # Standard DOM properties and their implementations. 605 606 def _documentElement(self): 607 return self.xpath("*")[0] 608 609 def _ownerDocument(self): 610 return self 611 612 def __del__(self): 613 #print "Freeing document", self._node 614 libxml2mod.xmlFreeDoc(self._node) 615 616 documentElement = property(_documentElement) 617 ownerDocument = property(_ownerDocument) 618 619 # DOM Level 3 Core DOMConfiguration methods. 620 621 def setParameter(self, name, value): 622 if name == "error-handler": 623 raise xml.dom.NotSupportedErr() 624 raise xml.dom.NotFoundErr() 625 626 def getParameter(self, name): 627 if name == "error-handler": 628 return self.error_handler 629 raise xml.dom.NotFoundErr() 630 631 def canSetParameter(self, name, value): 632 return 0 633 634 def _parameterNames(self): 635 return [] 636 637 # Extensions to the usual PyXML API. 638 639 def validate(self, doc): 640 641 """ 642 Validate the document against the given schema document, 'doc'. 643 """ 644 645 validation_ns = doc.documentElement.namespaceURI 646 647 if hasattr(doc, "as_native_node"): 648 _schema = Document_schema(doc.as_native_node(), validation_ns) 649 else: 650 _schema = Document_schemaFromString(doc.toString(), validation_ns) 651 try: 652 self.error_handler.reset() 653 return Document_validate(_schema, self._node, self.error_handler, validation_ns) 654 finally: 655 Schema_free(_schema, validation_ns) 656 657 # DOM Level 3 Validation methods. 658 659 def validateDocument(self, doc): 660 661 """ 662 Validate the document against the given schema document, 'doc'. 663 See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-DocumentEditVAL-validateDocument 664 """ 665 666 return self.validate(doc) and self.VAL_TRUE or self.VAL_FALSE 667 668 class Document(_Document, Node): 669 670 """ 671 A generic document class. Specialised document classes should inherit from 672 the _Document class and their own variation of Node. 673 """ 674 675 pass 676 677 class DocumentType(object): 678 679 "A class providing a container for document type information." 680 681 def __init__(self, localName, publicId, systemId): 682 self.name = self.localName = localName 683 self.publicId = publicId 684 self.systemId = systemId 685 686 # NOTE: Nothing is currently provided to support the following 687 # NOTE: attributes. 688 689 self.entities = {} 690 self.notations = {} 691 692 # Constants. 693 694 null_value_node_types = [ 695 Node.DOCUMENT_NODE, Node.DOCUMENT_TYPE_NODE, Node.ELEMENT_NODE, 696 Node.ENTITY_NODE, Node.ENTITY_REFERENCE_NODE, Node.NOTATION_NODE 697 ] 698 699 # Utility functions. 700 701 def createDocumentType(localName, publicId, systemId): 702 return default_impl.createDocumentType(localName, publicId, systemId) 703 704 def createDocument(namespaceURI, localName, doctype): 705 return default_impl.createDocument(namespaceURI, localName, doctype) 706 707 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 708 709 """ 710 Parse the given 'stream_or_string', where the supplied object can either be 711 a stream (such as a file or stream object), or a string (containing the 712 filename of a document). The optional parameters described below should be 713 provided as keyword arguments. 714 715 If the optional 'html' parameter is set to a true value, the content to be 716 parsed will be treated as being HTML rather than XML. If the optional 717 'htmlencoding' is specified, HTML parsing will be performed with the 718 document encoding assumed to be that specified. 719 720 If the optional 'unfinished' parameter is set to a true value, unfinished 721 documents will be parsed, even though such documents may be missing content 722 such as closing tags. 723 724 If the optional 'validate' parameter is set to a true value, an attempt will 725 be made to validate the parsed document. 726 727 If the optional 'remote' parameter is set to a true value, references to 728 remote documents (such as DTDs) will be followed in order to obtain such 729 documents. 730 731 A document object is returned by this function. 732 """ 733 734 impl = impl or default_impl 735 736 if hasattr(stream_or_string, "read"): 737 stream = stream_or_string 738 return parseString(stream.read(), html=html, htmlencoding=htmlencoding, 739 unfinished=unfinished, validate=validate, remote=remote, impl=impl) 740 else: 741 return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding, 742 unfinished=unfinished, validate=validate, remote=remote, impl=impl) 743 744 def parseFile(filename, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 745 746 """ 747 Parse the file having the given 'filename'. The optional parameters 748 described below should be provided as keyword arguments. 749 750 If the optional 'html' parameter is set to a true value, the content to be 751 parsed will be treated as being HTML rather than XML. If the optional 752 'htmlencoding' is specified, HTML parsing will be performed with the 753 document encoding assumed to be that specified. 754 755 If the optional 'unfinished' parameter is set to a true value, unfinished 756 documents will be parsed, even though such documents may be missing content 757 such as closing tags. 758 759 If the optional 'validate' parameter is set to a true value, an attempt will 760 be made to validate the parsed document. 761 762 If the optional 'remote' parameter is set to a true value, references to 763 remote documents (such as DTDs) will be followed in order to obtain such 764 documents. 765 766 A document object is returned by this function. 767 """ 768 769 impl = impl or default_impl 770 return impl.adoptDocument(Node_parseFile(filename, html=html, htmlencoding=htmlencoding, 771 unfinished=unfinished, validate=validate, remote=remote)) 772 773 def parseString(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 774 775 """ 776 Parse the content of the given string 's'. The optional parameters described 777 below should be provided as keyword arguments. 778 779 If the optional 'html' parameter is set to a true value, the content to be 780 parsed will be treated as being HTML rather than XML. If the optional 781 'htmlencoding' is specified, HTML parsing will be performed with the 782 document encoding assumed to be that specified. 783 784 If the optional 'unfinished' parameter is set to a true value, unfinished 785 documents will be parsed, even though such documents may be missing content 786 such as closing tags. 787 788 If the optional 'validate' parameter is set to a true value, an attempt will 789 be made to validate the parsed document. 790 791 If the optional 'remote' parameter is set to a true value, references to 792 remote documents (such as DTDs) will be followed in order to obtain such 793 documents. 794 795 A document object is returned by this function. 796 """ 797 798 impl = impl or default_impl 799 return impl.adoptDocument(Node_parseString(s, html=html, htmlencoding=htmlencoding, 800 unfinished=unfinished, validate=validate, remote=remote)) 801 802 def parseURI(uri, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None): 803 804 """ 805 Parse the content found at the given 'uri'. The optional parameters 806 described below should be provided as keyword arguments. 807 808 If the optional 'html' parameter is set to a true value, the content to be 809 parsed will be treated as being HTML rather than XML. If the optional 810 'htmlencoding' is specified, HTML parsing will be performed with the 811 document encoding assumed to be that specified. 812 813 If the optional 'unfinished' parameter is set to a true value, unfinished 814 documents will be parsed, even though such documents may be missing content 815 such as closing tags. 816 817 If the optional 'validate' parameter is set to a true value, an attempt will 818 be made to validate the parsed document. 819 820 If the optional 'remote' parameter is set to a true value, references to 821 remote documents (such as DTDs) will be followed in order to obtain such 822 documents. 823 824 Documents are retrieved using libxml2's own network capabilities. To 825 retrieve documents using Python's own modules for this purpose (such as 826 urllib or urllib2), open a stream and pass it to the parse function: 827 828 f = urllib.urlopen(uri) 829 try: 830 doc = libxml2dom.parse(f, html) 831 finally: 832 f.close() 833 834 A document object is returned by this function. 835 """ 836 837 if html: 838 return parseFile(uri, html=html, htmlencoding=htmlencoding, unfinished=unfinished, 839 validate=validate, remote=remote, impl=impl) 840 else: 841 impl = impl or default_impl 842 return impl.adoptDocument(Node_parseURI(uri, unfinished=unfinished, 843 validate=validate, remote=remote)) 844 845 def toString(node, encoding=None, prettyprint=0): 846 847 """ 848 Return a string containing the serialised form of the given 'node' and its 849 children. The optional 'encoding' can be used to override the default 850 character encoding used in the serialisation. The optional 'prettyprint' 851 indicates whether the serialised form is prettyprinted or not (the default 852 setting). 853 """ 854 855 return Node_toString(node.as_native_node(), encoding, prettyprint) 856 857 def toStream(node, stream, encoding=None, prettyprint=0): 858 859 """ 860 Write the serialised form of the given 'node' and its children to the given 861 'stream'. The optional 'encoding' can be used to override the default 862 character encoding used in the serialisation. The optional 'prettyprint' 863 indicates whether the serialised form is prettyprinted or not (the default 864 setting). 865 """ 866 867 Node_toStream(node.as_native_node(), stream, encoding, prettyprint) 868 869 def toFile(node, filename, encoding=None, prettyprint=0): 870 871 """ 872 Write the serialised form of the given 'node' and its children to a file 873 having the given 'filename'. The optional 'encoding' can be used to override 874 the default character encoding used in the serialisation. The optional 875 'prettyprint' indicates whether the serialised form is prettyprinted or not 876 (the default setting). 877 """ 878 879 Node_toFile(node.as_native_node(), filename, encoding, prettyprint) 880 881 def adoptNodes(nodes, impl=None): 882 883 """ 884 A special utility method which adopts the given low-level 'nodes' and which 885 returns a list of high-level equivalents. This is currently experimental and 886 should not be casually used. 887 """ 888 889 impl = impl or default_impl 890 891 if len(nodes) == 0: 892 return [] 893 doc = impl.adoptDocument(libxml2mod.doc(nodes[0])) 894 results = [] 895 for node in nodes: 896 results.append(Node(node, impl, doc)) 897 return results 898 899 def getDOMImplementation(): 900 901 "Return the default DOM implementation." 902 903 return default_impl 904 905 # Single instance of the implementation. 906 907 default_impl = Implementation() 908 909 # vim: tabstop=4 expandtab shiftwidth=4