libxml2dom (file libxml2dom/__init_

     1 #!/usr/bin/env python     2      3 """     4 DOM wrapper around libxml2, specifically the libxml2mod Python extension module.     5      6 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2012, 2013 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU Lesser General Public License as published by the Free    10 Software Foundation; either version 3 of the License, or (at your option) any    11 later version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more    16 details.    17     18 You should have received a copy of the GNU Lesser General Public License along    19 with this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 __version__ = "0.5.1"    23     24 from libxml2dom.macrolib import *    25 from libxml2dom.macrolib import \    26     createDocument as Node_createDocument, \    27     parseString as Node_parseString, parseURI as Node_parseURI, \    28     parseFile as Node_parseFile, \    29     toString as Node_toString, toStream as Node_toStream, \    30     toFile as Node_toFile    31 import libxml2dom.errors    32     33 # Standard namespaces.    34     35 XML_NAMESPACE = xml.dom.XML_NAMESPACE    36     37 # Default namespace bindings for XPath.    38     39 default_ns = {    40     "xml" : XML_NAMESPACE    41     }    42     43 class Implementation(object):    44     45     "Contains an abstraction over the DOM implementation."    46     47     def createDocumentType(self, localName, publicId, systemId):    48         return DocumentType(localName, publicId, systemId)    49     50     def createDocument(self, namespaceURI, localName, doctype):    51         return Document(Node_createDocument(namespaceURI, localName, doctype), self)    52     53     # Wrapping of documents.    54     55     def adoptDocument(self, node):    56         return Document(node, self)    57     58     # Factory functions.    59     60     def get_node(self, _node, context_node):    61     62         # Return the existing document.    63     64         if Node_nodeType(_node) == context_node.DOCUMENT_NODE:    65             return context_node.ownerDocument    66     67         # Return an attribute using the parent of the attribute as the owner    68         # element.    69     70         elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE:    71             return Attribute(_node, self, context_node.ownerDocument,    72                 self.get_node(Node_parentNode(_node), context_node))    73     74         # Return other nodes.    75     76         else:    77             return Node(_node, self, context_node.ownerDocument)    78     79     def get_node_or_none(self, _node, context_node):    80         if _node is None:    81             return None    82         else:    83             return self.get_node(_node, context_node)    84     85 # Attribute and node list wrappers.    86     87 class NamedNodeMap(object):    88     89     """    90     A wrapper around Node objects providing DOM and dictionary convenience    91     methods.    92     """    93     94     def __init__(self, node, impl):    95         self.node = node    96         self.impl = impl    97     98     def getNamedItem(self, name):    99         return self.node.getAttributeNode(name)   100    101     def getNamedItemNS(self, ns, localName):   102         return self.node.getAttributeNodeNS(ns, localName)   103    104     def setNamedItem(self, node):   105         try:   106             old = self.getNamedItem(node.nodeName)   107         except KeyError:   108             old = None   109         self.node.setAttributeNode(node)   110         return old   111    112     def setNamedItemNS(self, node):   113         try:   114             old = self.getNamedItemNS(node.namespaceURI, node.localName)   115         except KeyError:   116             old = None   117         self.node.setAttributeNodeNS(node)   118         return old   119    120     def removeNamedItem(self, name):   121         try:   122             old = self.getNamedItem(name)   123         except KeyError:   124             old = None   125         self.node.removeAttribute(name)   126         return old   127    128     def removeNamedItemNS(self, ns, localName):   129         try:   130             old = self.getNamedItemNS(ns, localName)   131         except KeyError:   132             old = None   133         self.node.removeAttributeNS(ns, localName)   134         return old   135    136     # Iterator emulation.   137    138     def __iter__(self):   139         return NamedNodeMapIterator(self)   140    141     # Dictionary emulation methods.   142    143     def __getitem__(self, name):   144         return self.getNamedItem(name)   145    146     def __setitem__(self, name, node):   147         if name == node.nodeName:   148             self.setNamedItem(node)   149         else:   150             raise KeyError, name   151    152     def __delitem__(self, name):   153         # NOTE: To be implemented.   154         pass   155    156     def values(self):   157         return [Attribute(_node, self.impl, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()]   158    159     def keys(self):   160         return [(attr.namespaceURI, attr.localName) for attr in self.values()]   161    162     def items(self):   163         return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()]   164    165     def __repr__(self):   166         return str(self)   167    168     def __str__(self):   169         return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()])   170    171     def _length(self):   172         return len(self.values())   173    174     length = property(_length)   175    176 class NamedNodeMapIterator(object):   177    178     "An iterator over a NamedNodeMap."   179    180     def __init__(self, nodemap):   181         self.nodemap = nodemap   182         self.items = self.nodemap.items()   183    184     def next(self):   185         if self.items:   186             current = self.items[0][1]   187             self.items = self.items[1:]   188             return current   189         else:   190             raise StopIteration   191    192 class NodeList(list):   193    194     "A wrapper around node lists."   195    196     def item(self, index):   197         return self[index]   198    199     def _length(self):   200         return len(self)   201    202     length = property(_length)   203    204 # Node classes.   205    206 class Node(object):   207    208     """   209     A DOM-style wrapper around libxml2mod objects.   210     """   211    212     ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE   213     COMMENT_NODE = xml.dom.Node.COMMENT_NODE   214     DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE   215     DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE   216     ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE   217     ENTITY_NODE = xml.dom.Node.ENTITY_NODE   218     ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE   219     NOTATION_NODE = xml.dom.Node.NOTATION_NODE   220     PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE   221     TEXT_NODE = xml.dom.Node.TEXT_NODE   222    223     def __init__(self, node, impl=None, ownerDocument=None):   224         self._node = node   225         self.impl = impl or default_impl   226         self.ownerDocument = ownerDocument   227    228     def __repr__(self):   229         return "<%s: %r>" % (self.__class__.__name__, self.nodeName)   230    231     def as_native_node(self):   232         return self._node   233    234     def _nodeType(self):   235         return Node_nodeType(self._node)   236    237     def _childNodes(self):   238    239         # NOTE: Consider a generator instead.   240    241         return NodeList([self.impl.get_node(_node, self) for _node in Node_childNodes(self._node)])   242    243     def _firstChild(self):   244         return (self.childNodes or [None])[0]   245    246     def _lastChild(self):   247         return (self.childNodes or [None])[-1]   248    249     def _attributes(self):   250         return NamedNodeMap(self, self.impl)   251    252     def _namespaceURI(self):   253         return Node_namespaceURI(self._node)   254    255     def _textContent(self):   256         return Node_textContent(self._node)   257    258     def _nodeValue(self):   259         if self.nodeType in null_value_node_types:   260             return None   261         return Node_nodeValue(self._node)   262    263     def _setNodeValue(self, value):   264         Node_setNodeValue(self._node, value)   265    266     def _prefix(self):   267         return Node_prefix(self._node)   268    269     def _nodeName(self):   270         return Node_nodeName(self._node)   271    272     def _tagName(self):   273         return Node_tagName(self._node)   274    275     def _localName(self):   276         return Node_localName(self._node)   277    278     def _parentNode(self):   279         return self.impl.get_node_or_none(Node_parentNode(self._node), self)   280    281     def _previousSibling(self):   282         return self.impl.get_node_or_none(Node_previousSibling(self._node), self)   283    284     def _nextSibling(self):   285         return self.impl.get_node_or_none(Node_nextSibling(self._node), self)   286    287     def _doctype(self):   288         _doctype = Node_doctype(self._node)   289         if _doctype is not None:   290             return self.impl.get_node(_doctype, self)   291         else:   292             return None   293    294     def _publicId(self):   295         # NOTE: To be fixed when the libxml2mod API has been figured out.   296         if self.nodeType != self.DOCUMENT_TYPE_NODE:   297             return None   298         declaration = self.toString()   299         return self._findId(declaration, "PUBLIC")   300    301     def _systemId(self):   302         # NOTE: To be fixed when the libxml2mod API has been figured out.   303         if self.nodeType != self.DOCUMENT_TYPE_NODE:   304             return None   305         declaration = self.toString()   306         if self._findId(declaration, "PUBLIC"):   307             return self._findIdValue(declaration, 0)   308         return self._findId(declaration, "SYSTEM")   309    310     # NOTE: To be removed when the libxml2mod API has been figured out.   311    312     def _findId(self, declaration, identifier):   313         i = declaration.find(identifier)   314         if i == -1:   315             return None   316         return self._findIdValue(declaration, i)   317    318     def _findIdValue(self, declaration, i):   319         q = declaration.find('"', i)   320         if q == -1:   321             return None   322         q2 = declaration.find('"', q + 1)   323         if q2 == -1:   324             return None   325         return declaration[q+1:q2]   326    327     def hasChildNodes(self):   328         return bool(self.childNodes)   329    330     def hasAttributeNS(self, ns, localName):   331         return Node_hasAttributeNS(self._node, ns, localName)   332    333     def hasAttribute(self, name):   334         return Node_hasAttribute(self._node, name)   335    336     def getAttributeNS(self, ns, localName):   337         return Node_getAttributeNS(self._node, ns, localName)   338    339     def getAttribute(self, name):   340         return Node_getAttribute(self._node, name)   341    342     def getAttributeNodeNS(self, ns, localName):   343         return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.impl, self.ownerDocument, self)   344    345     def getAttributeNode(self, localName):   346         return Attribute(Node_getAttributeNode(self._node, localName), self.impl, self.ownerDocument, self)   347    348     def setAttributeNS(self, ns, name, value):   349         Node_setAttributeNS(self._node, ns, name, value)   350    351     def setAttribute(self, name, value):   352         Node_setAttribute(self._node, name, value)   353    354     def setAttributeNodeNS(self, node):   355         Node_setAttributeNodeNS(self._node, node._node)   356    357     def setAttributeNode(self, node):   358         Node_setAttributeNode(self._node, node._node)   359    360     def removeAttributeNS(self, ns, localName):   361         Node_removeAttributeNS(self._node, ns, localName)   362    363     def removeAttribute(self, name):   364         Node_removeAttribute(self._node, name)   365    366     def createElementNS(self, ns, name):   367         return self.impl.get_node(Node_createElementNS(self._node, ns, name), self)   368    369     def createElement(self, name):   370         return self.impl.get_node(Node_createElement(self._node, name), self)   371    372     def createAttributeNS(self, ns, name):   373         tmp = self.createElement("tmp")   374         return Attribute(Node_createAttributeNS(tmp._node, self.impl, ns, name))   375    376     def createAttribute(self, name):   377         tmp = self.createElement("tmp")   378         return Attribute(Node_createAttribute(tmp._node, name), self.impl)   379    380     def createTextNode(self, value):   381         return self.impl.get_node(Node_createTextNode(self._node, value), self)   382    383     def createComment(self, value):   384         return self.impl.get_node(Node_createComment(self._node, value), self)   385    386     def createCDATASection(self, value):   387         return self.impl.get_node(Node_createCDATASection(self._node, value), self)   388    389     def importNode(self, node, deep):   390         if hasattr(node, "as_native_node"):   391             return self.impl.get_node(Node_importNode(self._node, node.as_native_node(), deep), self)   392         else:   393             return self.impl.get_node(Node_importNode_DOM(self._node, node, deep), self)   394    395     def cloneNode(self, deep):   396         # This takes advantage of the ubiquity of importNode (in spite of the DOM specification).   397         return self.importNode(self, deep)   398    399     def insertBefore(self, tmp, oldNode):   400         if tmp.ownerDocument != self.ownerDocument:   401             raise xml.dom.WrongDocumentErr()   402         if oldNode.parentNode != self:   403             raise xml.dom.NotFoundErr()   404    405         # Nodes must be from this implementation before insertion.   406    407         if not hasattr(tmp, "as_native_node"):   408             raise xml.dom.WrongDocumentErr()   409    410         return self.impl.get_node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self)   411    412     def replaceChild(self, tmp, oldNode):   413         if tmp.ownerDocument != self.ownerDocument:   414             raise xml.dom.WrongDocumentErr()   415         if oldNode.parentNode != self:   416             raise xml.dom.NotFoundErr()   417    418         # Nodes must be from this implementation before insertion.   419    420         if not hasattr(tmp, "as_native_node"):   421             raise xml.dom.WrongDocumentErr()   422    423         return self.impl.get_node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self)   424    425     def appendChild(self, tmp):   426         if tmp.ownerDocument != self.ownerDocument:   427             raise xml.dom.WrongDocumentErr()   428    429         # Nodes must be from this implementation before insertion.   430    431         if not hasattr(tmp, "as_native_node"):   432             raise xml.dom.WrongDocumentErr()   433    434         return self.impl.get_node(Node_appendChild(self._node, tmp.as_native_node()), self)   435    436     def removeChild(self, tmp):   437    438         # Nodes must be from this implementation in order to be removed.   439    440         if not hasattr(tmp, "as_native_node"):   441             raise xml.dom.WrongDocumentErr()   442    443         Node_removeChild(self._node, tmp.as_native_node())   444         return tmp   445    446     def getElementById(self, identifier):   447         _node = Node_getElementById(self.ownerDocument.as_native_node(), identifier)   448         if _node is None:   449             return None   450         else:   451             return self.impl.get_node(_node, self)   452    453     def getElementsByTagName(self, tagName):   454         return self.xpath(".//" + tagName)   455    456     def getElementsByTagNameNS(self, namespaceURI, localName):   457         return self.xpath(".//ns:" + localName, namespaces={"ns" : namespaceURI})   458    459     def normalize(self):   460         text_nodes = []   461         for node in self.childNodes:   462             if node.nodeType == node.TEXT_NODE:   463                 text_nodes.append(node)   464             elif len(text_nodes) != 0:   465                 self._normalize(text_nodes)   466                 text_nodes = []   467         if len(text_nodes) != 0:   468             self._normalize(text_nodes)   469    470     def _normalize(self, text_nodes):   471         texts = []   472         for text_node in text_nodes[:-1]:   473             texts.append(text_node.nodeValue)   474             self.removeChild(text_node)   475         texts.append(text_nodes[-1].nodeValue)   476         self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1])   477    478     childNodes = property(_childNodes)   479     firstChild = property(_firstChild)   480     lastChild = property(_lastChild)   481     value = data = nodeValue = property(_nodeValue, _setNodeValue)   482     textContent = property(_textContent)   483     name = nodeName = property(_nodeName)   484     tagName = property(_tagName)   485     namespaceURI = property(_namespaceURI)   486     prefix = property(_prefix)   487     localName = property(_localName)   488     parentNode = property(_parentNode)   489     nodeType = property(_nodeType)   490     attributes = property(_attributes)   491     previousSibling = property(_previousSibling)   492     nextSibling = property(_nextSibling)   493     doctype = property(_doctype)   494     publicId = property(_publicId)   495     systemId = property(_systemId)   496    497     # NOTE: To be fixed - these being doctype-specific values.   498    499     entities = {}   500     notations = {}   501    502     def isSameNode(self, other):   503         return self == other   504    505     def __hash__(self):   506         return hash(self.localName)   507    508     def __eq__(self, other):   509         return isinstance(other, Node) and Node_equals(self._node, other._node)   510    511     def __ne__(self, other):   512         return not (self == other)   513    514     # 4DOM extensions to the usual PyXML API.   515     # NOTE: To be finished.   516    517     def xpath(self, expr, variables=None, namespaces=None):   518    519         """   520         Evaluate the given expression 'expr' using the optional 'variables' and   521         'namespaces' mappings.   522         """   523    524         ns = {}   525         ns.update(default_ns)   526         ns.update(namespaces or {})   527         result = Node_xpath(self._node, expr, variables, ns)   528         if isinstance(result, str):   529             return to_unicode(result)   530         elif hasattr(result, "__len__"):   531             return NodeList([self.impl.get_node(_node, self) for _node in result])   532         else:   533             return result   534    535     # Other extensions to the usual PyXML API.   536    537     def xinclude(self):   538    539         """   540         Process XInclude declarations within the document, returning the number   541         of substitutions performed (zero or more), raising an XIncludeException   542         otherwise.   543         """   544    545         return Node_xinclude(self._node)   546    547     # Convenience methods.   548    549     def toString(self, encoding=None, prettyprint=0):   550         return toString(self, encoding, prettyprint)   551    552     def toStream(self, stream, encoding=None, prettyprint=0):   553         toStream(self, stream, encoding, prettyprint)   554    555     def toFile(self, f, encoding=None, prettyprint=0):   556         toFile(self, f, encoding, prettyprint)   557    558 # Attribute nodes.   559    560 class Attribute(Node):   561    562     "A class providing attribute access."   563    564     def __init__(self, node, impl, ownerDocument=None, ownerElement=None):   565         Node.__init__(self, node, impl, ownerDocument)   566         self.ownerElement = ownerElement   567    568     def _parentNode(self):   569         return self.ownerElement   570    571     parentNode = property(_parentNode)   572    573 # Document housekeeping mechanisms.   574    575 class _Document:   576    577     """   578     An abstract class providing document-level housekeeping and distinct   579     functionality. Configuration of the document is also supported.   580     See: http://www.w3.org/TR/DOM-Level-3-Core/core.html#DOMConfiguration   581     """   582    583     # Constants from    584     # See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-NodeEditVAL   585    586     VAL_TRUE = 5   587     VAL_FALSE = 6   588     VAL_UNKNOWN = 7   589    590     def __init__(self, node, impl):   591         self._node = node   592         self.implementation = self.impl = impl   593         self.error_handler = libxml2dom.errors.DOMErrorHandler()   594    595     # Standard DOM properties and their implementations.   596    597     def _documentElement(self):   598         return self.xpath("*")[0]   599    600     def _ownerDocument(self):   601         return self   602    603     def __del__(self):   604         #print "Freeing document", self._node   605         libxml2mod.xmlFreeDoc(self._node)   606    607     documentElement = property(_documentElement)   608     ownerDocument = property(_ownerDocument)   609    610     # DOM Level 3 Core DOMConfiguration methods.   611    612     def setParameter(self, name, value):   613         if name == "error-handler":   614             raise xml.dom.NotSupportedErr()   615         raise xml.dom.NotFoundErr()   616    617     def getParameter(self, name):   618         if name == "error-handler":   619             return self.error_handler   620         raise xml.dom.NotFoundErr()   621    622     def canSetParameter(self, name, value):   623         return 0   624    625     def _parameterNames(self):   626         return []   627    628     # Extensions to the usual PyXML API.   629    630     def validate(self, doc):   631    632         """   633         Validate the document against the given schema document, 'doc'.   634         """   635    636         validation_ns = doc.documentElement.namespaceURI   637    638         if hasattr(doc, "as_native_node"):   639             _schema = Document_schema(doc.as_native_node(), validation_ns)   640         else:   641             _schema = Document_schemaFromString(doc.toString(), validation_ns)   642         try:   643             self.error_handler.reset()   644             return Document_validate(_schema, self._node, self.error_handler, validation_ns)   645         finally:   646             Schema_free(_schema, validation_ns)   647    648     # DOM Level 3 Validation methods.   649    650     def validateDocument(self, doc):   651    652         """   653         Validate the document against the given schema document, 'doc'.   654         See: http://www.w3.org/TR/DOM-Level-3-Val/validation.html#VAL-Interfaces-DocumentEditVAL-validateDocument   655         """   656    657         return self.validate(doc) and self.VAL_TRUE or self.VAL_FALSE   658    659 class Document(_Document, Node):   660    661     """   662     A generic document class. Specialised document classes should inherit from   663     the _Document class and their own variation of Node.   664     """   665    666     pass   667    668 class DocumentType(object):   669    670     "A class providing a container for document type information."   671    672     def __init__(self, localName, publicId, systemId):   673         self.name = self.localName = localName   674         self.publicId = publicId   675         self.systemId = systemId   676    677         # NOTE: Nothing is currently provided to support the following   678         # NOTE: attributes.   679    680         self.entities = {}   681         self.notations = {}   682    683 # Constants.   684    685 null_value_node_types = [   686     Node.DOCUMENT_NODE, Node.DOCUMENT_TYPE_NODE, Node.ELEMENT_NODE,   687     Node.ENTITY_NODE, Node.ENTITY_REFERENCE_NODE, Node.NOTATION_NODE   688     ]   689    690 # Utility functions.   691    692 def createDocumentType(localName, publicId, systemId):   693     return default_impl.createDocumentType(localName, publicId, systemId)   694    695 def createDocument(namespaceURI, localName, doctype):   696     return default_impl.createDocument(namespaceURI, localName, doctype)   697    698 def parse(stream_or_string, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   699    700     """   701     Parse the given 'stream_or_string', where the supplied object can either be   702     a stream (such as a file or stream object), or a string (containing the   703     filename of a document). The optional parameters described below should be   704     provided as keyword arguments.   705    706     If the optional 'html' parameter is set to a true value, the content to be   707     parsed will be treated as being HTML rather than XML. If the optional   708     'htmlencoding' is specified, HTML parsing will be performed with the   709     document encoding assumed to be that specified.   710    711     If the optional 'unfinished' parameter is set to a true value, unfinished   712     documents will be parsed, even though such documents may be missing content   713     such as closing tags.   714    715     If the optional 'validate' parameter is set to a true value, an attempt will   716     be made to validate the parsed document.   717    718     If the optional 'remote' parameter is set to a true value, references to   719     remote documents (such as DTDs) will be followed in order to obtain such   720     documents.   721    722     A document object is returned by this function.   723     """   724    725     impl = impl or default_impl   726    727     if hasattr(stream_or_string, "read"):   728         stream = stream_or_string   729         return parseString(stream.read(), html=html, htmlencoding=htmlencoding,   730             unfinished=unfinished, validate=validate, remote=remote, impl=impl)   731     else:   732         return parseFile(stream_or_string, html=html, htmlencoding=htmlencoding,   733             unfinished=unfinished, validate=validate, remote=remote, impl=impl)   734    735 def parseFile(filename, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   736    737     """   738     Parse the file having the given 'filename'. The optional parameters   739     described below should be provided as keyword arguments.   740    741     If the optional 'html' parameter is set to a true value, the content to be   742     parsed will be treated as being HTML rather than XML. If the optional   743     'htmlencoding' is specified, HTML parsing will be performed with the   744     document encoding assumed to be that specified.   745    746     If the optional 'unfinished' parameter is set to a true value, unfinished   747     documents will be parsed, even though such documents may be missing content   748     such as closing tags.   749    750     If the optional 'validate' parameter is set to a true value, an attempt will   751     be made to validate the parsed document.   752    753     If the optional 'remote' parameter is set to a true value, references to   754     remote documents (such as DTDs) will be followed in order to obtain such   755     documents.   756    757     A document object is returned by this function.   758     """   759    760     impl = impl or default_impl   761     return impl.adoptDocument(Node_parseFile(filename, html=html, htmlencoding=htmlencoding,   762         unfinished=unfinished, validate=validate, remote=remote))   763    764 def parseString(s, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   765    766     """   767     Parse the content of the given string 's'. The optional parameters described   768     below should be provided as keyword arguments.   769    770     If the optional 'html' parameter is set to a true value, the content to be   771     parsed will be treated as being HTML rather than XML. If the optional   772     'htmlencoding' is specified, HTML parsing will be performed with the   773     document encoding assumed to be that specified.   774    775     If the optional 'unfinished' parameter is set to a true value, unfinished   776     documents will be parsed, even though such documents may be missing content   777     such as closing tags.   778    779     If the optional 'validate' parameter is set to a true value, an attempt will   780     be made to validate the parsed document.   781    782     If the optional 'remote' parameter is set to a true value, references to   783     remote documents (such as DTDs) will be followed in order to obtain such   784     documents.   785    786     A document object is returned by this function.   787     """   788    789     impl = impl or default_impl   790     return impl.adoptDocument(Node_parseString(s, html=html, htmlencoding=htmlencoding,   791         unfinished=unfinished, validate=validate, remote=remote))   792    793 def parseURI(uri, html=0, htmlencoding=None, unfinished=0, validate=0, remote=0, impl=None):   794    795     """   796     Parse the content found at the given 'uri'. The optional parameters   797     described below should be provided as keyword arguments.   798    799     If the optional 'html' parameter is set to a true value, the content to be   800     parsed will be treated as being HTML rather than XML. If the optional   801     'htmlencoding' is specified, HTML parsing will be performed with the   802     document encoding assumed to be that specified.   803    804     If the optional 'unfinished' parameter is set to a true value, unfinished   805     documents will be parsed, even though such documents may be missing content   806     such as closing tags.   807    808     If the optional 'validate' parameter is set to a true value, an attempt will   809     be made to validate the parsed document.   810    811     If the optional 'remote' parameter is set to a true value, references to   812     remote documents (such as DTDs) will be followed in order to obtain such   813     documents.   814    815     Documents are retrieved using libxml2's own network capabilities. To   816     retrieve documents using Python's own modules for this purpose (such as   817     urllib or urllib2), open a stream and pass it to the parse function:   818    819     f = urllib.urlopen(uri)   820     try:   821         doc = libxml2dom.parse(f, html)   822     finally:   823         f.close()   824    825     A document object is returned by this function.   826     """   827    828     if html:   829         return parseFile(uri, html=html, htmlencoding=htmlencoding, unfinished=unfinished,   830             validate=validate, remote=remote, impl=impl)   831     else:   832         impl = impl or default_impl   833         return impl.adoptDocument(Node_parseURI(uri, unfinished=unfinished,   834             validate=validate, remote=remote))   835    836 def toString(node, encoding=None, prettyprint=0):   837    838     """   839     Return a string containing the serialised form of the given 'node' and its   840     children. The optional 'encoding' can be used to override the default   841     character encoding used in the serialisation. The optional 'prettyprint'   842     indicates whether the serialised form is prettyprinted or not (the default   843     setting).   844     """   845    846     return Node_toString(node.as_native_node(), encoding, prettyprint)   847    848 def toStream(node, stream, encoding=None, prettyprint=0):   849    850     """   851     Write the serialised form of the given 'node' and its children to the given   852     'stream'. The optional 'encoding' can be used to override the default   853     character encoding used in the serialisation. The optional 'prettyprint'   854     indicates whether the serialised form is prettyprinted or not (the default   855     setting).   856     """   857    858     Node_toStream(node.as_native_node(), stream, encoding, prettyprint)   859    860 def toFile(node, filename, encoding=None, prettyprint=0):   861    862     """   863     Write the serialised form of the given 'node' and its children to a file   864     having the given 'filename'. The optional 'encoding' can be used to override   865     the default character encoding used in the serialisation. The optional   866     'prettyprint' indicates whether the serialised form is prettyprinted or not   867     (the default setting).   868     """   869    870     Node_toFile(node.as_native_node(), filename, encoding, prettyprint)   871    872 def adoptNodes(nodes, impl=None):   873    874     """   875     A special utility method which adopts the given low-level 'nodes' and which   876     returns a list of high-level equivalents. This is currently experimental and   877     should not be casually used.   878     """   879    880     impl = impl or default_impl   881    882     if len(nodes) == 0:   883         return []   884     doc = impl.adoptDocument(libxml2mod.doc(nodes[0]))   885     results = []   886     for node in nodes:   887         results.append(Node(node, impl, doc))   888     return results   889    890 def getDOMImplementation():   891    892     "Return the default DOM implementation."   893    894     return default_impl   895    896 # Single instance of the implementation.   897    898 default_impl = Implementation()   899    900 # vim: tabstop=4 expandtab shiftwidth=4
libxml2dom

libxml2dom/__init__.py

libxml2dom/init.py