libxml2dom (file libxml2dom/__init_

     1 #!/usr/bin/env python     2      3 """     4 DOM wrapper around libxml2, specifically the libxml2mod Python extension module.     5      6 Copyright (C) 2003, 2004, 2005 Paul Boddie <paul@boddie.org.uk>     7      8 This library is free software; you can redistribute it and/or     9 modify it under the terms of the GNU Lesser General Public    10 License as published by the Free Software Foundation; either    11 version 2.1 of the License, or (at your option) any later version.    12     13 This library is distributed in the hope that it will be useful,    14 but WITHOUT ANY WARRANTY; without even the implied warranty of    15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    16 Lesser General Public License for more details.    17     18 You should have received a copy of the GNU Lesser General Public    19 License along with this library; if not, write to the Free Software    20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA    21 """    22     23 __version__ = "0.3.5"    24     25 from libxml2dom.macrolib import *    26 from libxml2dom.macrolib import \    27     createDocument as Node_createDocument, \    28     parseString as Node_parseString, parseURI as Node_parseURI, \    29     parseFile as Node_parseFile, \    30     toString as Node_toString, toStream as Node_toStream, \    31     toFile as Node_toFile    32     33 # Attribute and node list wrappers.    34     35 class NamedNodeMap(object):    36     37     """    38     A wrapper around Node objects providing DOM and dictionary convenience    39     methods.    40     """    41     42     def __init__(self, node):    43         self.node = node    44     45     def getNamedItem(self, name):    46         return self.node.getAttributeNode(name)    47     48     def getNamedItemNS(self, ns, localName):    49         return self.node.getAttributeNodeNS(ns, localName)    50     51     def setNamedItem(self, node):    52         try:    53             old = self.getNamedItem(node.nodeName)    54         except KeyError:    55             old = None    56         self.node.setAttributeNode(node)    57         return old    58     59     def setNamedItemNS(self, node):    60         try:    61             old = self.getNamedItemNS(node.namespaceURI, node.localName)    62         except KeyError:    63             old = None    64         self.node.setAttributeNodeNS(node)    65         return old    66     67     def removeNamedItem(self, name):    68         try:    69             old = self.getNamedItem(name)    70         except KeyError:    71             old = None    72         self.node.removeAttribute(name)    73         return old    74     75     def removeNamedItemNS(self, ns, localName):    76         try:    77             old = self.getNamedItemNS(ns, localName)    78         except KeyError:    79             old = None    80         self.node.removeAttributeNS(ns, localName)    81         return old    82     83     # Dictionary emulation methods.    84     85     def __getitem__(self, name):    86         return self.getNamedItem(name)    87     88     def __setitem__(self, name, node):    89         if name == node.nodeName:    90             self.setNamedItem(node)    91         else:    92             raise KeyError, name    93     94     def __delitem__(self, name):    95         # NOTE: To be implemented.    96         pass    97     98     def values(self):    99         return [Attribute(_node, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()]   100    101     def keys(self):   102         return [(attr.namespaceURI, attr.localName) for attr in self.values()]   103    104     def items(self):   105         return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()]   106    107     def __repr__(self):   108         return str(self)   109    110     def __str__(self):   111         return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()])   112    113     def _length(self):   114         return len(self.values())   115    116     length = property(_length)   117    118 class NodeList(list):   119    120     "A wrapper around node lists."   121    122     def item(self, index):   123         return self[index]   124    125     def _length(self):   126         return len(self)   127    128     length = property(_length)   129    130 # Node classes.   131    132 class Node(object):   133    134     """   135     A DOM-style wrapper around libxml2mod objects.   136     """   137    138     ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE   139     COMMENT_NODE = xml.dom.Node.COMMENT_NODE   140     DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE   141     DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE   142     ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE   143     ENTITY_NODE = xml.dom.Node.ENTITY_NODE   144     ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE   145     NOTATION_NODE = xml.dom.Node.NOTATION_NODE   146     PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE   147     TEXT_NODE = xml.dom.Node.TEXT_NODE   148    149     def __init__(self, node, ownerDocument=None):   150         self._node = node   151         self.ownerDocument = ownerDocument   152    153     def as_native_node(self):   154         return self._node   155    156     def _nodeType(self):   157         return Node_nodeType(self._node)   158    159     def _childNodes(self):   160    161         # NOTE: Consider a generator instead.   162    163         return NodeList([Node(_node, self.ownerDocument) for _node in Node_childNodes(self._node)])   164    165     def _attributes(self):   166         return NamedNodeMap(self)   167    168     def _namespaceURI(self):   169         return Node_namespaceURI(self._node)   170    171     def _nodeValue(self):   172         return Node_nodeValue(self._node)   173    174     def _setNodeValue(self, value):   175         Node_setNodeValue(self._node, value)   176    177     def _prefix(self):   178         return Node_prefix(self._node)   179    180     def _nodeName(self):   181         return Node_nodeName(self._node)   182    183     def _tagName(self):   184         return Node_tagName(self._node)   185    186     def _localName(self):   187         return Node_localName(self._node)   188    189     def _parentNode(self):   190         return get_node(Node_parentNode(self._node), self)   191    192     def _previousSibling(self):   193         return Node(Node_previousSibling(self._node), self.ownerDocument)   194    195     def _nextSibling(self):   196         return Node(Node_nextSibling(self._node), self.ownerDocument)   197    198     def _doctype(self):   199         return Node(Node_doctype(self._node), self.ownerDocument)   200    201     def _publicId(self):   202         # NOTE: To be fixed when the libxml2mod API has been figured out.   203         if self.nodeType != self.DOCUMENT_TYPE_NODE:   204             return None   205         declaration = self.toString()   206         return self._findId(declaration, "PUBLIC")   207    208     def _systemId(self):   209         # NOTE: To be fixed when the libxml2mod API has been figured out.   210         if self.nodeType != self.DOCUMENT_TYPE_NODE:   211             return None   212         declaration = self.toString()   213         if self._findId(declaration, "PUBLIC"):   214             return self._findIdValue(declaration, 0)   215         return self._findId(declaration, "SYSTEM")   216    217     # NOTE: To be removed when the libxml2mod API has been figured out.   218    219     def _findId(self, declaration, identifier):   220         i = declaration.find(identifier)   221         if i == -1:   222             return None   223         return self._findIdValue(declaration, i)   224    225     def _findIdValue(self, declaration, i):   226         q = declaration.find('"', i)   227         if q == -1:   228             return None   229         q2 = declaration.find('"', q + 1)   230         if q2 == -1:   231             return None   232         return declaration[q+1:q2]   233    234     def hasAttributeNS(self, ns, localName):   235         return Node_hasAttributeNS(self._node, ns, localName)   236    237     def hasAttribute(self, name):   238         return Node_hasAttribute(self._node, name)   239    240     def getAttributeNS(self, ns, localName):   241         return Node_getAttributeNS(self._node, ns, localName)   242    243     def getAttribute(self, name):   244         return Node_getAttribute(self._node, name)   245    246     def getAttributeNodeNS(self, ns, localName):   247         return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.ownerDocument, self)   248    249     def getAttributeNode(self, localName):   250         return Attribute(Node_getAttributeNode(self._node, localName), self.ownerDocument, self)   251    252     def setAttributeNS(self, ns, name, value):   253         Node_setAttributeNS(self._node, ns, name, value)   254    255     def setAttribute(self, name, value):   256         Node_setAttribute(self._node, name, value)   257    258     def setAttributeNodeNS(self, node):   259         Node_setAttributeNodeNS(self._node, node._node)   260    261     def setAttributeNode(self, node):   262         Node_setAttributeNode(self._node, node._node)   263    264     def removeAttributeNS(self, ns, localName):   265         Node_removeAttributeNS(self._node, ns, localName)   266    267     def removeAttribute(self, name):   268         Node_removeAttribute(self._node, name)   269    270     def createElementNS(self, ns, name):   271         return Node(Node_createElementNS(self._node, ns, name), self.ownerDocument)   272    273     def createElement(self, name):   274         return Node(Node_createElement(self._node, name), self.ownerDocument)   275    276     def createAttributeNS(self, ns, name):   277         tmp = self.createElement("tmp")   278         return Attribute(Node_createAttributeNS(tmp._node, ns, name))   279    280     def createAttribute(self, name):   281         tmp = self.createElement("tmp")   282         return Attribute(Node_createAttribute(tmp._node, name))   283    284     def createTextNode(self, value):   285         return Node(Node_createTextNode(self._node, value), self.ownerDocument)   286    287     def createComment(self, value):   288         return Node(Node_createComment(self._node, value), self.ownerDocument)   289    290     def importNode(self, node, deep):   291         if hasattr(node, "as_native_node"):   292             return Node(Node_importNode(self._node, node.as_native_node(), deep), self.ownerDocument)   293         else:   294             return Node(Node_importNode_DOM(self._node, node, deep), self.ownerDocument)   295    296     def insertBefore(self, tmp, oldNode):   297         if hasattr(tmp, "as_native_node"):   298             return Node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self.ownerDocument)   299         else:   300             return Node(Node_insertBefore(self._node, tmp, oldNode.as_native_node()), self.ownerDocument)   301    302     def replaceChild(self, tmp, oldNode):   303         if hasattr(tmp, "as_native_node"):   304             return Node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self.ownerDocument)   305         else:   306             return Node(Node_replaceChild(self._node, tmp, oldNode.as_native_node()), self.ownerDocument)   307    308     def appendChild(self, tmp):   309         if hasattr(tmp, "as_native_node"):   310             return Node(Node_appendChild(self._node, tmp.as_native_node()), self.ownerDocument)   311         else:   312             return Node(Node_appendChild(self._node, tmp), self.ownerDocument)   313    314     def removeChild(self, tmp):   315         if hasattr(tmp, "as_native_node"):   316             Node_removeChild(self._node, tmp.as_native_node())   317         else:   318             Node_removeChild(self._node, tmp)   319    320     def getElementsByTagName(self, tagName):   321         return self.xpath("//" + tagName)   322    323     def getElementsByTagNameNS(self, namespaceURI, localName):   324         return self.xpath("//ns:" + localName, namespaces={"ns" : namespaceURI})   325    326     def normalize(self):   327         text_nodes = []   328         for node in self.childNodes:   329             if node.nodeType == node.TEXT_NODE:   330                 text_nodes.append(node)   331             elif len(text_nodes) != 0:   332                 self._normalize(text_nodes)   333                 text_nodes = []   334         if len(text_nodes) != 0:   335             self._normalize(text_nodes)   336    337     def _normalize(self, text_nodes):   338         texts = []   339         for text_node in text_nodes[:-1]:   340             texts.append(text_node.nodeValue)   341             self.removeChild(text_node)   342         texts.append(text_nodes[-1].nodeValue)   343         self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1])   344    345     childNodes = property(_childNodes)   346     value = data = nodeValue = property(_nodeValue, _setNodeValue)   347     name = nodeName = property(_nodeName)   348     tagName = property(_tagName)   349     namespaceURI = property(_namespaceURI)   350     prefix = property(_prefix)   351     localName = property(_localName)   352     parentNode = property(_parentNode)   353     nodeType = property(_nodeType)   354     attributes = property(_attributes)   355     previousSibling = property(_previousSibling)   356     nextSibling = property(_nextSibling)   357     doctype = property(_doctype)   358     publicId = property(_publicId)   359     systemId = property(_systemId)   360    361     # NOTE: To be fixed - these being doctype-specific values.   362    363     entities = {}   364     notations = {}   365    366     #def isSameNode(self, other):   367     #    return self._node.nodePath() == other._node.nodePath()   368    369     #def __eq__(self, other):   370     #    return self._node.nodePath() == other._node.nodePath()   371    372     # 4DOM extensions to the usual PyXML API.   373     # NOTE: To be finished.   374    375     def xpath(self, expr, variables=None, namespaces=None):   376         result = Node_xpath(self._node, expr, variables, namespaces)   377         if isinstance(result, str):   378             return to_unicode(result)   379         elif hasattr(result, "__len__"):   380             return NodeList([get_node(_node, self) for _node in result])   381         else:   382             return result   383    384     # Convenience methods.   385    386     def toString(self, encoding=None, prettyprint=0):   387         return toString(self, encoding, prettyprint)   388    389     def toStream(self, stream, encoding=None, prettyprint=0):   390         toStream(self, stream, encoding, prettyprint)   391    392     def toFile(self, f, encoding=None, prettyprint=0):   393         toFile(self, f, encoding, prettyprint)   394    395 # Attribute nodes.   396    397 class Attribute(Node):   398    399     "A class providing attribute access."   400    401     def __init__(self, node, ownerDocument=None, ownerElement=None):   402         Node.__init__(self, node, ownerDocument)   403         self.ownerElement = ownerElement   404    405     def _parentNode(self):   406         return self.ownerElement   407    408     parentNode = property(_parentNode)   409    410 # Document housekeeping mechanisms.   411    412 class Document(Node):   413    414     "A class providing document-level housekeeping."   415    416     def __init__(self, node):   417         self._node = node   418    419     def _ownerDocument(self):   420         return self   421    422     def _parentNode(self):   423         return None   424    425     def __del__(self):   426         #print "Freeing document", self._node   427         libxml2mod.xmlFreeDoc(self._node)   428    429     ownerDocument = property(_ownerDocument)   430     parentNode = property(_parentNode)   431    432 class DocumentType(object):   433    434     "A class providing a container for document type information."   435    436     def __init__(self, localName, publicId, systemId):   437         self.name = self.localName = localName   438         self.publicId = publicId   439         self.systemId = systemId   440    441         # NOTE: Nothing is currently provided to support the following   442         # NOTE: attributes.   443    444         self.entities = {}   445         self.notations = {}   446    447 # Factory functions.   448    449 def get_node(_node, context_node):   450     if Node_nodeType(_node) == context_node.DOCUMENT_NODE:   451         return context_node.ownerDocument   452     elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE:   453         return Attribute(_node, context_node.ownerDocument, context_node)   454     else:   455         return Node(_node, context_node.ownerDocument)   456    457 # Utility functions.   458    459 def createDocumentType(localName, publicId, systemId):   460     return DocumentType(localName, publicId, systemId)   461    462 def createDocument(namespaceURI, localName, doctype):   463     return Document(Node_createDocument(namespaceURI, localName, doctype))   464    465 def parse(stream_or_string, html=0):   466    467     """   468     Parse the given 'stream_or_string', where the supplied object can either be   469     a stream (such as a file or stream object), or a string (containing the   470     filename of a document). If the optional 'html' parameter is set to a true   471     value, the content to be parsed will be treated as being HTML rather than   472     XML.   473    474     A document object is returned by this function.   475     """   476    477     if hasattr(stream_or_string, "read"):   478         stream = stream_or_string   479         return parseString(stream.read(), html)   480     else:   481         return parseFile(stream_or_string, html)   482    483 def parseFile(filename, html=0):   484    485     """   486     Parse the file having the given 'filename'. If the optional 'html' parameter   487     is set to a true value, the content to be parsed will be treated as being   488     HTML rather than XML.   489    490     A document object is returned by this function.   491     """   492    493     return Document(Node_parseFile(filename, html))   494    495 def parseString(s, html=0):   496    497     """   498     Parse the content of the given string 's'. If the optional 'html' parameter   499     is set to a true value, the content to be parsed will be treated as being   500     HTML rather than XML.   501    502     A document object is returned by this function.   503     """   504    505     return Document(Node_parseString(s, html))   506    507 def parseURI(uri, html=0):   508    509     """   510     Parse the content found at the given 'uri'. If the optional 'html' parameter   511     is set to a true value, the content to be parsed will be treated as being   512     HTML rather than XML.   513    514     The parseURI does not currently work with HTML. Use parse with a stream   515     object instead. For example:   516    517     d = parse(urllib.urlopen("http://www.python.org"), html=1)   518    519     A document object is returned by this function.   520     """   521    522     return Document(Node_parseURI(uri, html))   523    524 def toString(node, encoding=None, prettyprint=0):   525    526     """   527     Return a string containing the serialised form of the given 'node' and its   528     children. The optional 'encoding' can be used to override the default   529     character encoding used in the serialisation. The optional 'prettyprint'   530     indicates whether the serialised form is prettyprinted or not (the default   531     setting).   532     """   533    534     return Node_toString(node.as_native_node(), encoding, prettyprint)   535    536 def toStream(node, stream, encoding=None, prettyprint=0):   537    538     """   539     Write the serialised form of the given 'node' and its children to the given   540     'stream'. The optional 'encoding' can be used to override the default   541     character encoding used in the serialisation. The optional 'prettyprint'   542     indicates whether the serialised form is prettyprinted or not (the default   543     setting).   544     """   545    546     Node_toStream(node.as_native_node(), stream, encoding, prettyprint)   547    548 def toFile(node, filename, encoding=None, prettyprint=0):   549    550     """   551     Write the serialised form of the given 'node' and its children to a file   552     having the given 'filename'. The optional 'encoding' can be used to override   553     the default character encoding used in the serialisation. The optional   554     'prettyprint' indicates whether the serialised form is prettyprinted or not   555     (the default setting).   556     """   557    558     Node_toFile(node.as_native_node(), filename, encoding, prettyprint)   559    560 def adoptNodes(nodes):   561    562     """   563     A special utility method which adopts the given low-level 'nodes' and which   564     returns a list of high-level equivalents. This is currently experimental and   565     should not be casually used.   566     """   567    568     if len(nodes) == 0:   569         return []   570     doc = Document(libxml2mod.doc(nodes[0]))   571     results = []   572     for node in nodes:   573         results.append(Node(node, doc))   574     return results   575    576 # vim: tabstop=4 expandtab shiftwidth=4
libxml2dom

libxml2dom/__init__.py

libxml2dom/init.py