libxml2dom (file libxml2dom/__init_

     1 #!/usr/bin/env python     2      3 """     4 DOM wrapper around libxml2, specifically the libxml2mod Python extension module.     5      6 Copyright (C) 2003, 2004, 2005, 2006 Paul Boddie <paul@boddie.org.uk>     7      8 This library is free software; you can redistribute it and/or     9 modify it under the terms of the GNU Lesser General Public    10 License as published by the Free Software Foundation; either    11 version 2.1 of the License, or (at your option) any later version.    12     13 This library is distributed in the hope that it will be useful,    14 but WITHOUT ANY WARRANTY; without even the implied warranty of    15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    16 Lesser General Public License for more details.    17     18 You should have received a copy of the GNU Lesser General Public    19 License along with this library; if not, write to the Free Software    20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA    21 """    22     23 __version__ = "0.3.6"    24     25 from libxml2dom.macrolib import *    26 from libxml2dom.macrolib import \    27     createDocument as Node_createDocument, \    28     parseString as Node_parseString, parseURI as Node_parseURI, \    29     parseFile as Node_parseFile, \    30     toString as Node_toString, toStream as Node_toStream, \    31     toFile as Node_toFile    32     33 # Attribute and node list wrappers.    34     35 class NamedNodeMap(object):    36     37     """    38     A wrapper around Node objects providing DOM and dictionary convenience    39     methods.    40     """    41     42     def __init__(self, node):    43         self.node = node    44     45     def getNamedItem(self, name):    46         return self.node.getAttributeNode(name)    47     48     def getNamedItemNS(self, ns, localName):    49         return self.node.getAttributeNodeNS(ns, localName)    50     51     def setNamedItem(self, node):    52         try:    53             old = self.getNamedItem(node.nodeName)    54         except KeyError:    55             old = None    56         self.node.setAttributeNode(node)    57         return old    58     59     def setNamedItemNS(self, node):    60         try:    61             old = self.getNamedItemNS(node.namespaceURI, node.localName)    62         except KeyError:    63             old = None    64         self.node.setAttributeNodeNS(node)    65         return old    66     67     def removeNamedItem(self, name):    68         try:    69             old = self.getNamedItem(name)    70         except KeyError:    71             old = None    72         self.node.removeAttribute(name)    73         return old    74     75     def removeNamedItemNS(self, ns, localName):    76         try:    77             old = self.getNamedItemNS(ns, localName)    78         except KeyError:    79             old = None    80         self.node.removeAttributeNS(ns, localName)    81         return old    82     83     # Dictionary emulation methods.    84     85     def __getitem__(self, name):    86         return self.getNamedItem(name)    87     88     def __setitem__(self, name, node):    89         if name == node.nodeName:    90             self.setNamedItem(node)    91         else:    92             raise KeyError, name    93     94     def __delitem__(self, name):    95         # NOTE: To be implemented.    96         pass    97     98     def values(self):    99         return [Attribute(_node, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()]   100    101     def keys(self):   102         return [(attr.namespaceURI, attr.localName) for attr in self.values()]   103    104     def items(self):   105         return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()]   106    107     def __repr__(self):   108         return str(self)   109    110     def __str__(self):   111         return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()])   112    113     def _length(self):   114         return len(self.values())   115    116     length = property(_length)   117    118 class NodeList(list):   119    120     "A wrapper around node lists."   121    122     def item(self, index):   123         return self[index]   124    125     def _length(self):   126         return len(self)   127    128     length = property(_length)   129    130 # Node classes.   131    132 class Node(object):   133    134     """   135     A DOM-style wrapper around libxml2mod objects.   136     """   137    138     ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE   139     COMMENT_NODE = xml.dom.Node.COMMENT_NODE   140     DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE   141     DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE   142     ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE   143     ENTITY_NODE = xml.dom.Node.ENTITY_NODE   144     ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE   145     NOTATION_NODE = xml.dom.Node.NOTATION_NODE   146     PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE   147     TEXT_NODE = xml.dom.Node.TEXT_NODE   148    149     def __init__(self, node, ownerDocument=None):   150         self._node = node   151         self.ownerDocument = ownerDocument   152    153     def as_native_node(self):   154         return self._node   155    156     def _nodeType(self):   157         return Node_nodeType(self._node)   158    159     def _childNodes(self):   160    161         # NOTE: Consider a generator instead.   162    163         return NodeList([Node(_node, self.ownerDocument) for _node in Node_childNodes(self._node)])   164    165     def _attributes(self):   166         return NamedNodeMap(self)   167    168     def _namespaceURI(self):   169         return Node_namespaceURI(self._node)   170    171     def _nodeValue(self):   172         return Node_nodeValue(self._node)   173    174     def _setNodeValue(self, value):   175         Node_setNodeValue(self._node, value)   176    177     def _prefix(self):   178         return Node_prefix(self._node)   179    180     def _nodeName(self):   181         return Node_nodeName(self._node)   182    183     def _tagName(self):   184         return Node_tagName(self._node)   185    186     def _localName(self):   187         return Node_localName(self._node)   188    189     def _parentNode(self):   190         return get_node(Node_parentNode(self._node), self)   191    192     def _previousSibling(self):   193         return Node(Node_previousSibling(self._node), self.ownerDocument)   194    195     def _nextSibling(self):   196         return Node(Node_nextSibling(self._node), self.ownerDocument)   197    198     def _doctype(self):   199         return Node(Node_doctype(self._node), self.ownerDocument)   200    201     def _publicId(self):   202         # NOTE: To be fixed when the libxml2mod API has been figured out.   203         if self.nodeType != self.DOCUMENT_TYPE_NODE:   204             return None   205         declaration = self.toString()   206         return self._findId(declaration, "PUBLIC")   207    208     def _systemId(self):   209         # NOTE: To be fixed when the libxml2mod API has been figured out.   210         if self.nodeType != self.DOCUMENT_TYPE_NODE:   211             return None   212         declaration = self.toString()   213         if self._findId(declaration, "PUBLIC"):   214             return self._findIdValue(declaration, 0)   215         return self._findId(declaration, "SYSTEM")   216    217     # NOTE: To be removed when the libxml2mod API has been figured out.   218    219     def _findId(self, declaration, identifier):   220         i = declaration.find(identifier)   221         if i == -1:   222             return None   223         return self._findIdValue(declaration, i)   224    225     def _findIdValue(self, declaration, i):   226         q = declaration.find('"', i)   227         if q == -1:   228             return None   229         q2 = declaration.find('"', q + 1)   230         if q2 == -1:   231             return None   232         return declaration[q+1:q2]   233    234     def hasAttributeNS(self, ns, localName):   235         return Node_hasAttributeNS(self._node, ns, localName)   236    237     def hasAttribute(self, name):   238         return Node_hasAttribute(self._node, name)   239    240     def getAttributeNS(self, ns, localName):   241         return Node_getAttributeNS(self._node, ns, localName)   242    243     def getAttribute(self, name):   244         return Node_getAttribute(self._node, name)   245    246     def getAttributeNodeNS(self, ns, localName):   247         return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.ownerDocument, self)   248    249     def getAttributeNode(self, localName):   250         return Attribute(Node_getAttributeNode(self._node, localName), self.ownerDocument, self)   251    252     def setAttributeNS(self, ns, name, value):   253         Node_setAttributeNS(self._node, ns, name, value)   254    255     def setAttribute(self, name, value):   256         Node_setAttribute(self._node, name, value)   257    258     def setAttributeNodeNS(self, node):   259         Node_setAttributeNodeNS(self._node, node._node)   260    261     def setAttributeNode(self, node):   262         Node_setAttributeNode(self._node, node._node)   263    264     def removeAttributeNS(self, ns, localName):   265         Node_removeAttributeNS(self._node, ns, localName)   266    267     def removeAttribute(self, name):   268         Node_removeAttribute(self._node, name)   269    270     def createElementNS(self, ns, name):   271         return Node(Node_createElementNS(self._node, ns, name), self.ownerDocument)   272    273     def createElement(self, name):   274         return Node(Node_createElement(self._node, name), self.ownerDocument)   275    276     def createAttributeNS(self, ns, name):   277         tmp = self.createElement("tmp")   278         return Attribute(Node_createAttributeNS(tmp._node, ns, name))   279    280     def createAttribute(self, name):   281         tmp = self.createElement("tmp")   282         return Attribute(Node_createAttribute(tmp._node, name))   283    284     def createTextNode(self, value):   285         return Node(Node_createTextNode(self._node, value), self.ownerDocument)   286    287     def createComment(self, value):   288         return Node(Node_createComment(self._node, value), self.ownerDocument)   289    290     def importNode(self, node, deep):   291         if hasattr(node, "as_native_node"):   292             return Node(Node_importNode(self._node, node.as_native_node(), deep), self.ownerDocument)   293         else:   294             return Node(Node_importNode_DOM(self._node, node, deep), self.ownerDocument)   295    296     def cloneNode(self, deep):   297         # This takes advantage of the ubiquity of importNode (in spite of the DOM specification).   298         return self.importNode(self, deep)   299    300     def insertBefore(self, tmp, oldNode):   301         if hasattr(tmp, "as_native_node"):   302             return Node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self.ownerDocument)   303         else:   304             return Node(Node_insertBefore(self._node, tmp, oldNode.as_native_node()), self.ownerDocument)   305    306     def replaceChild(self, tmp, oldNode):   307         if hasattr(tmp, "as_native_node"):   308             return Node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self.ownerDocument)   309         else:   310             return Node(Node_replaceChild(self._node, tmp, oldNode.as_native_node()), self.ownerDocument)   311    312     def appendChild(self, tmp):   313         if hasattr(tmp, "as_native_node"):   314             return Node(Node_appendChild(self._node, tmp.as_native_node()), self.ownerDocument)   315         else:   316             return Node(Node_appendChild(self._node, tmp), self.ownerDocument)   317    318     def removeChild(self, tmp):   319         if hasattr(tmp, "as_native_node"):   320             Node_removeChild(self._node, tmp.as_native_node())   321         else:   322             Node_removeChild(self._node, tmp)   323    324     def getElementsByTagName(self, tagName):   325         return self.xpath("//" + tagName)   326    327     def getElementsByTagNameNS(self, namespaceURI, localName):   328         return self.xpath("//ns:" + localName, namespaces={"ns" : namespaceURI})   329    330     def normalize(self):   331         text_nodes = []   332         for node in self.childNodes:   333             if node.nodeType == node.TEXT_NODE:   334                 text_nodes.append(node)   335             elif len(text_nodes) != 0:   336                 self._normalize(text_nodes)   337                 text_nodes = []   338         if len(text_nodes) != 0:   339             self._normalize(text_nodes)   340    341     def _normalize(self, text_nodes):   342         texts = []   343         for text_node in text_nodes[:-1]:   344             texts.append(text_node.nodeValue)   345             self.removeChild(text_node)   346         texts.append(text_nodes[-1].nodeValue)   347         self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1])   348    349     childNodes = property(_childNodes)   350     value = data = nodeValue = property(_nodeValue, _setNodeValue)   351     name = nodeName = property(_nodeName)   352     tagName = property(_tagName)   353     namespaceURI = property(_namespaceURI)   354     prefix = property(_prefix)   355     localName = property(_localName)   356     parentNode = property(_parentNode)   357     nodeType = property(_nodeType)   358     attributes = property(_attributes)   359     previousSibling = property(_previousSibling)   360     nextSibling = property(_nextSibling)   361     doctype = property(_doctype)   362     publicId = property(_publicId)   363     systemId = property(_systemId)   364    365     # NOTE: To be fixed - these being doctype-specific values.   366    367     entities = {}   368     notations = {}   369    370     #def isSameNode(self, other):   371     #    return self._node.nodePath() == other._node.nodePath()   372    373     #def __eq__(self, other):   374     #    return self._node.nodePath() == other._node.nodePath()   375    376     # 4DOM extensions to the usual PyXML API.   377     # NOTE: To be finished.   378    379     def xpath(self, expr, variables=None, namespaces=None):   380         result = Node_xpath(self._node, expr, variables, namespaces)   381         if isinstance(result, str):   382             return to_unicode(result)   383         elif hasattr(result, "__len__"):   384             return NodeList([get_node(_node, self) for _node in result])   385         else:   386             return result   387    388     # Convenience methods.   389    390     def toString(self, encoding=None, prettyprint=0):   391         return toString(self, encoding, prettyprint)   392    393     def toStream(self, stream, encoding=None, prettyprint=0):   394         toStream(self, stream, encoding, prettyprint)   395    396     def toFile(self, f, encoding=None, prettyprint=0):   397         toFile(self, f, encoding, prettyprint)   398    399 # Attribute nodes.   400    401 class Attribute(Node):   402    403     "A class providing attribute access."   404    405     def __init__(self, node, ownerDocument=None, ownerElement=None):   406         Node.__init__(self, node, ownerDocument)   407         self.ownerElement = ownerElement   408    409     def _parentNode(self):   410         return self.ownerElement   411    412     parentNode = property(_parentNode)   413    414 # Document housekeeping mechanisms.   415    416 class Document(Node):   417    418     "A class providing document-level housekeeping."   419    420     def __init__(self, node):   421         self._node = node   422    423     def _ownerDocument(self):   424         return self   425    426     def _parentNode(self):   427         return None   428    429     def __del__(self):   430         #print "Freeing document", self._node   431         libxml2mod.xmlFreeDoc(self._node)   432    433     ownerDocument = property(_ownerDocument)   434     parentNode = property(_parentNode)   435    436 class DocumentType(object):   437    438     "A class providing a container for document type information."   439    440     def __init__(self, localName, publicId, systemId):   441         self.name = self.localName = localName   442         self.publicId = publicId   443         self.systemId = systemId   444    445         # NOTE: Nothing is currently provided to support the following   446         # NOTE: attributes.   447    448         self.entities = {}   449         self.notations = {}   450    451 # Factory functions.   452    453 def get_node(_node, context_node):   454     if Node_nodeType(_node) == context_node.DOCUMENT_NODE:   455         return context_node.ownerDocument   456     elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE:   457         return Attribute(_node, context_node.ownerDocument, context_node)   458     else:   459         return Node(_node, context_node.ownerDocument)   460    461 # Utility functions.   462    463 def createDocumentType(localName, publicId, systemId):   464     return DocumentType(localName, publicId, systemId)   465    466 def createDocument(namespaceURI, localName, doctype):   467     return Document(Node_createDocument(namespaceURI, localName, doctype))   468    469 def parse(stream_or_string, html=0):   470    471     """   472     Parse the given 'stream_or_string', where the supplied object can either be   473     a stream (such as a file or stream object), or a string (containing the   474     filename of a document). If the optional 'html' parameter is set to a true   475     value, the content to be parsed will be treated as being HTML rather than   476     XML.   477    478     A document object is returned by this function.   479     """   480    481     if hasattr(stream_or_string, "read"):   482         stream = stream_or_string   483         return parseString(stream.read(), html)   484     else:   485         return parseFile(stream_or_string, html)   486    487 def parseFile(filename, html=0):   488    489     """   490     Parse the file having the given 'filename'. If the optional 'html' parameter   491     is set to a true value, the content to be parsed will be treated as being   492     HTML rather than XML.   493    494     A document object is returned by this function.   495     """   496    497     return Document(Node_parseFile(filename, html))   498    499 def parseString(s, html=0):   500    501     """   502     Parse the content of the given string 's'. If the optional 'html' parameter   503     is set to a true value, the content to be parsed will be treated as being   504     HTML rather than XML.   505    506     A document object is returned by this function.   507     """   508    509     return Document(Node_parseString(s, html))   510    511 def parseURI(uri, html=0):   512    513     """   514     Parse the content found at the given 'uri'. If the optional 'html' parameter   515     is set to a true value, the content to be parsed will be treated as being   516     HTML rather than XML.   517    518     The parseURI does not currently work with HTML. Use parse with a stream   519     object instead. For example:   520    521     d = parse(urllib.urlopen("http://www.python.org"), html=1)   522    523     A document object is returned by this function.   524     """   525    526     return Document(Node_parseURI(uri, html))   527    528 def toString(node, encoding=None, prettyprint=0):   529    530     """   531     Return a string containing the serialised form of the given 'node' and its   532     children. The optional 'encoding' can be used to override the default   533     character encoding used in the serialisation. The optional 'prettyprint'   534     indicates whether the serialised form is prettyprinted or not (the default   535     setting).   536     """   537    538     return Node_toString(node.as_native_node(), encoding, prettyprint)   539    540 def toStream(node, stream, encoding=None, prettyprint=0):   541    542     """   543     Write the serialised form of the given 'node' and its children to the given   544     'stream'. The optional 'encoding' can be used to override the default   545     character encoding used in the serialisation. The optional 'prettyprint'   546     indicates whether the serialised form is prettyprinted or not (the default   547     setting).   548     """   549    550     Node_toStream(node.as_native_node(), stream, encoding, prettyprint)   551    552 def toFile(node, filename, encoding=None, prettyprint=0):   553    554     """   555     Write the serialised form of the given 'node' and its children to a file   556     having the given 'filename'. The optional 'encoding' can be used to override   557     the default character encoding used in the serialisation. The optional   558     'prettyprint' indicates whether the serialised form is prettyprinted or not   559     (the default setting).   560     """   561    562     Node_toFile(node.as_native_node(), filename, encoding, prettyprint)   563    564 def adoptNodes(nodes):   565    566     """   567     A special utility method which adopts the given low-level 'nodes' and which   568     returns a list of high-level equivalents. This is currently experimental and   569     should not be casually used.   570     """   571    572     if len(nodes) == 0:   573         return []   574     doc = Document(libxml2mod.doc(nodes[0]))   575     results = []   576     for node in nodes:   577         results.append(Node(node, doc))   578     return results   579    580 # vim: tabstop=4 expandtab shiftwidth=4
libxml2dom

libxml2dom/__init__.py

libxml2dom/init.py