libxml2dom

__init__.py

14:c1ea85c65c06
2003-11-10 paulb [project @ 2003-11-10 23:52:54 by paulb] Changed various methods to always return Unicode objects. Beware that passing Unicode objects into certain libxml2/libxslt functions can cause them to be quite upset.
     1 #!/usr/bin/env python     2      3 """     4 DOM wrapper around libxml2.     5 """     6      7 import xml.dom     8 import libxml2     9 import sys    10     11 # NOTE: libxml2 seems to use UTF-8 throughout.    12     13 def to_unicode(s):    14     if s is None:    15         return None    16     elif type(s) == type(""):    17         return unicode(s, encoding="utf-8")    18     else:    19         return s    20     21 # NOTE: Consider a generator instead.    22     23 class NamedNodeMap(object):    24     25     def __init__(self, node):    26         self.node = node    27     28     def getNamedItem(self, name):    29         return self.node.getAttributeNode(name)    30     31     def getNamedItemNS(self, ns, localName):    32         return self.node.getAttributeNodeNS(ns, localName)    33     34     def setNamedItem(self, node):    35         self.node.setAttributeNode(node.name, node)    36     37     def setNamedItemNS(self, node):    38         self.node.setAttributeNodeNS(node.namespaceURI, node.localName, node)    39     40     def __getitem__(self, name):    41         return self.getNamedItem(name)    42     43     def __setitem__(self, name, node):    44         if name == node.nodeName:    45             self.setNamedItem(node)    46         else:    47             raise KeyError, name    48     49     def __delitem__(self, name):    50         # NOTE: To be implemented.    51         pass    52     53     def values(self):    54         attributes = []    55         _attribute = self.node.as_native_node().properties    56         while _attribute is not None:    57             attributes.append(Node(_attribute, ownerElement=self.node))    58             _attribute = _attribute.next    59         return attributes    60     61     def keys(self):    62         return [(attr.namespaceURI, attr.localName) for attr in self.values()]    63     64     def items(self):    65         return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()]    66     67     def __repr__(self):    68         return str(self)    69     70     def __str__(self):    71         return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()])    72     73 def _get_prefix_and_localName(name):    74     t = name.split(":")    75     if len(t) == 1:    76         return None, name    77     elif len(t) == 2:    78         return t    79     else:    80         # NOTE: Should raise an exception.    81         return None, None    82     83 class TemporaryNode(object):    84     def __init__(self, ns, name, nodeType):    85         self.ns = ns    86         self.name = name    87         self.nodeType = nodeType    88         self.prefix, self.localName = _get_prefix_and_localName(self.name)    89     90 class Node(object):    91     92     _nodeTypes = {    93         "attribute" : xml.dom.Node.ATTRIBUTE_NODE,    94         "comment" : xml.dom.Node.COMMENT_NODE,    95         "document_xml" : xml.dom.Node.DOCUMENT_NODE,    96         "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE,    97         "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying.    98         "element" : xml.dom.Node.ELEMENT_NODE,    99         "entity" : xml.dom.Node.ENTITY_NODE,   100         "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE,   101         "notation" : xml.dom.Node.NOTATION_NODE,   102         "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE,   103         "text" : xml.dom.Node.TEXT_NODE   104         }   105    106     def __init__(self, node, ownerElement=None, doctype=None):   107         self._node = node   108         self.ownerElement = ownerElement   109         self.doctype = doctype   110    111     def as_native_node(self):   112         return self._node   113    114     def _ownerDocument(self):   115         return Node(self._node.doc)   116    117     def _nodeType(self):   118         return self._nodeTypes[self._node.type]   119    120     def _childNodes(self):   121    122         # NOTE: Consider a generator instead.   123    124         child_nodes = []   125         _node = self._node.children   126         while _node is not None:   127             child_nodes.append(Node(_node))   128             _node = _node.next   129         return child_nodes   130    131     def _attributes(self):   132         return NamedNodeMap(self)   133    134     def _getNs(self):   135    136         "Internal namespace information retrieval."   137    138         try:   139             return self._node.ns()   140         except libxml2.treeError:   141             return None   142    143     def _namespaceURI(self):   144         ns = self._getNs()   145         if ns is not None:   146             return to_unicode(ns.content)   147         else:   148             return None   149    150     def _nodeValue(self):   151         return to_unicode(self._node.content)   152    153     def _prefix(self):   154         ns = self._getNs()   155         if ns is not None:   156             return to_unicode(ns.name)   157         else:   158             return None   159    160     def _nodeName(self):   161         prefix = self._prefix()   162         if prefix is not None:   163             return prefix + ":" + self._localName()   164         else:   165             return self._localName()   166    167     def _tagName(self):   168         if self._node.type == "element":   169             return self._nodeName()   170         else:   171             return None   172    173     def _localName(self):   174         return to_unicode(self._node.name)   175    176     def _parentNode(self):   177         if self.nodeType == xml.dom.Node.DOCUMENT_NODE:   178             return None   179         else:   180             return Node(self._node.parent)   181    182     def _nextSibling(self):   183         if self._node.next is not None:   184             return Node(self._node.next)   185         else:   186             return None   187    188     def hasAttributeNS(self, ns, localName):   189         return self.getAttributeNS(ns, localName) is not None   190    191     def hasAttribute(self, name):   192         return self.getAttribute(name) is not None   193    194     def getAttributeNS(self, ns, localName):   195         return to_unicode(self._node.nsProp(localName, ns))   196    197     def getAttribute(self, name):   198         return to_unicode(self._node.prop(name))   199    200     def getAttributeNodeNS(self, ns, localName):   201         return self.attributes[(ns, localName)]   202    203     def getAttributeNode(self, localName):   204         # NOTE: Needs verifying.   205         return self.attributes[(None, localName)]   206    207     def setAttributeNS(self, ns, name, value):   208         prefix, localName = _get_prefix_and_localName(name)   209         if prefix is not None:   210             self._node.setNsProp(self._node.newNs(ns, prefix), localName, value)   211         elif ns == self._node.ns().content:   212             self._node.setNsProp(self._node.ns().content, localName, value)   213         else:   214             # NOTE: Needs verifying: what should happen to the namespace?   215             self._node.setNsProp(None, localName, value)   216    217     def setAttribute(self, name, value):   218         self._node.setProp(name, value)   219    220     def setAttributeNodeNS(self, ns, name, node):   221         # NOTE: Not actually putting the node on the element.   222         self.setAttributeNS(ns, name, node.nodeValue)   223    224     def setAttributeNode(self, name, node):   225         # NOTE: Not actually putting the node on the element.   226         self.setAttribute(name, node.nodeValue)   227    228     def createElementNS(self, ns, name):   229         prefix, localName = _get_prefix_and_localName(name)   230         _node = libxml2.newNode(localName)   231         _ns = _node.newNs(ns, prefix)   232         _node.setNs(_ns)   233         return Node(_node)   234    235     def createElement(self, name):   236         _node = libxml2.newNode(localName)   237         return Node(_node)   238    239     def createAttributeNS(self, ns, name):   240         prefix, localName = _get_prefix_and_localName(name)   241         return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE)   242    243     def createAttribute(self, name):   244         return TemporaryNode(ns, name, xml.dom.Node.ATTRIBUTE_NODE)   245    246     def createTextNode(self, value):   247         return Node(libxml2.newText(value))   248    249     def _add_node(self, tmp):   250         if tmp.nodeType == xml.dom.Node.ATTRIBUTE_NODE:   251             if tmp.ns is not None:   252                 _child = self._node.newNsProp(None, tmp.localName, None)   253                 _ns = _child.newNs(tmp.ns, tmp.prefix)   254                 _child.setNs(_ns)   255             else:   256                 _child = self._node.newProp(None, tmp.name, None)   257         else:   258             _child = None   259    260         return _child   261    262     def importNode(self, node, deep):   263    264         if node.nodeType == xml.dom.Node.ELEMENT_NODE:   265             imported_element = self.ownerDocument.createElementNS(node.namespaceURI, node.tagName)   266             for value in node.attributes.values():   267                 imported_element.setAttributeNS(value.namespaceURI, value.nodeName, value.nodeValue)   268    269             if deep:   270                 for child in node.childNodes:   271                     imported_child = self.importNode(child, deep)   272                     if imported_child:   273                         imported_element.appendChild(imported_child)   274    275             return imported_element   276    277         elif node.nodeType == xml.dom.Node.TEXT_NODE:   278             return self.ownerDocument.createTextNode(node.nodeValue)   279    280         elif node.nodeType == xml.dom.Node.ATTRIBUTE_NODE:   281             return self.ownerDocument.createAttributeNS(node.namespaceURI, node.name)   282    283         raise ValueError, node.nodeType   284    285     def insertBefore(self, tmp, oldNode):   286         if tmp.nodeType == xml.dom.Node.TEXT_NODE:   287             _child = tmp._node   288         elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE:   289             _child = tmp._node   290         else:   291             _child = self._add_node(tmp)   292             _child.unlinkNode()   293         return Node(oldNode._node.addPrevSibling(_child))   294    295     def replaceChild(self, tmp, oldNode):   296         if tmp.nodeType == xml.dom.Node.TEXT_NODE:   297             _child = tmp._node   298         elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE:   299             _child = tmp._node   300         else:   301             _child = self._add_node(tmp)   302             _child.unlinkNode()   303         return Node(oldNode._node.replaceNode(_child))   304    305     def appendChild(self, tmp):   306         if tmp.nodeType == xml.dom.Node.TEXT_NODE:   307             _child = self._node.addChild(tmp._node)   308         elif tmp.nodeType == xml.dom.Node.ELEMENT_NODE:   309             _child = self._node.addChild(tmp._node)   310         else:   311             _child = self._add_node(tmp)   312         return Node(_child)   313    314     def removeChild(self, tmp):   315         tmp._node.unlinkNode()   316    317     #doctype defined in __init__   318     #ownerElement defined in __init__   319     ownerDocument = property(_ownerDocument)   320     childNodes = property(_childNodes)   321     value = data = nodeValue = property(_nodeValue)   322     name = nodeName = property(_nodeName)   323     tagName = property(_tagName)   324     namespaceURI = property(_namespaceURI)   325     prefix = property(_prefix)   326     localName = property(_localName)   327     parentNode = property(_parentNode)   328     nodeType = property(_nodeType)   329     attributes = property(_attributes)   330     nextSibling = property(_nextSibling)   331    332     def isSameNode(self, other):   333         return self._node.nodePath() == other._node.nodePath()   334    335     def __eq__(self, other):   336         return self._node.nodePath() == other._node.nodePath()   337    338 # Utility functions.   339    340 def createDocumentType(localName, publicId, systemId):   341     return None   342    343 def createDocument(namespaceURI, localName, doctype):   344     # NOTE: Fixed to use version 1.0 only.   345     d = Node(libxml2.newDoc("1.0"), doctype=doctype)   346     if localName is not None:   347         root = d.createElementNS(namespaceURI, localName)   348         d.appendChild(root)   349     return d   350    351 def parse(stream_or_string):   352     if hasattr(stream_or_string, "read"):   353         stream = stream_or_string   354     else:   355         stream = open(stream_or_string)   356     return parseString(stream.read())   357    358 def parseString(s):   359     return Node(libxml2.parseDoc(s))   360    361 def parseURI(uri):   362     return Node(libxml2.parseURI(uri))   363    364 def toString(node):   365     return node.as_native_node().serialize()   366    367 def toStream(node, stream=None):   368     stream = stream or sys.stdout   369     stream.write(toString(node))   370    371 # vim: tabstop=4 expandtab shiftwidth=4