1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2mod node methods and properties. 5 """ 6 7 import xml.dom 8 import libxml2mod 9 10 # NOTE: libxml2 seems to use UTF-8 throughout. 11 12 def from_unicode(s): 13 if isinstance(s, unicode): 14 return s.encode("utf-8") 15 else: 16 return s 17 18 def to_unicode(s): 19 if isinstance(s, str): 20 return unicode(s, encoding="utf-8") 21 else: 22 return s 23 24 def _get_prefix_and_localName(name): 25 t = name.split(":") 26 if len(t) == 1: 27 return None, name 28 elif len(t) == 2: 29 return t 30 else: 31 # NOTE: Should raise an exception. 32 return None, None 33 34 _nodeTypes = { 35 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 36 "comment" : xml.dom.Node.COMMENT_NODE, 37 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 38 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 39 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 40 "element" : xml.dom.Node.ELEMENT_NODE, 41 "entity" : xml.dom.Node.ENTITY_NODE, 42 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 43 "notation" : xml.dom.Node.NOTATION_NODE, 44 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 45 "text" : xml.dom.Node.TEXT_NODE 46 } 47 48 _reverseNodeTypes = {} 49 for label, value in _nodeTypes.items(): 50 _reverseNodeTypes[value] = label 51 52 def Node_ownerDocument(node): 53 return libxml2mod.doc(node) or node 54 55 def Node_nodeType(node): 56 return _nodeTypes[libxml2mod.type(node)] 57 58 def Node_childNodes(node): 59 60 # NOTE: Consider a generator instead. 61 62 child_nodes = [] 63 node = libxml2mod.children(node) 64 while node is not None: 65 # Remove doctypes. 66 if Node_nodeType(node) != xml.dom.Node.DOCUMENT_TYPE_NODE: 67 child_nodes.append(node) 68 node = libxml2mod.next(node) 69 return child_nodes 70 71 def Node_attributes(node): 72 attributes = {} 73 node = libxml2mod.properties(node) 74 while node is not None: 75 ns = libxml2mod.xmlNodeGetNs(node) 76 if ns is not None: 77 attributes[(libxml2mod.xmlNodeGetContent(ns), libxml2mod.name(node))] = node 78 else: 79 attributes[(None, libxml2mod.name(node))] = node 80 node = libxml2mod.next(node) 81 return attributes 82 83 def Node_namespaceURI(node): 84 ns = libxml2mod.xmlNodeGetNs(node) 85 if ns is not None: 86 return to_unicode(libxml2mod.xmlNodeGetContent(ns)) 87 else: 88 return None 89 90 def Node_nodeValue(node): 91 return to_unicode(libxml2mod.xmlNodeGetContent(node)) 92 93 # NOTE: This is not properly exposed in the libxml2macro interface as the 94 # NOTE: writable form of nodeValue. 95 96 def Node_setNodeValue(node, value): 97 # NOTE: Cannot set attribute node values. 98 libxml2mod.xmlNodeSetContent(node, from_unicode(value)) 99 100 # NOTE: Verify this. 101 102 Node_data = Node_nodeValue 103 104 def Node_prefix(node): 105 ns = libxml2mod.xmlNodeGetNs(node) 106 if ns is not None: 107 return to_unicode(libxml2mod.name(ns)) 108 else: 109 return None 110 111 def Node_nodeName(node): 112 prefix = Node_prefix(node) 113 if prefix is not None: 114 return prefix + ":" + Node_localName(node) 115 else: 116 return Node_localName(node) 117 118 def Node_tagName(node): 119 if libxml2mod.type(node) == "element": 120 return Node_nodeName(node) 121 else: 122 return None 123 124 def Node_localName(node): 125 return to_unicode(libxml2mod.name(node)) 126 127 def Node_parentNode(node): 128 if libxml2mod.type(node) == "document_xml": 129 return None 130 else: 131 return libxml2mod.parent(node) 132 133 def Node_previousSibling(node): 134 if libxml2mod.prev(node) is not None: 135 return libxml2mod.prev(node) 136 else: 137 return None 138 139 def Node_nextSibling(node): 140 if libxml2mod.next(node) is not None: 141 return libxml2mod.next(node) 142 else: 143 return None 144 145 def Node_doctype(node): 146 return libxml2mod.xmlGetIntSubset(node) 147 148 def Node_hasAttributeNS(node, ns, localName): 149 return Node_getAttributeNS(node, ns, localName) is not None 150 151 def Node_hasAttribute(node, name): 152 return Node_getAttribute(node, name) is not None 153 154 def Node_getAttributeNS(node, ns, localName): 155 return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) 156 157 def Node_getAttribute(node, name): 158 return to_unicode(libxml2mod.xmlGetProp(node, name)) 159 160 def Node_getAttributeNodeNS(node, ns, localName): 161 # NOTE: Needs verifying. 162 return Node_attributes(node)[(ns, localName)] 163 164 def Node_getAttributeNode(node, name): 165 # NOTE: Needs verifying. 166 return Node_attributes(node)[(None, name)] 167 168 def Node_setAttributeNS(node, ns, name, value): 169 # NOTE: Need to convert from Unicode. 170 ns, name, value = map(from_unicode, [ns, name, value]) 171 172 prefix, localName = _get_prefix_and_localName(name) 173 174 # NOTE: Might need to be xmlSetNsProp. 175 if ns is not None and ns == libxml2mod.xmlNodeGetContent(libxml2mod.xmlNodeGetNs(node)): 176 libxml2mod.xmlNewNsProp(node, libxml2mod.xmlNodeGetNs(node), localName, value) 177 elif prefix is not None: 178 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 179 libxml2mod.xmlNewNsProp(node, new_ns, localName, value) 180 else: 181 # NOTE: Needs verifying: what should happen to the namespace? 182 # NOTE: This also catches the case where None is the element's 183 # NOTE: namespace and is also used for the attribute. 184 libxml2mod.xmlNewNsProp(node, None, localName, value) 185 186 def Node_setAttribute(node, name, value): 187 # NOTE: Need to convert from Unicode. 188 name, value = map(from_unicode, [name, value]) 189 190 libxml2mod.xmlSetProp(node, name, value) 191 192 def Node_setAttributeNodeNS(node, attr): 193 # NOTE: Not actually putting the node on the element. 194 Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 195 196 def Node_setAttributeNode(node, attr): 197 # NOTE: Not actually putting the node on the element. 198 Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) 199 200 def Node_removeAttributeNS(node, ns, localName): 201 attr = Node_getAttributeNodeNS(node, ns, localName) 202 libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) 203 204 def Node_removeAttribute(node, name): 205 name = from_unicode(name) 206 libxml2mod.xmlUnsetProp(node, name) 207 208 def Node_createElementNS(node, ns, name): 209 # NOTE: Need to convert from Unicode. 210 ns, name = map(from_unicode, [ns, name]) 211 212 prefix, localName = _get_prefix_and_localName(name) 213 new_node = libxml2mod.xmlNewNode(localName) 214 # NOTE: Does it make sense to set the namespace if it is empty? 215 if ns is not None: 216 new_ns = libxml2mod.xmlNewNs(new_node, ns, prefix) 217 libxml2mod.xmlSetNs(new_node, new_ns) 218 return new_node 219 220 def Node_createElement(node, name): 221 # NOTE: Need to convert from Unicode. 222 name = from_unicode(name) 223 224 new_node = libxml2mod.xmlNewNode(name) 225 return new_node 226 227 def Node_createAttributeNS(node, ns, name): 228 229 # NOTE: Need to convert from Unicode. 230 ns, name = map(from_unicode, [ns, name]) 231 232 prefix, localName = _get_prefix_and_localName(name) 233 # NOTE: Does it make sense to set the namespace if it is empty? 234 if ns is not None: 235 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 236 else: 237 new_ns = None 238 new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) 239 return new_node 240 241 def Node_createAttribute(node, name): 242 243 # NOTE: Need to convert from Unicode. 244 name = from_unicode(name) 245 246 # NOTE: xmlNewProp does not seem to work. 247 return Node_createAttributeNS(node, None, name) 248 249 def Node_createTextNode(node, value): 250 # NOTE: Need to convert from Unicode. 251 value = from_unicode(value) 252 253 return libxml2mod.xmlNewText(value) 254 255 def Node_createComment(node, value): 256 # NOTE: Need to convert from Unicode. 257 value = from_unicode(value) 258 259 return libxml2mod.xmlNewComment(value) 260 261 def Node_insertBefore(node, tmp, oldNode): 262 return libxml2mod.xmlAddPrevSibling(oldNode, tmp) 263 264 def Node_replaceChild(node, tmp, oldNode): 265 return libxml2mod.xmlReplaceNode(oldNode, tmp) 266 267 def Node_appendChild(node, tmp): 268 return libxml2mod.xmlAddChild(node, tmp) 269 270 def Node_removeChild(node, child): 271 libxml2mod.xmlUnlinkNode(child) 272 273 def Node_importNode(node, other, deep): 274 if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: 275 imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) 276 for attr in Node_attributes(other).values(): 277 Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 278 279 if deep: 280 for child in Node_childNodes(other): 281 imported_child = Node_importNode(node, child, deep) 282 if imported_child: 283 Node_appendChild(imported_element, imported_child) 284 285 return imported_element 286 287 elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: 288 return Node_createTextNode(node, Node_nodeValue(other)) 289 290 elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: 291 return Node_createComment(node, Node_data(other)) 292 293 raise ValueError, "Node type '%s' (%d) not supported." % (other, Node_nodeType(other)) 294 295 def Node_importNode_DOM(node, other, deep): 296 if other.nodeType == xml.dom.Node.ELEMENT_NODE: 297 imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) 298 for attr in other.attributes.values(): 299 Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) 300 301 if deep: 302 for child in other.childNodes: 303 imported_child = Node_importNode_DOM(node, child, deep) 304 if imported_child: 305 Node_appendChild(imported_element, imported_child) 306 307 return imported_element 308 309 elif other.nodeType == xml.dom.Node.TEXT_NODE: 310 return Node_createTextNode(node, other.nodeValue) 311 312 elif other.nodeType == xml.dom.Node.COMMENT_NODE: 313 return Node_createComment(node, other.data) 314 315 raise ValueError, "Node type '%s' (%d) not supported." % (_reverseNodeTypes[other.nodeType], other.nodeType) 316 317 def Node_xpath(node, expr, variables=None, namespaces=None): 318 # NOTE: Need to convert from Unicode. 319 expr = from_unicode(expr) 320 321 context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node)) 322 libxml2mod.xmlXPathSetContextNode(context, node) 323 # NOTE: Discover namespaces from the node. 324 # NOTE: Work out how to specify paths without having to use prefixes on 325 # NOTE: names all the time. 326 for prefix, ns in (namespaces or {}).items(): 327 libxml2mod.xmlXPathRegisterNs(context, prefix, ns) 328 # NOTE: No such functions are exposed in current versions of libxml2. 329 #for (prefix, ns), value in (variables or {}).items(): 330 # # NOTE: Need to convert from Unicode. 331 # value = from_unicode(value) 332 # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) 333 result = libxml2mod.xmlXPathEval(expr, context) 334 libxml2mod.xmlXPathFreeContext(context) 335 return result 336 337 # Utility functions. 338 339 def createDocument(namespaceURI, localName, doctype): 340 # NOTE: Fixed to use version 1.0 only. 341 d = libxml2mod.xmlNewDoc("1.0") 342 if localName is not None: 343 root = Node_createElementNS(d, namespaceURI, localName) 344 Node_appendChild(d, root) 345 if doctype is not None: 346 libxml2mod.xmlCreateIntSubset(d, doctype.localName, doctype.publicId, doctype.systemId) 347 return d 348 349 def parse(stream_or_string, html=0): 350 if hasattr(stream_or_string, "read"): 351 stream = stream_or_string 352 return parseString(stream.read(), html) 353 else: 354 return parseFile(stream_or_string, html) 355 356 def parseFile(s, html=0): 357 # NOTE: Switching off validation and remote DTD resolution. 358 if not html: 359 context = libxml2mod.xmlCreateFileParserCtxt(s) 360 libxml2mod.xmlParserSetPedantic(context, 0) 361 libxml2mod.xmlParserSetValidate(context, 0) 362 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 363 libxml2mod.xmlParseDocument(context) 364 return libxml2mod.xmlParserGetDoc(context) 365 else: 366 return libxml2mod.htmlReadFile(s, None, HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 367 368 def parseString(s, html=0): 369 # NOTE: Switching off validation and remote DTD resolution. 370 if not html: 371 context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) 372 libxml2mod.xmlParserSetPedantic(context, 0) 373 libxml2mod.xmlParserSetValidate(context, 0) 374 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 375 libxml2mod.xmlParseDocument(context) 376 return libxml2mod.xmlParserGetDoc(context) 377 else: 378 # NOTE: URL given as None. 379 html_url = None 380 return libxml2mod.htmlReadMemory(s, len(s), html_url, None, 381 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 382 383 def parseURI(uri, html=0): 384 # NOTE: Switching off validation and remote DTD resolution. 385 if not html: 386 context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) 387 libxml2mod.xmlParserSetPedantic(context, 0) 388 libxml2mod.xmlParserSetValidate(context, 0) 389 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 390 libxml2mod.xmlParseDocument(context) 391 return libxml2mod.xmlParserGetDoc(context) 392 else: 393 raise NotImplementedError, "parseURI does not yet support HTML" 394 395 def toString(node, encoding=None, prettyprint=0): 396 return libxml2mod.serializeNode(node, encoding, prettyprint) 397 398 def toStream(node, stream, encoding=None, prettyprint=0): 399 stream.write(toString(node, encoding, prettyprint)) 400 401 def toFile(node, f, encoding=None, prettyprint=0): 402 libxml2mod.saveNodeTo(node, f, encoding, prettyprint) 403 404 # libxml2mod constants. 405 406 HTML_PARSE_NOERROR = 32 407 HTML_PARSE_NOWARNING = 64 408 HTML_PARSE_NONET = 2048 409 XML_PARSE_NOERROR = 32 410 XML_PARSE_NOWARNING = 64 411 XML_PARSE_NONET = 2048 412 413 # vim: tabstop=4 expandtab shiftwidth=4