1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2mod node methods and properties. 5 """ 6 7 import xml.dom 8 import libxml2mod 9 10 # NOTE: libxml2 seems to use UTF-8 throughout. 11 12 def from_unicode(s): 13 if isinstance(s, unicode): 14 return s.encode("utf-8") 15 else: 16 return s 17 18 def to_unicode(s): 19 if isinstance(s, str): 20 return unicode(s, encoding="utf-8") 21 else: 22 return s 23 24 def _get_prefix_and_localName(name): 25 t = name.split(":") 26 if len(t) == 1: 27 return None, name 28 elif len(t) == 2: 29 return t 30 else: 31 # NOTE: Should raise an exception. 32 return None, None 33 34 _nodeTypes = { 35 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 36 "comment" : xml.dom.Node.COMMENT_NODE, 37 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 38 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 39 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 40 "element" : xml.dom.Node.ELEMENT_NODE, 41 "entity" : xml.dom.Node.ENTITY_NODE, 42 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 43 "notation" : xml.dom.Node.NOTATION_NODE, 44 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 45 "text" : xml.dom.Node.TEXT_NODE 46 } 47 48 _reverseNodeTypes = {} 49 for label, value in _nodeTypes.items(): 50 _reverseNodeTypes[value] = label 51 52 def Node_ownerDocument(node): 53 return libxml2mod.doc(node) or node 54 55 def Node_nodeType(node): 56 return _nodeTypes[libxml2mod.type(node)] 57 58 def Node_childNodes(node): 59 60 # NOTE: Consider a generator instead. 61 62 child_nodes = [] 63 node = libxml2mod.children(node) 64 while node is not None: 65 child_nodes.append(node) 66 node = libxml2mod.next(node) 67 return child_nodes 68 69 def Node_attributes(node): 70 attributes = {} 71 node = libxml2mod.properties(node) 72 while node is not None: 73 ns = libxml2mod.xmlNodeGetNs(node) 74 if ns is not None: 75 attributes[(libxml2mod.xmlNodeGetContent(ns), libxml2mod.name(node))] = node 76 else: 77 attributes[(None, libxml2mod.name(node))] = node 78 node = libxml2mod.next(node) 79 return attributes 80 81 def Node_namespaceURI(node): 82 ns = libxml2mod.xmlNodeGetNs(node) 83 if ns is not None: 84 return to_unicode(libxml2mod.xmlNodeGetContent(ns)) 85 else: 86 return None 87 88 def Node_nodeValue(node): 89 return to_unicode(libxml2mod.xmlNodeGetContent(node)) 90 91 # NOTE: This is not properly exposed in the libxml2macro interface as the 92 # NOTE: writable form of nodeValue. 93 94 def Node_setNodeValue(node, value): 95 # NOTE: Cannot set attribute node values. 96 libxml2mod.xmlNodeSetContent(node, from_unicode(value)) 97 98 # NOTE: Verify this. 99 100 Node_data = Node_nodeValue 101 102 def Node_prefix(node): 103 ns = libxml2mod.xmlNodeGetNs(node) 104 if ns is not None: 105 return to_unicode(libxml2mod.name(ns)) 106 else: 107 return None 108 109 def Node_nodeName(node): 110 prefix = Node_prefix(node) 111 if prefix is not None: 112 return prefix + ":" + Node_localName(node) 113 else: 114 return Node_localName(node) 115 116 def Node_tagName(node): 117 if libxml2mod.type(node) == "element": 118 return Node_nodeName(node) 119 else: 120 return None 121 122 def Node_localName(node): 123 return to_unicode(libxml2mod.name(node)) 124 125 def Node_parentNode(node): 126 if libxml2mod.type(node) == "document_xml": 127 return None 128 else: 129 return libxml2mod.parent(node) 130 131 def Node_previousSibling(node): 132 if libxml2mod.prev(node) is not None: 133 return libxml2mod.prev(node) 134 else: 135 return None 136 137 def Node_nextSibling(node): 138 if libxml2mod.next(node) is not None: 139 return libxml2mod.next(node) 140 else: 141 return None 142 143 def Node_hasAttributeNS(node, ns, localName): 144 return Node_getAttributeNS(node, ns, localName) is not None 145 146 def Node_hasAttribute(node, name): 147 return Node_getAttribute(node, name) is not None 148 149 def Node_getAttributeNS(node, ns, localName): 150 return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) 151 152 def Node_getAttribute(node, name): 153 return to_unicode(libxml2mod.xmlGetProp(node, name)) 154 155 def Node_getAttributeNodeNS(node, ns, localName): 156 # NOTE: Needs verifying. 157 return Node_attributes(node)[(ns, localName)] 158 159 def Node_getAttributeNode(node, name): 160 # NOTE: Needs verifying. 161 return Node_attributes(node)[(None, name)] 162 163 def Node_setAttributeNS(node, ns, name, value): 164 # NOTE: Need to convert from Unicode. 165 ns, name, value = map(from_unicode, [ns, name, value]) 166 167 prefix, localName = _get_prefix_and_localName(name) 168 169 # NOTE: Might need to be xmlSetNsProp. 170 if ns is not None and ns == libxml2mod.xmlNodeGetContent(libxml2mod.xmlNodeGetNs(node)): 171 libxml2mod.xmlNewNsProp(node, libxml2mod.xmlNodeGetNs(node), localName, value) 172 elif prefix is not None: 173 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 174 libxml2mod.xmlNewNsProp(node, new_ns, localName, value) 175 else: 176 # NOTE: Needs verifying: what should happen to the namespace? 177 # NOTE: This also catches the case where None is the element's 178 # NOTE: namespace and is also used for the attribute. 179 libxml2mod.xmlNewNsProp(node, None, localName, value) 180 181 def Node_setAttribute(node, name, value): 182 # NOTE: Need to convert from Unicode. 183 name, value = map(from_unicode, [name, value]) 184 185 libxml2mod.xmlSetProp(node, name, value) 186 187 def Node_setAttributeNodeNS(node, attr): 188 # NOTE: Not actually putting the node on the element. 189 Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 190 191 def Node_setAttributeNode(node, attr): 192 # NOTE: Not actually putting the node on the element. 193 Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) 194 195 def Node_removeAttributeNS(node, ns, localName): 196 attr = Node_getAttributeNodeNS(node, ns, localName) 197 libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) 198 199 def Node_removeAttribute(node, name): 200 name = from_unicode(name) 201 libxml2mod.xmlUnsetProp(node, name) 202 203 def Node_createElementNS(node, ns, name): 204 # NOTE: Need to convert from Unicode. 205 ns, name = map(from_unicode, [ns, name]) 206 207 prefix, localName = _get_prefix_and_localName(name) 208 new_node = libxml2mod.xmlNewNode(localName) 209 # NOTE: Does it make sense to set the namespace if it is empty? 210 if ns is not None: 211 new_ns = libxml2mod.xmlNewNs(new_node, ns, prefix) 212 libxml2mod.xmlSetNs(new_node, new_ns) 213 return new_node 214 215 def Node_createElement(node, name): 216 # NOTE: Need to convert from Unicode. 217 name = from_unicode(name) 218 219 new_node = libxml2mod.xmlNewNode(name) 220 return new_node 221 222 def Node_createAttributeNS(node, ns, name): 223 224 # NOTE: Need to convert from Unicode. 225 ns, name = map(from_unicode, [ns, name]) 226 227 prefix, localName = _get_prefix_and_localName(name) 228 # NOTE: Does it make sense to set the namespace if it is empty? 229 if ns is not None: 230 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 231 else: 232 new_ns = None 233 new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) 234 return new_node 235 236 def Node_createAttribute(node, name): 237 238 # NOTE: Need to convert from Unicode. 239 name = from_unicode(name) 240 241 # NOTE: xmlNewProp does not seem to work. 242 return Node_createAttributeNS(node, None, name) 243 244 def Node_createTextNode(node, value): 245 # NOTE: Need to convert from Unicode. 246 value = from_unicode(value) 247 248 return libxml2mod.xmlNewText(value) 249 250 def Node_createComment(node, value): 251 # NOTE: Need to convert from Unicode. 252 value = from_unicode(value) 253 254 return libxml2mod.xmlNewComment(value) 255 256 def Node_insertBefore(node, tmp, oldNode): 257 return libxml2mod.xmlAddPrevSibling(oldNode, tmp) 258 259 def Node_replaceChild(node, tmp, oldNode): 260 return libxml2mod.xmlReplaceNode(oldNode, tmp) 261 262 def Node_appendChild(node, tmp): 263 return libxml2mod.xmlAddChild(node, tmp) 264 265 def Node_removeChild(node, child): 266 libxml2mod.xmlUnlinkNode(child) 267 268 def Node_importNode(node, other, deep): 269 if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: 270 imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) 271 for attr in Node_attributes(other).values(): 272 Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 273 274 if deep: 275 for child in Node_childNodes(other): 276 imported_child = Node_importNode(node, child, deep) 277 if imported_child: 278 Node_appendChild(imported_element, imported_child) 279 280 return imported_element 281 282 elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: 283 return Node_createTextNode(node, Node_nodeValue(other)) 284 285 elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: 286 return Node_createComment(node, Node_data(other)) 287 288 raise ValueError, "Node type '%s' (%d) not supported." % (other, Node_nodeType(other)) 289 290 def Node_importNode_DOM(node, other, deep): 291 if other.nodeType == xml.dom.Node.ELEMENT_NODE: 292 imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) 293 for attr in other.attributes.values(): 294 Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) 295 296 if deep: 297 for child in other.childNodes: 298 imported_child = Node_importNode_DOM(node, child, deep) 299 if imported_child: 300 Node_appendChild(imported_element, imported_child) 301 302 return imported_element 303 304 elif other.nodeType == xml.dom.Node.TEXT_NODE: 305 return Node_createTextNode(node, other.nodeValue) 306 307 elif other.nodeType == xml.dom.Node.COMMENT_NODE: 308 return Node_createComment(node, other.data) 309 310 raise ValueError, "Node type '%s' (%d) not supported." % (_reverseNodeTypes[other.nodeType], other.nodeType) 311 312 def Node_xpath(node, expr, variables=None, namespaces=None): 313 # NOTE: Need to convert from Unicode. 314 expr = from_unicode(expr) 315 316 context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node)) 317 libxml2mod.xmlXPathSetContextNode(context, node) 318 # NOTE: Discover namespaces from the node. 319 # NOTE: Work out how to specify paths without having to use prefixes on 320 # NOTE: names all the time. 321 for prefix, ns in (namespaces or {}).items(): 322 libxml2mod.xmlXPathRegisterNs(context, prefix, ns) 323 # NOTE: No such functions are exposed in current versions of libxml2. 324 #for (prefix, ns), value in (variables or {}).items(): 325 # # NOTE: Need to convert from Unicode. 326 # value = from_unicode(value) 327 # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) 328 result = libxml2mod.xmlXPathEval(expr, context) 329 libxml2mod.xmlXPathFreeContext(context) 330 return result 331 332 # Utility functions. 333 334 def createDocumentType(localName, publicId, systemId): 335 return None 336 337 def createDocument(namespaceURI, localName, doctype): 338 # NOTE: Fixed to use version 1.0 only. 339 d = libxml2mod.xmlNewDoc("1.0") 340 if localName is not None: 341 root = Node_createElementNS(d, namespaceURI, localName) 342 Node_appendChild(d, root) 343 return d 344 345 def parse(stream_or_string, html=0): 346 if hasattr(stream_or_string, "read"): 347 stream = stream_or_string 348 return parseString(stream.read(), html) 349 else: 350 return parseFile(stream_or_string, html) 351 352 def parseFile(s, html=0): 353 # NOTE: Switching off validation and remote DTD resolution. 354 if not html: 355 context = libxml2mod.xmlCreateFileParserCtxt(s) 356 libxml2mod.xmlParserSetPedantic(context, 0) 357 libxml2mod.xmlParserSetValidate(context, 0) 358 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 359 libxml2mod.xmlParseDocument(context) 360 return libxml2mod.xmlParserGetDoc(context) 361 else: 362 return libxml2mod.htmlReadFile(s, None, HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 363 364 def parseString(s, html=0): 365 # NOTE: Switching off validation and remote DTD resolution. 366 if not html: 367 context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) 368 libxml2mod.xmlParserSetPedantic(context, 0) 369 libxml2mod.xmlParserSetValidate(context, 0) 370 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 371 libxml2mod.xmlParseDocument(context) 372 return libxml2mod.xmlParserGetDoc(context) 373 else: 374 # NOTE: URL given as None. 375 html_url = None 376 return libxml2mod.htmlReadMemory(s, len(s), html_url, None, 377 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 378 379 def parseURI(uri, html=0): 380 # NOTE: Switching off validation and remote DTD resolution. 381 if not html: 382 context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) 383 libxml2mod.xmlParserSetPedantic(context, 0) 384 libxml2mod.xmlParserSetValidate(context, 0) 385 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 386 libxml2mod.xmlParseDocument(context) 387 return libxml2mod.xmlParserGetDoc(context) 388 else: 389 raise NotImplementedError, "parseURI does not yet support HTML" 390 391 def toString(node, encoding=None, prettyprint=0): 392 return libxml2mod.serializeNode(node, encoding, prettyprint) 393 394 def toStream(node, stream, encoding=None, prettyprint=0): 395 stream.write(toString(node, encoding, prettyprint)) 396 397 def toFile(node, f, encoding=None, prettyprint=0): 398 libxml2mod.saveNodeTo(node, f, encoding, prettyprint) 399 400 # libxml2mod constants. 401 402 HTML_PARSE_NOERROR = 32 403 HTML_PARSE_NOWARNING = 64 404 HTML_PARSE_NONET = 2048 405 XML_PARSE_NOERROR = 32 406 XML_PARSE_NOWARNING = 64 407 XML_PARSE_NONET = 2048 408 409 # vim: tabstop=4 expandtab shiftwidth=4