1 #!/usr/bin/env python 2 3 """ 4 DOM macros for virtual libxml2mod node methods and properties. 5 """ 6 7 import xml.dom 8 import libxml2mod 9 10 # NOTE: libxml2 seems to use UTF-8 throughout. 11 12 def from_unicode(s): 13 if type(s) == type(u""): 14 return s.encode("utf-8") 15 else: 16 return s 17 18 def to_unicode(s): 19 if type(s) == type(""): 20 return unicode(s, encoding="utf-8") 21 else: 22 return s 23 24 def _get_prefix_and_localName(name): 25 t = name.split(":") 26 if len(t) == 1: 27 return None, name 28 elif len(t) == 2: 29 return t 30 else: 31 # NOTE: Should raise an exception. 32 return None, None 33 34 _nodeTypes = { 35 "attribute" : xml.dom.Node.ATTRIBUTE_NODE, 36 "comment" : xml.dom.Node.COMMENT_NODE, 37 "document_xml" : xml.dom.Node.DOCUMENT_NODE, 38 "doctype" : xml.dom.Node.DOCUMENT_TYPE_NODE, 39 "dtd" : xml.dom.Node.DOCUMENT_TYPE_NODE, # NOTE: Needs verifying. 40 "element" : xml.dom.Node.ELEMENT_NODE, 41 "entity" : xml.dom.Node.ENTITY_NODE, 42 "entity_ref" : xml.dom.Node.ENTITY_REFERENCE_NODE, 43 "notation" : xml.dom.Node.NOTATION_NODE, 44 "pi" : xml.dom.Node.PROCESSING_INSTRUCTION_NODE, 45 "text" : xml.dom.Node.TEXT_NODE 46 } 47 48 def Node_ownerDocument(node): 49 return libxml2mod.doc(node) or node 50 51 def Node_nodeType(node): 52 global _nodeTypes 53 return _nodeTypes[libxml2mod.type(node)] 54 55 def Node_childNodes(node): 56 57 # NOTE: Consider a generator instead. 58 59 child_nodes = [] 60 node = libxml2mod.children(node) 61 while node is not None: 62 child_nodes.append(node) 63 node = libxml2mod.next(node) 64 return child_nodes 65 66 def Node_attributes(node): 67 attributes = {} 68 node = libxml2mod.properties(node) 69 while node is not None: 70 ns = libxml2mod.xmlNodeGetNs(node) 71 if ns is not None: 72 attributes[(libxml2mod.xmlNodeGetContent(ns), libxml2mod.name(node))] = node 73 else: 74 attributes[(None, libxml2mod.name(node))] = node 75 node = libxml2mod.next(node) 76 return attributes 77 78 def Node_namespaceURI(node): 79 ns = libxml2mod.xmlNodeGetNs(node) 80 if ns is not None: 81 return to_unicode(libxml2mod.xmlNodeGetContent(ns)) 82 else: 83 return None 84 85 def Node_nodeValue(node): 86 return to_unicode(libxml2mod.xmlNodeGetContent(node)) 87 88 # NOTE: This is not properly exposed in the libxml2macro interface as the 89 # NOTE: writable form of nodeValue. 90 91 def Node_setNodeValue(node, value): 92 # NOTE: Cannot set attribute node values. 93 libxml2mod.xmlNodeSetContent(node, from_unicode(value)) 94 95 # NOTE: Verify this. 96 97 Node_data = Node_nodeValue 98 99 def Node_prefix(node): 100 ns = libxml2mod.xmlNodeGetNs(node) 101 if ns is not None: 102 return to_unicode(libxml2mod.name(ns)) 103 else: 104 return None 105 106 def Node_nodeName(node): 107 prefix = Node_prefix(node) 108 if prefix is not None: 109 return prefix + ":" + Node_localName(node) 110 else: 111 return Node_localName(node) 112 113 def Node_tagName(node): 114 if libxml2mod.type(node) == "element": 115 return Node_nodeName(node) 116 else: 117 return None 118 119 def Node_localName(node): 120 return to_unicode(libxml2mod.name(node)) 121 122 def Node_parentNode(node): 123 if libxml2mod.type(node) == "document_xml": 124 return None 125 else: 126 return libxml2mod.parent(node) 127 128 def Node_previousSibling(node): 129 if libxml2mod.prev(node) is not None: 130 return libxml2mod.prev(node) 131 else: 132 return None 133 134 def Node_nextSibling(node): 135 if libxml2mod.next(node) is not None: 136 return libxml2mod.next(node) 137 else: 138 return None 139 140 def Node_hasAttributeNS(node, ns, localName): 141 return Node_getAttributeNS(node, ns, localName) is not None 142 143 def Node_hasAttribute(node, name): 144 return Node_getAttribute(node, name) is not None 145 146 def Node_getAttributeNS(node, ns, localName): 147 return to_unicode(libxml2mod.xmlGetNsProp(node, localName, ns)) 148 149 def Node_getAttribute(node, name): 150 return to_unicode(libxml2mod.xmlGetProp(node, name)) 151 152 def Node_getAttributeNodeNS(node, ns, localName): 153 # NOTE: Needs verifying. 154 return Node_attributes(node)[(ns, localName)] 155 156 def Node_getAttributeNode(node, name): 157 # NOTE: Needs verifying. 158 return Node_attributes(node)[(None, name)] 159 160 def Node_setAttributeNS(node, ns, name, value): 161 # NOTE: Need to convert from Unicode. 162 ns, name, value = map(from_unicode, [ns, name, value]) 163 164 prefix, localName = _get_prefix_and_localName(name) 165 166 # NOTE: Might need to be xmlSetNsProp. 167 if ns is not None and ns == libxml2mod.xmlNodeGetContent(libxml2mod.xmlNodeGetNs(node)): 168 libxml2mod.xmlNewNsProp(node, libxml2mod.xmlNodeGetNs(node), localName, value) 169 elif prefix is not None: 170 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 171 libxml2mod.xmlNewNsProp(node, new_ns, localName, value) 172 else: 173 # NOTE: Needs verifying: what should happen to the namespace? 174 # NOTE: This also catches the case where None is the element's 175 # NOTE: namespace and is also used for the attribute. 176 libxml2mod.xmlNewNsProp(node, None, localName, value) 177 178 def Node_setAttribute(node, name, value): 179 # NOTE: Need to convert from Unicode. 180 name, value = map(from_unicode, [name, value]) 181 182 libxml2mod.xmlSetProp(node, name, value) 183 184 def Node_setAttributeNodeNS(node, attr): 185 # NOTE: Not actually putting the node on the element. 186 Node_setAttributeNS(node, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 187 188 def Node_setAttributeNode(node, attr): 189 # NOTE: Not actually putting the node on the element. 190 Node_setAttribute(node, Node_nodeName(attr), Node_nodeValue(attr)) 191 192 def Node_removeAttributeNS(node, ns, localName): 193 attr = Node_getAttributeNodeNS(node, ns, localName) 194 libxml2mod.xmlUnsetNsProp(node, libxml2mod.xmlNodeGetNs(attr), libxml2mod.name(attr)) 195 196 def Node_removeAttribute(node, name): 197 name = from_unicode(name) 198 libxml2mod.xmlUnsetProp(node, name) 199 200 def Node_createElementNS(node, ns, name): 201 # NOTE: Need to convert from Unicode. 202 ns, name = map(from_unicode, [ns, name]) 203 204 prefix, localName = _get_prefix_and_localName(name) 205 new_node = libxml2mod.xmlNewNode(localName) 206 # NOTE: Does it make sense to set the namespace if it is empty? 207 if ns is not None: 208 new_ns = libxml2mod.xmlNewNs(new_node, ns, prefix) 209 libxml2mod.xmlSetNs(new_node, new_ns) 210 return new_node 211 212 def Node_createElement(node, name): 213 # NOTE: Need to convert from Unicode. 214 name = from_unicode(name) 215 216 new_node = libxml2mod.xmlNewNode(name) 217 return new_node 218 219 def Node_createAttributeNS(node, ns, name): 220 221 # NOTE: Need to convert from Unicode. 222 ns, name = map(from_unicode, [ns, name]) 223 224 prefix, localName = _get_prefix_and_localName(name) 225 # NOTE: Does it make sense to set the namespace if it is empty? 226 if ns is not None: 227 new_ns = libxml2mod.xmlNewNs(node, ns, prefix) 228 else: 229 new_ns = None 230 new_node = libxml2mod.xmlNewNsProp(node, new_ns, localName, None) 231 return new_node 232 233 def Node_createAttribute(node, name): 234 235 # NOTE: Need to convert from Unicode. 236 name = from_unicode(name) 237 238 # NOTE: xmlNewProp does not seem to work. 239 return Node_createAttributeNS(node, None, name) 240 241 def Node_createTextNode(node, value): 242 # NOTE: Need to convert from Unicode. 243 value = from_unicode(value) 244 245 return libxml2mod.xmlNewText(value) 246 247 def Node_createComment(node, value): 248 # NOTE: Need to convert from Unicode. 249 value = from_unicode(value) 250 251 return libxml2mod.xmlNewComment(value) 252 253 def Node_insertBefore(node, tmp, oldNode): 254 return libxml2mod.xmlAddPrevSibling(oldNode, tmp) 255 256 def Node_replaceChild(node, tmp, oldNode): 257 return libxml2mod.xmlReplaceNode(oldNode, tmp) 258 259 def Node_appendChild(node, tmp): 260 return libxml2mod.xmlAddChild(node, tmp) 261 262 def Node_removeChild(node, child): 263 libxml2mod.xmlUnlinkNode(child) 264 265 def Node_importNode(node, other, deep): 266 if Node_nodeType(other) == xml.dom.Node.ELEMENT_NODE: 267 imported_element = Node_createElementNS(node, Node_namespaceURI(other), Node_tagName(other)) 268 for attr in Node_attributes(other).values(): 269 Node_setAttributeNS(imported_element, Node_namespaceURI(attr), Node_nodeName(attr), Node_nodeValue(attr)) 270 271 if deep: 272 for child in Node_childNodes(other): 273 imported_child = Node_importNode(node, child, deep) 274 if imported_child: 275 Node_appendChild(imported_element, imported_child) 276 277 return imported_element 278 279 elif Node_nodeType(other) == xml.dom.Node.TEXT_NODE: 280 return Node_createTextNode(node, Node_nodeValue(other)) 281 282 elif Node_nodeType(other) == xml.dom.Node.COMMENT_NODE: 283 return Node_createComment(node, Node_data(other)) 284 285 raise ValueError, Node_nodeType(other) 286 287 def Node_importNode_DOM(node, other, deep): 288 if other.nodeType == xml.dom.Node.ELEMENT_NODE: 289 imported_element = Node_createElementNS(node, other.namespaceURI, other.tagName) 290 for attr in other.attributes.values(): 291 Node_setAttributeNS(imported_element, attr.namespaceURI, attr.nodeName, attr.nodeValue) 292 293 if deep: 294 for child in other.childNodes: 295 imported_child = Node_importNode_DOM(node, child, deep) 296 if imported_child: 297 Node_appendChild(imported_element, imported_child) 298 299 return imported_element 300 301 elif other.nodeType == xml.dom.Node.TEXT_NODE: 302 return Node_createTextNode(node, other.nodeValue) 303 304 elif other.nodeType == xml.dom.Node.COMMENT_NODE: 305 return Node_createComment(node, other.data) 306 307 raise ValueError, other.nodeType 308 309 def Node_xpath(node, expr, variables=None, namespaces=None): 310 context = libxml2mod.xmlXPathNewContext(Node_ownerDocument(node)) 311 libxml2mod.xmlXPathSetContextNode(context, node) 312 # NOTE: Discover namespaces from the node. 313 # NOTE: Work out how to specify paths without having to use prefixes on 314 # NOTE: names all the time. 315 for prefix, ns in (namespaces or {}).items(): 316 libxml2mod.xmlXPathRegisterNs(context, prefix, ns) 317 # NOTE: No such functions are exposed in current versions of libxml2. 318 #for (prefix, ns), value in (variables or {}).items(): 319 # libxml2mod.xmlXPathRegisterVariableNS(context, prefix, ns, value) 320 result = libxml2mod.xmlXPathEval(expr, context) 321 libxml2mod.xmlXPathFreeContext(context) 322 return result 323 324 # Utility functions. 325 326 def createDocumentType(localName, publicId, systemId): 327 return None 328 329 def createDocument(namespaceURI, localName, doctype): 330 # NOTE: Fixed to use version 1.0 only. 331 d = libxml2mod.xmlNewDoc("1.0") 332 if localName is not None: 333 root = Node_createElementNS(d, namespaceURI, localName) 334 Node_appendChild(d, root) 335 return d 336 337 def parse(stream_or_string, html=0): 338 if hasattr(stream_or_string, "read"): 339 stream = stream_or_string 340 return parseString(stream.read(), html) 341 else: 342 return parseFile(stream_or_string, html) 343 344 def parseFile(s, html=0): 345 # NOTE: Switching off validation and remote DTD resolution. 346 if not html: 347 context = libxml2mod.xmlCreateFileParserCtxt(s) 348 libxml2mod.xmlParserSetPedantic(context, 0) 349 libxml2mod.xmlParserSetValidate(context, 0) 350 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 351 libxml2mod.xmlParseDocument(context) 352 return libxml2mod.xmlParserGetDoc(context) 353 else: 354 return libxml2mod.htmlReadFile(s, None, HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 355 356 def parseString(s, html=0): 357 # NOTE: Switching off validation and remote DTD resolution. 358 if not html: 359 context = libxml2mod.xmlCreateMemoryParserCtxt(s, len(s)) 360 libxml2mod.xmlParserSetPedantic(context, 0) 361 libxml2mod.xmlParserSetValidate(context, 0) 362 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 363 libxml2mod.xmlParseDocument(context) 364 return libxml2mod.xmlParserGetDoc(context) 365 else: 366 # NOTE: URL given as None. 367 html_url = None 368 return libxml2mod.htmlReadMemory(s, len(s), html_url, None, 369 HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET) 370 371 def parseURI(uri, html=0): 372 # NOTE: Switching off validation and remote DTD resolution. 373 if not html: 374 context = libxml2mod.xmlCreateURLParserCtxt(uri, 0) 375 libxml2mod.xmlParserSetPedantic(context, 0) 376 libxml2mod.xmlParserSetValidate(context, 0) 377 libxml2mod.xmlCtxtUseOptions(context, XML_PARSE_NOERROR | XML_PARSE_NOWARNING | XML_PARSE_NONET) 378 libxml2mod.xmlParseDocument(context) 379 return libxml2mod.xmlParserGetDoc(context) 380 else: 381 raise NotSupportedError, "parseURI does not yet support HTML" 382 383 def toString(node, encoding=None): 384 return libxml2mod.serializeNode(node, encoding, 0) 385 386 def toStream(node, stream, encoding=None): 387 stream.write(toString(node, encoding)) 388 389 def toFile(node, f, encoding=None): 390 libxml2mod.saveNodeTo(node, f, encoding, 0) 391 392 # libxml2mod constants. 393 394 HTML_PARSE_NOERROR = 32 395 HTML_PARSE_NOWARNING = 64 396 HTML_PARSE_NONET = 2048 397 XML_PARSE_NOERROR = 32 398 XML_PARSE_NOWARNING = 64 399 XML_PARSE_NONET = 2048 400 401 # vim: tabstop=4 expandtab shiftwidth=4