1 #!/usr/bin/env python 2 3 """ 4 Classes which process field collections, producing instance 5 documents. Each field entry consists of a field name mapped 6 to a string value, where the field name may have the following 7 formats: 8 9 /name1#n1/name2 10 /name1#n1/name2#n2/name3 11 /name1#n1/name2#n2/name3#n3/name4 12 ... 13 14 The indexes n1, n2, n3, ... indicate the position of elements 15 (starting from 1) in the entire element list, whose elements 16 may have different names. For example: 17 18 /zoo#1/name 19 /zoo#1/cage#1/name 20 /zoo#1/cage#2/name 21 /zoo#1/funding#3/contributor#1/name 22 23 Where multiple values can be collected for a given field, the 24 following notation is employed: 25 26 /package#1/categories#1/category##value 27 28 Some fields may contain the "=" string. This string is 29 reserved and all text following it is meant to specify a path 30 into a particular document. For example: 31 32 _action_add_animal=/zoo#1/cage#2 33 """ 34 35 import Constants 36 import libxml2dom 37 from xml.dom import EMPTY_NAMESPACE 38 39 class FieldsError(Exception): 40 pass 41 42 class Fields: 43 44 """ 45 A class which converts fields in the documented form to XML 46 instance documents. 47 """ 48 49 def __init__(self, encoding="utf-8", values_are_lists=0): 50 51 """ 52 Initialise the fields processor with the given 'encoding', 53 which is optional and which only applies to field data in 54 Python string form (and not Unicode objects). 55 56 If the optional 'values_are_lists' parameter is set to true 57 then each actual field value will be obtained by taking the 58 first element from each supplied field value. 59 """ 60 61 self.encoding = encoding 62 self.values_are_lists = values_are_lists 63 64 def complete_documents(self, documents, fields): 65 66 """ 67 Complete the given 'documents' using the 'fields' items list. 68 """ 69 70 for field, value in fields: 71 72 # Ignore selectors. 73 74 if field.find(Constants.selector_indicator) != -1: 75 continue 76 77 model_name, components = self._get_model_name_and_components(field) 78 if model_name is None: 79 continue 80 81 # Get a new instance document if none has been made for the 82 # model. 83 84 if not documents.has_key(model_name): 85 documents[model_name] = self.new_instance(model_name) 86 node = documents[model_name] 87 88 # Traverse the components within the instance. 89 90 for component in components: 91 t = component.split(Constants.pair_separator) 92 if len(t) == 1: 93 94 # Convert from lists if necessary. 95 96 if self.values_are_lists: 97 value = value[0] 98 99 # Convert the value to Unicode if necessary. 100 101 if type(value) == type(""): 102 value = unicode(value, encoding=self.encoding) 103 104 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value) 105 break 106 107 elif len(t) == 2: 108 109 # Convert from one-based indexing (the position() 110 # function) to zero-based indexing. 111 112 name, index = t[0], int(t[1]) - 1 113 if index < 0: 114 break 115 node = self._enter_element(node, name, index) 116 117 elif len(t) == 3 and t[1] == "": 118 119 # Multivalued fields. 120 121 if not self.values_are_lists: 122 values = [value] 123 else: 124 values = value 125 126 name = t[0] 127 for subvalue in values: 128 subnode = self._append_element(node, name) 129 130 # Convert the value to Unicode if necessary. 131 132 if type(subvalue) == type(""): 133 subvalue = unicode(subvalue, encoding=self.encoding) 134 135 subnode.setAttributeNS(EMPTY_NAMESPACE, t[2], subvalue) 136 137 def complete_selectors(self, selectors, fields, documents): 138 139 """ 140 Fill in the given 'selectors' dictionary using the given 141 'fields' so that it contains mappings from selector names to 142 parts of the specified 'documents'. 143 """ 144 145 for field, value in fields: 146 147 # Process selectors only. 148 149 selector_components = field.split(Constants.selector_indicator) 150 if len(selector_components) < 2: 151 continue 152 153 # Get the selector name and path. 154 # Note that the joining of the components uses the separator, 155 # but the separator really should not exist in the path. 156 157 selector_name = selector_components[0] 158 path = Constants.selector_indicator.join(selector_components[1:]) 159 160 model_name, components = self._get_model_name_and_components(path) 161 if model_name is None: 162 continue 163 164 # Go to the instance element. 165 166 if not documents.has_key(model_name) or documents[model_name] is None: 167 continue 168 169 node = documents[model_name] 170 171 # Traverse the path to find the part of the document to be 172 # selected. 173 174 for component in components: 175 t = component.split(Constants.pair_separator) 176 if len(t) == 1: 177 178 # Select attribute. 179 180 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 181 break 182 183 elif len(t) == 2: 184 185 # Convert from one-based indexing (the position() function) 186 # to zero-based indexing. 187 188 name, index = t[0], int(t[1]) - 1 189 if index < 0: 190 break 191 192 # NOTE: Controversial creation of potentially non-existent 193 # NOTE: nodes. 194 195 node = self._enter_element(node, name, index) 196 197 if not selectors.has_key(selector_name): 198 selectors[selector_name] = [] 199 selectors[selector_name].append(node) 200 201 def _append_element(self, node, name): 202 203 """ 204 Within 'node' append an element with the given 'name'. 205 """ 206 207 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 208 node.appendChild(new_node) 209 return new_node 210 211 def _enter_element(self, node, name, index): 212 213 """ 214 From 'node' enter the element with the given 'name' at the 215 given 'index' position amongst the child elements. Create 216 missing child elements if necessary. 217 """ 218 219 self._ensure_elements(node, index) 220 221 elements = node.xpath("*") 222 if elements[index].localName == "placeholder": 223 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 224 node.replaceChild(new_node, elements[index]) 225 else: 226 new_node = elements[index] 227 if new_node.localName != name: 228 raise FieldsError, (new_node.localName, name, elements, index) 229 230 # Enter the newly-created element. 231 232 return new_node 233 234 def _get_model_name_and_components(self, field): 235 236 """ 237 From 'field', return the model name and components which 238 describe the path within the instance document associated 239 with that model. 240 """ 241 242 # Get the components of the field name. 243 # Example: /name1#n1/name2#n2/name3 244 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 245 246 components = field.split(Constants.path_separator) 247 if len(components) < 2: 248 return None, None 249 250 # Extract the model name from the top-level element 251 # specification. 252 # Expected: ['name1', 'n1'] 253 254 model_name_and_index = components[1].split(Constants.pair_separator) 255 if len(model_name_and_index) != 2: 256 return None, None 257 258 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 259 260 return model_name_and_index[0], components[1:] 261 262 def _ensure_elements(self, document, index): 263 264 """ 265 In the given 'document', extend the child elements list 266 so that a node can be stored at the given 'index'. 267 """ 268 269 elements = document.xpath("*") 270 i = len(elements) 271 while i <= index: 272 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 273 document.appendChild(new_node) 274 i += 1 275 276 def make_documents(self, fields): 277 278 """ 279 Make a dictionary mapping model names to new documents prepared 280 from the given 'fields' dictionary. 281 """ 282 283 documents = {} 284 self.complete_documents(documents, fields) 285 286 # Fix the dictionary to return the actual document root. 287 288 for model_name, instance_root in documents.items(): 289 documents[model_name] = instance_root 290 return documents 291 292 def get_selectors(self, fields, documents): 293 294 """ 295 Get a dictionary containing a mapping of selector names to 296 selected parts of the given 'documents'. 297 """ 298 299 selectors = {} 300 self.complete_selectors(selectors, fields, documents) 301 return selectors 302 303 def new_instance(self, name): 304 305 "Return an instance root of the given 'name' in a new document." 306 307 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 308 309 if __name__ == "__main__": 310 311 items = [ 312 ("_action_update", "Some value"), 313 ("_action_delete=/zoo#1/cage#2", "Some value"), 314 ("/actions#1/update#1/selected", "Some value"), # Not actually used in output documents or input. 315 ("/zoo#1/name", "The Zoo ???"), 316 ("/zoo#1/cage#1/name", "reptiles"), 317 ("/zoo#1/cage#1/capacity", "5"), 318 ("/zoo#1/cage#1/animal#1/name", "Monty"), 319 ("/zoo#1/cage#1/animal#1/species#1/name", "Python"), 320 ("/zoo#1/cage#1/animal#1/property#2/name", "texture"), 321 ("/zoo#1/cage#1/animal#1/property#2/value", "scaled"), 322 ("/zoo#1/cage#1/animal#1/property#3/name", "length"), 323 ("/zoo#1/cage#1/animal#1/property#3/value", "5m"), 324 ("/zoo#1/cage#1/animal#2/name", "Vincent"), 325 ("/zoo#1/cage#1/animal#2/species#1/name", "Lizard"), 326 ("/zoo#1/cage#1/animal#2/property#2/name", "colour"), 327 ("/zoo#1/cage#1/animal#2/property#2/value", "variable"), 328 ("/zoo#1/cage#1/animal#2/property#3/name", "length"), 329 ("/zoo#1/cage#1/animal#2/property#3/value", "1m"), 330 ("/zoo#1/cage#2/name", "mammals"), 331 ("/zoo#1/cage#2/capacity", "25"), 332 ("/zoo#1/cage#2/animal#1/name", "Simon"), 333 ("/zoo#1/cage#2/animal#1/species#1/name", "Giraffe"), 334 ("/zoo#1/cage#2/animal#2/name", "Leonard"), 335 ("/zoo#1/cage#2/animal#2/species#1/name", "Lion"), 336 ("/zoo#1/cage#2/animal#2/property#2/name", "danger"), 337 ("/zoo#1/cage#2/animal#2/property#2/value", "high"), 338 ("/zoo#1/funding#3/type", "private"), 339 ("/zoo#1/funding#3/contributor#1/name", "Animal Corporation"), 340 ("/zoo#1/funding#3/contributor#1/amount", "543210.987"), 341 ("/zoo#1/funding#3/contributor#1/industry##type", "animals") 342 ] 343 344 import time 345 import sys, cmdsyntax 346 347 # Find the documents. 348 349 syntax = cmdsyntax.Syntax(""" 350 --plain-output=OUTPUT_FILE 351 --instance-name=NAME 352 """) 353 354 syntax_matches = syntax.get_args(sys.argv[1:]) 355 356 try: 357 args = syntax_matches[0] 358 except IndexError: 359 print syntax.syntax 360 sys.exit(1) 361 362 # Create an object to interpret the test data. 363 364 fields = Fields("iso-8859-1") 365 366 t = time.time() 367 documents = fields.make_documents(items) 368 print "Building time", time.time() - t 369 370 t = time.time() 371 libxml2dom.toStream(documents[args["instance-name"]], stream=open(args["plain-output"], "wb"), encoding="utf-8") 372 print "Prettyprinting time", time.time() - t 373 374 print "Selectors", repr(fields.get_selectors(items, documents)) 375 376 # vim: tabstop=4 expandtab shiftwidth=4