1 #!/usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 4 """ 5 Classes which process field collections, producing instance 6 documents. Each field entry consists of a field name mapped 7 to a string value, where the field name may have the following 8 formats: 9 10 /name1#n1/name2 11 /name1#n1/name2#n2/name3 12 /name1#n1/name2#n2/name3#n3/name4 13 ... 14 15 The indexes n1, n2, n3, ... indicate the position of elements 16 (starting from 1) in the entire element list, whose elements 17 may have different names. For example: 18 19 /zoo#1/name 20 /zoo#1/cage#1/name 21 /zoo#1/cage#2/name 22 /zoo#1/funding#3/contributor#1/name 23 24 Where multiple values can be collected for a given field, the 25 following notation is employed: 26 27 /package#1/categories#1/category##value 28 29 Some fields may contain the "=" string. This string is 30 reserved and all text following it is meant to specify a path 31 into a particular document. For example: 32 33 _action_add_animal=/zoo#1/cage#2 34 """ 35 36 import Constants 37 import libxml2dom 38 from xml.dom import EMPTY_NAMESPACE 39 40 class FieldsError(Exception): 41 pass 42 43 class FieldProcessor: 44 45 """ 46 A class which converts fields in the documented form to XML 47 instance documents. 48 """ 49 50 def __init__(self, encoding="utf-8", values_are_lists=0): 51 52 """ 53 Initialise the fields processor with the given 'encoding', 54 which is optional and which only applies to field data in 55 Python string form (and not Unicode objects). 56 57 If the optional 'values_are_lists' parameter is set to true 58 then each actual field value will be obtained by taking the 59 first element from each supplied field value. 60 """ 61 62 self.encoding = encoding 63 self.values_are_lists = values_are_lists 64 65 def complete_documents(self, documents, fields): 66 67 """ 68 Complete the given 'documents' using the 'fields' items list. 69 """ 70 71 for field, value in fields: 72 73 # Ignore selectors. 74 75 if field.find(Constants.selector_indicator) != -1: 76 continue 77 78 model_name, components = self._get_model_name_and_components(field) 79 if model_name is None: 80 continue 81 82 # Get a new instance document if none has been made for the 83 # model. 84 85 if not documents.has_key(model_name): 86 documents[model_name] = self.new_instance(model_name) 87 node = documents[model_name] 88 89 # Traverse the components within the instance. 90 91 for component in components: 92 t = component.split(Constants.pair_separator) 93 if len(t) == 1: 94 95 # Convert from lists if necessary. 96 97 if self.values_are_lists: 98 value = value[0] 99 100 # Convert the value to Unicode if necessary. 101 102 if type(value) == type(""): 103 value = unicode(value, encoding=self.encoding) 104 105 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value) 106 break 107 108 elif len(t) == 2: 109 110 # Convert from one-based indexing (the position() 111 # function) to zero-based indexing. 112 113 name, index = t[0], int(t[1]) - 1 114 if index < 0: 115 break 116 node = self._enter_element(node, name, index) 117 118 elif len(t) == 3 and t[1] == "": 119 120 # Multivalued fields. 121 122 if not self.values_are_lists: 123 values = [value] 124 else: 125 values = value 126 127 name = t[0] 128 for subvalue in values: 129 subnode = self._append_element(node, name) 130 131 # Convert the value to Unicode if necessary. 132 133 if type(subvalue) == type(""): 134 subvalue = unicode(subvalue, encoding=self.encoding) 135 136 subnode.setAttributeNS(EMPTY_NAMESPACE, t[2], subvalue) 137 138 def complete_selectors(self, selectors, fields, documents): 139 140 """ 141 Fill in the given 'selectors' dictionary using the given 142 'fields' so that it contains mappings from selector names to 143 parts of the specified 'documents'. 144 """ 145 146 for field, value in fields: 147 148 # Process selectors only. 149 150 selector_components = field.split(Constants.selector_indicator) 151 if len(selector_components) < 2: 152 continue 153 154 # Get the selector name and path. 155 # Note that the joining of the components uses the separator, 156 # but the separator really should not exist in the path. 157 158 selector_name = selector_components[0] 159 path = Constants.selector_indicator.join(selector_components[1:]) 160 161 model_name, components = self._get_model_name_and_components(path) 162 if model_name is None: 163 continue 164 165 # Go to the instance element. 166 167 if not documents.has_key(model_name) or documents[model_name] is None: 168 continue 169 170 node = documents[model_name] 171 172 # Traverse the path to find the part of the document to be 173 # selected. 174 175 for component in components: 176 t = component.split(Constants.pair_separator) 177 if len(t) == 1: 178 179 # Select attribute. 180 181 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 182 break 183 184 elif len(t) == 2: 185 186 # Convert from one-based indexing (the position() function) 187 # to zero-based indexing. 188 189 name, index = t[0], int(t[1]) - 1 190 if index < 0: 191 break 192 193 # NOTE: Controversial creation of potentially non-existent 194 # NOTE: nodes. 195 196 node = self._enter_element(node, name, index) 197 198 if not selectors.has_key(selector_name): 199 selectors[selector_name] = [] 200 selectors[selector_name].append(node) 201 202 def _append_element(self, node, name): 203 204 """ 205 Within 'node' append an element with the given 'name'. 206 """ 207 208 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 209 node.appendChild(new_node) 210 return new_node 211 212 def _enter_element(self, node, name, index): 213 214 """ 215 From 'node' enter the element with the given 'name' at the 216 given 'index' position amongst the child elements. Create 217 missing child elements if necessary. 218 """ 219 220 self._ensure_elements(node, index) 221 222 elements = node.xpath("*") 223 if elements[index].localName == "placeholder": 224 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 225 node.replaceChild(new_node, elements[index]) 226 else: 227 new_node = elements[index] 228 if new_node.localName != name: 229 raise FieldsError, (new_node.localName, name, elements, index) 230 231 # Enter the newly-created element. 232 233 return new_node 234 235 def _get_model_name_and_components(self, field): 236 237 """ 238 From 'field', return the model name and components which 239 describe the path within the instance document associated 240 with that model. 241 """ 242 243 # Get the components of the field name. 244 # Example: /name1#n1/name2#n2/name3 245 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 246 247 components = field.split(Constants.path_separator) 248 if len(components) < 2: 249 return None, None 250 251 # Extract the model name from the top-level element 252 # specification. 253 # Expected: ['name1', 'n1'] 254 255 model_name_and_index = components[1].split(Constants.pair_separator) 256 if len(model_name_and_index) != 2: 257 return None, None 258 259 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 260 261 return model_name_and_index[0], components[1:] 262 263 def _ensure_elements(self, document, index): 264 265 """ 266 In the given 'document', extend the child elements list 267 so that a node can be stored at the given 'index'. 268 """ 269 270 elements = document.xpath("*") 271 i = len(elements) 272 while i <= index: 273 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 274 document.appendChild(new_node) 275 i += 1 276 277 def make_documents(self, fields): 278 279 """ 280 Make a dictionary mapping model names to new documents prepared 281 from the given 'fields' dictionary. 282 """ 283 284 documents = {} 285 self.complete_documents(documents, fields) 286 287 # Fix the dictionary to return the actual document root. 288 289 for model_name, instance_root in documents.items(): 290 documents[model_name] = instance_root 291 return documents 292 293 def get_selectors(self, fields, documents): 294 295 """ 296 Get a dictionary containing a mapping of selector names to 297 selected parts of the given 'documents'. 298 """ 299 300 selectors = {} 301 self.complete_selectors(selectors, fields, documents) 302 return selectors 303 304 def new_instance(self, name): 305 306 "Return an instance root of the given 'name' in a new document." 307 308 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 309 310 # NOTE: Legacy name exposure. 311 312 Fields = FieldProcessor 313 314 class Form(FieldProcessor): 315 316 "A collection of documents processed from form fields." 317 318 def __init__(self, *args, **kw): 319 FieldProcessor.__init__(self, *args, **kw) 320 self.parameters = {} 321 self.documents = {} 322 323 def set_parameters(self, parameters): 324 self.parameters = parameters 325 self.documents = self.make_documents(self.parameters.items()) 326 327 def get_parameters(self): 328 return self.parameters 329 330 def get_documents(self): 331 return self.documents 332 333 def get_selectors(self): 334 return FieldProcessor.get_selectors(self, self.parameters.items(), self.documents) 335 336 if __name__ == "__main__": 337 338 items = [ 339 ("_action_update", "Some value"), 340 ("_action_delete=/zoo#1/cage#2", "Some value"), 341 ("/actions#1/update#1/selected", "Some value"), # Not actually used in output documents or input. 342 ("/zoo#1/name", "The Zoo ???"), 343 ("/zoo#1/cage#1/name", "reptiles"), 344 ("/zoo#1/cage#1/capacity", "5"), 345 ("/zoo#1/cage#1/animal#1/name", "Monty"), 346 ("/zoo#1/cage#1/animal#1/species#1/name", "Python"), 347 ("/zoo#1/cage#1/animal#1/property#2/name", "texture"), 348 ("/zoo#1/cage#1/animal#1/property#2/value", "scaled"), 349 ("/zoo#1/cage#1/animal#1/property#3/name", "length"), 350 ("/zoo#1/cage#1/animal#1/property#3/value", "5m"), 351 ("/zoo#1/cage#1/animal#2/name", "Vincent"), 352 ("/zoo#1/cage#1/animal#2/species#1/name", "Lizard"), 353 ("/zoo#1/cage#1/animal#2/property#2/name", "colour"), 354 ("/zoo#1/cage#1/animal#2/property#2/value", "variable"), 355 ("/zoo#1/cage#1/animal#2/property#3/name", "length"), 356 ("/zoo#1/cage#1/animal#2/property#3/value", "1m"), 357 ("/zoo#1/cage#2/name", "mammals"), 358 ("/zoo#1/cage#2/capacity", "25"), 359 ("/zoo#1/cage#2/animal#1/name", "Simon"), 360 ("/zoo#1/cage#2/animal#1/species#1/name", "Giraffe"), 361 ("/zoo#1/cage#2/animal#2/name", "Leonard"), 362 ("/zoo#1/cage#2/animal#2/species#1/name", "Lion"), 363 ("/zoo#1/cage#2/animal#2/property#2/name", "danger"), 364 ("/zoo#1/cage#2/animal#2/property#2/value", "high"), 365 ("/zoo#1/funding#3/type", "private"), 366 ("/zoo#1/funding#3/contributor#1/name", "Animal Corporation"), 367 ("/zoo#1/funding#3/contributor#1/amount", "543210.987"), 368 ("/zoo#1/funding#3/contributor#1/industry##type", "animals") 369 ] 370 371 import time 372 import sys, cmdsyntax 373 374 # Find the documents. 375 376 syntax = cmdsyntax.Syntax(""" 377 --plain-output=OUTPUT_FILE 378 --instance-name=NAME 379 """) 380 381 syntax_matches = syntax.get_args(sys.argv[1:]) 382 383 try: 384 args = syntax_matches[0] 385 except IndexError: 386 print syntax.syntax 387 sys.exit(1) 388 389 # Create an object to interpret the test data. 390 391 fields = FieldProcessor("iso-8859-1") 392 393 t = time.time() 394 documents = fields.make_documents(items) 395 print "Building time", time.time() - t 396 397 t = time.time() 398 documents[args["instance-name"]].toStream(stream=open(args["plain-output"], "wb"), encoding="utf-8") 399 print "Prettyprinting time", time.time() - t 400 401 print "Selectors", repr(fields.get_selectors(items, documents)) 402 403 # vim: tabstop=4 expandtab shiftwidth=4