1 #!/usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 4 """ 5 Interpretation of field collections from sources such as HTTP request parameter 6 dictionaries. 7 8 Copyright (C) 2005 Paul Boddie <paul@boddie.org.uk> 9 10 This library is free software; you can redistribute it and/or 11 modify it under the terms of the GNU Lesser General Public 12 License as published by the Free Software Foundation; either 13 version 2.1 of the License, or (at your option) any later version. 14 15 This library is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 Lesser General Public License for more details. 19 20 You should have received a copy of the GNU Lesser General Public 21 License along with this library; if not, write to the Free Software 22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 24 -------- 25 26 Classes which process field collections, producing instance documents. Each 27 field entry consists of a field name mapped to a string value, where the field 28 name may have the following formats: 29 30 /name1$n1/name2 31 /name1$n1/name2$n2/name3 32 /name1$n1/name2$n2/name3$n3/name4 33 ... 34 35 The indexes n1, n2, n3, ... indicate the position of elements (starting from 1) 36 in the entire element list, whose elements may have different names. For 37 example: 38 39 /zoo$1/name 40 /zoo$1/cage$1/name 41 /zoo$1/cage$2/name 42 /zoo$1/funding$3/contributor$1/name 43 44 Where multiple values can be collected for a given field, the following notation 45 is employed: 46 47 /package$1/categories$1/category$$value 48 49 Some fields may contain the "=" string. This string is reserved and all text 50 following it is meant to specify a path into a particular document. For example: 51 52 _action_add_animal=/zoo$1/cage$2 53 """ 54 55 import Constants 56 import libxml2dom 57 from xml.dom import EMPTY_NAMESPACE 58 59 class FieldsError(Exception): 60 pass 61 62 class FieldProcessor: 63 64 """ 65 A class which converts fields in the documented form to XML 66 instance documents. 67 """ 68 69 def __init__(self, encoding="utf-8", values_are_lists=0): 70 71 """ 72 Initialise the fields processor with the given 'encoding', 73 which is optional and which only applies to field data in 74 Python string form (and not Unicode objects). 75 76 If the optional 'values_are_lists' parameter is set to true 77 then each actual field value will be obtained by taking the 78 first element from each supplied field value. 79 """ 80 81 self.encoding = encoding 82 self.values_are_lists = values_are_lists 83 84 def complete_documents(self, documents, fields): 85 86 """ 87 Complete the given 'documents' using the 'fields' items list. 88 """ 89 90 for field, value in fields: 91 92 # Ignore selectors. 93 94 if field.find(Constants.selector_indicator) != -1: 95 continue 96 97 model_name, components = self._get_model_name_and_components(field) 98 if model_name is None: 99 continue 100 101 # Get a new instance document if none has been made for the 102 # model. 103 104 if not documents.has_key(model_name): 105 documents[model_name] = self.new_instance(model_name) 106 node = documents[model_name] 107 108 # Traverse the components within the instance. 109 110 for component in components: 111 t = component.split(Constants.pair_separator) 112 if len(t) == 1: 113 114 # Convert from lists if necessary. 115 116 if self.values_are_lists: 117 value = value[0] 118 119 # Convert the value to Unicode if necessary. 120 121 if type(value) == type(""): 122 value = unicode(value, encoding=self.encoding) 123 124 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value) 125 break 126 127 elif len(t) == 2: 128 129 # Convert from one-based indexing (the position() 130 # function) to zero-based indexing. 131 132 name, index = t[0], int(t[1]) - 1 133 if index < 0: 134 break 135 try: 136 node = self._enter_element(node, name, index) 137 except FieldsError, exc: 138 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 139 field, name, index, exc.args[0]) 140 141 elif len(t) == 3 and t[1] == "": 142 143 # Multivalued fields. 144 145 if not self.values_are_lists: 146 values = [value] 147 else: 148 values = value 149 150 name = t[0] 151 for subvalue in values: 152 subnode = self._append_element(node, name) 153 154 # Convert the value to Unicode if necessary. 155 156 if type(subvalue) == type(""): 157 subvalue = unicode(subvalue, encoding=self.encoding) 158 159 subnode.setAttributeNS(EMPTY_NAMESPACE, t[2], subvalue) 160 161 def complete_selectors(self, selectors, fields, documents): 162 163 """ 164 Fill in the given 'selectors' dictionary using the given 165 'fields' so that it contains mappings from selector names to 166 parts of the specified 'documents'. 167 """ 168 169 for field, value in fields: 170 171 # Process selectors only. 172 173 selector_components = field.split(Constants.selector_indicator) 174 if len(selector_components) < 2: 175 continue 176 177 # Get the selector name and path. 178 # Note that the joining of the components uses the separator, 179 # but the separator really should not exist in the path. 180 181 selector_name = selector_components[0] 182 path = Constants.selector_indicator.join(selector_components[1:]) 183 184 model_name, components = self._get_model_name_and_components(path) 185 if model_name is None: 186 continue 187 188 # Go to the instance element. 189 190 if not documents.has_key(model_name) or documents[model_name] is None: 191 continue 192 193 node = documents[model_name] 194 195 # Traverse the path to find the part of the document to be 196 # selected. 197 198 for component in components: 199 t = component.split(Constants.pair_separator) 200 if len(t) == 1: 201 202 # Select attribute. 203 204 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 205 break 206 207 elif len(t) == 2: 208 209 # Convert from one-based indexing (the position() function) 210 # to zero-based indexing. 211 212 name, index = t[0], int(t[1]) - 1 213 if index < 0: 214 break 215 216 # NOTE: Controversial creation of potentially non-existent 217 # NOTE: nodes. 218 219 try: 220 node = self._enter_element(node, name, index) 221 except FieldsError, exc: 222 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 223 field, name, index, exc.args[0]) 224 225 if not selectors.has_key(selector_name): 226 selectors[selector_name] = [] 227 selectors[selector_name].append(node) 228 229 def _append_element(self, node, name): 230 231 """ 232 Within 'node' append an element with the given 'name'. 233 """ 234 235 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 236 node.appendChild(new_node) 237 return new_node 238 239 def _enter_element(self, node, name, index): 240 241 """ 242 From 'node' enter the element with the given 'name' at the 243 given 'index' position amongst the child elements. Create 244 missing child elements if necessary. 245 """ 246 247 self._ensure_elements(node, index) 248 249 elements = node.xpath("*") 250 if elements[index].localName == "placeholder": 251 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 252 node.replaceChild(new_node, elements[index]) 253 else: 254 new_node = elements[index] 255 if new_node.localName != name: 256 raise FieldsError, (new_node.localName, name, elements, index) 257 258 # Enter the newly-created element. 259 260 return new_node 261 262 def _get_model_name_and_components(self, field): 263 264 """ 265 From 'field', return the model name and components which 266 describe the path within the instance document associated 267 with that model. 268 """ 269 270 # Get the components of the field name. 271 # Example: /name1#n1/name2#n2/name3 272 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 273 274 components = field.split(Constants.path_separator) 275 if len(components) < 2: 276 return None, None 277 278 # Extract the model name from the top-level element 279 # specification. 280 # Expected: ['name1', 'n1'] 281 282 model_name_and_index = components[1].split(Constants.pair_separator) 283 if len(model_name_and_index) != 2: 284 return None, None 285 286 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 287 288 return model_name_and_index[0], components[1:] 289 290 def _ensure_elements(self, document, index): 291 292 """ 293 In the given 'document', extend the child elements list 294 so that a node can be stored at the given 'index'. 295 """ 296 297 elements = document.xpath("*") 298 i = len(elements) 299 while i <= index: 300 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 301 document.appendChild(new_node) 302 i += 1 303 304 def make_documents(self, fields): 305 306 """ 307 Make a dictionary mapping model names to new documents prepared 308 from the given 'fields' dictionary. 309 """ 310 311 documents = {} 312 self.complete_documents(documents, fields) 313 314 # Fix the dictionary to return the actual document root. 315 316 for model_name, instance_root in documents.items(): 317 documents[model_name] = instance_root 318 return documents 319 320 def get_selectors(self, fields, documents): 321 322 """ 323 Get a dictionary containing a mapping of selector names to 324 selected parts of the given 'documents'. 325 """ 326 327 selectors = {} 328 self.complete_selectors(selectors, fields, documents) 329 return selectors 330 331 def new_instance(self, name): 332 333 "Return an instance root of the given 'name' in a new document." 334 335 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 336 337 # An alias for the older method name. 338 339 new_document = new_instance 340 341 # NOTE: Legacy name exposure. 342 343 Fields = FieldProcessor 344 345 class Form(FieldProcessor): 346 347 "A collection of documents processed from form fields." 348 349 def __init__(self, *args, **kw): 350 351 """ 352 Initialise the form data container with the general 'args' and 'kw' 353 parameters. 354 """ 355 356 FieldProcessor.__init__(self, *args, **kw) 357 self.parameters = {} 358 self.documents = {} 359 360 def set_parameters(self, parameters): 361 362 "Set the request 'parameters' (or fields) in the container." 363 364 self.parameters = parameters 365 self.documents = self.make_documents(self.parameters.items()) 366 367 def get_parameters(self): 368 369 """ 370 Get the request parameters (or fields) from the container. Note that 371 these parameters comprise the raw form field values submitted in a 372 request rather than the structured form data. 373 374 Return a dictionary mapping parameter names to values. 375 """ 376 377 return self.parameters 378 379 def get_documents(self): 380 381 """ 382 Get the form data documents from the container, returning a dictionary 383 mapping document names to DOM-style document objects. 384 """ 385 386 return self.documents 387 388 def get_document(self, name): 389 390 """ 391 Get the form data document with the given 'name' from the container, 392 returning a DOM-style document object if such a document exists, or None 393 if no such document can be found. 394 """ 395 396 return self.documents.get(name) 397 398 def get_selectors(self): 399 400 """ 401 Get the form data selectors from the container, returning a dictionary 402 mapping selector names to collections of selected elements. 403 """ 404 405 return FieldProcessor.get_selectors(self, self.parameters.items(), self.documents) 406 407 def new_instance(self, name): 408 409 """ 410 Make a new document with the given 'name', storing it in the container 411 and returning the document. 412 """ 413 414 doc = FieldProcessor.new_instance(self, name) 415 self.documents[name] = doc 416 return doc 417 418 # An alias for the older method name. 419 420 new_document = new_instance 421 422 def set_document(self, name, doc): 423 424 """ 425 Store in the container under the given 'name' the supplied document 426 'doc'. 427 """ 428 429 self.documents[name] = doc 430 431 if __name__ == "__main__": 432 433 items = [ 434 ("_action_update", "Some value"), 435 ("_action_delete=/zoo$1/cage$2", "Some value"), 436 ("/actions$1/update$1/selected", "Some value"), # Not actually used in output documents or input. 437 ("/zoo$1/name", "The Zoo ???"), 438 ("/zoo$1/cage$1/name", "reptiles"), 439 ("/zoo$1/cage$1/capacity", "5"), 440 ("/zoo$1/cage$1/animal$1/name", "Monty"), 441 ("/zoo$1/cage$1/animal$1/species$1/name", "Python"), 442 ("/zoo$1/cage$1/animal$1/property$2/name", "texture"), 443 ("/zoo$1/cage$1/animal$1/property$2/value", "scaled"), 444 ("/zoo$1/cage$1/animal$1/property$3/name", "length"), 445 ("/zoo$1/cage$1/animal$1/property$3/value", "5m"), 446 ("/zoo$1/cage$1/animal$2/name", "Vincent"), 447 ("/zoo$1/cage$1/animal$2/species$1/name", "Lizard"), 448 ("/zoo$1/cage$1/animal$2/property$2/name", "colour"), 449 ("/zoo$1/cage$1/animal$2/property$2/value", "variable"), 450 ("/zoo$1/cage$1/animal$2/property$3/name", "length"), 451 ("/zoo$1/cage$1/animal$2/property$3/value", "1m"), 452 ("/zoo$1/cage$2/name", "mammals"), 453 ("/zoo$1/cage$2/capacity", "25"), 454 ("/zoo$1/cage$2/animal$1/name", "Simon"), 455 ("/zoo$1/cage$2/animal$1/species$1/name", "Giraffe"), 456 ("/zoo$1/cage$2/animal$2/name", "Leonard"), 457 ("/zoo$1/cage$2/animal$2/species$1/name", "Lion"), 458 ("/zoo$1/cage$2/animal$2/property$2/name", "danger"), 459 ("/zoo$1/cage$2/animal$2/property$2/value", "high"), 460 ("/zoo$1/funding$3/type", "private"), 461 ("/zoo$1/funding$3/contributor$1/name", "Animal Corporation"), 462 ("/zoo$1/funding$3/contributor$1/amount", "543210.987"), 463 ("/zoo$1/funding$3/contributor$1/industry$$type", "animals") 464 ] 465 466 import time 467 import sys, cmdsyntax 468 469 # Find the documents. 470 471 syntax = cmdsyntax.Syntax(""" 472 --plain-output=OUTPUT_FILE 473 --instance-name=NAME 474 """) 475 476 syntax_matches = syntax.get_args(sys.argv[1:]) 477 478 try: 479 args = syntax_matches[0] 480 except IndexError: 481 print syntax.syntax 482 sys.exit(1) 483 484 # Create an object to interpret the test data. 485 486 fields = FieldProcessor("iso-8859-1") 487 488 t = time.time() 489 documents = fields.make_documents(items) 490 print "Building time", time.time() - t 491 492 t = time.time() 493 documents[args["instance-name"]].toStream(stream=open(args["plain-output"], "wb"), encoding="utf-8") 494 print "Prettyprinting time", time.time() - t 495 496 print "Selectors", repr(fields.get_selectors(items, documents)) 497 498 # vim: tabstop=4 expandtab shiftwidth=4