1 #!/usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 4 """ 5 Interpretation of field collections from sources such as HTTP request parameter 6 dictionaries. 7 8 Copyright (C) 2005 Paul Boddie <paul@boddie.org.uk> 9 10 This library is free software; you can redistribute it and/or 11 modify it under the terms of the GNU Lesser General Public 12 License as published by the Free Software Foundation; either 13 version 2.1 of the License, or (at your option) any later version. 14 15 This library is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 Lesser General Public License for more details. 19 20 You should have received a copy of the GNU Lesser General Public 21 License along with this library; if not, write to the Free Software 22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 23 24 -------- 25 26 Classes which process field collections, producing instance documents. Each 27 field entry consists of a field name mapped to a string value, where the field 28 name may have the following formats: 29 30 /name1$n1/name2 31 /name1$n1/name2$n2/name3 32 /name1$n1/name2$n2/name3$n3/name4 33 ... 34 35 The indexes n1, n2, n3, ... indicate the position of elements (starting from 1) 36 in the entire element list, whose elements may have different names. For 37 example: 38 39 /zoo$1/name 40 /zoo$1/cage$1/name 41 /zoo$1/cage$2/name 42 /zoo$1/funding$3/contributor$1/name 43 44 Where multiple values can be collected for a given field, the following notation 45 is employed: 46 47 /package$1/categories$1/category$$value 48 49 Some fields may contain the "=" string. This string is reserved and all text 50 following it is meant to specify a path into a particular document. For example: 51 52 _action_add_animal=/zoo$1/cage$2 53 """ 54 55 import Constants 56 import libxml2dom 57 from xml.dom import EMPTY_NAMESPACE 58 59 class FieldsError(Exception): 60 pass 61 62 class FieldProcessor: 63 64 """ 65 A class which converts fields in the documented form to XML 66 instance documents. 67 """ 68 69 def __init__(self, encoding="utf-8", values_are_lists=0): 70 71 """ 72 Initialise the fields processor with the given 'encoding', 73 which is optional and which only applies to field data in 74 Python string form (and not Unicode objects). 75 76 If the optional 'values_are_lists' parameter is set to true 77 then each actual field value will be obtained by taking the 78 first element from each supplied field value. 79 """ 80 81 self.encoding = encoding 82 self.values_are_lists = values_are_lists 83 84 def complete_documents(self, documents, fields): 85 86 """ 87 Complete the given 'documents' using the 'fields' items list. 88 """ 89 90 for field, value in fields: 91 92 # Ignore selectors. 93 94 if field.find(Constants.selector_indicator) != -1: 95 continue 96 97 model_name, components = self._get_model_name_and_components(field) 98 if model_name is None: 99 continue 100 101 # Get a new instance document if none has been made for the 102 # model. 103 104 if not documents.has_key(model_name): 105 documents[model_name] = self.new_instance(model_name) 106 node = documents[model_name] 107 108 # Traverse the components within the instance. 109 110 for component in components: 111 t = component.split(Constants.pair_separator) 112 if len(t) == 1: 113 114 # Convert from lists if necessary. 115 116 if self.values_are_lists: 117 value = value[0] 118 119 # Convert the value to Unicode if necessary. 120 121 if type(value) == type(""): 122 value = unicode(value, encoding=self.encoding) 123 124 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value) 125 break 126 127 elif len(t) == 2: 128 129 # Convert from one-based indexing (the position() 130 # function) to zero-based indexing. 131 132 name, index = t[0], int(t[1]) - 1 133 if index < 0: 134 break 135 try: 136 node = self._enter_element(node, name, index) 137 except FieldsError, exc: 138 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 139 field, name, index, exc.args[0]) 140 141 elif len(t) == 3 and t[1] == "": 142 143 # Multivalued fields. 144 145 if not self.values_are_lists: 146 values = [value] 147 else: 148 values = value 149 150 name = t[0] 151 for subvalue in values: 152 subnode = self._append_element(node, name) 153 154 # Convert the value to Unicode if necessary. 155 156 if type(subvalue) == type(""): 157 subvalue = unicode(subvalue, encoding=self.encoding) 158 159 subnode.setAttributeNS(EMPTY_NAMESPACE, t[2], subvalue) 160 161 def complete_selectors(self, selectors, fields, documents): 162 163 """ 164 Fill in the given 'selectors' dictionary using the given 165 'fields' so that it contains mappings from selector names to 166 parts of the specified 'documents'. 167 """ 168 169 for field, value in fields: 170 171 # Process selectors only. 172 173 selector_components = field.split(Constants.selector_indicator) 174 if len(selector_components) < 2: 175 continue 176 177 # Get the selector name and path. 178 # Note that the joining of the components uses the separator, 179 # but the separator really should not exist in the path. 180 181 selector_name = selector_components[0] 182 path = Constants.selector_indicator.join(selector_components[1:]) 183 184 model_name, components = self._get_model_name_and_components(path) 185 if model_name is None: 186 continue 187 188 # Go to the instance element. 189 190 if not documents.has_key(model_name) or documents[model_name] is None: 191 continue 192 193 node = documents[model_name] 194 195 # Traverse the path to find the part of the document to be 196 # selected. 197 198 for component in components: 199 t = component.split(Constants.pair_separator) 200 if len(t) == 1: 201 202 # Select attribute. 203 204 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 205 break 206 207 elif len(t) == 2: 208 209 # Convert from one-based indexing (the position() function) 210 # to zero-based indexing. 211 212 name, index = t[0], int(t[1]) - 1 213 if index < 0: 214 break 215 216 # Where a node cannot be found, do not create a selector. 217 218 node = self._find_element(node, name, index) 219 if node is None: 220 break 221 222 if not selectors.has_key(selector_name): 223 selectors[selector_name] = [] 224 if node is not None: 225 selectors[selector_name].append(node) 226 227 def _append_element(self, node, name): 228 229 """ 230 Within 'node' append an element with the given 'name'. 231 """ 232 233 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 234 node.appendChild(new_node) 235 return new_node 236 237 def _enter_element(self, node, name, index): 238 239 """ 240 From 'node' enter the element with the given 'name' at the 241 given 'index' position amongst the child elements. Create 242 missing child elements if necessary. 243 """ 244 245 self._ensure_elements(node, index) 246 247 elements = node.xpath("*") 248 if elements[index].localName == "placeholder": 249 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 250 node.replaceChild(new_node, elements[index]) 251 else: 252 new_node = elements[index] 253 if new_node.localName != name: 254 raise FieldsError, (new_node.localName, name, elements, index) 255 256 # Enter the newly-created element. 257 258 return new_node 259 260 def _find_element(self, node, name, index): 261 262 """ 263 From 'node' find the element with the given 'name' at the 264 given 'index' position amongst the child elements. Return 265 None if no such element exists. 266 """ 267 268 elements = node.xpath("*") 269 try: 270 new_node = elements[index] 271 if new_node.localName != name: 272 return None 273 except IndexError: 274 return None 275 return new_node 276 277 def _get_model_name_and_components(self, field): 278 279 """ 280 From 'field', return the model name and components which 281 describe the path within the instance document associated 282 with that model. 283 """ 284 285 # Get the components of the field name. 286 # Example: /name1#n1/name2#n2/name3 287 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 288 289 components = field.split(Constants.path_separator) 290 if len(components) < 2: 291 return None, None 292 293 # Extract the model name from the top-level element 294 # specification. 295 # Expected: ['name1', 'n1'] 296 297 model_name_and_index = components[1].split(Constants.pair_separator) 298 if len(model_name_and_index) != 2: 299 return None, None 300 301 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 302 303 return model_name_and_index[0], components[1:] 304 305 def _ensure_elements(self, document, index): 306 307 """ 308 In the given 'document', extend the child elements list 309 so that a node can be stored at the given 'index'. 310 """ 311 312 elements = document.xpath("*") 313 i = len(elements) 314 while i <= index: 315 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 316 document.appendChild(new_node) 317 i += 1 318 319 def make_documents(self, fields): 320 321 """ 322 Make a dictionary mapping model names to new documents prepared 323 from the given 'fields' dictionary. 324 """ 325 326 documents = {} 327 self.complete_documents(documents, fields) 328 329 # Fix the dictionary to return the actual document root. 330 331 for model_name, instance_root in documents.items(): 332 documents[model_name] = instance_root 333 return documents 334 335 def get_selectors(self, fields, documents): 336 337 """ 338 Get a dictionary containing a mapping of selector names to 339 selected parts of the given 'documents'. 340 """ 341 342 selectors = {} 343 self.complete_selectors(selectors, fields, documents) 344 return selectors 345 346 def new_instance(self, name): 347 348 "Return an instance root of the given 'name' in a new document." 349 350 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 351 352 # An alias for the older method name. 353 354 new_document = new_instance 355 356 # NOTE: Legacy name exposure. 357 358 Fields = FieldProcessor 359 360 class Form(FieldProcessor): 361 362 "A collection of documents processed from form fields." 363 364 def __init__(self, *args, **kw): 365 366 """ 367 Initialise the form data container with the general 'args' and 'kw' 368 parameters. 369 """ 370 371 FieldProcessor.__init__(self, *args, **kw) 372 self.parameters = {} 373 self.documents = {} 374 375 def set_parameters(self, parameters): 376 377 "Set the request 'parameters' (or fields) in the container." 378 379 self.parameters = parameters 380 self.documents = self.make_documents(self.parameters.items()) 381 382 def get_parameters(self): 383 384 """ 385 Get the request parameters (or fields) from the container. Note that 386 these parameters comprise the raw form field values submitted in a 387 request rather than the structured form data. 388 389 Return a dictionary mapping parameter names to values. 390 """ 391 392 return self.parameters 393 394 def get_documents(self): 395 396 """ 397 Get the form data documents from the container, returning a dictionary 398 mapping document names to DOM-style document objects. 399 """ 400 401 return self.documents 402 403 def get_document(self, name): 404 405 """ 406 Get the form data document with the given 'name' from the container, 407 returning a DOM-style document object if such a document exists, or None 408 if no such document can be found. 409 """ 410 411 return self.documents.get(name) 412 413 def get_selectors(self): 414 415 """ 416 Get the form data selectors from the container, returning a dictionary 417 mapping selector names to collections of selected elements. 418 """ 419 420 return FieldProcessor.get_selectors(self, self.parameters.items(), self.documents) 421 422 def new_instance(self, name): 423 424 """ 425 Make a new document with the given 'name', storing it in the container 426 and returning the document. 427 """ 428 429 doc = FieldProcessor.new_instance(self, name) 430 self.documents[name] = doc 431 return doc 432 433 # An alias for the older method name. 434 435 new_document = new_instance 436 437 def set_document(self, name, doc): 438 439 """ 440 Store in the container under the given 'name' the supplied document 441 'doc'. 442 """ 443 444 self.documents[name] = doc 445 446 if __name__ == "__main__": 447 448 items = [ 449 ("_action_update", "Some value"), 450 ("_action_delete=/zoo$1/cage$2", "Some value"), 451 ("_action_nasty=/zoo$1/cage$3", "Some value"), 452 ("/actions$1/update$1/selected", "Some value"), # Not actually used in output documents or input. 453 ("/zoo$1/name", "The Zoo ???"), 454 ("/zoo$1/cage$1/name", "reptiles"), 455 ("/zoo$1/cage$1/capacity", "5"), 456 ("/zoo$1/cage$1/animal$1/name", "Monty"), 457 ("/zoo$1/cage$1/animal$1/species$1/name", "Python"), 458 ("/zoo$1/cage$1/animal$1/property$2/name", "texture"), 459 ("/zoo$1/cage$1/animal$1/property$2/value", "scaled"), 460 ("/zoo$1/cage$1/animal$1/property$3/name", "length"), 461 ("/zoo$1/cage$1/animal$1/property$3/value", "5m"), 462 ("/zoo$1/cage$1/animal$2/name", "Vincent"), 463 ("/zoo$1/cage$1/animal$2/species$1/name", "Lizard"), 464 ("/zoo$1/cage$1/animal$2/property$2/name", "colour"), 465 ("/zoo$1/cage$1/animal$2/property$2/value", "variable"), 466 ("/zoo$1/cage$1/animal$2/property$3/name", "length"), 467 ("/zoo$1/cage$1/animal$2/property$3/value", "1m"), 468 ("/zoo$1/cage$2/name", "mammals"), 469 ("/zoo$1/cage$2/capacity", "25"), 470 ("/zoo$1/cage$2/animal$1/name", "Simon"), 471 ("/zoo$1/cage$2/animal$1/species$1/name", "Giraffe"), 472 ("/zoo$1/cage$2/animal$2/name", "Leonard"), 473 ("/zoo$1/cage$2/animal$2/species$1/name", "Lion"), 474 ("/zoo$1/cage$2/animal$2/property$2/name", "danger"), 475 ("/zoo$1/cage$2/animal$2/property$2/value", "high"), 476 ("/zoo$1/funding$3/type", "private"), 477 ("/zoo$1/funding$3/contributor$1/name", "Animal Corporation"), 478 ("/zoo$1/funding$3/contributor$1/amount", "543210.987"), 479 ("/zoo$1/funding$3/contributor$1/industry$$type", "animals") 480 ] 481 482 import time 483 import sys, cmdsyntax 484 485 # Find the documents. 486 487 syntax = cmdsyntax.Syntax(""" 488 --plain-output=OUTPUT_FILE 489 --instance-name=NAME 490 """) 491 492 syntax_matches = syntax.get_args(sys.argv[1:]) 493 494 try: 495 args = syntax_matches[0] 496 except IndexError: 497 print syntax.syntax 498 sys.exit(1) 499 500 # Create an object to interpret the test data. 501 502 fields = FieldProcessor("iso-8859-1") 503 504 t = time.time() 505 documents = fields.make_documents(items) 506 print "Building time", time.time() - t 507 508 t = time.time() 509 documents[args["instance-name"]].toStream(stream=open(args["plain-output"], "wb"), encoding="utf-8") 510 print "Prettyprinting time", time.time() - t 511 512 print "Selectors", repr(fields.get_selectors(items, documents)) 513 514 # vim: tabstop=4 expandtab shiftwidth=4