1 #!/usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 4 """ 5 Interpretation of field collections from sources such as HTTP request parameter 6 dictionaries. 7 8 Copyright (C) 2005, 2006, 2007 Paul Boddie <paul@boddie.org.uk> 9 10 This program is free software; you can redistribute it and/or modify it under 11 the terms of the GNU Lesser General Public License as published by the Free 12 Software Foundation; either version 3 of the License, or (at your option) any 13 later version. 14 15 This program is distributed in the hope that it will be useful, but WITHOUT 16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 18 details. 19 20 You should have received a copy of the GNU Lesser General Public License along 21 with this program. If not, see <http://www.gnu.org/licenses/>. 22 23 -------- 24 25 Classes which process field collections, producing instance documents. Each 26 field entry consists of a field name mapped to a string value, where the field 27 name may have the following formats: 28 29 /name1$n1/name2 30 /name1$n1/name2$n2/name3 31 /name1$n1/name2$n2/name3$n3/name4 32 ... 33 34 The indexes n1, n2, n3, ... indicate the position of elements (starting from 1) 35 in the entire element list, whose elements may have different names. For 36 example: 37 38 /zoo$1/name 39 /zoo$1/cage$1/name 40 /zoo$1/cage$2/name 41 /zoo$1/funding$3/contributor$1/name 42 43 Where multiple values can be collected for a given field, the following notation 44 is employed: 45 46 /package$1/categories$1/category$$value 47 48 Some fields may contain the "=" string. This string is reserved and all text 49 following it is meant to specify a path into a particular document. For example: 50 51 _action_add_animal=/zoo$1/cage$2 52 """ 53 54 import Constants 55 import libxml2dom 56 from xml.dom import EMPTY_NAMESPACE 57 58 class FieldsError(Exception): 59 pass 60 61 class FieldProcessor: 62 63 """ 64 A class which converts fields in the documented form to XML 65 instance documents. 66 """ 67 68 def __init__(self, encoding="utf-8", values_are_lists=0): 69 70 """ 71 Initialise the fields processor with the given 'encoding', 72 which is optional and which only applies to field data in 73 Python string form (and not Unicode objects). 74 75 If the optional 'values_are_lists' parameter is set to true 76 then each actual field value will be obtained by taking the 77 first element from each supplied field value. 78 """ 79 80 self.encoding = encoding 81 self.values_are_lists = values_are_lists 82 83 def complete_documents(self, documents, fields): 84 85 """ 86 Complete the given 'documents' using the 'fields' items list. 87 """ 88 89 for field, value in fields: 90 91 # Ignore selectors. 92 93 if field.find(Constants.selector_indicator) != -1: 94 continue 95 96 model_name, components = self._get_model_name_and_components(field) 97 if model_name is None: 98 continue 99 100 # Get a new instance document if none has been made for the 101 # model. 102 103 if not documents.has_key(model_name): 104 documents[model_name] = self.new_instance(model_name) 105 node = documents[model_name] 106 107 # Traverse the components within the instance. 108 109 for component in components: 110 t = component.split(Constants.pair_separator) 111 if len(t) == 1: 112 113 # Convert from lists if necessary. 114 115 if self.values_are_lists: 116 value = value[0] 117 118 # Convert the value to Unicode if necessary. 119 120 if type(value) == type(""): 121 value = unicode(value, encoding=self.encoding) 122 123 # Remove CR characters. 124 125 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value.replace("\r", "")) 126 break 127 128 elif len(t) == 2: 129 130 # Convert from one-based indexing (the position() 131 # function) to zero-based indexing. 132 133 name, index = t[0], int(t[1]) - 1 134 if index < 0: 135 break 136 try: 137 node = self._enter_element(node, name, index) 138 except FieldsError, exc: 139 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 140 field, name, index, exc.args[0]) 141 142 elif len(t) == 3 and t[1] == "": 143 144 # Multivalued fields. 145 146 if not self.values_are_lists: 147 values = [value] 148 else: 149 values = value 150 151 name = t[0] 152 for subvalue in values: 153 subnode = self._append_element(node, name) 154 155 # Convert the value to Unicode if necessary. 156 157 if type(subvalue) == type(""): 158 subvalue = unicode(subvalue, encoding=self.encoding) 159 160 # Remove CR characters. 161 162 subnode.setAttributeNS(EMPTY_NAMESPACE, t[2], subvalue.replace("\r", "")) 163 164 def complete_selectors(self, selectors, fields, documents, create): 165 166 """ 167 Fill in the given 'selectors' dictionary using the given 168 'fields' so that it contains mappings from selector names to 169 parts of the specified 'documents'. If 'create' is set to a 170 true value, selected elements will be created if not already 171 present; otherwise, ignore such selectors. 172 """ 173 174 for field, value in fields: 175 176 # Process selectors only. 177 178 selector_components = field.split(Constants.selector_indicator) 179 if len(selector_components) < 2: 180 continue 181 182 # Get the selector name and path. 183 # Note that the joining of the components uses the separator, 184 # but the separator really should not exist in the path. 185 186 selector_name = selector_components[0] 187 path = Constants.selector_indicator.join(selector_components[1:]) 188 189 model_name, components = self._get_model_name_and_components(path) 190 if model_name is None: 191 continue 192 193 # Go to the instance element. 194 195 if not documents.has_key(model_name) or documents[model_name] is None: 196 continue 197 198 node = documents[model_name] 199 200 # Traverse the path to find the part of the document to be 201 # selected. 202 203 for component in components: 204 t = component.split(Constants.pair_separator) 205 if len(t) == 1: 206 207 # Select attribute. 208 209 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 210 break 211 212 elif len(t) == 2: 213 214 # Convert from one-based indexing (the position() function) 215 # to zero-based indexing. 216 217 name, index = t[0], int(t[1]) - 1 218 if index < 0: 219 break 220 221 # If create is set, create selected elements. 222 223 if create: 224 try: 225 node = self._enter_element(node, name, index) 226 except FieldsError, exc: 227 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 228 field, name, index, exc.args[0]) 229 230 # Where a node cannot be found, do not create a selector. 231 232 else: 233 node = self._find_element(node, name, index) 234 if node is None: 235 break 236 237 if not selectors.has_key(selector_name): 238 selectors[selector_name] = [] 239 if node is not None: 240 selectors[selector_name].append(node) 241 242 def _append_element(self, node, name): 243 244 """ 245 Within 'node' append an element with the given 'name'. 246 """ 247 248 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 249 node.appendChild(new_node) 250 return new_node 251 252 def _enter_element(self, node, name, index): 253 254 """ 255 From 'node' enter the element with the given 'name' at the 256 given 'index' position amongst the child elements. Create 257 missing child elements if necessary. 258 """ 259 260 self._ensure_elements(node, index) 261 262 elements = node.xpath("*") 263 if elements[index].localName == "placeholder": 264 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 265 node.replaceChild(new_node, elements[index]) 266 else: 267 new_node = elements[index] 268 if new_node.localName != name: 269 raise FieldsError, (new_node.localName, name, elements, index) 270 271 # Enter the newly-created element. 272 273 return new_node 274 275 def _find_element(self, node, name, index): 276 277 """ 278 From 'node' find the element with the given 'name' at the 279 given 'index' position amongst the child elements. Return 280 None if no such element exists. 281 """ 282 283 elements = node.xpath("*") 284 try: 285 new_node = elements[index] 286 if new_node.localName != name: 287 return None 288 except IndexError: 289 return None 290 return new_node 291 292 def _get_model_name_and_components(self, field): 293 294 """ 295 From 'field', return the model name and components which 296 describe the path within the instance document associated 297 with that model. 298 """ 299 300 # Get the components of the field name. 301 # Example: /name1#n1/name2#n2/name3 302 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 303 304 components = field.split(Constants.path_separator) 305 if len(components) < 2: 306 return None, None 307 308 # Extract the model name from the top-level element 309 # specification. 310 # Expected: ['name1', 'n1'] 311 312 model_name_and_index = components[1].split(Constants.pair_separator) 313 if len(model_name_and_index) != 2: 314 return None, None 315 316 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 317 318 return model_name_and_index[0], components[1:] 319 320 def _ensure_elements(self, document, index): 321 322 """ 323 In the given 'document', extend the child elements list 324 so that a node can be stored at the given 'index'. 325 """ 326 327 elements = document.xpath("*") 328 i = len(elements) 329 while i <= index: 330 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 331 document.appendChild(new_node) 332 i += 1 333 334 def make_documents(self, fields): 335 336 """ 337 Make a dictionary mapping model names to new documents prepared 338 from the given 'fields' dictionary. 339 """ 340 341 documents = {} 342 self.complete_documents(documents, fields) 343 344 # Fix the dictionary to return the actual document root. 345 346 for model_name, instance_root in documents.items(): 347 documents[model_name] = instance_root 348 return documents 349 350 def get_selectors(self, fields, documents, create=0): 351 352 """ 353 Get a dictionary containing a mapping of selector names to 354 selected parts of the given 'documents'. If 'create' is set 355 to a true value, selected elements will be created if not 356 already present. 357 """ 358 359 selectors = {} 360 self.complete_selectors(selectors, fields, documents, create) 361 return selectors 362 363 def new_instance(self, name): 364 365 "Return an instance root of the given 'name' in a new document." 366 367 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 368 369 # An alias for the older method name. 370 371 new_document = new_instance 372 373 # NOTE: Legacy name exposure. 374 375 Fields = FieldProcessor 376 377 class Form(FieldProcessor): 378 379 "A collection of documents processed from form fields." 380 381 def __init__(self, *args, **kw): 382 383 """ 384 Initialise the form data container with the general 'args' and 'kw' 385 parameters. 386 """ 387 388 FieldProcessor.__init__(self, *args, **kw) 389 self.parameters = {} 390 self.documents = {} 391 392 def set_parameters(self, parameters): 393 394 "Set the request 'parameters' (or fields) in the container." 395 396 self.parameters = parameters 397 self.documents = self.make_documents(self.parameters.items()) 398 399 def get_parameters(self): 400 401 """ 402 Get the request parameters (or fields) from the container. Note that 403 these parameters comprise the raw form field values submitted in a 404 request rather than the structured form data. 405 406 Return a dictionary mapping parameter names to values. 407 """ 408 409 return self.parameters 410 411 def get_documents(self): 412 413 """ 414 Get the form data documents from the container, returning a dictionary 415 mapping document names to DOM-style document objects. 416 """ 417 418 return self.documents 419 420 def get_document(self, name): 421 422 """ 423 Get the form data document with the given 'name' from the container, 424 returning a DOM-style document object if such a document exists, or None 425 if no such document can be found. 426 """ 427 428 return self.documents.get(name) 429 430 def get_selectors(self, create=0): 431 432 """ 433 Get the form data selectors from the container, returning a dictionary 434 mapping selector names to collections of selected elements. If 'create' 435 is set to a true value (unlike the default), the selected elements will 436 be created in the form data document if not already present. 437 """ 438 439 return FieldProcessor.get_selectors(self, self.parameters.items(), self.documents, create) 440 441 def get_selector(self, name, create=0): 442 443 """ 444 Get the form data selectors for the given 'name', returning a collection 445 of selected elements. If 'create' is set to a true value (unlike the 446 default), the selected elements will be created in the form data 447 document if not already present. 448 """ 449 450 parameters = [] 451 for parameter_name, value in parameters.items(): 452 if parameter_name.startswith(name + Constants.selector_indicator): 453 parameters.append((parameter_name, value)) 454 return FieldProcessor.get_selectors(self, parameters, self.documents, create) 455 456 def new_instance(self, name): 457 458 """ 459 Make a new document with the given 'name', storing it in the container 460 and returning the document. 461 """ 462 463 doc = FieldProcessor.new_instance(self, name) 464 self.documents[name] = doc 465 return doc 466 467 # An alias for the older method name. 468 469 new_document = new_instance 470 471 def set_document(self, name, doc): 472 473 """ 474 Store in the container under the given 'name' the supplied document 475 'doc'. 476 """ 477 478 self.documents[name] = doc 479 480 if __name__ == "__main__": 481 482 items = [ 483 ("_action_update", "Some value"), 484 ("_action_delete=/zoo$1/cage$2", "Some value"), 485 ("_action_nasty=/zoo$1/cage$3", "Some value"), 486 ("/actions$1/update$1/selected", "Some value"), # Not actually used in output documents or input. 487 ("/zoo$1/name", "The Zoo ???"), 488 ("/zoo$1/cage$1/name", "reptiles"), 489 ("/zoo$1/cage$1/capacity", "5"), 490 ("/zoo$1/cage$1/animal$1/name", "Monty"), 491 ("/zoo$1/cage$1/animal$1/species$1/name", "Python"), 492 ("/zoo$1/cage$1/animal$1/property$2/name", "texture"), 493 ("/zoo$1/cage$1/animal$1/property$2/value", "scaled"), 494 ("/zoo$1/cage$1/animal$1/property$3/name", "length"), 495 ("/zoo$1/cage$1/animal$1/property$3/value", "5m"), 496 ("/zoo$1/cage$1/animal$2/name", "Vincent"), 497 ("/zoo$1/cage$1/animal$2/species$1/name", "Lizard"), 498 ("/zoo$1/cage$1/animal$2/property$2/name", "colour"), 499 ("/zoo$1/cage$1/animal$2/property$2/value", "variable"), 500 ("/zoo$1/cage$1/animal$2/property$3/name", "length"), 501 ("/zoo$1/cage$1/animal$2/property$3/value", "1m"), 502 ("/zoo$1/cage$2/name", "mammals"), 503 ("/zoo$1/cage$2/capacity", "25"), 504 ("/zoo$1/cage$2/animal$1/name", "Simon"), 505 ("/zoo$1/cage$2/animal$1/species$1/name", "Giraffe"), 506 ("/zoo$1/cage$2/animal$2/name", "Leonard"), 507 ("/zoo$1/cage$2/animal$2/species$1/name", "Lion"), 508 ("/zoo$1/cage$2/animal$2/property$2/name", "danger"), 509 ("/zoo$1/cage$2/animal$2/property$2/value", "high"), 510 ("/zoo$1/funding$3/type", "private"), 511 ("/zoo$1/funding$3/contributor$1/name", "Animal Corporation"), 512 ("/zoo$1/funding$3/contributor$1/amount", "543210.987"), 513 ("/zoo$1/funding$3/contributor$1/industry$$type", "animals") 514 ] 515 516 import time 517 import sys, cmdsyntax 518 519 # Find the documents. 520 521 syntax = cmdsyntax.Syntax(""" 522 --plain-output=OUTPUT_FILE 523 --instance-name=NAME 524 """) 525 526 syntax_matches = syntax.get_args(sys.argv[1:]) 527 528 try: 529 args = syntax_matches[0] 530 except IndexError: 531 print syntax.syntax 532 sys.exit(1) 533 534 # Create an object to interpret the test data. 535 536 fields = FieldProcessor("iso-8859-1") 537 538 t = time.time() 539 documents = fields.make_documents(items) 540 print "Building time", time.time() - t 541 542 t = time.time() 543 documents[args["instance-name"]].toStream(stream=open(args["plain-output"], "wb"), encoding="utf-8") 544 print "Prettyprinting time", time.time() - t 545 546 print "Selectors", repr(fields.get_selectors(items, documents)) 547 548 # vim: tabstop=4 expandtab shiftwidth=4