1 #!/usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 4 """ 5 Interpretation of field collections from sources such as HTTP request parameter 6 dictionaries. 7 8 Copyright (C) 2005, 2006, 2007 Paul Boddie <paul@boddie.org.uk> 9 10 This program is free software; you can redistribute it and/or modify it under 11 the terms of the GNU Lesser General Public License as published by the Free 12 Software Foundation; either version 3 of the License, or (at your option) any 13 later version. 14 15 This program is distributed in the hope that it will be useful, but WITHOUT 16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 18 details. 19 20 You should have received a copy of the GNU Lesser General Public License along 21 with this program. If not, see <http://www.gnu.org/licenses/>. 22 23 -------- 24 25 Classes which process field collections, producing instance documents. Each 26 field entry consists of a field name mapped to a string value, where the field 27 name may have the following formats: 28 29 /name1$n1/name2 30 /name1$n1/name2$n2/name3 31 /name1$n1/name2$n2/name3$n3/name4 32 ... 33 34 The indexes n1, n2, n3, ... indicate the position of elements (starting from 1) 35 in the entire element list, whose elements may have different names. For 36 example: 37 38 /zoo$1/name 39 /zoo$1/cage$1/name 40 /zoo$1/cage$2/name 41 /zoo$1/funding$3/contributor$1/name 42 43 Where multiple values can be collected for a given field, the following notation 44 is employed: 45 46 /package$1/categories$1/category$$value 47 48 Some fields may contain the "=" string. This string is reserved and all text 49 following it is meant to specify a path into a particular document. For example: 50 51 _action_add_animal=/zoo$1/cage$2 52 """ 53 54 import Constants 55 import libxml2dom 56 from xml.dom import EMPTY_NAMESPACE 57 58 class FieldsError(Exception): 59 pass 60 61 class FieldProcessor: 62 63 """ 64 A class which converts fields in the documented form to XML 65 instance documents. 66 """ 67 68 def __init__(self, encoding="utf-8", values_are_lists=0): 69 70 """ 71 Initialise the fields processor with the given 'encoding', 72 which is optional and which only applies to field data in 73 Python string form (and not Unicode objects). 74 75 If the optional 'values_are_lists' parameter is set to true 76 then each actual field value will be obtained by taking the 77 first element from each supplied field value. 78 """ 79 80 self.encoding = encoding 81 self.values_are_lists = values_are_lists 82 83 def complete_documents(self, documents, fields): 84 85 """ 86 Complete the given 'documents' using the 'fields' items list. 87 """ 88 89 for field, value in fields: 90 91 # Ignore selectors. 92 93 if field.find(Constants.selector_indicator) != -1: 94 continue 95 96 model_name, components = self._get_model_name_and_components(field) 97 if model_name is None: 98 continue 99 100 # Get a new instance document if none has been made for the 101 # model. 102 103 if not documents.has_key(model_name): 104 documents[model_name] = self.new_instance(model_name) 105 node = documents[model_name] 106 107 # Traverse the components within the instance. 108 109 for component in components: 110 t = component.split(Constants.pair_separator) 111 if len(t) == 1: 112 113 # Convert from lists if necessary. 114 115 if self.values_are_lists: 116 value = value[0] 117 118 # Convert the value to Unicode if necessary. 119 120 if type(value) == type(""): 121 value = unicode(value, encoding=self.encoding) 122 123 # Remove CR characters. 124 125 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value.replace("\r", "")) 126 break 127 128 elif len(t) == 2: 129 130 # Convert from one-based indexing (the position() 131 # function) to zero-based indexing. 132 133 name, index = t[0], int(t[1]) - 1 134 if index < 0: 135 break 136 try: 137 node = self._enter_element(node, name, index) 138 except FieldsError, exc: 139 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 140 field, name, index, exc.args[0]) 141 142 elif len(t) == 3 and t[1] == "": 143 144 # Multivalued fields. 145 146 if not self.values_are_lists: 147 values = [value] 148 else: 149 values = value 150 151 name = t[0] 152 for subvalue in values: 153 subnode = self._append_element(node, name) 154 155 # Convert the value to Unicode if necessary. 156 157 if type(subvalue) == type(""): 158 subvalue = unicode(subvalue, encoding=self.encoding) 159 160 # Remove CR characters. 161 162 subnode.setAttributeNS(EMPTY_NAMESPACE, t[2], subvalue.replace("\r", "")) 163 164 def complete_selectors(self, selectors, fields, documents, create): 165 166 """ 167 Fill in the given 'selectors' dictionary using the given 168 'fields' so that it contains mappings from selector names to 169 parts of the specified 'documents'. If 'create' is set to a 170 true value, selected elements will be created if not already 171 present; otherwise, ignore such selectors. 172 """ 173 174 for field, value in fields: 175 176 # Process selectors only. 177 178 selector_components = field.split(Constants.selector_indicator) 179 if len(selector_components) < 2: 180 continue 181 182 # Get the selector name and path. 183 # Note that the joining of the components uses the separator, 184 # but the separator really should not exist in the path. 185 186 selector_name = selector_components[0] 187 path = Constants.selector_indicator.join(selector_components[1:]) 188 189 model_name, components = self._get_model_name_and_components(path) 190 if model_name is None: 191 continue 192 193 # Go to the instance element. 194 195 if not documents.has_key(model_name) or documents[model_name] is None: 196 continue 197 198 node = documents[model_name] 199 200 # Traverse the path to find the part of the document to be 201 # selected. 202 203 for component in components: 204 t = component.split(Constants.pair_separator) 205 if len(t) == 1: 206 207 # Select attribute. 208 209 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 210 break 211 212 elif len(t) == 2: 213 214 # Convert from one-based indexing (the position() function) 215 # to zero-based indexing. 216 217 name, index = t[0], int(t[1]) - 1 218 if index < 0: 219 break 220 221 # If create is set, create selected elements. 222 223 if create: 224 try: 225 node = self._enter_element(node, name, index) 226 except FieldsError, exc: 227 raise FieldsError, "In field '%s', name '%s' and index '%s' could not be added, since '%s' was found." % ( 228 field, name, index, exc.args[0]) 229 230 # Where a node cannot be found, do not create a selector. 231 232 else: 233 node = self._find_element(node, name, index) 234 if node is None: 235 break 236 237 if not selectors.has_key(selector_name): 238 selectors[selector_name] = [] 239 if node is not None: 240 selectors[selector_name].append(node) 241 242 def _append_element(self, node, name): 243 244 """ 245 Within 'node' append an element with the given 'name'. 246 """ 247 248 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 249 node.appendChild(new_node) 250 return new_node 251 252 def _enter_element(self, node, name, index): 253 254 """ 255 From 'node' enter the element with the given 'name' at the 256 given 'index' position amongst the child elements. Create 257 missing child elements if necessary. 258 """ 259 260 self._ensure_elements(node, index) 261 262 elements = node.xpath("*") 263 if elements[index].localName == "placeholder": 264 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 265 node.replaceChild(new_node, elements[index]) 266 else: 267 new_node = elements[index] 268 if new_node.localName != name: 269 raise FieldsError, (new_node.localName, name, elements, index) 270 271 # Enter the newly-created element. 272 273 return new_node 274 275 def _find_element(self, node, name, index): 276 277 """ 278 From 'node' find the element with the given 'name' at the 279 given 'index' position amongst the child elements. Return 280 None if no such element exists. 281 """ 282 283 elements = node.xpath("*") 284 try: 285 new_node = elements[index] 286 if new_node.localName != name: 287 return None 288 except IndexError: 289 return None 290 return new_node 291 292 def _get_model_name_and_components(self, field): 293 294 """ 295 From 'field', return the model name and components which 296 describe the path within the instance document associated 297 with that model. 298 """ 299 300 # Get the components of the field name. 301 # Example: /name1#n1/name2#n2/name3 302 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 303 304 components = field.split(Constants.path_separator) 305 if len(components) < 2: 306 return None, None 307 308 # Extract the model name from the top-level element 309 # specification. 310 # Expected: ['name1', 'n1'] 311 312 model_name_and_index = components[1].split(Constants.pair_separator) 313 if len(model_name_and_index) != 2: 314 return None, None 315 316 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 317 318 return model_name_and_index[0], components[1:] 319 320 def _ensure_elements(self, document, index): 321 322 """ 323 In the given 'document', extend the child elements list 324 so that a node can be stored at the given 'index'. 325 """ 326 327 elements = document.xpath("*") 328 i = len(elements) 329 while i <= index: 330 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 331 document.appendChild(new_node) 332 i += 1 333 334 def make_documents(self, fields): 335 336 """ 337 Make a dictionary mapping model names to new documents prepared 338 from the given 'fields' dictionary. 339 """ 340 341 documents = {} 342 self.complete_documents(documents, fields) 343 344 # Fix the dictionary to return the actual document root. 345 346 for model_name, instance_root in documents.items(): 347 documents[model_name] = instance_root 348 return documents 349 350 def get_selectors(self, fields, documents, create=0): 351 352 """ 353 Get a dictionary containing a mapping of selector names to 354 selected parts of the given 'documents'. If 'create' is set 355 to a true value, selected elements will be created if not 356 already present. 357 """ 358 359 selectors = {} 360 self.complete_selectors(selectors, fields, documents, create) 361 return selectors 362 363 def new_instance(self, name): 364 365 "Return an instance root of the given 'name' in a new document." 366 367 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 368 369 # An alias for the older method name. 370 371 new_document = new_instance 372 373 # NOTE: Legacy name exposure. 374 375 Fields = FieldProcessor 376 377 class Form(FieldProcessor): 378 379 "A collection of documents processed from form fields." 380 381 def __init__(self, *args, **kw): 382 383 """ 384 Initialise the form data container with the general 'args' and 'kw' 385 parameters. 386 """ 387 388 FieldProcessor.__init__(self, *args, **kw) 389 self.parameters = {} 390 self.documents = {} 391 392 # Activity-related attributes. 393 394 self.current_activity = None 395 self.current_document = None 396 397 def set_parameters(self, parameters): 398 399 "Set the request 'parameters' (or fields) in the container." 400 401 self.parameters = parameters 402 self.documents = self.make_documents(self.parameters.items()) 403 404 def get_parameters(self): 405 406 """ 407 Get the request parameters (or fields) from the container. Note that 408 these parameters comprise the raw form field values submitted in a 409 request rather than the structured form data. 410 411 Return a dictionary mapping parameter names to values. 412 """ 413 414 return self.parameters 415 416 def get_documents(self): 417 418 """ 419 Get the form data documents from the container, returning a dictionary 420 mapping document names to DOM-style document objects. 421 """ 422 423 return self.documents 424 425 def get_document(self, name): 426 427 """ 428 Get the form data document with the given 'name' from the container, 429 returning a DOM-style document object if such a document exists, or None 430 if no such document can be found. 431 """ 432 433 return self.documents.get(name) 434 435 def get_selectors(self, create=0): 436 437 """ 438 Get the form data selectors from the container, returning a dictionary 439 mapping selector names to collections of selected elements. If 'create' 440 is set to a true value (unlike the default), the selected elements will 441 be created in the form data document if not already present. 442 """ 443 444 return FieldProcessor.get_selectors(self, self.parameters.items(), self.documents, create) 445 446 def get_selector(self, name, create=0): 447 448 """ 449 Get the form data selectors for the given 'name', returning a collection 450 of selected elements. If 'create' is set to a true value (unlike the 451 default), the selected elements will be created in the form data 452 document if not already present. 453 """ 454 455 parameters = [] 456 for parameter_name, value in parameters.items(): 457 if parameter_name.startswith(name + Constants.selector_indicator): 458 parameters.append((parameter_name, value)) 459 return FieldProcessor.get_selectors(self, parameters, self.documents, create) 460 461 def new_instance(self, name): 462 463 """ 464 Make a new document with the given 'name', storing it in the container 465 and returning the document. 466 """ 467 468 doc = FieldProcessor.new_instance(self, name) 469 self.documents[name] = doc 470 return doc 471 472 # An alias for the older method name. 473 474 new_document = new_instance 475 476 def set_document(self, name, doc): 477 478 """ 479 Store in the container under the given 'name' the supplied document 480 'doc'. 481 """ 482 483 self.documents[name] = doc 484 485 # Support for activities and the main/default document. 486 487 def set_activity(self, name): 488 self.current_activity = name 489 490 def get_activity(self): 491 return self.current_activity 492 493 def set_document(self, doc): 494 self.current_document = doc 495 496 def get_document(self): 497 return self.current_document 498 499 if __name__ == "__main__": 500 501 items = [ 502 ("_action_update", "Some value"), 503 ("_action_delete=/zoo$1/cage$2", "Some value"), 504 ("_action_nasty=/zoo$1/cage$3", "Some value"), 505 ("/actions$1/update$1/selected", "Some value"), # Not actually used in output documents or input. 506 ("/zoo$1/name", "The Zoo ???"), 507 ("/zoo$1/cage$1/name", "reptiles"), 508 ("/zoo$1/cage$1/capacity", "5"), 509 ("/zoo$1/cage$1/animal$1/name", "Monty"), 510 ("/zoo$1/cage$1/animal$1/species$1/name", "Python"), 511 ("/zoo$1/cage$1/animal$1/property$2/name", "texture"), 512 ("/zoo$1/cage$1/animal$1/property$2/value", "scaled"), 513 ("/zoo$1/cage$1/animal$1/property$3/name", "length"), 514 ("/zoo$1/cage$1/animal$1/property$3/value", "5m"), 515 ("/zoo$1/cage$1/animal$2/name", "Vincent"), 516 ("/zoo$1/cage$1/animal$2/species$1/name", "Lizard"), 517 ("/zoo$1/cage$1/animal$2/property$2/name", "colour"), 518 ("/zoo$1/cage$1/animal$2/property$2/value", "variable"), 519 ("/zoo$1/cage$1/animal$2/property$3/name", "length"), 520 ("/zoo$1/cage$1/animal$2/property$3/value", "1m"), 521 ("/zoo$1/cage$2/name", "mammals"), 522 ("/zoo$1/cage$2/capacity", "25"), 523 ("/zoo$1/cage$2/animal$1/name", "Simon"), 524 ("/zoo$1/cage$2/animal$1/species$1/name", "Giraffe"), 525 ("/zoo$1/cage$2/animal$2/name", "Leonard"), 526 ("/zoo$1/cage$2/animal$2/species$1/name", "Lion"), 527 ("/zoo$1/cage$2/animal$2/property$2/name", "danger"), 528 ("/zoo$1/cage$2/animal$2/property$2/value", "high"), 529 ("/zoo$1/funding$3/type", "private"), 530 ("/zoo$1/funding$3/contributor$1/name", "Animal Corporation"), 531 ("/zoo$1/funding$3/contributor$1/amount", "543210.987"), 532 ("/zoo$1/funding$3/contributor$1/industry$$type", "animals") 533 ] 534 535 import time 536 import sys, cmdsyntax 537 538 # Find the documents. 539 540 syntax = cmdsyntax.Syntax(""" 541 --plain-output=OUTPUT_FILE 542 --instance-name=NAME 543 """) 544 545 syntax_matches = syntax.get_args(sys.argv[1:]) 546 547 try: 548 args = syntax_matches[0] 549 except IndexError: 550 print syntax.syntax 551 sys.exit(1) 552 553 # Create an object to interpret the test data. 554 555 fields = FieldProcessor("iso-8859-1") 556 557 t = time.time() 558 documents = fields.make_documents(items) 559 print "Building time", time.time() - t 560 561 t = time.time() 562 documents[args["instance-name"]].toStream(stream=open(args["plain-output"], "wb"), encoding="utf-8") 563 print "Prettyprinting time", time.time() - t 564 565 print "Selectors", repr(fields.get_selectors(items, documents)) 566 567 # vim: tabstop=4 expandtab shiftwidth=4