1 #!/usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 4 """ 5 Classes which process field collections, producing instance 6 documents. Each field entry consists of a field name mapped 7 to a string value, where the field name may have the following 8 formats: 9 10 /name1#n1/name2 11 /name1#n1/name2#n2/name3 12 /name1#n1/name2#n2/name3#n3/name4 13 ... 14 15 The indexes n1, n2, n3, ... indicate the position of elements 16 (starting from 1) in the entire element list, whose elements 17 may have different names. For example: 18 19 /zoo#1/name 20 /zoo#1/cage#1/name 21 /zoo#1/cage#2/name 22 /zoo#1/funding#3/contributor#1/name 23 24 Where multiple values can be collected for a given field, the 25 following notation is employed: 26 27 /package#1/categories#1/category##value 28 29 Some fields may contain the "=" string. This string is 30 reserved and all text following it is meant to specify a path 31 into a particular document. For example: 32 33 _action_add_animal=/zoo#1/cage#2 34 """ 35 36 import Constants 37 import libxml2dom 38 from xml.dom import EMPTY_NAMESPACE 39 40 class FieldsError(Exception): 41 pass 42 43 class Fields: 44 45 """ 46 A class which converts fields in the documented form to XML 47 instance documents. 48 """ 49 50 def __init__(self, encoding="utf-8", values_are_lists=0): 51 52 """ 53 Initialise the fields processor with the given 'encoding', 54 which is optional and which only applies to field data in 55 Python string form (and not Unicode objects). 56 57 If the optional 'values_are_lists' parameter is set to true 58 then each actual field value will be obtained by taking the 59 first element from each supplied field value. 60 """ 61 62 self.encoding = encoding 63 self.values_are_lists = values_are_lists 64 65 def complete_documents(self, documents, fields): 66 67 """ 68 Complete the given 'documents' using the 'fields' items list. 69 """ 70 71 for field, value in fields: 72 73 # Ignore selectors. 74 75 if field.find(Constants.selector_indicator) != -1: 76 continue 77 78 model_name, components = self._get_model_name_and_components(field) 79 if model_name is None: 80 continue 81 82 # Get a new instance document if none has been made for the 83 # model. 84 85 if not documents.has_key(model_name): 86 documents[model_name] = self.new_instance(model_name) 87 node = documents[model_name] 88 89 # Traverse the components within the instance. 90 91 for component in components: 92 t = component.split(Constants.pair_separator) 93 if len(t) == 1: 94 95 # Convert from lists if necessary. 96 97 if self.values_are_lists: 98 value = value[0] 99 100 # Convert the value to Unicode if necessary. 101 102 if type(value) == type(""): 103 value = unicode(value, encoding=self.encoding) 104 105 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value) 106 break 107 108 elif len(t) == 2: 109 110 # Convert from one-based indexing (the position() 111 # function) to zero-based indexing. 112 113 name, index = t[0], int(t[1]) - 1 114 if index < 0: 115 break 116 node = self._enter_element(node, name, index) 117 118 elif len(t) == 3 and t[1] == "": 119 120 # Multivalued fields. 121 122 if not self.values_are_lists: 123 values = [value] 124 else: 125 values = value 126 127 name = t[0] 128 for subvalue in values: 129 subnode = self._append_element(node, name) 130 131 # Convert the value to Unicode if necessary. 132 133 if type(subvalue) == type(""): 134 subvalue = unicode(subvalue, encoding=self.encoding) 135 136 subnode.setAttributeNS(EMPTY_NAMESPACE, t[2], subvalue) 137 138 def complete_selectors(self, selectors, fields, documents): 139 140 """ 141 Fill in the given 'selectors' dictionary using the given 142 'fields' so that it contains mappings from selector names to 143 parts of the specified 'documents'. 144 """ 145 146 for field, value in fields: 147 148 # Process selectors only. 149 150 selector_components = field.split(Constants.selector_indicator) 151 if len(selector_components) < 2: 152 continue 153 154 # Get the selector name and path. 155 # Note that the joining of the components uses the separator, 156 # but the separator really should not exist in the path. 157 158 selector_name = selector_components[0] 159 path = Constants.selector_indicator.join(selector_components[1:]) 160 161 model_name, components = self._get_model_name_and_components(path) 162 if model_name is None: 163 continue 164 165 # Go to the instance element. 166 167 if not documents.has_key(model_name) or documents[model_name] is None: 168 continue 169 170 node = documents[model_name] 171 172 # Traverse the path to find the part of the document to be 173 # selected. 174 175 for component in components: 176 t = component.split(Constants.pair_separator) 177 if len(t) == 1: 178 179 # Select attribute. 180 181 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 182 break 183 184 elif len(t) == 2: 185 186 # Convert from one-based indexing (the position() function) 187 # to zero-based indexing. 188 189 name, index = t[0], int(t[1]) - 1 190 if index < 0: 191 break 192 193 # NOTE: Controversial creation of potentially non-existent 194 # NOTE: nodes. 195 196 node = self._enter_element(node, name, index) 197 198 if not selectors.has_key(selector_name): 199 selectors[selector_name] = [] 200 selectors[selector_name].append(node) 201 202 def _append_element(self, node, name): 203 204 """ 205 Within 'node' append an element with the given 'name'. 206 """ 207 208 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 209 node.appendChild(new_node) 210 return new_node 211 212 def _enter_element(self, node, name, index): 213 214 """ 215 From 'node' enter the element with the given 'name' at the 216 given 'index' position amongst the child elements. Create 217 missing child elements if necessary. 218 """ 219 220 self._ensure_elements(node, index) 221 222 elements = node.xpath("*") 223 if elements[index].localName == "placeholder": 224 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 225 node.replaceChild(new_node, elements[index]) 226 else: 227 new_node = elements[index] 228 if new_node.localName != name: 229 raise FieldsError, (new_node.localName, name, elements, index) 230 231 # Enter the newly-created element. 232 233 return new_node 234 235 def _get_model_name_and_components(self, field): 236 237 """ 238 From 'field', return the model name and components which 239 describe the path within the instance document associated 240 with that model. 241 """ 242 243 # Get the components of the field name. 244 # Example: /name1#n1/name2#n2/name3 245 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 246 247 components = field.split(Constants.path_separator) 248 if len(components) < 2: 249 return None, None 250 251 # Extract the model name from the top-level element 252 # specification. 253 # Expected: ['name1', 'n1'] 254 255 model_name_and_index = components[1].split(Constants.pair_separator) 256 if len(model_name_and_index) != 2: 257 return None, None 258 259 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 260 261 return model_name_and_index[0], components[1:] 262 263 def _ensure_elements(self, document, index): 264 265 """ 266 In the given 'document', extend the child elements list 267 so that a node can be stored at the given 'index'. 268 """ 269 270 elements = document.xpath("*") 271 i = len(elements) 272 while i <= index: 273 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 274 document.appendChild(new_node) 275 i += 1 276 277 def make_documents(self, fields): 278 279 """ 280 Make a dictionary mapping model names to new documents prepared 281 from the given 'fields' dictionary. 282 """ 283 284 documents = {} 285 self.complete_documents(documents, fields) 286 287 # Fix the dictionary to return the actual document root. 288 289 for model_name, instance_root in documents.items(): 290 documents[model_name] = instance_root 291 return documents 292 293 def get_selectors(self, fields, documents): 294 295 """ 296 Get a dictionary containing a mapping of selector names to 297 selected parts of the given 'documents'. 298 """ 299 300 selectors = {} 301 self.complete_selectors(selectors, fields, documents) 302 return selectors 303 304 def new_instance(self, name): 305 306 "Return an instance root of the given 'name' in a new document." 307 308 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 309 310 if __name__ == "__main__": 311 312 items = [ 313 ("_action_update", "Some value"), 314 ("_action_delete=/zoo#1/cage#2", "Some value"), 315 ("/actions#1/update#1/selected", "Some value"), # Not actually used in output documents or input. 316 ("/zoo#1/name", "The Zoo ???"), 317 ("/zoo#1/cage#1/name", "reptiles"), 318 ("/zoo#1/cage#1/capacity", "5"), 319 ("/zoo#1/cage#1/animal#1/name", "Monty"), 320 ("/zoo#1/cage#1/animal#1/species#1/name", "Python"), 321 ("/zoo#1/cage#1/animal#1/property#2/name", "texture"), 322 ("/zoo#1/cage#1/animal#1/property#2/value", "scaled"), 323 ("/zoo#1/cage#1/animal#1/property#3/name", "length"), 324 ("/zoo#1/cage#1/animal#1/property#3/value", "5m"), 325 ("/zoo#1/cage#1/animal#2/name", "Vincent"), 326 ("/zoo#1/cage#1/animal#2/species#1/name", "Lizard"), 327 ("/zoo#1/cage#1/animal#2/property#2/name", "colour"), 328 ("/zoo#1/cage#1/animal#2/property#2/value", "variable"), 329 ("/zoo#1/cage#1/animal#2/property#3/name", "length"), 330 ("/zoo#1/cage#1/animal#2/property#3/value", "1m"), 331 ("/zoo#1/cage#2/name", "mammals"), 332 ("/zoo#1/cage#2/capacity", "25"), 333 ("/zoo#1/cage#2/animal#1/name", "Simon"), 334 ("/zoo#1/cage#2/animal#1/species#1/name", "Giraffe"), 335 ("/zoo#1/cage#2/animal#2/name", "Leonard"), 336 ("/zoo#1/cage#2/animal#2/species#1/name", "Lion"), 337 ("/zoo#1/cage#2/animal#2/property#2/name", "danger"), 338 ("/zoo#1/cage#2/animal#2/property#2/value", "high"), 339 ("/zoo#1/funding#3/type", "private"), 340 ("/zoo#1/funding#3/contributor#1/name", "Animal Corporation"), 341 ("/zoo#1/funding#3/contributor#1/amount", "543210.987"), 342 ("/zoo#1/funding#3/contributor#1/industry##type", "animals") 343 ] 344 345 import time 346 import sys, cmdsyntax 347 348 # Find the documents. 349 350 syntax = cmdsyntax.Syntax(""" 351 --plain-output=OUTPUT_FILE 352 --instance-name=NAME 353 """) 354 355 syntax_matches = syntax.get_args(sys.argv[1:]) 356 357 try: 358 args = syntax_matches[0] 359 except IndexError: 360 print syntax.syntax 361 sys.exit(1) 362 363 # Create an object to interpret the test data. 364 365 fields = Fields("iso-8859-1") 366 367 t = time.time() 368 documents = fields.make_documents(items) 369 print "Building time", time.time() - t 370 371 t = time.time() 372 libxml2dom.toStream(documents[args["instance-name"]], stream=open(args["plain-output"], "wb"), encoding="utf-8") 373 print "Prettyprinting time", time.time() - t 374 375 print "Selectors", repr(fields.get_selectors(items, documents)) 376 377 # vim: tabstop=4 expandtab shiftwidth=4