1 #!/usr/bin/env python 2 3 """ 4 Classes which process field collections, producing instance 5 documents. Each field entry consists of a field name mapped 6 to a string value, where the field name may have the following 7 formats: 8 9 /name1#n1/name2 10 /name1#n1/name2#n2/name3 11 /name1#n1/name2#n2/name3#n3/name4 12 ... 13 14 The indexes n1, n2, n3, ... indicate the position of elements 15 (starting from 1) in the entire element list, whose elements 16 may have different names. For example: 17 18 /zoo#1/name 19 /zoo#1/cage#1/name 20 /zoo#1/cage#2/name 21 /zoo#1/funding#3/contributor#1/name 22 23 Where multiple values can be collected for a given field, the 24 following notation is employed: 25 26 /package#1/categories#1/category##value 27 28 Some fields may contain the "=" string. This string is 29 reserved and all text following it is meant to specify a path 30 into a particular document. For example: 31 32 _action_add_animal=/zoo#1/cage#2 33 """ 34 35 import Constants 36 import libxml2dom 37 from xml.dom import EMPTY_NAMESPACE 38 39 class FieldsError(Exception): 40 pass 41 42 class Fields: 43 44 """ 45 A class which converts fields in the documented form to XML 46 instance documents. 47 """ 48 49 def __init__(self, encoding="utf-8", values_are_lists=0): 50 51 """ 52 Initialise the fields processor with the given 'encoding', 53 which is optional and which only applies to field data in 54 Python string form (and not Unicode objects). 55 56 If the optional 'values_are_lists' parameter is set to true 57 then each actual field value will be obtained by taking the 58 first element from each supplied field value. 59 """ 60 61 self.encoding = encoding 62 self.values_are_lists = values_are_lists 63 64 def complete_documents(self, documents, fields): 65 66 """ 67 Complete the given 'documents' using the 'fields' items list. 68 """ 69 70 for field, value in fields: 71 72 # Ignore selectors. 73 74 if field.find(Constants.selector_indicator) != -1: 75 continue 76 77 model_name, components = self._get_model_name_and_components(field) 78 if model_name is None: 79 continue 80 81 # Get a new instance document if none has been made for the 82 # model. 83 84 if not documents.has_key(model_name): 85 documents[model_name] = self.new_instance(model_name) 86 node = documents[model_name] 87 88 # Traverse the components within the instance. 89 90 for component in components: 91 t = component.split(Constants.pair_separator) 92 if len(t) == 1: 93 94 # Convert from lists if necessary. 95 96 if self.values_are_lists: 97 value = value[0] 98 99 # Convert the value to Unicode if necessary. 100 101 if type(value) == type(""): 102 value = unicode(value, encoding=self.encoding) 103 104 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value) 105 break 106 107 elif len(t) == 2: 108 109 # Convert from one-based indexing (the position() 110 # function) to zero-based indexing. 111 112 name, index = t[0], int(t[1]) - 1 113 if index < 0: 114 break 115 node = self._enter_element(node, name, index) 116 117 elif len(t) == 3 and t[1] == "": 118 119 # Multivalued fields. 120 121 if not self.values_are_lists: 122 values = [value] 123 else: 124 values = value 125 126 name = t[0] 127 for index in range(0, len(values)): 128 subnode = self._enter_element(node, name, index) 129 subvalue = values[index] 130 131 # Convert the value to Unicode if necessary. 132 133 if type(subvalue) == type(""): 134 subvalue = unicode(subvalue, encoding=self.encoding) 135 136 subnode.setAttributeNS(EMPTY_NAMESPACE, t[2], subvalue) 137 138 def complete_selectors(self, selectors, fields, documents): 139 140 """ 141 Fill in the given 'selectors' dictionary using the given 142 'fields' so that it contains mappings from selector names to 143 parts of the specified 'documents'. 144 """ 145 146 for field, value in fields: 147 148 # Process selectors only. 149 150 selector_components = field.split(Constants.selector_indicator) 151 if len(selector_components) < 2: 152 continue 153 154 # Get the selector name and path. 155 # Note that the joining of the components uses the separator, 156 # but the separator really should not exist in the path. 157 158 selector_name = selector_components[0] 159 path = Constants.selector_indicator.join(selector_components[1:]) 160 161 model_name, components = self._get_model_name_and_components(path) 162 if model_name is None: 163 continue 164 165 # Go to the instance element. 166 167 if not documents.has_key(model_name) or documents[model_name] is None: 168 continue 169 170 node = documents[model_name] 171 172 # Traverse the path to find the part of the document to be 173 # selected. 174 175 for component in components: 176 t = component.split(Constants.pair_separator) 177 if len(t) == 1: 178 179 # Select attribute. 180 181 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 182 break 183 184 elif len(t) == 2: 185 186 # Convert from one-based indexing (the position() function) 187 # to zero-based indexing. 188 189 name, index = t[0], int(t[1]) - 1 190 if index < 0: 191 break 192 193 # NOTE: Controversial creation of potentially non-existent 194 # NOTE: nodes. 195 196 node = self._enter_element(node, name, index) 197 198 if not selectors.has_key(selector_name): 199 selectors[selector_name] = [] 200 selectors[selector_name].append(node) 201 202 def _enter_element(self, node, name, index): 203 204 """ 205 From 'node' enter the element with the given 'name' at the 206 given 'index' position amongst the child elements. Create 207 missing child elements if necessary. 208 """ 209 210 self._ensure_elements(node, index) 211 212 elements = node.xpath("*") 213 if elements[index].localName == "placeholder": 214 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 215 node.replaceChild(new_node, elements[index]) 216 else: 217 new_node = elements[index] 218 if new_node.localName != name: 219 raise FieldsError, (new_node.localName, name, elements, index) 220 221 # Enter the newly-created element. 222 223 return new_node 224 225 def _get_model_name_and_components(self, field): 226 227 """ 228 From 'field', return the model name and components which 229 describe the path within the instance document associated 230 with that model. 231 """ 232 233 # Get the components of the field name. 234 # Example: /name1#n1/name2#n2/name3 235 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 236 237 components = field.split(Constants.path_separator) 238 if len(components) < 2: 239 return None, None 240 241 # Extract the model name from the top-level element 242 # specification. 243 # Expected: ['name1', 'n1'] 244 245 model_name_and_index = components[1].split(Constants.pair_separator) 246 if len(model_name_and_index) != 2: 247 return None, None 248 249 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 250 251 return model_name_and_index[0], components[1:] 252 253 def _ensure_elements(self, document, index): 254 255 """ 256 In the given 'document', extend the child elements list 257 so that a node can be stored at the given 'index'. 258 """ 259 260 elements = document.xpath("*") 261 i = len(elements) 262 while i <= index: 263 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 264 document.appendChild(new_node) 265 i += 1 266 267 def make_documents(self, fields): 268 269 """ 270 Make a dictionary mapping model names to new documents prepared 271 from the given 'fields' dictionary. 272 """ 273 274 documents = {} 275 self.complete_documents(documents, fields) 276 277 # Fix the dictionary to return the actual document root. 278 279 for model_name, instance_root in documents.items(): 280 documents[model_name] = instance_root 281 return documents 282 283 def get_selectors(self, fields, documents): 284 285 """ 286 Get a dictionary containing a mapping of selector names to 287 selected parts of the given 'documents'. 288 """ 289 290 selectors = {} 291 self.complete_selectors(selectors, fields, documents) 292 return selectors 293 294 def new_instance(self, name): 295 296 "Return an instance root of the given 'name' in a new document." 297 298 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 299 300 if __name__ == "__main__": 301 302 items = [ 303 ("_action_update", "Some value"), 304 ("_action_delete=/zoo#1/cage#2", "Some value"), 305 ("/actions#1/update#1/selected", "Some value"), # Not actually used in output documents or input. 306 ("/zoo#1/name", "The Zoo ???"), 307 ("/zoo#1/cage#1/name", "reptiles"), 308 ("/zoo#1/cage#1/capacity", "5"), 309 ("/zoo#1/cage#1/animal#1/name", "Monty"), 310 ("/zoo#1/cage#1/animal#1/species#1/name", "Python"), 311 ("/zoo#1/cage#1/animal#1/property#2/name", "texture"), 312 ("/zoo#1/cage#1/animal#1/property#2/value", "scaled"), 313 ("/zoo#1/cage#1/animal#1/property#3/name", "length"), 314 ("/zoo#1/cage#1/animal#1/property#3/value", "5m"), 315 ("/zoo#1/cage#1/animal#2/name", "Vincent"), 316 ("/zoo#1/cage#1/animal#2/species#1/name", "Lizard"), 317 ("/zoo#1/cage#1/animal#2/property#2/name", "colour"), 318 ("/zoo#1/cage#1/animal#2/property#2/value", "variable"), 319 ("/zoo#1/cage#1/animal#2/property#3/name", "length"), 320 ("/zoo#1/cage#1/animal#2/property#3/value", "1m"), 321 ("/zoo#1/cage#2/name", "mammals"), 322 ("/zoo#1/cage#2/capacity", "25"), 323 ("/zoo#1/cage#2/animal#1/name", "Simon"), 324 ("/zoo#1/cage#2/animal#1/species#1/name", "Giraffe"), 325 ("/zoo#1/cage#2/animal#2/name", "Leonard"), 326 ("/zoo#1/cage#2/animal#2/species#1/name", "Lion"), 327 ("/zoo#1/cage#2/animal#2/property#2/name", "danger"), 328 ("/zoo#1/cage#2/animal#2/property#2/value", "high"), 329 ("/zoo#1/funding#3/type", "private"), 330 ("/zoo#1/funding#3/contributor#1/name", "Animal Corporation"), 331 ("/zoo#1/funding#3/contributor#1/amount", "543210.987"), 332 ("/zoo#1/funding#3/contributor#1/industry##type", "animals") 333 ] 334 335 import time 336 import sys, cmdsyntax 337 338 # Find the documents. 339 340 syntax = cmdsyntax.Syntax(""" 341 --plain-output=OUTPUT_FILE 342 --instance-name=NAME 343 """) 344 345 syntax_matches = syntax.get_args(sys.argv[1:]) 346 347 try: 348 args = syntax_matches[0] 349 except IndexError: 350 print syntax.syntax 351 sys.exit(1) 352 353 # Create an object to interpret the test data. 354 355 fields = Fields("iso-8859-1") 356 357 t = time.time() 358 documents = fields.make_documents(items) 359 print "Building time", time.time() - t 360 361 t = time.time() 362 libxml2dom.toStream(documents[args["instance-name"]], stream=open(args["plain-output"], "wb"), encoding="utf-8") 363 print "Prettyprinting time", time.time() - t 364 365 print "Selectors", repr(fields.get_selectors(items, documents)) 366 367 # vim: tabstop=4 expandtab shiftwidth=4