1 #!/usr/bin/env python 2 3 """ 4 Classes which process field collections, producing instance 5 documents. Each field entry consists of a field name mapped 6 to a string value, where the field name may have the following 7 formats: 8 9 /name1#n1/name2 10 /name1#n1/name2#n2/name3 11 /name1#n1/name2#n2/name3#n3/name4 12 ... 13 14 The indexes n1, n2, n3, ... indicate the position of elements 15 (starting from 1) in the entire element list, whose elements 16 may have different names. For example: 17 18 /zoo#1/name 19 /zoo#1/cage#1/name 20 /zoo#1/cage#2/name 21 /zoo#1/funding#3/contributor#1/name 22 23 Some fields may contain the "=" string. This string is 24 reserved and all text following it is meant to specify a path 25 into a particular document. For example: 26 27 _action_add_animal=/zoo#1/cage#2 28 """ 29 30 import Constants 31 import libxml2dom 32 from xml.dom import EMPTY_NAMESPACE 33 34 class FieldsError(Exception): 35 pass 36 37 class Fields: 38 39 """ 40 A class which converts fields in the documented form to XML 41 instance documents. 42 """ 43 44 def __init__(self, encoding="utf-8", values_are_lists=0): 45 46 """ 47 Initialise the fields processor with the given 'encoding', 48 which is optional and which only applies to field data in 49 Python string form (and not Unicode objects). 50 51 If the optional 'values_are_lists' parameter is set to true 52 then each actual field value will be obtained by taking the 53 first element from each supplied field value. 54 """ 55 56 self.encoding = encoding 57 self.values_are_lists = values_are_lists 58 59 def complete_documents(self, documents, fields): 60 61 """ 62 Complete the given 'documents' using the 'fields' items list. 63 """ 64 65 for field, value in fields: 66 67 # Ignore selectors. 68 69 if field.find(Constants.selector_indicator) != -1: 70 continue 71 72 model_name, components = self._get_model_name_and_components(field) 73 if model_name is None: 74 continue 75 76 # Convert from lists if necessary. 77 78 if self.values_are_lists: 79 value = value[0] 80 81 # Convert the value to Unicode if necessary. 82 83 if type(value) == type(""): 84 value = unicode(value, encoding=self.encoding) 85 86 # Get a new instance document if none has been made for the 87 # model. 88 89 if not documents.has_key(model_name): 90 documents[model_name] = self.new_instance(model_name) 91 node = documents[model_name] 92 93 # Traverse the components within the instance. 94 95 for component in components: 96 t = component.split(Constants.pair_separator) 97 if len(t) == 1: 98 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value) 99 break 100 101 elif len(t) == 2: 102 103 # Convert from one-based indexing (the position() function) 104 # to zero-based indexing. 105 106 name, index = t[0], int(t[1]) - 1 107 if index < 0: 108 break 109 node = self._enter_element(node, name, index) 110 111 def complete_selectors(self, selectors, fields, documents): 112 113 """ 114 Fill in the given 'selectors' dictionary using the given 115 'fields' so that it contains mappings from selector names to 116 parts of the specified 'documents'. 117 """ 118 119 for field, value in fields: 120 121 # Process selectors only. 122 123 selector_components = field.split(Constants.selector_indicator) 124 if len(selector_components) < 2: 125 continue 126 127 # Get the selector name and path. 128 # Note that the joining of the components uses the separator, 129 # but the separator really should not exist in the path. 130 131 selector_name = selector_components[0] 132 path = Constants.selector_indicator.join(selector_components[1:]) 133 134 model_name, components = self._get_model_name_and_components(path) 135 if model_name is None: 136 continue 137 138 # Go to the instance element. 139 140 if not documents.has_key(model_name) or documents[model_name] is None: 141 continue 142 143 node = documents[model_name] 144 145 # Traverse the path to find the part of the document to be 146 # selected. 147 148 for component in components: 149 t = component.split(Constants.pair_separator) 150 if len(t) == 1: 151 152 # Select attribute. 153 154 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 155 break 156 157 elif len(t) == 2: 158 159 # Convert from one-based indexing (the position() function) 160 # to zero-based indexing. 161 162 name, index = t[0], int(t[1]) - 1 163 if index < 0: 164 break 165 166 # NOTE: Controversial creation of potentially non-existent 167 # NOTE: nodes. 168 169 node = self._enter_element(node, name, index) 170 171 if not selectors.has_key(selector_name): 172 selectors[selector_name] = [] 173 selectors[selector_name].append(node) 174 175 def _enter_element(self, node, name, index): 176 177 """ 178 From 'node' enter the element with the given 'name' at the 179 given 'index' position amongst the child elements. Create 180 missing child elements if necessary. 181 """ 182 183 self._ensure_elements(node, index) 184 185 elements = node.xpath("*") 186 if elements[index].localName == "placeholder": 187 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 188 node.replaceChild(new_node, elements[index]) 189 else: 190 new_node = elements[index] 191 if new_node.localName != name: 192 raise FieldsError, (new_node.localName, name, elements, index) 193 194 # Enter the newly-created element. 195 196 return new_node 197 198 def _get_model_name_and_components(self, field): 199 200 """ 201 From 'field', return the model name and components which 202 describe the path within the instance document associated 203 with that model. 204 """ 205 206 # Get the components of the field name. 207 # Example: /name1#n1/name2#n2/name3 208 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 209 210 components = field.split(Constants.path_separator) 211 if len(components) < 2: 212 return None, None 213 214 # Extract the model name from the top-level element 215 # specification. 216 # Expected: ['name1', 'n1'] 217 218 model_name_and_index = components[1].split(Constants.pair_separator) 219 if len(model_name_and_index) != 2: 220 return None, None 221 222 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 223 224 return model_name_and_index[0], components[1:] 225 226 def _ensure_elements(self, document, index): 227 228 """ 229 In the given 'document', extend the child elements list 230 so that a node can be stored at the given 'index'. 231 """ 232 233 elements = document.xpath("*") 234 i = len(elements) 235 while i <= index: 236 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 237 document.appendChild(new_node) 238 i += 1 239 240 def make_documents(self, fields): 241 242 """ 243 Make a dictionary mapping model names to new documents prepared 244 from the given 'fields' dictionary. 245 """ 246 247 documents = {} 248 self.complete_documents(documents, fields) 249 250 # Fix the dictionary to return the actual document root. 251 252 for model_name, instance_root in documents.items(): 253 documents[model_name] = instance_root 254 return documents 255 256 def get_selectors(self, fields, documents): 257 258 """ 259 Get a dictionary containing a mapping of selector names to 260 selected parts of the given 'documents'. 261 """ 262 263 selectors = {} 264 self.complete_selectors(selectors, fields, documents) 265 return selectors 266 267 def new_instance(self, name): 268 269 "Return an instance root of the given 'name' in a new document." 270 271 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 272 273 if __name__ == "__main__": 274 275 items = [ 276 ("_action_update", "Some value"), 277 ("_action_delete=/zoo#1/cage#2", "Some value"), 278 ("/actions#1/update#1/selected", "Some value"), # Not actually used in output documents or input. 279 ("/zoo#1/name", "The Zoo ???"), 280 ("/zoo#1/cage#1/name", "reptiles"), 281 ("/zoo#1/cage#1/capacity", "5"), 282 ("/zoo#1/cage#1/animal#1/name", "Monty"), 283 ("/zoo#1/cage#1/animal#1/species#1/name", "Python"), 284 ("/zoo#1/cage#1/animal#1/property#2/name", "texture"), 285 ("/zoo#1/cage#1/animal#1/property#2/value", "scaled"), 286 ("/zoo#1/cage#1/animal#1/property#3/name", "length"), 287 ("/zoo#1/cage#1/animal#1/property#3/value", "5m"), 288 ("/zoo#1/cage#1/animal#2/name", "Vincent"), 289 ("/zoo#1/cage#1/animal#2/species#1/name", "Lizard"), 290 ("/zoo#1/cage#1/animal#2/property#2/name", "colour"), 291 ("/zoo#1/cage#1/animal#2/property#2/value", "variable"), 292 ("/zoo#1/cage#1/animal#2/property#3/name", "length"), 293 ("/zoo#1/cage#1/animal#2/property#3/value", "1m"), 294 ("/zoo#1/cage#2/name", "mammals"), 295 ("/zoo#1/cage#2/capacity", "25"), 296 ("/zoo#1/cage#2/animal#1/name", "Simon"), 297 ("/zoo#1/cage#2/animal#1/species#1/name", "Giraffe"), 298 ("/zoo#1/cage#2/animal#2/name", "Leonard"), 299 ("/zoo#1/cage#2/animal#2/species#1/name", "Lion"), 300 ("/zoo#1/cage#2/animal#2/property#2/name", "danger"), 301 ("/zoo#1/cage#2/animal#2/property#2/value", "high"), 302 ("/zoo#1/funding#3/type", "private"), 303 ("/zoo#1/funding#3/contributor#1/name", "Animal Corporation"), 304 ("/zoo#1/funding#3/contributor#1/amount", "543210.987") 305 ] 306 307 import time 308 import sys, cmdsyntax 309 310 # Find the documents. 311 312 syntax = cmdsyntax.Syntax(""" 313 --plain-output=OUTPUT_FILE 314 --instance-name=NAME 315 """) 316 317 syntax_matches = syntax.get_args(sys.argv[1:]) 318 319 try: 320 args = syntax_matches[0] 321 except IndexError: 322 print syntax.syntax 323 sys.exit(1) 324 325 # Create an object to interpret the test data. 326 327 fields = Fields("iso-8859-1") 328 329 t = time.time() 330 documents = fields.make_documents(items) 331 print "Building time", time.time() - t 332 333 t = time.time() 334 libxml2dom.toStream(documents[args["instance-name"]], stream=open(args["plain-output"], "wb"), encoding="utf-8") 335 print "Prettyprinting time", time.time() - t 336 337 print "Selectors", repr(fields.get_selectors(items, documents)) 338 339 # vim: tabstop=4 expandtab shiftwidth=4