1 #!/usr/bin/env python 2 3 """ 4 NOTE: Add support for multiple values from fields, perhaps using 5 NOTE: the following notation: 6 NOTE: 7 NOTE: /package#1/categories#1/category#n/value 8 9 Classes which process field collections, producing instance 10 documents. Each field entry consists of a field name mapped 11 to a string value, where the field name may have the following 12 formats: 13 14 /name1#n1/name2 15 /name1#n1/name2#n2/name3 16 /name1#n1/name2#n2/name3#n3/name4 17 ... 18 19 The indexes n1, n2, n3, ... indicate the position of elements 20 (starting from 1) in the entire element list, whose elements 21 may have different names. For example: 22 23 /zoo#1/name 24 /zoo#1/cage#1/name 25 /zoo#1/cage#2/name 26 /zoo#1/funding#3/contributor#1/name 27 28 Some fields may contain the "=" string. This string is 29 reserved and all text following it is meant to specify a path 30 into a particular document. For example: 31 32 _action_add_animal=/zoo#1/cage#2 33 """ 34 35 import Constants 36 import libxml2dom 37 from xml.dom import EMPTY_NAMESPACE 38 39 class FieldsError(Exception): 40 pass 41 42 class Fields: 43 44 """ 45 A class which converts fields in the documented form to XML 46 instance documents. 47 """ 48 49 def __init__(self, encoding="utf-8", values_are_lists=0): 50 51 """ 52 Initialise the fields processor with the given 'encoding', 53 which is optional and which only applies to field data in 54 Python string form (and not Unicode objects). 55 56 If the optional 'values_are_lists' parameter is set to true 57 then each actual field value will be obtained by taking the 58 first element from each supplied field value. 59 """ 60 61 self.encoding = encoding 62 self.values_are_lists = values_are_lists 63 64 def complete_documents(self, documents, fields): 65 66 """ 67 Complete the given 'documents' using the 'fields' items list. 68 """ 69 70 for field, value in fields: 71 72 # Ignore selectors. 73 74 if field.find(Constants.selector_indicator) != -1: 75 continue 76 77 model_name, components = self._get_model_name_and_components(field) 78 if model_name is None: 79 continue 80 81 # Get a new instance document if none has been made for the 82 # model. 83 84 if not documents.has_key(model_name): 85 documents[model_name] = self.new_instance(model_name) 86 node = documents[model_name] 87 88 # Traverse the components within the instance. 89 90 for component in components: 91 t = component.split(Constants.pair_separator) 92 if len(t) == 1: 93 94 # Convert from lists if necessary. 95 96 if self.values_are_lists: 97 value = value[0] 98 99 # Convert the value to Unicode if necessary. 100 101 if type(value) == type(""): 102 value = unicode(value, encoding=self.encoding) 103 104 node.setAttributeNS(EMPTY_NAMESPACE, t[0], value) 105 break 106 107 elif len(t) == 2: 108 109 # Convert from one-based indexing (the position() function) 110 # to zero-based indexing. 111 112 name, index = t[0], int(t[1]) - 1 113 if index < 0: 114 break 115 node = self._enter_element(node, name, index) 116 117 def complete_selectors(self, selectors, fields, documents): 118 119 """ 120 Fill in the given 'selectors' dictionary using the given 121 'fields' so that it contains mappings from selector names to 122 parts of the specified 'documents'. 123 """ 124 125 for field, value in fields: 126 127 # Process selectors only. 128 129 selector_components = field.split(Constants.selector_indicator) 130 if len(selector_components) < 2: 131 continue 132 133 # Get the selector name and path. 134 # Note that the joining of the components uses the separator, 135 # but the separator really should not exist in the path. 136 137 selector_name = selector_components[0] 138 path = Constants.selector_indicator.join(selector_components[1:]) 139 140 model_name, components = self._get_model_name_and_components(path) 141 if model_name is None: 142 continue 143 144 # Go to the instance element. 145 146 if not documents.has_key(model_name) or documents[model_name] is None: 147 continue 148 149 node = documents[model_name] 150 151 # Traverse the path to find the part of the document to be 152 # selected. 153 154 for component in components: 155 t = component.split(Constants.pair_separator) 156 if len(t) == 1: 157 158 # Select attribute. 159 160 node = node.getAttributeNodeNS(EMPTY_NAMESPACE, t[0]) 161 break 162 163 elif len(t) == 2: 164 165 # Convert from one-based indexing (the position() function) 166 # to zero-based indexing. 167 168 name, index = t[0], int(t[1]) - 1 169 if index < 0: 170 break 171 172 # NOTE: Controversial creation of potentially non-existent 173 # NOTE: nodes. 174 175 node = self._enter_element(node, name, index) 176 177 if not selectors.has_key(selector_name): 178 selectors[selector_name] = [] 179 selectors[selector_name].append(node) 180 181 def _enter_element(self, node, name, index): 182 183 """ 184 From 'node' enter the element with the given 'name' at the 185 given 'index' position amongst the child elements. Create 186 missing child elements if necessary. 187 """ 188 189 self._ensure_elements(node, index) 190 191 elements = node.xpath("*") 192 if elements[index].localName == "placeholder": 193 new_node = node.ownerDocument.createElementNS(EMPTY_NAMESPACE, name) 194 node.replaceChild(new_node, elements[index]) 195 else: 196 new_node = elements[index] 197 if new_node.localName != name: 198 raise FieldsError, (new_node.localName, name, elements, index) 199 200 # Enter the newly-created element. 201 202 return new_node 203 204 def _get_model_name_and_components(self, field): 205 206 """ 207 From 'field', return the model name and components which 208 describe the path within the instance document associated 209 with that model. 210 """ 211 212 # Get the components of the field name. 213 # Example: /name1#n1/name2#n2/name3 214 # Expected: ['', 'name1#n1', 'name2#n2', 'name3'] 215 216 components = field.split(Constants.path_separator) 217 if len(components) < 2: 218 return None, None 219 220 # Extract the model name from the top-level element 221 # specification. 222 # Expected: ['name1', 'n1'] 223 224 model_name_and_index = components[1].split(Constants.pair_separator) 225 if len(model_name_and_index) != 2: 226 return None, None 227 228 # Expected: 'name1', ['', 'name1#n1', 'name2#n2', 'name3'] 229 230 return model_name_and_index[0], components[1:] 231 232 def _ensure_elements(self, document, index): 233 234 """ 235 In the given 'document', extend the child elements list 236 so that a node can be stored at the given 'index'. 237 """ 238 239 elements = document.xpath("*") 240 i = len(elements) 241 while i <= index: 242 new_node = document.ownerDocument.createElementNS(EMPTY_NAMESPACE, "placeholder") 243 document.appendChild(new_node) 244 i += 1 245 246 def make_documents(self, fields): 247 248 """ 249 Make a dictionary mapping model names to new documents prepared 250 from the given 'fields' dictionary. 251 """ 252 253 documents = {} 254 self.complete_documents(documents, fields) 255 256 # Fix the dictionary to return the actual document root. 257 258 for model_name, instance_root in documents.items(): 259 documents[model_name] = instance_root 260 return documents 261 262 def get_selectors(self, fields, documents): 263 264 """ 265 Get a dictionary containing a mapping of selector names to 266 selected parts of the given 'documents'. 267 """ 268 269 selectors = {} 270 self.complete_selectors(selectors, fields, documents) 271 return selectors 272 273 def new_instance(self, name): 274 275 "Return an instance root of the given 'name' in a new document." 276 277 return libxml2dom.createDocument(EMPTY_NAMESPACE, name, None) 278 279 if __name__ == "__main__": 280 281 items = [ 282 ("_action_update", "Some value"), 283 ("_action_delete=/zoo#1/cage#2", "Some value"), 284 ("/actions#1/update#1/selected", "Some value"), # Not actually used in output documents or input. 285 ("/zoo#1/name", "The Zoo ???"), 286 ("/zoo#1/cage#1/name", "reptiles"), 287 ("/zoo#1/cage#1/capacity", "5"), 288 ("/zoo#1/cage#1/animal#1/name", "Monty"), 289 ("/zoo#1/cage#1/animal#1/species#1/name", "Python"), 290 ("/zoo#1/cage#1/animal#1/property#2/name", "texture"), 291 ("/zoo#1/cage#1/animal#1/property#2/value", "scaled"), 292 ("/zoo#1/cage#1/animal#1/property#3/name", "length"), 293 ("/zoo#1/cage#1/animal#1/property#3/value", "5m"), 294 ("/zoo#1/cage#1/animal#2/name", "Vincent"), 295 ("/zoo#1/cage#1/animal#2/species#1/name", "Lizard"), 296 ("/zoo#1/cage#1/animal#2/property#2/name", "colour"), 297 ("/zoo#1/cage#1/animal#2/property#2/value", "variable"), 298 ("/zoo#1/cage#1/animal#2/property#3/name", "length"), 299 ("/zoo#1/cage#1/animal#2/property#3/value", "1m"), 300 ("/zoo#1/cage#2/name", "mammals"), 301 ("/zoo#1/cage#2/capacity", "25"), 302 ("/zoo#1/cage#2/animal#1/name", "Simon"), 303 ("/zoo#1/cage#2/animal#1/species#1/name", "Giraffe"), 304 ("/zoo#1/cage#2/animal#2/name", "Leonard"), 305 ("/zoo#1/cage#2/animal#2/species#1/name", "Lion"), 306 ("/zoo#1/cage#2/animal#2/property#2/name", "danger"), 307 ("/zoo#1/cage#2/animal#2/property#2/value", "high"), 308 ("/zoo#1/funding#3/type", "private"), 309 ("/zoo#1/funding#3/contributor#1/name", "Animal Corporation"), 310 ("/zoo#1/funding#3/contributor#1/amount", "543210.987") 311 ] 312 313 import time 314 import sys, cmdsyntax 315 316 # Find the documents. 317 318 syntax = cmdsyntax.Syntax(""" 319 --plain-output=OUTPUT_FILE 320 --instance-name=NAME 321 """) 322 323 syntax_matches = syntax.get_args(sys.argv[1:]) 324 325 try: 326 args = syntax_matches[0] 327 except IndexError: 328 print syntax.syntax 329 sys.exit(1) 330 331 # Create an object to interpret the test data. 332 333 fields = Fields("iso-8859-1") 334 335 t = time.time() 336 documents = fields.make_documents(items) 337 print "Building time", time.time() - t 338 339 t = time.time() 340 libxml2dom.toStream(documents[args["instance-name"]], stream=open(args["plain-output"], "wb"), encoding="utf-8") 341 print "Prettyprinting time", time.time() - t 342 343 print "Selectors", repr(fields.get_selectors(items, documents)) 344 345 # vim: tabstop=4 expandtab shiftwidth=4