1 #!/usr/bin/env python 2 3 """ 4 Encoder functions, producing representations of program objects. 5 6 Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import first 23 24 # Output encoding and decoding for the summary files. 25 26 def encode_attrnames(attrnames): 27 28 "Encode the 'attrnames' representing usage." 29 30 return ", ".join(attrnames) or "{}" 31 32 def encode_constrained(constrained): 33 34 "Encode the 'constrained' status for program summaries." 35 36 return constrained and "constrained" or "deduced" 37 38 def encode_usage(usage): 39 40 "Encode attribute details from 'usage'." 41 42 all_attrnames = [] 43 for t in usage: 44 attrname, invocation, assignment = t 45 all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or "")) 46 return ", ".join(all_attrnames) or "{}" 47 48 def decode_usage(s): 49 50 "Decode attribute details from 's'." 51 52 all_attrnames = set() 53 for attrname_str in s.split(", "): 54 all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("="))) 55 56 all_attrnames = list(all_attrnames) 57 all_attrnames.sort() 58 return tuple(all_attrnames) 59 60 def encode_access_location(t): 61 62 "Encode the access location 't'." 63 64 path, name, attrname, version = t 65 return "%s %s %s:%d" % (path, name or "{}", attrname, version) 66 67 def encode_location(t): 68 69 "Encode the general location 't' in a concise form." 70 71 path, name, attrname, version = t 72 if name is not None and version is not None: 73 return "%s %s:%d" % (path, name, version) 74 elif name is not None: 75 return "%s %s" % (path, name) 76 else: 77 return "%s :%s" % (path, attrname) 78 79 def encode_modifiers(modifiers): 80 81 "Encode assignment details from 'modifiers'." 82 83 all_modifiers = [] 84 for t in modifiers: 85 all_modifiers.append(encode_modifier_term(t)) 86 return "".join(all_modifiers) 87 88 def encode_modifier_term(t): 89 90 "Encode modifier 't' representing assignment status." 91 92 assignment, invocation = t 93 return assignment and "=" or invocation and "!" or "_" 94 95 def decode_modifier_term(s): 96 97 "Decode modifier term 's' representing assignment status." 98 99 return (s == "=", s == "!") 100 101 102 103 # Test generation functions. 104 105 def get_kinds(all_types): 106 107 """ 108 Return object kind details for 'all_types', being a collection of 109 references for program types. 110 """ 111 112 return map(lambda ref: ref.get_kind(), all_types) 113 114 def test_label_for_kind(kind): 115 116 "Return the label used for 'kind' in test details." 117 118 return kind == "<instance>" and "instance" or "type" 119 120 def test_label_for_type(ref): 121 122 "Return the label used for 'ref' in test details." 123 124 return test_label_for_kind(ref.get_kind()) 125 126 127 128 # Instruction representation encoding. 129 130 def encode_instruction(instruction): 131 132 """ 133 Encode the 'instruction' - a sequence starting with an operation and 134 followed by arguments, each of which may be an instruction sequence or a 135 plain value - to produce a function call string representation. 136 """ 137 138 op = instruction[0] 139 args = instruction[1:] 140 141 if args: 142 a = [] 143 for arg in args: 144 if isinstance(arg, tuple): 145 a.append(encode_instruction(arg)) 146 else: 147 a.append(arg or "{}") 148 argstr = "(%s)" % ", ".join(a) 149 return "%s%s" % (op, argstr) 150 else: 151 return op 152 153 154 155 # Output program encoding. 156 157 attribute_loading_ops = ( 158 "__load_via_class", "__load_via_object", "__get_class_and_load", 159 ) 160 161 attribute_ops = attribute_loading_ops + ( 162 "__store_via_object", 163 ) 164 165 checked_loading_ops = ( 166 "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any", 167 ) 168 169 checked_ops = checked_loading_ops + ( 170 "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any", 171 ) 172 173 typename_ops = ( 174 "__test_common_instance", "__test_common_object", "__test_common_type", 175 ) 176 177 type_ops = ( 178 "__test_specific_instance", "__test_specific_object", "__test_specific_type", 179 ) 180 181 static_ops = ( 182 "__load_static", 183 ) 184 185 reference_acting_ops = attribute_ops + checked_ops + typename_ops 186 attribute_producing_ops = attribute_loading_ops + checked_loading_ops 187 188 def encode_access_instruction(instruction, subs): 189 190 """ 191 Encode the 'instruction' - a sequence starting with an operation and 192 followed by arguments, each of which may be an instruction sequence or a 193 plain value - to produce a function call string representation. 194 195 The 'subs' parameter defines a mapping of substitutions for special values 196 used in instructions. 197 198 Return both the encoded instruction and a collection of substituted names. 199 """ 200 201 op = instruction[0] 202 args = instruction[1:] 203 substituted = set() 204 205 if not args: 206 argstr = "" 207 208 else: 209 # Encode the arguments. 210 211 a = [] 212 converting_op = op 213 for arg in args: 214 s, _substituted = encode_access_instruction_arg(arg, subs, converting_op) 215 substituted.update(_substituted) 216 a.append(s) 217 converting_op = None 218 219 # Modify certain arguments. 220 221 # Convert attribute name arguments to position symbols. 222 223 if op in attribute_ops: 224 arg = a[1] 225 a[1] = encode_symbol("pos", arg) 226 227 # Convert attribute name arguments to position and code symbols. 228 229 elif op in checked_ops: 230 arg = a[1] 231 a[1] = encode_symbol("pos", arg) 232 a.insert(2, encode_symbol("code", arg)) 233 234 # Convert type name arguments to position and code symbols. 235 236 elif op in typename_ops: 237 arg = encode_type_attribute(args[1]) 238 a[1] = encode_symbol("pos", arg) 239 a.insert(2, encode_symbol("code", arg)) 240 241 # Obtain addresses of type arguments. 242 243 elif op in type_ops: 244 a[1] = "&%s" % a[1] 245 246 # Obtain addresses of static objects. 247 248 elif op in static_ops: 249 a[0] = "&%s" % a[0] 250 a[1] = "&%s" % a[1] 251 252 argstr = "(%s)" % ", ".join(map(str, a)) 253 254 # Substitute the first element of the instruction, which may not be an 255 # operation at all. 256 257 if subs.has_key(op): 258 substituted.add(op) 259 op = subs[op] 260 elif not args: 261 op = "&%s" % encode_path(op) 262 263 return "%s%s" % (op, argstr), substituted 264 265 def encode_access_instruction_arg(arg, subs, op): 266 267 """ 268 Encode 'arg' using 'subs' to define substitutions, returning a tuple 269 containing the encoded form of 'arg' along with a collection of any 270 substituted values. 271 """ 272 273 if isinstance(arg, tuple): 274 encoded, substituted = encode_access_instruction(arg, subs) 275 276 # Convert attribute results to references where required. 277 278 if op and op in reference_acting_ops and arg[0] in attribute_producing_ops: 279 return "%s.value" % encoded, substituted 280 else: 281 return encoded, substituted 282 283 # Special values only need replacing, not encoding. 284 285 elif subs.has_key(arg): 286 return subs.get(arg), set([arg]) 287 288 # Convert static references to the appropriate type. 289 290 elif op and op in reference_acting_ops and arg != "<accessor>": 291 return "&%s" % encode_path(arg), set() 292 293 # Other values may need encoding. 294 295 else: 296 return encode_path(arg), set() 297 298 def encode_bound_reference(path): 299 300 "Encode 'path' as a bound method name." 301 302 return "__bound_%s" % encode_path(path) 303 304 def encode_function_pointer(path): 305 306 "Encode 'path' as a reference to an output program function." 307 308 return "__fn_%s" % encode_path(path) 309 310 def encode_initialiser_pointer(path): 311 312 "Encode 'path' as a reference to an initialiser function structure." 313 314 return encode_path("%s.__init__" % path) 315 316 def encode_instantiator_pointer(path): 317 318 "Encode 'path' as a reference to an output program instantiator." 319 320 return "__new_%s" % encode_path(path) 321 322 def encode_instructions(instructions): 323 324 "Encode 'instructions' as a sequence." 325 326 if len(instructions) == 1: 327 return instructions[0] 328 else: 329 return "(\n%s\n)" % ",\n".join(instructions) 330 331 def encode_literal_constant(n): 332 333 "Encode a name for the literal constant with the number 'n'." 334 335 return "__const%d" % n 336 337 def encode_literal_constant_size(value): 338 339 "Encode a size for the literal constant with the given 'value'." 340 341 if isinstance(value, basestring): 342 return len(value) 343 else: 344 return 0 345 346 def encode_literal_constant_member(value): 347 348 "Encode the member name for the 'value' in the final program." 349 350 return "%svalue" % value.__class__.__name__ 351 352 def encode_literal_constant_value(value): 353 354 "Encode the given 'value' in the final program." 355 356 if isinstance(value, (int, float)): 357 return str(value) 358 else: 359 l = [] 360 361 # Encode characters including non-ASCII ones. 362 363 for c in str(value): 364 if c == '"': l.append('\\"') 365 elif c == '\n': l.append('\\n') 366 elif c == '\t': l.append('\\t') 367 elif c == '\r': l.append('\\r') 368 elif 0x20 <= ord(c) < 0x80: l.append(c) 369 else: l.append("\\x%02x" % ord(c)) 370 371 return '"%s"' % "".join(l) 372 373 def encode_literal_data_initialiser(style): 374 375 """ 376 Encode a reference to a function populating the data for a literal having 377 the given 'style' ("mapping" or "sequence"). 378 """ 379 380 return "__newdata_%s" % style 381 382 def encode_literal_instantiator(path): 383 384 """ 385 Encode a reference to an instantiator for a literal having the given 'path'. 386 """ 387 388 return "__newliteral_%s" % encode_path(path) 389 390 def encode_literal_reference(n): 391 392 "Encode a reference to a literal constant with the number 'n'." 393 394 return "__constvalue%d" % n 395 396 # Track all encoded paths, detecting and avoiding conflicts. 397 398 all_encoded_paths = {} 399 400 def encode_path(path): 401 402 "Encode 'path' as an output program object, translating special symbols." 403 404 if path in reserved_words: 405 return "__%s" % path 406 else: 407 part_encoded = path.replace("#", "__").replace("$", "__") 408 409 if "." not in path: 410 return part_encoded 411 412 encoded = part_encoded.replace(".", "_") 413 414 # Test for a conflict with the encoding of a different path, re-encoding 415 # if necessary. 416 417 previous = all_encoded_paths.get(encoded) 418 replacement = "_" 419 420 while previous: 421 if path == previous: 422 return encoded 423 replacement += "_" 424 encoded = part_encoded.replace(".", replacement) 425 previous = all_encoded_paths.get(encoded) 426 427 # Store any new or re-encoded path. 428 429 all_encoded_paths[encoded] = path 430 return encoded 431 432 def encode_predefined_reference(path): 433 434 "Encode a reference to a predefined constant value for 'path'." 435 436 return "__predefined_%s" % encode_path(path) 437 438 def encode_size(kind, path=None): 439 440 """ 441 Encode a structure size reference for the given 'kind' of structure, with 442 'path' indicating a specific structure name. 443 """ 444 445 return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "") 446 447 def encode_symbol(symbol_type, path=None): 448 449 "Encode a symbol with the given 'symbol_type' and optional 'path'." 450 451 return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "") 452 453 def encode_tablename(kind, path): 454 455 """ 456 Encode a table reference for the given 'kind' of table structure, indicating 457 a 'path' for the specific object concerned. 458 """ 459 460 return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path)) 461 462 def encode_type_attribute(path): 463 464 "Encode the special type attribute for 'path'." 465 466 return "#%s" % path 467 468 def decode_type_attribute(s): 469 470 "Decode the special type attribute 's'." 471 472 return s[1:] 473 474 def is_type_attribute(s): 475 476 "Return whether 's' is a type attribute name." 477 478 return s.startswith("#") 479 480 481 482 # A mapping from kinds to structure size reference prefixes. 483 484 structure_size_prefixes = { 485 "<class>" : "c", 486 "<module>" : "m", 487 "<instance>" : "i" 488 } 489 490 # A mapping from kinds to table name prefixes. 491 492 table_name_prefixes = { 493 "<class>" : "Class", 494 "<function>" : "Function", 495 "<module>" : "Module", 496 "<instance>" : "Instance" 497 } 498 499 500 501 # Output language reserved words. 502 503 reserved_words = [ 504 "break", "char", "const", "continue", 505 "default", "double", "else", 506 "float", "for", 507 "if", "int", "long", 508 "NULL", 509 "return", "struct", 510 "typedef", 511 "void", "while", 512 ] 513 514 # vim: tabstop=4 expandtab shiftwidth=4