1 #!/usr/bin/env python 2 3 """ 4 Encoder functions, producing representations of program objects. 5 6 Copyright (C) 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import first, InstructionSequence 23 24 # Output encoding and decoding for the summary files. 25 26 def encode_attrnames(attrnames): 27 28 "Encode the 'attrnames' representing usage." 29 30 return ", ".join(attrnames) or "{}" 31 32 def encode_constrained(constrained): 33 34 "Encode the 'constrained' status for program summaries." 35 36 return constrained and "constrained" or "deduced" 37 38 def encode_usage(usage): 39 40 "Encode attribute details from 'usage'." 41 42 all_attrnames = [] 43 for t in usage: 44 attrname, invocation, assignment = t 45 all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or "")) 46 return ", ".join(all_attrnames) or "{}" 47 48 def decode_usage(s): 49 50 "Decode attribute details from 's'." 51 52 all_attrnames = set() 53 for attrname_str in s.split(", "): 54 all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("="))) 55 56 all_attrnames = list(all_attrnames) 57 all_attrnames.sort() 58 return tuple(all_attrnames) 59 60 def encode_access_location(t): 61 62 "Encode the access location 't'." 63 64 path, name, attrname, version = t 65 return "%s %s %s:%d" % (path, name or "{}", attrname, version) 66 67 def encode_location(t): 68 69 "Encode the general location 't' in a concise form." 70 71 path, name, attrname, version = t 72 if name is not None and version is not None: 73 return "%s %s:%d" % (path, name, version) 74 elif name is not None: 75 return "%s %s" % (path, name) 76 else: 77 return "%s :%s" % (path, attrname) 78 79 def encode_modifiers(modifiers): 80 81 "Encode assignment and invocation details from 'modifiers'." 82 83 all_modifiers = [] 84 for t in modifiers: 85 all_modifiers.append(encode_modifier_term(t)) 86 return "".join(all_modifiers) 87 88 def encode_modifier_term(t): 89 90 "Encode modifier 't' representing an assignment or an invocation." 91 92 assignment, invocation = t 93 if assignment: 94 return "=" 95 elif invocation is not None: 96 return "(%d)" % invocation 97 else: 98 return "_" 99 100 def decode_modifiers(s): 101 102 "Decode 's' containing modifiers." 103 104 i = 0 105 end = len(s) 106 107 modifiers = [] 108 109 while i < end: 110 if s[i] == "=": 111 modifiers.append((True, None)) 112 i += 1 113 elif s[i] == "(": 114 j = s.index(")", i) 115 modifiers.append((False, int(s[i+1:j]))) 116 i = j + 1 117 else: 118 modifiers.append((False, None)) 119 i += 1 120 121 return modifiers 122 123 124 125 # Test generation functions. 126 127 def get_kinds(all_types): 128 129 """ 130 Return object kind details for 'all_types', being a collection of 131 references for program types. 132 """ 133 134 return map(lambda ref: ref.get_kind(), all_types) 135 136 def test_label_for_kind(kind): 137 138 "Return the label used for 'kind' in test details." 139 140 return kind == "<instance>" and "instance" or "type" 141 142 def test_label_for_type(ref): 143 144 "Return the label used for 'ref' in test details." 145 146 return test_label_for_kind(ref.get_kind()) 147 148 149 150 # Instruction representation encoding. 151 152 def encode_instruction(instruction): 153 154 """ 155 Encode the 'instruction' - a sequence starting with an operation and 156 followed by arguments, each of which may be an instruction sequence or a 157 plain value - to produce a function call string representation. 158 """ 159 160 op = instruction[0] 161 args = instruction[1:] 162 163 if args: 164 a = [] 165 for arg in args: 166 if isinstance(arg, tuple): 167 a.append(encode_instruction(arg)) 168 else: 169 a.append(arg or "{}") 170 argstr = "(%s)" % ", ".join(a) 171 return "%s%s" % (op, argstr) 172 else: 173 return op 174 175 176 177 # Output program encoding. 178 179 attribute_loading_ops = ( 180 "__load_via_class", "__load_via_object", "__get_class_and_load", 181 ) 182 183 attribute_ops = attribute_loading_ops + ( 184 "__store_via_object", 185 ) 186 187 checked_loading_ops = ( 188 "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any", 189 ) 190 191 checked_ops = checked_loading_ops + ( 192 "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any", 193 ) 194 195 typename_ops = ( 196 "__test_common_instance", "__test_common_object", "__test_common_type", 197 ) 198 199 type_ops = ( 200 "__test_specific_instance", "__test_specific_object", "__test_specific_type", 201 ) 202 203 static_ops = ( 204 "__load_static", 205 ) 206 207 reference_acting_ops = attribute_ops + checked_ops + typename_ops 208 attribute_producing_ops = attribute_loading_ops + checked_loading_ops 209 210 def encode_access_instruction(instruction, subs): 211 212 """ 213 Encode the 'instruction' - a sequence starting with an operation and 214 followed by arguments, each of which may be an instruction sequence or a 215 plain value - to produce a function call string representation. 216 217 The 'subs' parameter defines a mapping of substitutions for special values 218 used in instructions. 219 220 Return both the encoded instruction and a collection of substituted names. 221 """ 222 223 op = instruction[0] 224 args = instruction[1:] 225 substituted = set() 226 227 if not args: 228 argstr = "" 229 230 else: 231 # Encode the arguments. 232 233 a = [] 234 converting_op = op 235 for arg in args: 236 s, _substituted = encode_access_instruction_arg(arg, subs, converting_op) 237 substituted.update(_substituted) 238 a.append(s) 239 converting_op = None 240 241 # Modify certain arguments. 242 243 # Convert attribute name arguments to position symbols. 244 245 if op in attribute_ops: 246 arg = a[1] 247 a[1] = encode_symbol("pos", arg) 248 249 # Convert attribute name arguments to position and code symbols. 250 251 elif op in checked_ops: 252 arg = a[1] 253 a[1] = encode_symbol("pos", arg) 254 a.insert(2, encode_symbol("code", arg)) 255 256 # Convert type name arguments to position and code symbols. 257 258 elif op in typename_ops: 259 arg = encode_type_attribute(args[1]) 260 a[1] = encode_symbol("pos", arg) 261 a.insert(2, encode_symbol("code", arg)) 262 263 # Obtain addresses of type arguments. 264 265 elif op in type_ops: 266 a[1] = "&%s" % a[1] 267 268 # Obtain addresses of static objects. 269 270 elif op in static_ops: 271 a[0] = "&%s" % a[0] 272 a[1] = "&%s" % a[1] 273 274 argstr = "(%s)" % ", ".join(map(str, a)) 275 276 # Substitute the first element of the instruction, which may not be an 277 # operation at all. 278 279 if subs.has_key(op): 280 substituted.add(op) 281 282 # Break accessor initialisation into initialisation and value-yielding 283 # parts: 284 285 if op == "<set_accessor>" and isinstance(a[0], InstructionSequence): 286 ops = [] 287 ops += a[0].get_init_instructions() 288 ops.append("%s(%s)" % (subs[op], a[0].get_value_instruction())) 289 return ", ".join(map(str, ops)), substituted 290 291 op = subs[op] 292 293 elif not args: 294 op = "&%s" % encode_path(op) 295 296 return "%s%s" % (op, argstr), substituted 297 298 def encode_access_instruction_arg(arg, subs, op): 299 300 """ 301 Encode 'arg' using 'subs' to define substitutions, returning a tuple 302 containing the encoded form of 'arg' along with a collection of any 303 substituted values. 304 """ 305 306 if isinstance(arg, tuple): 307 encoded, substituted = encode_access_instruction(arg, subs) 308 309 # Convert attribute results to references where required. 310 311 if op and op in reference_acting_ops and arg[0] in attribute_producing_ops: 312 return "%s.value" % encoded, substituted 313 else: 314 return encoded, substituted 315 316 # Special values only need replacing, not encoding. 317 318 elif subs.has_key(arg): 319 return subs.get(arg), set([arg]) 320 321 # Convert static references to the appropriate type. 322 323 elif op and op in reference_acting_ops and arg != "<accessor>": 324 return "&%s" % encode_path(arg), set() 325 326 # Other values may need encoding. 327 328 else: 329 return encode_path(arg), set() 330 331 def encode_function_pointer(path): 332 333 "Encode 'path' as a reference to an output program function." 334 335 return "__fn_%s" % encode_path(path) 336 337 def encode_instantiator_pointer(path): 338 339 "Encode 'path' as a reference to an output program instantiator." 340 341 return "__new_%s" % encode_path(path) 342 343 def encode_instructions(instructions): 344 345 "Encode 'instructions' as a sequence." 346 347 if len(instructions) == 1: 348 return instructions[0] 349 else: 350 return "(\n%s\n)" % ",\n".join(instructions) 351 352 def encode_literal_constant(n): 353 354 "Encode a name for the literal constant with the number 'n'." 355 356 return "__const%d" % n 357 358 def encode_literal_constant_size(value): 359 360 "Encode a size for the literal constant with the given 'value'." 361 362 if isinstance(value, basestring): 363 return len(value) 364 else: 365 return 0 366 367 def encode_literal_constant_member(value): 368 369 "Encode the member name for the 'value' in the final program." 370 371 return "%svalue" % value.__class__.__name__ 372 373 def encode_literal_constant_value(value): 374 375 "Encode the given 'value' in the final program." 376 377 if isinstance(value, (int, float)): 378 return str(value) 379 else: 380 l = [] 381 382 # Encode characters including non-ASCII ones. 383 384 for c in str(value): 385 if c == '"': l.append('\\"') 386 elif c == '\n': l.append('\\n') 387 elif c == '\t': l.append('\\t') 388 elif c == '\r': l.append('\\r') 389 elif c == '\\': l.append('\\\\') 390 elif 0x20 <= ord(c) < 0x80: l.append(c) 391 else: l.append("\\x%02x" % ord(c)) 392 393 return '"%s"' % "".join(l) 394 395 def encode_literal_data_initialiser(style): 396 397 """ 398 Encode a reference to a function populating the data for a literal having 399 the given 'style' ("mapping" or "sequence"). 400 """ 401 402 return "__newdata_%s" % style 403 404 def encode_literal_instantiator(path): 405 406 """ 407 Encode a reference to an instantiator for a literal having the given 'path'. 408 """ 409 410 return "__newliteral_%s" % encode_path(path) 411 412 def encode_literal_reference(n): 413 414 "Encode a reference to a literal constant with the number 'n'." 415 416 return "__constvalue%d" % n 417 418 419 420 # Track all encoded paths, detecting and avoiding conflicts. 421 422 all_encoded_paths = {} 423 424 def encode_path(path): 425 426 "Encode 'path' as an output program object, translating special symbols." 427 428 if path in reserved_words: 429 return "__%s" % path 430 else: 431 part_encoded = path.replace("#", "__").replace("$", "__") 432 433 if "." not in path: 434 return part_encoded 435 436 encoded = part_encoded.replace(".", "_") 437 438 # Test for a conflict with the encoding of a different path, re-encoding 439 # if necessary. 440 441 previous = all_encoded_paths.get(encoded) 442 replacement = "_" 443 444 while previous: 445 if path == previous: 446 return encoded 447 replacement += "_" 448 encoded = part_encoded.replace(".", replacement) 449 previous = all_encoded_paths.get(encoded) 450 451 # Store any new or re-encoded path. 452 453 all_encoded_paths[encoded] = path 454 return encoded 455 456 def encode_predefined_reference(path): 457 458 "Encode a reference to a predefined constant value for 'path'." 459 460 return "__predefined_%s" % encode_path(path) 461 462 def encode_size(kind, path=None): 463 464 """ 465 Encode a structure size reference for the given 'kind' of structure, with 466 'path' indicating a specific structure name. 467 """ 468 469 return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "") 470 471 def encode_symbol(symbol_type, path=None): 472 473 "Encode a symbol with the given 'symbol_type' and optional 'path'." 474 475 return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "") 476 477 def encode_tablename(kind, path): 478 479 """ 480 Encode a table reference for the given 'kind' of table structure, indicating 481 a 'path' for the specific object concerned. 482 """ 483 484 return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path)) 485 486 def encode_type_attribute(path): 487 488 "Encode the special type attribute for 'path'." 489 490 return "#%s" % path 491 492 def decode_type_attribute(s): 493 494 "Decode the special type attribute 's'." 495 496 return s[1:] 497 498 def is_type_attribute(s): 499 500 "Return whether 's' is a type attribute name." 501 502 return s.startswith("#") 503 504 505 506 # A mapping from kinds to structure size reference prefixes. 507 508 structure_size_prefixes = { 509 "<class>" : "c", 510 "<module>" : "m", 511 "<instance>" : "i" 512 } 513 514 # A mapping from kinds to table name prefixes. 515 516 table_name_prefixes = { 517 "<class>" : "Class", 518 "<function>" : "Function", 519 "<module>" : "Module", 520 "<instance>" : "Instance" 521 } 522 523 524 525 # Output language reserved words. 526 527 reserved_words = [ 528 "break", "char", "const", "continue", 529 "default", "double", "else", 530 "float", "for", 531 "if", "int", "long", 532 "NULL", 533 "return", "struct", 534 "typedef", 535 "void", "while", 536 ] 537 538 # vim: tabstop=4 expandtab shiftwidth=4