1 #!/usr/bin/env python 2 3 """ 4 Encoder functions, producing representations of program objects. 5 6 Copyright (C) 2016, 2017, 2018, 2023 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import first, InstructionSequence 23 24 25 26 # Value digest computation. 27 28 from base64 import b64encode 29 from hashlib import sha1 30 31 def digest(values): 32 m = sha1() 33 for value in values: 34 m.update(str(value)) 35 return b64encode(m.digest()).replace("+", "__").replace("/", "_").rstrip("=") 36 37 38 39 # Output encoding and decoding for the summary files. 40 41 def encode_attrnames(attrnames): 42 43 "Encode the 'attrnames' representing usage." 44 45 return ", ".join(attrnames) or "{}" 46 47 def encode_constrained(constrained): 48 49 "Encode the 'constrained' status for program summaries." 50 51 return constrained and "constrained" or "deduced" 52 53 def encode_usage(usage): 54 55 "Encode attribute details from 'usage'." 56 57 all_attrnames = [] 58 for t in usage: 59 attrname, invocation, assignment = t 60 all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or "")) 61 return ", ".join(all_attrnames) or "{}" 62 63 def decode_usage(s): 64 65 "Decode attribute details from 's'." 66 67 all_attrnames = set() 68 for attrname_str in s.split(", "): 69 all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("="))) 70 71 all_attrnames = list(all_attrnames) 72 all_attrnames.sort() 73 return tuple(all_attrnames) 74 75 def encode_access_location(t): 76 77 "Encode the access location 't'." 78 79 return "%s:%s:%s:%d" % (t.path, t.name or "{}", t.attrnames or "{}", t.access_number) 80 81 def decode_access_location(s): 82 83 "Decode the access location 's'." 84 85 path, name, attrnames, access_number = s.split(":") 86 return path, name, attrnames, access_number 87 88 def encode_alias_location(t, invocation=False): 89 90 "Encode the alias location 't'." 91 92 return "%s:%s:%s%s%s%s" % (t.path, t.name or "{}", t.attrnames or "{}", 93 t.version is not None and ":=%d" % t.version or "", 94 t.access_number is not None and ":#%d" % t.access_number or "", 95 invocation and "!" or "") 96 97 def decode_alias_location(s): 98 99 "Decode the alias location 's'." 100 101 path, name, rest = s.split(":", 2) 102 attrnames = version = access_number = None 103 invocation = rest.endswith("!") 104 105 t = rest.rstrip("!").split(":#") 106 if len(t) > 1: 107 rest = t[0]; access_number = int(t[1]) 108 109 t = rest.split(":=") 110 if len(t) > 1: 111 attrnames = t[0]; version = int(t[1]) 112 else: 113 attrnames = rest 114 115 return path, name, attrnames, version, access_number, invocation 116 117 def encode_location(t): 118 119 "Encode the general location 't' in a concise form." 120 121 if t.name is not None and t.version is not None: 122 return "%s:%s:%d" % (t.path, t.name, t.version) 123 elif t.name is not None: 124 return "%s:%s" % (t.path, t.name) 125 else: 126 return "%s::%s" % (t.path, t.attrnames) 127 128 def encode_modifiers(modifiers): 129 130 "Encode assignment and invocation details from 'modifiers'." 131 132 all_modifiers = [] 133 for t in modifiers: 134 all_modifiers.append(encode_modifier_term(t)) 135 return "".join(all_modifiers) 136 137 def encode_modifier_term(t): 138 139 "Encode modifier 't' representing an assignment or an invocation." 140 141 assignment, invocation = t 142 if assignment: 143 return "=" 144 elif invocation is not None: 145 arguments, keywords = invocation 146 return "(%d;%s)" % (arguments, ",".join(keywords)) 147 else: 148 return "_" 149 150 def decode_modifiers(s): 151 152 "Decode 's' containing modifiers." 153 154 i = 0 155 end = len(s) 156 157 modifiers = [] 158 159 while i < end: 160 if s[i] == "=": 161 modifiers.append((True, None)) 162 i += 1 163 elif s[i] == "(": 164 j = s.index(";", i) 165 arguments = int(s[i+1:j]) 166 i = j 167 j = s.index(")", i) 168 keywords = s[i+1:j] 169 keywords = keywords and keywords.split(",") or [] 170 modifiers.append((False, (arguments, keywords))) 171 i = j + 1 172 else: 173 modifiers.append((False, None)) 174 i += 1 175 176 return modifiers 177 178 179 180 # Test generation functions. 181 182 def get_kinds(all_types): 183 184 """ 185 Return object kind details for 'all_types', being a collection of 186 references for program types. 187 """ 188 189 return map(lambda ref: ref.get_kind(), all_types) 190 191 def test_label_for_kind(kind): 192 193 "Return the label used for 'kind' in test details." 194 195 return kind == "<instance>" and "instance" or "type" 196 197 def test_label_for_type(ref): 198 199 "Return the label used for 'ref' in test details." 200 201 return test_label_for_kind(ref.get_kind()) 202 203 204 205 # Instruction representation encoding. 206 207 def encode_instruction(instruction): 208 209 """ 210 Encode the 'instruction' - a sequence starting with an operation and 211 followed by arguments, each of which may be an instruction sequence or a 212 plain value - to produce a function call string representation. 213 """ 214 215 op = instruction[0] 216 args = instruction[1:] 217 218 if args: 219 a = [] 220 for arg in args: 221 if isinstance(arg, tuple): 222 a.append(encode_instruction(arg)) 223 else: 224 a.append(arg or "{}") 225 argstr = "(%s)" % ", ".join(a) 226 return "%s%s" % (op, argstr) 227 else: 228 return op 229 230 231 232 # Output program encoding. 233 234 attribute_loading_ops = ( 235 "__load_via_class", "__load_via_object", "__get_class_and_load", 236 "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any", 237 ) 238 239 attribute_ref_lookup_ops = ( 240 "__get_object_attr_ref", "__get_class_attr_ref", 241 "__check_and_get_object_attr_ref", 242 ) 243 244 typename_ops = ( 245 "__test_common_instance", "__test_common_object", "__test_common_type", 246 ) 247 248 type_ops = ( 249 "__test_specific_instance", "__test_specific_object", "__test_specific_type", 250 ) 251 252 static_ops = ( 253 "__load_static_ignore", "__load_static_replace", "__load_static_test", "<test_context_static>", 254 ) 255 256 accessor_values = ( 257 "<accessor>", 258 ) 259 260 accessor_ops = ( 261 "<accessor>", "<set_accessor>", 262 ) 263 264 attribute_ref_values = ( 265 "<attr_ref>", 266 ) 267 268 attribute_ref_ops = ( 269 "<attr_ref>", "<set_attr_ref>", 270 ) 271 272 context_values = ( 273 "<context>", 274 ) 275 276 context_ops = ( 277 "<context>", "<set_context>", "<test_context_revert>", "<test_context_static>", 278 ) 279 280 context_op_functions = ( 281 "<test_context_revert>", "<test_context_static>", 282 ) 283 284 reference_acting_ops = attribute_ref_lookup_ops + attribute_loading_ops + type_ops + typename_ops 285 attribute_producing_ops = attribute_loading_ops 286 287 attribute_producing_variables = ( 288 "<accessor>", "<context>", "<name>", "<private_context>", "<target_accessor>" 289 ) 290 291 def encode_access_instruction(instruction, subs, accessor_index, context_index, 292 attribute_ref_index): 293 294 """ 295 Encode the 'instruction' - a sequence starting with an operation and 296 followed by arguments, each of which may be an instruction sequence or a 297 plain value - to produce a function call string representation. 298 299 The 'subs' parameter defines a mapping of substitutions for special values 300 used in instructions. 301 302 The 'accessor_index' parameter defines the position in local accessor 303 storage for the referenced accessor or affected by an accessor operation. 304 305 The 'context_index' parameter defines the position in local context storage 306 for the referenced context or affected by a context operation. 307 308 The 'attribute_ref_index' parameter defines the position in local attribute 309 reference storage for a referenced attribute. 310 311 Return both the encoded instruction and a collection of substituted names. 312 """ 313 314 op = instruction[0] 315 args = instruction[1:] 316 substituted = set() 317 318 # Encode the arguments. 319 320 a = [] 321 if args: 322 converting_op = op 323 for arg in args: 324 s, _substituted = encode_access_instruction_arg(arg, subs, 325 converting_op, accessor_index, context_index, attribute_ref_index) 326 substituted.update(_substituted) 327 a.append(s) 328 converting_op = None 329 330 # Modify certain arguments. 331 332 # Convert type name arguments. 333 334 if op in typename_ops: 335 a[1] = encode_path(encode_type_attribute(args[1])) 336 337 # Obtain addresses of type arguments. 338 339 elif op in type_ops: 340 a[1] = "&%s" % a[1] 341 342 # Obtain addresses of static objects. 343 344 elif op in static_ops: 345 a[-1] = "&%s" % a[-1] 346 347 # Add accessor storage information to certain operations. 348 349 if op in accessor_ops: 350 a.insert(0, accessor_index) 351 352 # Add attribute reference storage information to certain operations. 353 354 if op in attribute_ref_ops: 355 a.insert(0, attribute_ref_index) 356 357 # Add context storage information to certain operations. 358 359 if op in context_ops: 360 a.insert(0, context_index) 361 362 # Add the local context array to certain operations. 363 364 if op in context_op_functions: 365 a.append("__tmp_contexts") 366 367 # Define any argument string. 368 369 if a: 370 argstr = "(%s)" % ", ".join(map(str, a)) 371 else: 372 argstr = "" 373 374 # Substitute the first element of the instruction, which may not be an 375 # operation at all. 376 377 if subs.has_key(op): 378 substituted.add(op) 379 380 # Break accessor initialisation into initialisation and value-yielding 381 # parts: 382 383 if op == "<set_accessor>" and isinstance(a[0], InstructionSequence): 384 ops = [] 385 ops += a[0].get_init_instructions() 386 ops.append("%s(%s)" % (subs[op], a[0].get_value_instruction())) 387 return ", ".join(map(str, ops)), substituted 388 389 op = subs[op] 390 391 elif not args: 392 op = "&%s" % encode_path(op) 393 394 return "%s%s" % (op, argstr), substituted 395 396 def encode_access_instruction_arg(arg, subs, op, accessor_index, context_index, attribute_ref_index): 397 398 """ 399 Encode 'arg' using 'subs' to define substitutions, 'op' to indicate the 400 operation to which the argument belongs, and with 'accessor_index' and 401 'context_index' indicating any affected accessor and context storage. 402 403 Return a tuple containing the encoded form of 'arg' along with a collection 404 of any substituted values. 405 """ 406 407 if isinstance(arg, tuple): 408 encoded, substituted = encode_access_instruction(arg, subs, 409 accessor_index, context_index, attribute_ref_index) 410 return attribute_to_reference(op, arg[0], encoded, substituted) 411 412 # Special values only need replacing, not encoding. 413 414 elif subs.has_key(arg): 415 416 # Handle values modified by storage details. 417 418 if arg in accessor_values or arg in context_values: 419 encoded = "%s(%s)" % (subs.get(arg), context_index) 420 elif arg in attribute_ref_values: 421 encoded = "%s(%s)" % (subs.get(arg), attribute_ref_index) 422 else: 423 encoded = subs.get(arg) 424 425 substituted = set([arg]) 426 return attribute_to_reference(op, arg, encoded, substituted) 427 428 # Convert static references to the appropriate type. 429 430 elif op and op in reference_acting_ops and \ 431 arg not in attribute_producing_variables: 432 433 return "&%s" % encode_path(arg), set() 434 435 # Other values may need encoding. 436 437 else: 438 return encode_path(arg), set() 439 440 def attribute_to_reference(op, arg, encoded, substituted): 441 442 # Convert attribute results to references where required. 443 444 if op and op in reference_acting_ops and ( 445 arg in attribute_producing_ops or 446 arg in attribute_producing_variables): 447 448 return "__VALUE(%s)" % encoded, substituted 449 else: 450 return encoded, substituted 451 452 def encode_function_pointer(path): 453 454 "Encode 'path' as a reference to an output program function." 455 456 return "__fn_%s" % encode_path(path) 457 458 def encode_instantiator_pointer(path): 459 460 "Encode 'path' as a reference to an output program instantiator." 461 462 return "__new_%s" % encode_path(path) 463 464 def encode_instructions(instructions): 465 466 "Encode 'instructions' as a sequence." 467 468 if len(instructions) == 1: 469 return instructions[0] 470 else: 471 return "(\n%s\n)" % ",\n".join(instructions) 472 473 def encode_literal_constant(n): 474 475 "Encode a name for the literal constant with the number 'n'." 476 477 return "__const%s" % n 478 479 def encode_literal_constant_size(value): 480 481 "Encode a size for the literal constant with the given 'value'." 482 483 if isinstance(value, basestring): 484 return len(value) 485 else: 486 return 0 487 488 def encode_literal_constant_member(value): 489 490 "Encode the member name for the 'value' in the final program." 491 492 return "%svalue" % value.__class__.__name__ 493 494 def encode_literal_constant_value(value): 495 496 "Encode the given 'value' in the final program." 497 498 if isinstance(value, (int, float)): 499 return str(value) 500 else: 501 l = [] 502 503 # Encode characters including non-ASCII ones. 504 505 for c in str(value): 506 if c == '"': l.append('\\"') 507 elif c == '\n': l.append('\\n') 508 elif c == '\t': l.append('\\t') 509 elif c == '\r': l.append('\\r') 510 elif c == '\\': l.append('\\\\') 511 elif 0x20 <= ord(c) < 0x80: l.append(c) 512 else: l.append("\\x%02x" % ord(c)) 513 514 return '"%s"' % "".join(l) 515 516 def encode_literal_data_initialiser(style): 517 518 """ 519 Encode a reference to a function populating the data for a literal having 520 the given 'style' ("mapping" or "sequence"). 521 """ 522 523 return "__newdata_%s" % style 524 525 def encode_literal_instantiator(path): 526 527 """ 528 Encode a reference to an instantiator for a literal having the given 'path'. 529 """ 530 531 return "__newliteral_%s" % encode_path(path) 532 533 def encode_literal_reference(n): 534 535 "Encode a reference to a literal constant with the number 'n'." 536 537 return "__constvalue%s" % n 538 539 def encode_trailing_area(path): 540 541 """ 542 Encode any reference to trailing data members for instances of the type 543 given by 'path'. 544 """ 545 546 return "__TRAILING_%s" % encode_path(path) 547 548 549 550 # Track all encoded paths, detecting and avoiding conflicts. 551 552 all_encoded_paths = {} 553 554 def encode_path(path): 555 556 "Encode 'path' as an output program object, translating special symbols." 557 558 if path in reserved_words: 559 return "__%s" % path 560 else: 561 part_encoded = path.replace("#", "__").replace("$", "__") 562 563 if "." not in path: 564 return part_encoded 565 566 encoded = part_encoded.replace(".", "_") 567 568 # Test for a conflict with the encoding of a different path, re-encoding 569 # if necessary. 570 571 previous = all_encoded_paths.get(encoded) 572 replacement = "_" 573 574 while previous: 575 if path == previous: 576 return encoded 577 replacement += "_" 578 encoded = part_encoded.replace(".", replacement) 579 previous = all_encoded_paths.get(encoded) 580 581 # Store any new or re-encoded path. 582 583 all_encoded_paths[encoded] = path 584 return encoded 585 586 def encode_code(name): 587 588 "Encode 'name' as an attribute code indicator." 589 590 return "__ATTRCODE(%s)" % encode_path(name) 591 592 def encode_pcode(name): 593 594 "Encode 'name' as an parameter code indicator." 595 596 return "__PARAMCODE(%s)" % encode_path(name) 597 598 def encode_pos(name): 599 600 "Encode 'name' as an attribute position indicator." 601 602 return "__ATTRPOS(%s)" % encode_path(name) 603 604 def encode_ppos(name): 605 606 "Encode 'name' as an parameter position indicator." 607 608 return "__PARAMPOS(%s)" % encode_path(name) 609 610 def encode_predefined_reference(path): 611 612 "Encode a reference to a predefined constant value for 'path'." 613 614 return "__predefined_%s" % encode_path(path) 615 616 def encode_size(kind, path=None): 617 618 """ 619 Encode a structure size reference for the given 'kind' of structure, with 620 'path' indicating a specific structure name. 621 """ 622 623 return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "") 624 625 def encode_symbol(symbol_type, path=None): 626 627 "Encode a symbol with the given 'symbol_type' and optional 'path'." 628 629 return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "") 630 631 def encode_tablename(kind, path): 632 633 """ 634 Encode a table reference for the given 'kind' of table structure, indicating 635 a 'path' for the specific object concerned. 636 """ 637 638 return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path)) 639 640 def encode_type_attribute(path): 641 642 "Encode the special type attribute for 'path'." 643 644 return "#%s" % path 645 646 def decode_type_attribute(s): 647 648 "Decode the special type attribute 's'." 649 650 return s[1:] 651 652 def is_type_attribute(s): 653 654 "Return whether 's' is a type attribute name." 655 656 return s.startswith("#") 657 658 659 660 # A mapping from kinds to structure size reference prefixes. 661 662 structure_size_prefixes = { 663 "<class>" : "c", 664 "<module>" : "m", 665 "<instance>" : "i" 666 } 667 668 # A mapping from kinds to table name prefixes. 669 670 table_name_prefixes = { 671 "<class>" : "Class", 672 "<function>" : "Function", 673 "<module>" : "Module", 674 "<instance>" : "Instance" 675 } 676 677 678 679 # Output language reserved words. 680 681 reserved_words = [ 682 "break", "char", "const", "continue", 683 "default", "double", "else", 684 "float", "for", 685 "if", "int", "long", 686 "NULL", 687 "return", "struct", 688 "typedef", 689 "void", "while", 690 ] 691 692 # vim: tabstop=4 expandtab shiftwidth=4