1 #!/usr/bin/env python 2 3 """ 4 Encoder functions, producing representations of program objects. 5 6 Copyright (C) 2016, 2017, 2018 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import first, InstructionSequence 23 24 25 26 # Value digest computation. 27 28 from base64 import b64encode 29 from hashlib import sha1 30 31 def digest(values): 32 m = sha1() 33 for value in values: 34 m.update(str(value)) 35 return b64encode(m.digest()).replace("+", "__").replace("/", "_").rstrip("=") 36 37 38 39 # Output encoding and decoding for the summary files. 40 41 def encode_attrnames(attrnames): 42 43 "Encode the 'attrnames' representing usage." 44 45 return ", ".join(attrnames) or "{}" 46 47 def encode_constrained(constrained): 48 49 "Encode the 'constrained' status for program summaries." 50 51 return constrained and "constrained" or "deduced" 52 53 def encode_usage(usage): 54 55 "Encode attribute details from 'usage'." 56 57 all_attrnames = [] 58 for t in usage: 59 attrname, invocation, assignment = t 60 all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or "")) 61 return ", ".join(all_attrnames) or "{}" 62 63 def decode_usage(s): 64 65 "Decode attribute details from 's'." 66 67 all_attrnames = set() 68 for attrname_str in s.split(", "): 69 all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("="))) 70 71 all_attrnames = list(all_attrnames) 72 all_attrnames.sort() 73 return tuple(all_attrnames) 74 75 def encode_access_location(t): 76 77 "Encode the access location 't'." 78 79 return "%s:%s:%s:%d" % (t.path, t.name or "{}", t.attrnames or "{}", t.access_number) 80 81 def decode_access_location(s): 82 83 "Decode the access location 's'." 84 85 path, name, attrnames, access_number = s.split(":") 86 return path, name, attrnames, access_number 87 88 def encode_alias_location(t, invocation=False): 89 90 "Encode the alias location 't'." 91 92 return "%s:%s:%s%s%s%s" % (t.path, t.name or "{}", t.attrnames or "{}", 93 t.version is not None and ":=%d" % t.version or "", 94 t.access_number is not None and ":#%d" % t.access_number or "", 95 invocation and "!" or "") 96 97 def decode_alias_location(s): 98 99 "Decode the alias location 's'." 100 101 path, name, rest = s.split(":", 2) 102 attrnames = version = access_number = None 103 invocation = rest.endswith("!") 104 105 t = rest.rstrip("!").split(":#") 106 if len(t) > 1: 107 rest = t[0]; access_number = int(t[1]) 108 109 t = rest.split(":=") 110 if len(t) > 1: 111 attrnames = t[0]; version = int(t[1]) 112 else: 113 attrnames = rest 114 115 return path, name, attrnames, version, access_number, invocation 116 117 def encode_location(t): 118 119 "Encode the general location 't' in a concise form." 120 121 if t.name is not None and t.version is not None: 122 return "%s:%s:%d" % (t.path, t.name, t.version) 123 elif t.name is not None: 124 return "%s:%s" % (t.path, t.name) 125 else: 126 return "%s::%s" % (t.path, t.attrnames) 127 128 def encode_modifiers(modifiers): 129 130 "Encode assignment and invocation details from 'modifiers'." 131 132 all_modifiers = [] 133 for t in modifiers: 134 all_modifiers.append(encode_modifier_term(t)) 135 return "".join(all_modifiers) 136 137 def encode_modifier_term(t): 138 139 "Encode modifier 't' representing an assignment or an invocation." 140 141 assignment, invocation = t 142 if assignment: 143 return "=" 144 elif invocation is not None: 145 arguments, keywords = invocation 146 return "(%d;%s)" % (arguments, ",".join(keywords)) 147 else: 148 return "_" 149 150 def decode_modifiers(s): 151 152 "Decode 's' containing modifiers." 153 154 i = 0 155 end = len(s) 156 157 modifiers = [] 158 159 while i < end: 160 if s[i] == "=": 161 modifiers.append((True, None)) 162 i += 1 163 elif s[i] == "(": 164 j = s.index(";", i) 165 arguments = int(s[i+1:j]) 166 i = j 167 j = s.index(")", i) 168 keywords = s[i+1:j] 169 keywords = keywords and keywords.split(",") or [] 170 modifiers.append((False, (arguments, keywords))) 171 i = j + 1 172 else: 173 modifiers.append((False, None)) 174 i += 1 175 176 return modifiers 177 178 179 180 # Test generation functions. 181 182 def get_kinds(all_types): 183 184 """ 185 Return object kind details for 'all_types', being a collection of 186 references for program types. 187 """ 188 189 return map(lambda ref: ref.get_kind(), all_types) 190 191 def test_label_for_kind(kind): 192 193 "Return the label used for 'kind' in test details." 194 195 return kind == "<instance>" and "instance" or "type" 196 197 def test_label_for_type(ref): 198 199 "Return the label used for 'ref' in test details." 200 201 return test_label_for_kind(ref.get_kind()) 202 203 204 205 # Instruction representation encoding. 206 207 def encode_instruction(instruction): 208 209 """ 210 Encode the 'instruction' - a sequence starting with an operation and 211 followed by arguments, each of which may be an instruction sequence or a 212 plain value - to produce a function call string representation. 213 """ 214 215 op = instruction[0] 216 args = instruction[1:] 217 218 if args: 219 a = [] 220 for arg in args: 221 if isinstance(arg, tuple): 222 a.append(encode_instruction(arg)) 223 else: 224 a.append(arg or "{}") 225 argstr = "(%s)" % ", ".join(a) 226 return "%s%s" % (op, argstr) 227 else: 228 return op 229 230 231 232 # Output program encoding. 233 234 attribute_loading_ops = ( 235 "__load_via_class", "__load_via_object", "__get_class_and_load", 236 ) 237 238 attribute_ops = attribute_loading_ops + ( 239 "__store_via_class", "__store_via_object", 240 ) 241 242 checked_loading_ops = ( 243 "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any", 244 ) 245 246 checked_ops = checked_loading_ops + ( 247 "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any", 248 ) 249 250 typename_ops = ( 251 "__test_common_instance", "__test_common_object", "__test_common_type", 252 ) 253 254 type_ops = ( 255 "__test_specific_instance", "__test_specific_object", "__test_specific_type", 256 ) 257 258 static_ops = ( 259 "__load_static_ignore", "__load_static_replace", "__load_static_test", "<test_context_static>", 260 ) 261 262 accessor_values = ( 263 "<accessor>", 264 ) 265 266 accessor_ops = ( 267 "<accessor>", "<set_accessor>", 268 ) 269 270 context_values = ( 271 "<context>", 272 ) 273 274 context_ops = ( 275 "<context>", "<set_context>", "<test_context_revert>", "<test_context_static>", 276 ) 277 278 context_op_functions = ( 279 "<test_context_revert>", "<test_context_static>", 280 ) 281 282 reference_acting_ops = attribute_ops + checked_ops + type_ops + typename_ops 283 attribute_producing_ops = attribute_loading_ops + checked_loading_ops 284 285 attribute_producing_variables = ( 286 "<accessor>", "<context>", "<name>", "<private_context>", "<target_accessor>" 287 ) 288 289 def encode_access_instruction(instruction, subs, accessor_index, context_index): 290 291 """ 292 Encode the 'instruction' - a sequence starting with an operation and 293 followed by arguments, each of which may be an instruction sequence or a 294 plain value - to produce a function call string representation. 295 296 The 'subs' parameter defines a mapping of substitutions for special values 297 used in instructions. 298 299 The 'accessor_index' parameter defines the position in local accessor 300 storage for the referenced accessor or affected by an accessor operation. 301 302 The 'context_index' parameter defines the position in local context storage 303 for the referenced context or affected by a context operation. 304 305 Return both the encoded instruction and a collection of substituted names. 306 """ 307 308 op = instruction[0] 309 args = instruction[1:] 310 substituted = set() 311 312 # Encode the arguments. 313 314 a = [] 315 if args: 316 converting_op = op 317 for arg in args: 318 s, _substituted = encode_access_instruction_arg(arg, subs, converting_op, accessor_index, context_index) 319 substituted.update(_substituted) 320 a.append(s) 321 converting_op = None 322 323 # Modify certain arguments. 324 325 # Convert type name arguments. 326 327 if op in typename_ops: 328 a[1] = encode_path(encode_type_attribute(args[1])) 329 330 # Obtain addresses of type arguments. 331 332 elif op in type_ops: 333 a[1] = "&%s" % a[1] 334 335 # Obtain addresses of static objects. 336 337 elif op in static_ops: 338 a[-1] = "&%s" % a[-1] 339 340 # Add accessor storage information to certain operations. 341 342 if op in accessor_ops: 343 a.insert(0, accessor_index) 344 345 # Add context storage information to certain operations. 346 347 if op in context_ops: 348 a.insert(0, context_index) 349 350 # Add the local context array to certain operations. 351 352 if op in context_op_functions: 353 a.append("__tmp_contexts") 354 355 # Define any argument string. 356 357 if a: 358 argstr = "(%s)" % ", ".join(map(str, a)) 359 else: 360 argstr = "" 361 362 # Substitute the first element of the instruction, which may not be an 363 # operation at all. 364 365 if subs.has_key(op): 366 substituted.add(op) 367 368 # Break accessor initialisation into initialisation and value-yielding 369 # parts: 370 371 if op == "<set_accessor>" and isinstance(a[0], InstructionSequence): 372 ops = [] 373 ops += a[0].get_init_instructions() 374 ops.append("%s(%s)" % (subs[op], a[0].get_value_instruction())) 375 return ", ".join(map(str, ops)), substituted 376 377 op = subs[op] 378 379 elif not args: 380 op = "&%s" % encode_path(op) 381 382 return "%s%s" % (op, argstr), substituted 383 384 def encode_access_instruction_arg(arg, subs, op, accessor_index, context_index): 385 386 """ 387 Encode 'arg' using 'subs' to define substitutions, 'op' to indicate the 388 operation to which the argument belongs, and with 'accessor_index' and 389 'context_index' indicating any affected accessor and context storage. 390 391 Return a tuple containing the encoded form of 'arg' along with a collection 392 of any substituted values. 393 """ 394 395 if isinstance(arg, tuple): 396 encoded, substituted = encode_access_instruction(arg, subs, accessor_index, context_index) 397 return attribute_to_reference(op, arg[0], encoded, substituted) 398 399 # Special values only need replacing, not encoding. 400 401 elif subs.has_key(arg): 402 403 # Handle values modified by storage details. 404 405 if arg in accessor_values or arg in context_values: 406 encoded = "%s(%s)" % (subs.get(arg), context_index) 407 else: 408 encoded = subs.get(arg) 409 410 substituted = set([arg]) 411 return attribute_to_reference(op, arg, encoded, substituted) 412 413 # Convert static references to the appropriate type. 414 415 elif op and op in reference_acting_ops and \ 416 arg not in attribute_producing_variables: 417 418 return "&%s" % encode_path(arg), set() 419 420 # Other values may need encoding. 421 422 else: 423 return encode_path(arg), set() 424 425 def attribute_to_reference(op, arg, encoded, substituted): 426 427 # Convert attribute results to references where required. 428 429 if op and op in reference_acting_ops and ( 430 arg in attribute_producing_ops or 431 arg in attribute_producing_variables): 432 433 return "__VALUE(%s)" % encoded, substituted 434 else: 435 return encoded, substituted 436 437 def encode_function_pointer(path): 438 439 "Encode 'path' as a reference to an output program function." 440 441 return "__fn_%s" % encode_path(path) 442 443 def encode_instantiator_pointer(path): 444 445 "Encode 'path' as a reference to an output program instantiator." 446 447 return "__new_%s" % encode_path(path) 448 449 def encode_instructions(instructions): 450 451 "Encode 'instructions' as a sequence." 452 453 if len(instructions) == 1: 454 return instructions[0] 455 else: 456 return "(\n%s\n)" % ",\n".join(instructions) 457 458 def encode_literal_constant(n): 459 460 "Encode a name for the literal constant with the number 'n'." 461 462 return "__const%s" % n 463 464 def encode_literal_constant_size(value): 465 466 "Encode a size for the literal constant with the given 'value'." 467 468 if isinstance(value, basestring): 469 return len(value) 470 else: 471 return 0 472 473 def encode_literal_constant_member(value): 474 475 "Encode the member name for the 'value' in the final program." 476 477 return "%svalue" % value.__class__.__name__ 478 479 def encode_literal_constant_value(value): 480 481 "Encode the given 'value' in the final program." 482 483 if isinstance(value, (int, float)): 484 return str(value) 485 else: 486 l = [] 487 488 # Encode characters including non-ASCII ones. 489 490 for c in str(value): 491 if c == '"': l.append('\\"') 492 elif c == '\n': l.append('\\n') 493 elif c == '\t': l.append('\\t') 494 elif c == '\r': l.append('\\r') 495 elif c == '\\': l.append('\\\\') 496 elif 0x20 <= ord(c) < 0x80: l.append(c) 497 else: l.append("\\x%02x" % ord(c)) 498 499 return '"%s"' % "".join(l) 500 501 def encode_literal_data_initialiser(style): 502 503 """ 504 Encode a reference to a function populating the data for a literal having 505 the given 'style' ("mapping" or "sequence"). 506 """ 507 508 return "__newdata_%s" % style 509 510 def encode_literal_instantiator(path): 511 512 """ 513 Encode a reference to an instantiator for a literal having the given 'path'. 514 """ 515 516 return "__newliteral_%s" % encode_path(path) 517 518 def encode_literal_reference(n): 519 520 "Encode a reference to a literal constant with the number 'n'." 521 522 return "__constvalue%s" % n 523 524 def encode_trailing_area(path): 525 526 """ 527 Encode any reference to trailing data members for instances of the type 528 given by 'path'. 529 """ 530 531 return "__TRAILING_%s" % encode_path(path) 532 533 534 535 # Track all encoded paths, detecting and avoiding conflicts. 536 537 all_encoded_paths = {} 538 539 def encode_path(path): 540 541 "Encode 'path' as an output program object, translating special symbols." 542 543 if path in reserved_words: 544 return "__%s" % path 545 else: 546 part_encoded = path.replace("#", "__").replace("$", "__") 547 548 if "." not in path: 549 return part_encoded 550 551 encoded = part_encoded.replace(".", "_") 552 553 # Test for a conflict with the encoding of a different path, re-encoding 554 # if necessary. 555 556 previous = all_encoded_paths.get(encoded) 557 replacement = "_" 558 559 while previous: 560 if path == previous: 561 return encoded 562 replacement += "_" 563 encoded = part_encoded.replace(".", replacement) 564 previous = all_encoded_paths.get(encoded) 565 566 # Store any new or re-encoded path. 567 568 all_encoded_paths[encoded] = path 569 return encoded 570 571 def encode_code(name): 572 573 "Encode 'name' as an attribute code indicator." 574 575 return "__ATTRCODE(%s)" % encode_path(name) 576 577 def encode_pcode(name): 578 579 "Encode 'name' as an parameter code indicator." 580 581 return "__PARAMCODE(%s)" % encode_path(name) 582 583 def encode_pos(name): 584 585 "Encode 'name' as an attribute position indicator." 586 587 return "__ATTRPOS(%s)" % encode_path(name) 588 589 def encode_ppos(name): 590 591 "Encode 'name' as an parameter position indicator." 592 593 return "__PARAMPOS(%s)" % encode_path(name) 594 595 def encode_predefined_reference(path): 596 597 "Encode a reference to a predefined constant value for 'path'." 598 599 return "__predefined_%s" % encode_path(path) 600 601 def encode_size(kind, path=None): 602 603 """ 604 Encode a structure size reference for the given 'kind' of structure, with 605 'path' indicating a specific structure name. 606 """ 607 608 return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "") 609 610 def encode_symbol(symbol_type, path=None): 611 612 "Encode a symbol with the given 'symbol_type' and optional 'path'." 613 614 return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "") 615 616 def encode_tablename(kind, path): 617 618 """ 619 Encode a table reference for the given 'kind' of table structure, indicating 620 a 'path' for the specific object concerned. 621 """ 622 623 return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path)) 624 625 def encode_type_attribute(path): 626 627 "Encode the special type attribute for 'path'." 628 629 return "#%s" % path 630 631 def decode_type_attribute(s): 632 633 "Decode the special type attribute 's'." 634 635 return s[1:] 636 637 def is_type_attribute(s): 638 639 "Return whether 's' is a type attribute name." 640 641 return s.startswith("#") 642 643 644 645 # A mapping from kinds to structure size reference prefixes. 646 647 structure_size_prefixes = { 648 "<class>" : "c", 649 "<module>" : "m", 650 "<instance>" : "i" 651 } 652 653 # A mapping from kinds to table name prefixes. 654 655 table_name_prefixes = { 656 "<class>" : "Class", 657 "<function>" : "Function", 658 "<module>" : "Module", 659 "<instance>" : "Instance" 660 } 661 662 663 664 # Output language reserved words. 665 666 reserved_words = [ 667 "break", "char", "const", "continue", 668 "default", "double", "else", 669 "float", "for", 670 "if", "int", "long", 671 "NULL", 672 "return", "struct", 673 "typedef", 674 "void", "while", 675 ] 676 677 # vim: tabstop=4 expandtab shiftwidth=4