1 #!/usr/bin/env python 2 3 """ 4 Encoder functions, producing representations of program objects. 5 6 Copyright (C) 2016, 2017, 2018 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import first, InstructionSequence 23 24 25 26 # Value digest computation. 27 28 from base64 import b64encode 29 from hashlib import sha1 30 31 def digest(values): 32 m = sha1() 33 for value in values: 34 m.update(str(value)) 35 return b64encode(m.digest()).replace("+", "__").replace("/", "_").rstrip("=") 36 37 38 39 # Output encoding and decoding for the summary files. 40 41 def encode_attrnames(attrnames): 42 43 "Encode the 'attrnames' representing usage." 44 45 return ", ".join(attrnames) or "{}" 46 47 def encode_constrained(constrained): 48 49 "Encode the 'constrained' status for program summaries." 50 51 return constrained and "constrained" or "deduced" 52 53 def encode_usage(usage): 54 55 "Encode attribute details from 'usage'." 56 57 all_attrnames = [] 58 for t in usage: 59 attrname, invocation, assignment = t 60 all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or "")) 61 return ", ".join(all_attrnames) or "{}" 62 63 def decode_usage(s): 64 65 "Decode attribute details from 's'." 66 67 all_attrnames = set() 68 for attrname_str in s.split(", "): 69 all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("="))) 70 71 all_attrnames = list(all_attrnames) 72 all_attrnames.sort() 73 return tuple(all_attrnames) 74 75 def encode_access_location(t): 76 77 "Encode the access location 't'." 78 79 return "%s:%s:%s:%d" % (t.path, t.name or "{}", t.attrnames or "{}", t.access_number) 80 81 def decode_access_location(s): 82 83 "Decode the access location 's'." 84 85 path, name, attrnames, access_number = s.split(":") 86 return path, name, attrnames, access_number 87 88 def encode_alias_location(t, invocation=False): 89 90 "Encode the alias location 't'." 91 92 return "%s:%s:%s%s%s%s" % (t.path, t.name or "{}", t.attrnames or "{}", 93 t.version is not None and ":=%d" % t.version or "", 94 t.access_number is not None and ":#%d" % t.access_number or "", 95 invocation and "!" or "") 96 97 def decode_alias_location(s): 98 99 "Decode the alias location 's'." 100 101 path, name, rest = s.split(":", 2) 102 attrnames = version = access_number = None 103 invocation = rest.endswith("!") 104 105 t = rest.rstrip("!").split(":#") 106 if len(t) > 1: 107 rest = t[0]; access_number = int(t[1]) 108 109 t = rest.split(":=") 110 if len(t) > 1: 111 attrnames = t[0]; version = int(t[1]) 112 else: 113 attrnames = rest 114 115 return path, name, attrnames, version, access_number, invocation 116 117 def encode_location(t): 118 119 "Encode the general location 't' in a concise form." 120 121 if t.name is not None and t.version is not None: 122 return "%s:%s:%d" % (t.path, t.name, t.version) 123 elif t.name is not None: 124 return "%s:%s" % (t.path, t.name) 125 else: 126 return "%s::%s" % (t.path, t.attrnames) 127 128 def encode_modifiers(modifiers): 129 130 "Encode assignment and invocation details from 'modifiers'." 131 132 all_modifiers = [] 133 for t in modifiers: 134 all_modifiers.append(encode_modifier_term(t)) 135 return "".join(all_modifiers) 136 137 def encode_modifier_term(t): 138 139 "Encode modifier 't' representing an assignment or an invocation." 140 141 assignment, invocation = t 142 if assignment: 143 return "=" 144 elif invocation is not None: 145 arguments, keywords = invocation 146 return "(%d;%s)" % (arguments, ",".join(keywords)) 147 else: 148 return "_" 149 150 def decode_modifiers(s): 151 152 "Decode 's' containing modifiers." 153 154 i = 0 155 end = len(s) 156 157 modifiers = [] 158 159 while i < end: 160 if s[i] == "=": 161 modifiers.append((True, None)) 162 i += 1 163 elif s[i] == "(": 164 j = s.index(";", i) 165 arguments = int(s[i+1:j]) 166 i = j 167 j = s.index(")", i) 168 keywords = s[i+1:j] 169 keywords = keywords and keywords.split(",") or [] 170 modifiers.append((False, (arguments, keywords))) 171 i = j + 1 172 else: 173 modifiers.append((False, None)) 174 i += 1 175 176 return modifiers 177 178 179 180 # Test generation functions. 181 182 def get_kinds(all_types): 183 184 """ 185 Return object kind details for 'all_types', being a collection of 186 references for program types. 187 """ 188 189 return map(lambda ref: ref.get_kind(), all_types) 190 191 def test_label_for_kind(kind): 192 193 "Return the label used for 'kind' in test details." 194 195 return kind == "<instance>" and "instance" or "type" 196 197 def test_label_for_type(ref): 198 199 "Return the label used for 'ref' in test details." 200 201 return test_label_for_kind(ref.get_kind()) 202 203 204 205 # Instruction representation encoding. 206 207 def encode_instruction(instruction): 208 209 """ 210 Encode the 'instruction' - a sequence starting with an operation and 211 followed by arguments, each of which may be an instruction sequence or a 212 plain value - to produce a function call string representation. 213 """ 214 215 op = instruction[0] 216 args = instruction[1:] 217 218 if args: 219 a = [] 220 for arg in args: 221 if isinstance(arg, tuple): 222 a.append(encode_instruction(arg)) 223 else: 224 a.append(arg or "{}") 225 argstr = "(%s)" % ", ".join(a) 226 return "%s%s" % (op, argstr) 227 else: 228 return op 229 230 231 232 # Output program encoding. 233 234 attribute_loading_ops = ( 235 "__load_via_class", "__load_via_object", "__get_class_and_load", 236 ) 237 238 attribute_ops = attribute_loading_ops + ( 239 "__store_via_class", "__store_via_object", 240 ) 241 242 checked_loading_ops = ( 243 "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any", 244 ) 245 246 checked_ops = checked_loading_ops + ( 247 "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any", 248 ) 249 250 typename_ops = ( 251 "__test_common_instance", "__test_common_object", "__test_common_type", 252 ) 253 254 type_ops = ( 255 "__test_specific_instance", "__test_specific_object", "__test_specific_type", 256 ) 257 258 static_ops = ( 259 "__load_static_ignore", "__load_static_replace", "__load_static_test", "<test_context_static>", 260 ) 261 262 context_values = ( 263 "<context>", 264 ) 265 266 context_ops = ( 267 "<context>", "<set_context>", "<test_context_revert>", "<test_context_static>", 268 ) 269 270 context_op_functions = ( 271 "<test_context_revert>", "<test_context_static>", 272 ) 273 274 reference_acting_ops = attribute_ops + checked_ops + type_ops + typename_ops 275 attribute_producing_ops = attribute_loading_ops + checked_loading_ops 276 277 attribute_producing_variables = ( 278 "<accessor>", "<context>", "<name>", "<private_context>", "<target_accessor>" 279 ) 280 281 def encode_access_instruction(instruction, subs, context_index): 282 283 """ 284 Encode the 'instruction' - a sequence starting with an operation and 285 followed by arguments, each of which may be an instruction sequence or a 286 plain value - to produce a function call string representation. 287 288 The 'subs' parameter defines a mapping of substitutions for special values 289 used in instructions. 290 291 The 'context_index' parameter defines the position in local context storage 292 for the referenced context or affected by a context operation. 293 294 Return both the encoded instruction and a collection of substituted names. 295 """ 296 297 op = instruction[0] 298 args = instruction[1:] 299 substituted = set() 300 301 # Encode the arguments. 302 303 a = [] 304 if args: 305 converting_op = op 306 for arg in args: 307 s, _substituted = encode_access_instruction_arg(arg, subs, converting_op, context_index) 308 substituted.update(_substituted) 309 a.append(s) 310 converting_op = None 311 312 # Modify certain arguments. 313 314 # Convert type name arguments. 315 316 if op in typename_ops: 317 a[1] = encode_path(encode_type_attribute(args[1])) 318 319 # Obtain addresses of type arguments. 320 321 elif op in type_ops: 322 a[1] = "&%s" % a[1] 323 324 # Obtain addresses of static objects. 325 326 elif op in static_ops: 327 a[-1] = "&%s" % a[-1] 328 329 # Add context storage information to certain operations. 330 331 if op in context_ops: 332 a.insert(0, context_index) 333 334 # Add the local context array to certain operations. 335 336 if op in context_op_functions: 337 a.append("__tmp_contexts") 338 339 # Define any argument string. 340 341 if a: 342 argstr = "(%s)" % ", ".join(map(str, a)) 343 else: 344 argstr = "" 345 346 # Substitute the first element of the instruction, which may not be an 347 # operation at all. 348 349 if subs.has_key(op): 350 substituted.add(op) 351 352 # Break accessor initialisation into initialisation and value-yielding 353 # parts: 354 355 if op == "<set_accessor>" and isinstance(a[0], InstructionSequence): 356 ops = [] 357 ops += a[0].get_init_instructions() 358 ops.append("%s(%s)" % (subs[op], a[0].get_value_instruction())) 359 return ", ".join(map(str, ops)), substituted 360 361 op = subs[op] 362 363 elif not args: 364 op = "&%s" % encode_path(op) 365 366 return "%s%s" % (op, argstr), substituted 367 368 def encode_access_instruction_arg(arg, subs, op, context_index): 369 370 """ 371 Encode 'arg' using 'subs' to define substitutions, 'op' to indicate the 372 operation to which the argument belongs, and 'context_index' to indicate any 373 affected context storage. 374 375 Return a tuple containing the encoded form of 'arg' along with a collection 376 of any substituted values. 377 """ 378 379 if isinstance(arg, tuple): 380 encoded, substituted = encode_access_instruction(arg, subs, context_index) 381 return attribute_to_reference(op, arg[0], encoded, substituted) 382 383 # Special values only need replacing, not encoding. 384 385 elif subs.has_key(arg): 386 387 # Handle values modified by storage details. 388 389 if arg in context_values: 390 encoded = "%s(%s)" % (subs.get(arg), context_index) 391 else: 392 encoded = subs.get(arg) 393 394 substituted = set([arg]) 395 return attribute_to_reference(op, arg, encoded, substituted) 396 397 # Convert static references to the appropriate type. 398 399 elif op and op in reference_acting_ops and \ 400 arg not in attribute_producing_variables: 401 402 return "&%s" % encode_path(arg), set() 403 404 # Other values may need encoding. 405 406 else: 407 return encode_path(arg), set() 408 409 def attribute_to_reference(op, arg, encoded, substituted): 410 411 # Convert attribute results to references where required. 412 413 if op and op in reference_acting_ops and ( 414 arg in attribute_producing_ops or 415 arg in attribute_producing_variables): 416 417 return "__VALUE(%s)" % encoded, substituted 418 else: 419 return encoded, substituted 420 421 def encode_function_pointer(path): 422 423 "Encode 'path' as a reference to an output program function." 424 425 return "__fn_%s" % encode_path(path) 426 427 def encode_instantiator_pointer(path): 428 429 "Encode 'path' as a reference to an output program instantiator." 430 431 return "__new_%s" % encode_path(path) 432 433 def encode_instructions(instructions): 434 435 "Encode 'instructions' as a sequence." 436 437 if len(instructions) == 1: 438 return instructions[0] 439 else: 440 return "(\n%s\n)" % ",\n".join(instructions) 441 442 def encode_literal_constant(n): 443 444 "Encode a name for the literal constant with the number 'n'." 445 446 return "__const%s" % n 447 448 def encode_literal_constant_size(value): 449 450 "Encode a size for the literal constant with the given 'value'." 451 452 if isinstance(value, basestring): 453 return len(value) 454 else: 455 return 0 456 457 def encode_literal_constant_member(value): 458 459 "Encode the member name for the 'value' in the final program." 460 461 return "%svalue" % value.__class__.__name__ 462 463 def encode_literal_constant_value(value): 464 465 "Encode the given 'value' in the final program." 466 467 if isinstance(value, (int, float)): 468 return str(value) 469 else: 470 l = [] 471 472 # Encode characters including non-ASCII ones. 473 474 for c in str(value): 475 if c == '"': l.append('\\"') 476 elif c == '\n': l.append('\\n') 477 elif c == '\t': l.append('\\t') 478 elif c == '\r': l.append('\\r') 479 elif c == '\\': l.append('\\\\') 480 elif 0x20 <= ord(c) < 0x80: l.append(c) 481 else: l.append("\\x%02x" % ord(c)) 482 483 return '"%s"' % "".join(l) 484 485 def encode_literal_data_initialiser(style): 486 487 """ 488 Encode a reference to a function populating the data for a literal having 489 the given 'style' ("mapping" or "sequence"). 490 """ 491 492 return "__newdata_%s" % style 493 494 def encode_literal_instantiator(path): 495 496 """ 497 Encode a reference to an instantiator for a literal having the given 'path'. 498 """ 499 500 return "__newliteral_%s" % encode_path(path) 501 502 def encode_literal_reference(n): 503 504 "Encode a reference to a literal constant with the number 'n'." 505 506 return "__constvalue%s" % n 507 508 509 510 # Track all encoded paths, detecting and avoiding conflicts. 511 512 all_encoded_paths = {} 513 514 def encode_path(path): 515 516 "Encode 'path' as an output program object, translating special symbols." 517 518 if path in reserved_words: 519 return "__%s" % path 520 else: 521 part_encoded = path.replace("#", "__").replace("$", "__") 522 523 if "." not in path: 524 return part_encoded 525 526 encoded = part_encoded.replace(".", "_") 527 528 # Test for a conflict with the encoding of a different path, re-encoding 529 # if necessary. 530 531 previous = all_encoded_paths.get(encoded) 532 replacement = "_" 533 534 while previous: 535 if path == previous: 536 return encoded 537 replacement += "_" 538 encoded = part_encoded.replace(".", replacement) 539 previous = all_encoded_paths.get(encoded) 540 541 # Store any new or re-encoded path. 542 543 all_encoded_paths[encoded] = path 544 return encoded 545 546 def encode_code(name): 547 548 "Encode 'name' as an attribute code indicator." 549 550 return "__ATTRCODE(%s)" % encode_path(name) 551 552 def encode_pcode(name): 553 554 "Encode 'name' as an parameter code indicator." 555 556 return "__PARAMCODE(%s)" % encode_path(name) 557 558 def encode_pos(name): 559 560 "Encode 'name' as an attribute position indicator." 561 562 return "__ATTRPOS(%s)" % encode_path(name) 563 564 def encode_ppos(name): 565 566 "Encode 'name' as an parameter position indicator." 567 568 return "__PARAMPOS(%s)" % encode_path(name) 569 570 def encode_predefined_reference(path): 571 572 "Encode a reference to a predefined constant value for 'path'." 573 574 return "__predefined_%s" % encode_path(path) 575 576 def encode_size(kind, path=None): 577 578 """ 579 Encode a structure size reference for the given 'kind' of structure, with 580 'path' indicating a specific structure name. 581 """ 582 583 return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "") 584 585 def encode_symbol(symbol_type, path=None): 586 587 "Encode a symbol with the given 'symbol_type' and optional 'path'." 588 589 return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "") 590 591 def encode_tablename(kind, path): 592 593 """ 594 Encode a table reference for the given 'kind' of table structure, indicating 595 a 'path' for the specific object concerned. 596 """ 597 598 return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path)) 599 600 def encode_type_attribute(path): 601 602 "Encode the special type attribute for 'path'." 603 604 return "#%s" % path 605 606 def decode_type_attribute(s): 607 608 "Decode the special type attribute 's'." 609 610 return s[1:] 611 612 def is_type_attribute(s): 613 614 "Return whether 's' is a type attribute name." 615 616 return s.startswith("#") 617 618 619 620 # A mapping from kinds to structure size reference prefixes. 621 622 structure_size_prefixes = { 623 "<class>" : "c", 624 "<module>" : "m", 625 "<instance>" : "i" 626 } 627 628 # A mapping from kinds to table name prefixes. 629 630 table_name_prefixes = { 631 "<class>" : "Class", 632 "<function>" : "Function", 633 "<module>" : "Module", 634 "<instance>" : "Instance" 635 } 636 637 638 639 # Output language reserved words. 640 641 reserved_words = [ 642 "break", "char", "const", "continue", 643 "default", "double", "else", 644 "float", "for", 645 "if", "int", "long", 646 "NULL", 647 "return", "struct", 648 "typedef", 649 "void", "while", 650 ] 651 652 # vim: tabstop=4 expandtab shiftwidth=4