1 #!/usr/bin/env python 2 3 """ 4 Encoder functions, producing representations of program objects. 5 6 Copyright (C) 2016, 2017, 2018 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import first, InstructionSequence 23 24 25 26 # Value digest computation. 27 28 from base64 import b64encode 29 from hashlib import sha1 30 31 def digest(values): 32 m = sha1() 33 for value in values: 34 m.update(str(value)) 35 return b64encode(m.digest()).replace("+", "__").replace("/", "_").rstrip("=") 36 37 38 39 # Output encoding and decoding for the summary files. 40 41 def encode_attrnames(attrnames): 42 43 "Encode the 'attrnames' representing usage." 44 45 return ", ".join(attrnames) or "{}" 46 47 def encode_constrained(constrained): 48 49 "Encode the 'constrained' status for program summaries." 50 51 return constrained and "constrained" or "deduced" 52 53 def encode_usage(usage): 54 55 "Encode attribute details from 'usage'." 56 57 all_attrnames = [] 58 for t in usage: 59 attrname, invocation, assignment = t 60 all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or "")) 61 return ", ".join(all_attrnames) or "{}" 62 63 def decode_usage(s): 64 65 "Decode attribute details from 's'." 66 67 all_attrnames = set() 68 for attrname_str in s.split(", "): 69 all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("="))) 70 71 all_attrnames = list(all_attrnames) 72 all_attrnames.sort() 73 return tuple(all_attrnames) 74 75 def encode_access_location(t): 76 77 "Encode the access location 't'." 78 79 return "%s:%s:%s:%d" % (t.path, t.name or "{}", t.attrnames or "{}", t.access_number) 80 81 def encode_alias_location(t, invocation=False): 82 83 "Encode the alias location 't'." 84 85 return "%s:%s:%s%s%s%s" % (t.path, t.name or "{}", t.attrnames or "{}", 86 t.version is not None and ":=%d" % t.version or "", 87 t.access_number is not None and ":#%d" % t.access_number or "", 88 invocation and "!" or "") 89 90 def decode_alias_location(s): 91 92 "Decode the alias location 's'." 93 94 path, name, rest = s.split(":", 2) 95 attrnames = version = access_number = None 96 invocation = rest.endswith("!") 97 98 t = rest.rstrip("!").split(":#") 99 if len(t) > 1: 100 rest = t[0]; access_number = int(t[1]) 101 102 t = rest.split(":=") 103 if len(t) > 1: 104 attrnames = t[0]; version = int(t[1]) 105 else: 106 attrnames = rest 107 108 return path, name, attrnames, version, access_number, invocation 109 110 def encode_location(t): 111 112 "Encode the general location 't' in a concise form." 113 114 if t.name is not None and t.version is not None: 115 return "%s:%s:%d" % (t.path, t.name, t.version) 116 elif t.name is not None: 117 return "%s:%s" % (t.path, t.name) 118 else: 119 return "%s::%s" % (t.path, t.attrnames) 120 121 def encode_modifiers(modifiers): 122 123 "Encode assignment and invocation details from 'modifiers'." 124 125 all_modifiers = [] 126 for t in modifiers: 127 all_modifiers.append(encode_modifier_term(t)) 128 return "".join(all_modifiers) 129 130 def encode_modifier_term(t): 131 132 "Encode modifier 't' representing an assignment or an invocation." 133 134 assignment, invocation = t 135 if assignment: 136 return "=" 137 elif invocation is not None: 138 arguments, keywords = invocation 139 return "(%d;%s)" % (arguments, ",".join(keywords)) 140 else: 141 return "_" 142 143 def decode_modifiers(s): 144 145 "Decode 's' containing modifiers." 146 147 i = 0 148 end = len(s) 149 150 modifiers = [] 151 152 while i < end: 153 if s[i] == "=": 154 modifiers.append((True, None)) 155 i += 1 156 elif s[i] == "(": 157 j = s.index(";", i) 158 arguments = int(s[i+1:j]) 159 i = j 160 j = s.index(")", i) 161 keywords = s[i+1:j] 162 keywords = keywords and keywords.split(",") or [] 163 modifiers.append((False, (arguments, keywords))) 164 i = j + 1 165 else: 166 modifiers.append((False, None)) 167 i += 1 168 169 return modifiers 170 171 172 173 # Test generation functions. 174 175 def get_kinds(all_types): 176 177 """ 178 Return object kind details for 'all_types', being a collection of 179 references for program types. 180 """ 181 182 return map(lambda ref: ref.get_kind(), all_types) 183 184 def test_label_for_kind(kind): 185 186 "Return the label used for 'kind' in test details." 187 188 return kind == "<instance>" and "instance" or "type" 189 190 def test_label_for_type(ref): 191 192 "Return the label used for 'ref' in test details." 193 194 return test_label_for_kind(ref.get_kind()) 195 196 197 198 # Instruction representation encoding. 199 200 def encode_instruction(instruction): 201 202 """ 203 Encode the 'instruction' - a sequence starting with an operation and 204 followed by arguments, each of which may be an instruction sequence or a 205 plain value - to produce a function call string representation. 206 """ 207 208 op = instruction[0] 209 args = instruction[1:] 210 211 if args: 212 a = [] 213 for arg in args: 214 if isinstance(arg, tuple): 215 a.append(encode_instruction(arg)) 216 else: 217 a.append(arg or "{}") 218 argstr = "(%s)" % ", ".join(a) 219 return "%s%s" % (op, argstr) 220 else: 221 return op 222 223 224 225 # Output program encoding. 226 227 attribute_loading_ops = ( 228 "__load_via_class", "__load_via_object", "__get_class_and_load", 229 ) 230 231 attribute_ops = attribute_loading_ops + ( 232 "__store_via_class", "__store_via_object", 233 ) 234 235 checked_loading_ops = ( 236 "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any", 237 ) 238 239 checked_ops = checked_loading_ops + ( 240 "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any", 241 ) 242 243 typename_ops = ( 244 "__test_common_instance", "__test_common_object", "__test_common_type", 245 ) 246 247 type_ops = ( 248 "__test_specific_instance", "__test_specific_object", "__test_specific_type", 249 ) 250 251 static_ops = ( 252 "__load_static_ignore", "__load_static_replace", "__load_static_test", "<test_context_static>", 253 ) 254 255 context_values = ( 256 "<context>", 257 ) 258 259 context_ops = ( 260 "<context>", "<set_context>", "<test_context_revert>", "<test_context_static>", 261 ) 262 263 context_op_functions = ( 264 "<test_context_revert>", "<test_context_static>", 265 ) 266 267 reference_acting_ops = attribute_ops + checked_ops + type_ops + typename_ops 268 attribute_producing_ops = attribute_loading_ops + checked_loading_ops 269 270 attribute_producing_variables = ( 271 "<accessor>", "<context>", "<name>", "<private_context>", "<target_accessor>" 272 ) 273 274 def encode_access_instruction(instruction, subs, context_index): 275 276 """ 277 Encode the 'instruction' - a sequence starting with an operation and 278 followed by arguments, each of which may be an instruction sequence or a 279 plain value - to produce a function call string representation. 280 281 The 'subs' parameter defines a mapping of substitutions for special values 282 used in instructions. 283 284 The 'context_index' parameter defines the position in local context storage 285 for the referenced context or affected by a context operation. 286 287 Return both the encoded instruction and a collection of substituted names. 288 """ 289 290 op = instruction[0] 291 args = instruction[1:] 292 substituted = set() 293 294 # Encode the arguments. 295 296 a = [] 297 if args: 298 converting_op = op 299 for arg in args: 300 s, _substituted = encode_access_instruction_arg(arg, subs, converting_op, context_index) 301 substituted.update(_substituted) 302 a.append(s) 303 converting_op = None 304 305 # Modify certain arguments. 306 307 # Convert type name arguments. 308 309 if op in typename_ops: 310 a[1] = encode_path(encode_type_attribute(args[1])) 311 312 # Obtain addresses of type arguments. 313 314 elif op in type_ops: 315 a[1] = "&%s" % a[1] 316 317 # Obtain addresses of static objects. 318 319 elif op in static_ops: 320 a[-1] = "&%s" % a[-1] 321 322 # Add context storage information to certain operations. 323 324 if op in context_ops: 325 a.insert(0, context_index) 326 327 # Add the local context array to certain operations. 328 329 if op in context_op_functions: 330 a.append("__tmp_contexts") 331 332 # Define any argument string. 333 334 if a: 335 argstr = "(%s)" % ", ".join(map(str, a)) 336 else: 337 argstr = "" 338 339 # Substitute the first element of the instruction, which may not be an 340 # operation at all. 341 342 if subs.has_key(op): 343 substituted.add(op) 344 345 # Break accessor initialisation into initialisation and value-yielding 346 # parts: 347 348 if op == "<set_accessor>" and isinstance(a[0], InstructionSequence): 349 ops = [] 350 ops += a[0].get_init_instructions() 351 ops.append("%s(%s)" % (subs[op], a[0].get_value_instruction())) 352 return ", ".join(map(str, ops)), substituted 353 354 op = subs[op] 355 356 elif not args: 357 op = "&%s" % encode_path(op) 358 359 return "%s%s" % (op, argstr), substituted 360 361 def encode_access_instruction_arg(arg, subs, op, context_index): 362 363 """ 364 Encode 'arg' using 'subs' to define substitutions, 'op' to indicate the 365 operation to which the argument belongs, and 'context_index' to indicate any 366 affected context storage. 367 368 Return a tuple containing the encoded form of 'arg' along with a collection 369 of any substituted values. 370 """ 371 372 if isinstance(arg, tuple): 373 encoded, substituted = encode_access_instruction(arg, subs, context_index) 374 return attribute_to_reference(op, arg[0], encoded, substituted) 375 376 # Special values only need replacing, not encoding. 377 378 elif subs.has_key(arg): 379 380 # Handle values modified by storage details. 381 382 if arg in context_values: 383 encoded = "%s(%s)" % (subs.get(arg), context_index) 384 else: 385 encoded = subs.get(arg) 386 387 substituted = set([arg]) 388 return attribute_to_reference(op, arg, encoded, substituted) 389 390 # Convert static references to the appropriate type. 391 392 elif op and op in reference_acting_ops and \ 393 arg not in attribute_producing_variables: 394 395 return "&%s" % encode_path(arg), set() 396 397 # Other values may need encoding. 398 399 else: 400 return encode_path(arg), set() 401 402 def attribute_to_reference(op, arg, encoded, substituted): 403 404 # Convert attribute results to references where required. 405 406 if op and op in reference_acting_ops and ( 407 arg in attribute_producing_ops or 408 arg in attribute_producing_variables): 409 410 return "__VALUE(%s)" % encoded, substituted 411 else: 412 return encoded, substituted 413 414 def encode_function_pointer(path): 415 416 "Encode 'path' as a reference to an output program function." 417 418 return "__fn_%s" % encode_path(path) 419 420 def encode_instantiator_pointer(path): 421 422 "Encode 'path' as a reference to an output program instantiator." 423 424 return "__new_%s" % encode_path(path) 425 426 def encode_instructions(instructions): 427 428 "Encode 'instructions' as a sequence." 429 430 if len(instructions) == 1: 431 return instructions[0] 432 else: 433 return "(\n%s\n)" % ",\n".join(instructions) 434 435 def encode_literal_constant(n): 436 437 "Encode a name for the literal constant with the number 'n'." 438 439 return "__const%s" % n 440 441 def encode_literal_constant_size(value): 442 443 "Encode a size for the literal constant with the given 'value'." 444 445 if isinstance(value, basestring): 446 return len(value) 447 else: 448 return 0 449 450 def encode_literal_constant_member(value): 451 452 "Encode the member name for the 'value' in the final program." 453 454 return "%svalue" % value.__class__.__name__ 455 456 def encode_literal_constant_value(value): 457 458 "Encode the given 'value' in the final program." 459 460 if isinstance(value, (int, float)): 461 return str(value) 462 else: 463 l = [] 464 465 # Encode characters including non-ASCII ones. 466 467 for c in str(value): 468 if c == '"': l.append('\\"') 469 elif c == '\n': l.append('\\n') 470 elif c == '\t': l.append('\\t') 471 elif c == '\r': l.append('\\r') 472 elif c == '\\': l.append('\\\\') 473 elif 0x20 <= ord(c) < 0x80: l.append(c) 474 else: l.append("\\x%02x" % ord(c)) 475 476 return '"%s"' % "".join(l) 477 478 def encode_literal_data_initialiser(style): 479 480 """ 481 Encode a reference to a function populating the data for a literal having 482 the given 'style' ("mapping" or "sequence"). 483 """ 484 485 return "__newdata_%s" % style 486 487 def encode_literal_instantiator(path): 488 489 """ 490 Encode a reference to an instantiator for a literal having the given 'path'. 491 """ 492 493 return "__newliteral_%s" % encode_path(path) 494 495 def encode_literal_reference(n): 496 497 "Encode a reference to a literal constant with the number 'n'." 498 499 return "__constvalue%s" % n 500 501 502 503 # Track all encoded paths, detecting and avoiding conflicts. 504 505 all_encoded_paths = {} 506 507 def encode_path(path): 508 509 "Encode 'path' as an output program object, translating special symbols." 510 511 if path in reserved_words: 512 return "__%s" % path 513 else: 514 part_encoded = path.replace("#", "__").replace("$", "__") 515 516 if "." not in path: 517 return part_encoded 518 519 encoded = part_encoded.replace(".", "_") 520 521 # Test for a conflict with the encoding of a different path, re-encoding 522 # if necessary. 523 524 previous = all_encoded_paths.get(encoded) 525 replacement = "_" 526 527 while previous: 528 if path == previous: 529 return encoded 530 replacement += "_" 531 encoded = part_encoded.replace(".", replacement) 532 previous = all_encoded_paths.get(encoded) 533 534 # Store any new or re-encoded path. 535 536 all_encoded_paths[encoded] = path 537 return encoded 538 539 def encode_code(name): 540 541 "Encode 'name' as an attribute code indicator." 542 543 return "__ATTRCODE(%s)" % encode_path(name) 544 545 def encode_pcode(name): 546 547 "Encode 'name' as an parameter code indicator." 548 549 return "__PARAMCODE(%s)" % encode_path(name) 550 551 def encode_pos(name): 552 553 "Encode 'name' as an attribute position indicator." 554 555 return "__ATTRPOS(%s)" % encode_path(name) 556 557 def encode_ppos(name): 558 559 "Encode 'name' as an parameter position indicator." 560 561 return "__PARAMPOS(%s)" % encode_path(name) 562 563 def encode_predefined_reference(path): 564 565 "Encode a reference to a predefined constant value for 'path'." 566 567 return "__predefined_%s" % encode_path(path) 568 569 def encode_size(kind, path=None): 570 571 """ 572 Encode a structure size reference for the given 'kind' of structure, with 573 'path' indicating a specific structure name. 574 """ 575 576 return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "") 577 578 def encode_symbol(symbol_type, path=None): 579 580 "Encode a symbol with the given 'symbol_type' and optional 'path'." 581 582 return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "") 583 584 def encode_tablename(kind, path): 585 586 """ 587 Encode a table reference for the given 'kind' of table structure, indicating 588 a 'path' for the specific object concerned. 589 """ 590 591 return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path)) 592 593 def encode_type_attribute(path): 594 595 "Encode the special type attribute for 'path'." 596 597 return "#%s" % path 598 599 def decode_type_attribute(s): 600 601 "Decode the special type attribute 's'." 602 603 return s[1:] 604 605 def is_type_attribute(s): 606 607 "Return whether 's' is a type attribute name." 608 609 return s.startswith("#") 610 611 612 613 # A mapping from kinds to structure size reference prefixes. 614 615 structure_size_prefixes = { 616 "<class>" : "c", 617 "<module>" : "m", 618 "<instance>" : "i" 619 } 620 621 # A mapping from kinds to table name prefixes. 622 623 table_name_prefixes = { 624 "<class>" : "Class", 625 "<function>" : "Function", 626 "<module>" : "Module", 627 "<instance>" : "Instance" 628 } 629 630 631 632 # Output language reserved words. 633 634 reserved_words = [ 635 "break", "char", "const", "continue", 636 "default", "double", "else", 637 "float", "for", 638 "if", "int", "long", 639 "NULL", 640 "return", "struct", 641 "typedef", 642 "void", "while", 643 ] 644 645 # vim: tabstop=4 expandtab shiftwidth=4