1 #!/usr/bin/env python 2 3 """ 4 Encoder functions, producing representations of program objects. 5 6 Copyright (C) 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import first, InstructionSequence 23 24 25 26 # Value digest computation. 27 28 from base64 import b64encode 29 from hashlib import sha1 30 31 def digest(values): 32 m = sha1() 33 for value in values: 34 m.update(str(value)) 35 return b64encode(m.digest()).replace("+", "__").replace("/", "_").rstrip("=") 36 37 38 39 # Output encoding and decoding for the summary files. 40 41 def encode_attrnames(attrnames): 42 43 "Encode the 'attrnames' representing usage." 44 45 return ", ".join(attrnames) or "{}" 46 47 def encode_constrained(constrained): 48 49 "Encode the 'constrained' status for program summaries." 50 51 return constrained and "constrained" or "deduced" 52 53 def encode_usage(usage): 54 55 "Encode attribute details from 'usage'." 56 57 all_attrnames = [] 58 for t in usage: 59 attrname, invocation, assignment = t 60 all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or "")) 61 return ", ".join(all_attrnames) or "{}" 62 63 def decode_usage(s): 64 65 "Decode attribute details from 's'." 66 67 all_attrnames = set() 68 for attrname_str in s.split(", "): 69 all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("="))) 70 71 all_attrnames = list(all_attrnames) 72 all_attrnames.sort() 73 return tuple(all_attrnames) 74 75 def encode_access_location(t): 76 77 "Encode the access location 't'." 78 79 return "%s:%s:%s:%d" % (t.path, t.name or "{}", t.attrnames or "{}", t.access_number) 80 81 def encode_alias_location(t, invocation=False): 82 83 "Encode the alias location 't'." 84 85 return "%s:%s:%s%s%s%s" % (t.path, t.name or "{}", t.attrnames or "{}", 86 t.version is not None and ":=%d" % t.version or "", 87 t.access_number is not None and ":#%d" % t.access_number or "", 88 invocation and "!" or "") 89 90 def encode_location(t): 91 92 "Encode the general location 't' in a concise form." 93 94 if t.name is not None and t.version is not None: 95 return "%s:%s:%d" % (t.path, t.name, t.version) 96 elif t.name is not None: 97 return "%s:%s" % (t.path, t.name) 98 else: 99 return "%s::%s" % (t.path, t.attrnames) 100 101 def encode_modifiers(modifiers): 102 103 "Encode assignment and invocation details from 'modifiers'." 104 105 all_modifiers = [] 106 for t in modifiers: 107 all_modifiers.append(encode_modifier_term(t)) 108 return "".join(all_modifiers) 109 110 def encode_modifier_term(t): 111 112 "Encode modifier 't' representing an assignment or an invocation." 113 114 assignment, invocation = t 115 if assignment: 116 return "=" 117 elif invocation is not None: 118 arguments, keywords = invocation 119 return "(%d;%s)" % (arguments, ",".join(keywords)) 120 else: 121 return "_" 122 123 def decode_modifiers(s): 124 125 "Decode 's' containing modifiers." 126 127 i = 0 128 end = len(s) 129 130 modifiers = [] 131 132 while i < end: 133 if s[i] == "=": 134 modifiers.append((True, None)) 135 i += 1 136 elif s[i] == "(": 137 j = s.index(";", i) 138 arguments = int(s[i+1:j]) 139 i = j 140 j = s.index(")", i) 141 keywords = s[i+1:j] 142 keywords = keywords and keywords.split(",") or [] 143 modifiers.append((False, (arguments, keywords))) 144 i = j + 1 145 else: 146 modifiers.append((False, None)) 147 i += 1 148 149 return modifiers 150 151 152 153 # Test generation functions. 154 155 def get_kinds(all_types): 156 157 """ 158 Return object kind details for 'all_types', being a collection of 159 references for program types. 160 """ 161 162 return map(lambda ref: ref.get_kind(), all_types) 163 164 def test_label_for_kind(kind): 165 166 "Return the label used for 'kind' in test details." 167 168 return kind == "<instance>" and "instance" or "type" 169 170 def test_label_for_type(ref): 171 172 "Return the label used for 'ref' in test details." 173 174 return test_label_for_kind(ref.get_kind()) 175 176 177 178 # Instruction representation encoding. 179 180 def encode_instruction(instruction): 181 182 """ 183 Encode the 'instruction' - a sequence starting with an operation and 184 followed by arguments, each of which may be an instruction sequence or a 185 plain value - to produce a function call string representation. 186 """ 187 188 op = instruction[0] 189 args = instruction[1:] 190 191 if args: 192 a = [] 193 for arg in args: 194 if isinstance(arg, tuple): 195 a.append(encode_instruction(arg)) 196 else: 197 a.append(arg or "{}") 198 argstr = "(%s)" % ", ".join(a) 199 return "%s%s" % (op, argstr) 200 else: 201 return op 202 203 204 205 # Output program encoding. 206 207 attribute_loading_ops = ( 208 "__load_via_class", "__load_via_object", "__get_class_and_load", 209 ) 210 211 attribute_ops = attribute_loading_ops + ( 212 "__store_via_object", 213 ) 214 215 checked_loading_ops = ( 216 "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any", 217 ) 218 219 checked_ops = checked_loading_ops + ( 220 "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any", 221 ) 222 223 typename_ops = ( 224 "__test_common_instance", "__test_common_object", "__test_common_type", 225 ) 226 227 type_ops = ( 228 "__test_specific_instance", "__test_specific_object", "__test_specific_type", 229 ) 230 231 static_ops = ( 232 "__load_static_ignore", "__load_static_replace", "__load_static_test", "<test_context_static>", 233 ) 234 235 context_values = ( 236 "<context>", 237 ) 238 239 context_ops = ( 240 "<context>", "<set_context>", "<test_context_revert>", "<test_context_static>", 241 ) 242 243 context_op_functions = ( 244 "<test_context_revert>", "<test_context_static>", 245 ) 246 247 reference_acting_ops = attribute_ops + checked_ops + type_ops + typename_ops 248 attribute_producing_ops = attribute_loading_ops + checked_loading_ops 249 250 attribute_producing_variables = ( 251 "<accessor>", "<context>", "<name>", "<private_context>", "<target_accessor>" 252 ) 253 254 def encode_access_instruction(instruction, subs, context_index): 255 256 """ 257 Encode the 'instruction' - a sequence starting with an operation and 258 followed by arguments, each of which may be an instruction sequence or a 259 plain value - to produce a function call string representation. 260 261 The 'subs' parameter defines a mapping of substitutions for special values 262 used in instructions. 263 264 The 'context_index' parameter defines the position in local context storage 265 for the referenced context or affected by a context operation. 266 267 Return both the encoded instruction and a collection of substituted names. 268 """ 269 270 op = instruction[0] 271 args = instruction[1:] 272 substituted = set() 273 274 # Encode the arguments. 275 276 a = [] 277 if args: 278 converting_op = op 279 for arg in args: 280 s, _substituted = encode_access_instruction_arg(arg, subs, converting_op, context_index) 281 substituted.update(_substituted) 282 a.append(s) 283 converting_op = None 284 285 # Modify certain arguments. 286 287 # Convert type name arguments. 288 289 if op in typename_ops: 290 a[1] = encode_path(encode_type_attribute(args[1])) 291 292 # Obtain addresses of type arguments. 293 294 elif op in type_ops: 295 a[1] = "&%s" % a[1] 296 297 # Obtain addresses of static objects. 298 299 elif op in static_ops: 300 a[-1] = "&%s" % a[-1] 301 302 # Add context storage information to certain operations. 303 304 if op in context_ops: 305 a.insert(0, context_index) 306 307 # Add the local context array to certain operations. 308 309 if op in context_op_functions: 310 a.append("__tmp_contexts") 311 312 # Define any argument string. 313 314 if a: 315 argstr = "(%s)" % ", ".join(map(str, a)) 316 else: 317 argstr = "" 318 319 # Substitute the first element of the instruction, which may not be an 320 # operation at all. 321 322 if subs.has_key(op): 323 substituted.add(op) 324 325 # Break accessor initialisation into initialisation and value-yielding 326 # parts: 327 328 if op == "<set_accessor>" and isinstance(a[0], InstructionSequence): 329 ops = [] 330 ops += a[0].get_init_instructions() 331 ops.append("%s(%s)" % (subs[op], a[0].get_value_instruction())) 332 return ", ".join(map(str, ops)), substituted 333 334 op = subs[op] 335 336 elif not args: 337 op = "&%s" % encode_path(op) 338 339 return "%s%s" % (op, argstr), substituted 340 341 def encode_access_instruction_arg(arg, subs, op, context_index): 342 343 """ 344 Encode 'arg' using 'subs' to define substitutions, 'op' to indicate the 345 operation to which the argument belongs, and 'context_index' to indicate any 346 affected context storage. 347 348 Return a tuple containing the encoded form of 'arg' along with a collection 349 of any substituted values. 350 """ 351 352 if isinstance(arg, tuple): 353 encoded, substituted = encode_access_instruction(arg, subs, context_index) 354 return attribute_to_reference(op, arg[0], encoded, substituted) 355 356 # Special values only need replacing, not encoding. 357 358 elif subs.has_key(arg): 359 360 # Handle values modified by storage details. 361 362 if arg in context_values: 363 encoded = "%s(%s)" % (subs.get(arg), context_index) 364 else: 365 encoded = subs.get(arg) 366 367 substituted = set([arg]) 368 return attribute_to_reference(op, arg, encoded, substituted) 369 370 # Convert static references to the appropriate type. 371 372 elif op and op in reference_acting_ops and \ 373 arg not in attribute_producing_variables: 374 375 return "&%s" % encode_path(arg), set() 376 377 # Other values may need encoding. 378 379 else: 380 return encode_path(arg), set() 381 382 def attribute_to_reference(op, arg, encoded, substituted): 383 384 # Convert attribute results to references where required. 385 386 if op and op in reference_acting_ops and ( 387 arg in attribute_producing_ops or 388 arg in attribute_producing_variables): 389 390 return "__VALUE(%s)" % encoded, substituted 391 else: 392 return encoded, substituted 393 394 def encode_function_pointer(path): 395 396 "Encode 'path' as a reference to an output program function." 397 398 return "__fn_%s" % encode_path(path) 399 400 def encode_instantiator_pointer(path): 401 402 "Encode 'path' as a reference to an output program instantiator." 403 404 return "__new_%s" % encode_path(path) 405 406 def encode_instructions(instructions): 407 408 "Encode 'instructions' as a sequence." 409 410 if len(instructions) == 1: 411 return instructions[0] 412 else: 413 return "(\n%s\n)" % ",\n".join(instructions) 414 415 def encode_literal_constant(n): 416 417 "Encode a name for the literal constant with the number 'n'." 418 419 return "__const%s" % n 420 421 def encode_literal_constant_size(value): 422 423 "Encode a size for the literal constant with the given 'value'." 424 425 if isinstance(value, basestring): 426 return len(value) 427 else: 428 return 0 429 430 def encode_literal_constant_member(value): 431 432 "Encode the member name for the 'value' in the final program." 433 434 return "%svalue" % value.__class__.__name__ 435 436 def encode_literal_constant_value(value): 437 438 "Encode the given 'value' in the final program." 439 440 if isinstance(value, (int, float)): 441 return str(value) 442 else: 443 l = [] 444 445 # Encode characters including non-ASCII ones. 446 447 for c in str(value): 448 if c == '"': l.append('\\"') 449 elif c == '\n': l.append('\\n') 450 elif c == '\t': l.append('\\t') 451 elif c == '\r': l.append('\\r') 452 elif c == '\\': l.append('\\\\') 453 elif 0x20 <= ord(c) < 0x80: l.append(c) 454 else: l.append("\\x%02x" % ord(c)) 455 456 return '"%s"' % "".join(l) 457 458 def encode_literal_data_initialiser(style): 459 460 """ 461 Encode a reference to a function populating the data for a literal having 462 the given 'style' ("mapping" or "sequence"). 463 """ 464 465 return "__newdata_%s" % style 466 467 def encode_literal_instantiator(path): 468 469 """ 470 Encode a reference to an instantiator for a literal having the given 'path'. 471 """ 472 473 return "__newliteral_%s" % encode_path(path) 474 475 def encode_literal_reference(n): 476 477 "Encode a reference to a literal constant with the number 'n'." 478 479 return "__constvalue%s" % n 480 481 482 483 # Track all encoded paths, detecting and avoiding conflicts. 484 485 all_encoded_paths = {} 486 487 def encode_path(path): 488 489 "Encode 'path' as an output program object, translating special symbols." 490 491 if path in reserved_words: 492 return "__%s" % path 493 else: 494 part_encoded = path.replace("#", "__").replace("$", "__") 495 496 if "." not in path: 497 return part_encoded 498 499 encoded = part_encoded.replace(".", "_") 500 501 # Test for a conflict with the encoding of a different path, re-encoding 502 # if necessary. 503 504 previous = all_encoded_paths.get(encoded) 505 replacement = "_" 506 507 while previous: 508 if path == previous: 509 return encoded 510 replacement += "_" 511 encoded = part_encoded.replace(".", replacement) 512 previous = all_encoded_paths.get(encoded) 513 514 # Store any new or re-encoded path. 515 516 all_encoded_paths[encoded] = path 517 return encoded 518 519 def encode_code(name): 520 521 "Encode 'name' as an attribute code indicator." 522 523 return "__ATTRCODE(%s)" % encode_path(name) 524 525 def encode_pcode(name): 526 527 "Encode 'name' as an parameter code indicator." 528 529 return "__PARAMCODE(%s)" % encode_path(name) 530 531 def encode_pos(name): 532 533 "Encode 'name' as an attribute position indicator." 534 535 return "__ATTRPOS(%s)" % encode_path(name) 536 537 def encode_ppos(name): 538 539 "Encode 'name' as an parameter position indicator." 540 541 return "__PARAMPOS(%s)" % encode_path(name) 542 543 def encode_predefined_reference(path): 544 545 "Encode a reference to a predefined constant value for 'path'." 546 547 return "__predefined_%s" % encode_path(path) 548 549 def encode_size(kind, path=None): 550 551 """ 552 Encode a structure size reference for the given 'kind' of structure, with 553 'path' indicating a specific structure name. 554 """ 555 556 return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "") 557 558 def encode_symbol(symbol_type, path=None): 559 560 "Encode a symbol with the given 'symbol_type' and optional 'path'." 561 562 return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "") 563 564 def encode_tablename(kind, path): 565 566 """ 567 Encode a table reference for the given 'kind' of table structure, indicating 568 a 'path' for the specific object concerned. 569 """ 570 571 return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path)) 572 573 def encode_type_attribute(path): 574 575 "Encode the special type attribute for 'path'." 576 577 return "#%s" % path 578 579 def decode_type_attribute(s): 580 581 "Decode the special type attribute 's'." 582 583 return s[1:] 584 585 def is_type_attribute(s): 586 587 "Return whether 's' is a type attribute name." 588 589 return s.startswith("#") 590 591 592 593 # A mapping from kinds to structure size reference prefixes. 594 595 structure_size_prefixes = { 596 "<class>" : "c", 597 "<module>" : "m", 598 "<instance>" : "i" 599 } 600 601 # A mapping from kinds to table name prefixes. 602 603 table_name_prefixes = { 604 "<class>" : "Class", 605 "<function>" : "Function", 606 "<module>" : "Module", 607 "<instance>" : "Instance" 608 } 609 610 611 612 # Output language reserved words. 613 614 reserved_words = [ 615 "break", "char", "const", "continue", 616 "default", "double", "else", 617 "float", "for", 618 "if", "int", "long", 619 "NULL", 620 "return", "struct", 621 "typedef", 622 "void", "while", 623 ] 624 625 # vim: tabstop=4 expandtab shiftwidth=4