1 #!/usr/bin/env python 2 3 """ 4 Encoder functions, producing representations of program objects. 5 6 Copyright (C) 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import first, InstructionSequence 23 24 25 26 # Value digest computation. 27 28 from base64 import b64encode 29 from hashlib import sha1 30 31 def digest(values): 32 m = sha1() 33 for value in values: 34 m.update(str(value)) 35 return b64encode(m.digest()).replace("+", "__").replace("/", "_").rstrip("=") 36 37 38 39 # Output encoding and decoding for the summary files. 40 41 def encode_attrnames(attrnames): 42 43 "Encode the 'attrnames' representing usage." 44 45 return ", ".join(attrnames) or "{}" 46 47 def encode_constrained(constrained): 48 49 "Encode the 'constrained' status for program summaries." 50 51 return constrained and "constrained" or "deduced" 52 53 def encode_usage(usage): 54 55 "Encode attribute details from 'usage'." 56 57 all_attrnames = [] 58 for t in usage: 59 attrname, invocation, assignment = t 60 all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or "")) 61 return ", ".join(all_attrnames) or "{}" 62 63 def decode_usage(s): 64 65 "Decode attribute details from 's'." 66 67 all_attrnames = set() 68 for attrname_str in s.split(", "): 69 all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("="))) 70 71 all_attrnames = list(all_attrnames) 72 all_attrnames.sort() 73 return tuple(all_attrnames) 74 75 def encode_access_location(t): 76 77 "Encode the access location 't'." 78 79 path, name, attrname, version = t 80 return "%s %s %s:%d" % (path, name or "{}", attrname, version) 81 82 def encode_location(t): 83 84 "Encode the general location 't' in a concise form." 85 86 path, name, attrname, version = t 87 if name is not None and version is not None: 88 return "%s %s:%d" % (path, name, version) 89 elif name is not None: 90 return "%s %s" % (path, name) 91 else: 92 return "%s :%s" % (path, attrname) 93 94 def encode_modifiers(modifiers): 95 96 "Encode assignment and invocation details from 'modifiers'." 97 98 all_modifiers = [] 99 for t in modifiers: 100 all_modifiers.append(encode_modifier_term(t)) 101 return "".join(all_modifiers) 102 103 def encode_modifier_term(t): 104 105 "Encode modifier 't' representing an assignment or an invocation." 106 107 assignment, invocation = t 108 if assignment: 109 return "=" 110 elif invocation is not None: 111 arguments, keywords = invocation 112 return "(%d;%s)" % (arguments, ",".join(keywords)) 113 else: 114 return "_" 115 116 def decode_modifiers(s): 117 118 "Decode 's' containing modifiers." 119 120 i = 0 121 end = len(s) 122 123 modifiers = [] 124 125 while i < end: 126 if s[i] == "=": 127 modifiers.append((True, None)) 128 i += 1 129 elif s[i] == "(": 130 j = s.index(";", i) 131 arguments = int(s[i+1:j]) 132 i = j 133 j = s.index(")", i) 134 keywords = s[i+1:j] 135 keywords = keywords and keywords.split(",") or [] 136 modifiers.append((False, (arguments, keywords))) 137 i = j + 1 138 else: 139 modifiers.append((False, None)) 140 i += 1 141 142 return modifiers 143 144 145 146 # Test generation functions. 147 148 def get_kinds(all_types): 149 150 """ 151 Return object kind details for 'all_types', being a collection of 152 references for program types. 153 """ 154 155 return map(lambda ref: ref.get_kind(), all_types) 156 157 def test_label_for_kind(kind): 158 159 "Return the label used for 'kind' in test details." 160 161 return kind == "<instance>" and "instance" or "type" 162 163 def test_label_for_type(ref): 164 165 "Return the label used for 'ref' in test details." 166 167 return test_label_for_kind(ref.get_kind()) 168 169 170 171 # Instruction representation encoding. 172 173 def encode_instruction(instruction): 174 175 """ 176 Encode the 'instruction' - a sequence starting with an operation and 177 followed by arguments, each of which may be an instruction sequence or a 178 plain value - to produce a function call string representation. 179 """ 180 181 op = instruction[0] 182 args = instruction[1:] 183 184 if args: 185 a = [] 186 for arg in args: 187 if isinstance(arg, tuple): 188 a.append(encode_instruction(arg)) 189 else: 190 a.append(arg or "{}") 191 argstr = "(%s)" % ", ".join(a) 192 return "%s%s" % (op, argstr) 193 else: 194 return op 195 196 197 198 # Output program encoding. 199 200 attribute_loading_ops = ( 201 "__load_via_class", "__load_via_object", "__get_class_and_load", 202 ) 203 204 attribute_ops = attribute_loading_ops + ( 205 "__store_via_object", 206 ) 207 208 checked_loading_ops = ( 209 "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any", 210 ) 211 212 checked_ops = checked_loading_ops + ( 213 "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any", 214 ) 215 216 typename_ops = ( 217 "__test_common_instance", "__test_common_object", "__test_common_type", 218 ) 219 220 type_ops = ( 221 "__test_specific_instance", "__test_specific_object", "__test_specific_type", 222 ) 223 224 static_ops = ( 225 "__load_static_ignore", "__load_static_replace", "__load_static_test", "<test_context_static>", 226 ) 227 228 context_values = ( 229 "<context>", 230 ) 231 232 context_ops = ( 233 "<context>", "<set_context>", "<test_context_revert>", "<test_context_static>", 234 ) 235 236 context_op_functions = ( 237 "<test_context_revert>", "<test_context_static>", 238 ) 239 240 reference_acting_ops = attribute_ops + checked_ops + type_ops + typename_ops 241 attribute_producing_ops = attribute_loading_ops + checked_loading_ops 242 243 attribute_producing_variables = ( 244 "<accessor>", "<context>", "<name>", "<private_context>", "<target_accessor>" 245 ) 246 247 def encode_access_instruction(instruction, subs, context_index): 248 249 """ 250 Encode the 'instruction' - a sequence starting with an operation and 251 followed by arguments, each of which may be an instruction sequence or a 252 plain value - to produce a function call string representation. 253 254 The 'subs' parameter defines a mapping of substitutions for special values 255 used in instructions. 256 257 The 'context_index' parameter defines the position in local context storage 258 for the referenced context or affected by a context operation. 259 260 Return both the encoded instruction and a collection of substituted names. 261 """ 262 263 op = instruction[0] 264 args = instruction[1:] 265 substituted = set() 266 267 # Encode the arguments. 268 269 a = [] 270 if args: 271 converting_op = op 272 for arg in args: 273 s, _substituted = encode_access_instruction_arg(arg, subs, converting_op, context_index) 274 substituted.update(_substituted) 275 a.append(s) 276 converting_op = None 277 278 # Modify certain arguments. 279 280 # Convert type name arguments. 281 282 if op in typename_ops: 283 a[1] = encode_path(encode_type_attribute(args[1])) 284 285 # Obtain addresses of type arguments. 286 287 elif op in type_ops: 288 a[1] = "&%s" % a[1] 289 290 # Obtain addresses of static objects. 291 292 elif op in static_ops: 293 a[-1] = "&%s" % a[-1] 294 295 # Add context storage information to certain operations. 296 297 if op in context_ops: 298 a.insert(0, context_index) 299 300 # Add the local context array to certain operations. 301 302 if op in context_op_functions: 303 a.append("__tmp_contexts") 304 305 # Define any argument string. 306 307 if a: 308 argstr = "(%s)" % ", ".join(map(str, a)) 309 else: 310 argstr = "" 311 312 # Substitute the first element of the instruction, which may not be an 313 # operation at all. 314 315 if subs.has_key(op): 316 substituted.add(op) 317 318 # Break accessor initialisation into initialisation and value-yielding 319 # parts: 320 321 if op == "<set_accessor>" and isinstance(a[0], InstructionSequence): 322 ops = [] 323 ops += a[0].get_init_instructions() 324 ops.append("%s(%s)" % (subs[op], a[0].get_value_instruction())) 325 return ", ".join(map(str, ops)), substituted 326 327 op = subs[op] 328 329 elif not args: 330 op = "&%s" % encode_path(op) 331 332 return "%s%s" % (op, argstr), substituted 333 334 def encode_access_instruction_arg(arg, subs, op, context_index): 335 336 """ 337 Encode 'arg' using 'subs' to define substitutions, 'op' to indicate the 338 operation to which the argument belongs, and 'context_index' to indicate any 339 affected context storage. 340 341 Return a tuple containing the encoded form of 'arg' along with a collection 342 of any substituted values. 343 """ 344 345 if isinstance(arg, tuple): 346 encoded, substituted = encode_access_instruction(arg, subs, context_index) 347 return attribute_to_reference(op, arg[0], encoded, substituted) 348 349 # Special values only need replacing, not encoding. 350 351 elif subs.has_key(arg): 352 353 # Handle values modified by storage details. 354 355 if arg in context_values: 356 encoded = "%s(%s)" % (subs.get(arg), context_index) 357 else: 358 encoded = subs.get(arg) 359 360 substituted = set([arg]) 361 return attribute_to_reference(op, arg, encoded, substituted) 362 363 # Convert static references to the appropriate type. 364 365 elif op and op in reference_acting_ops and \ 366 arg not in attribute_producing_variables: 367 368 return "&%s" % encode_path(arg), set() 369 370 # Other values may need encoding. 371 372 else: 373 return encode_path(arg), set() 374 375 def attribute_to_reference(op, arg, encoded, substituted): 376 377 # Convert attribute results to references where required. 378 379 if op and op in reference_acting_ops and ( 380 arg in attribute_producing_ops or 381 arg in attribute_producing_variables): 382 383 return "__VALUE(%s)" % encoded, substituted 384 else: 385 return encoded, substituted 386 387 def encode_function_pointer(path): 388 389 "Encode 'path' as a reference to an output program function." 390 391 return "__fn_%s" % encode_path(path) 392 393 def encode_instantiator_pointer(path): 394 395 "Encode 'path' as a reference to an output program instantiator." 396 397 return "__new_%s" % encode_path(path) 398 399 def encode_instructions(instructions): 400 401 "Encode 'instructions' as a sequence." 402 403 if len(instructions) == 1: 404 return instructions[0] 405 else: 406 return "(\n%s\n)" % ",\n".join(instructions) 407 408 def encode_literal_constant(n): 409 410 "Encode a name for the literal constant with the number 'n'." 411 412 return "__const%s" % n 413 414 def encode_literal_constant_size(value): 415 416 "Encode a size for the literal constant with the given 'value'." 417 418 if isinstance(value, basestring): 419 return len(value) 420 else: 421 return 0 422 423 def encode_literal_constant_member(value): 424 425 "Encode the member name for the 'value' in the final program." 426 427 return "%svalue" % value.__class__.__name__ 428 429 def encode_literal_constant_value(value): 430 431 "Encode the given 'value' in the final program." 432 433 if isinstance(value, (int, float)): 434 return str(value) 435 else: 436 l = [] 437 438 # Encode characters including non-ASCII ones. 439 440 for c in str(value): 441 if c == '"': l.append('\\"') 442 elif c == '\n': l.append('\\n') 443 elif c == '\t': l.append('\\t') 444 elif c == '\r': l.append('\\r') 445 elif c == '\\': l.append('\\\\') 446 elif 0x20 <= ord(c) < 0x80: l.append(c) 447 else: l.append("\\x%02x" % ord(c)) 448 449 return '"%s"' % "".join(l) 450 451 def encode_literal_data_initialiser(style): 452 453 """ 454 Encode a reference to a function populating the data for a literal having 455 the given 'style' ("mapping" or "sequence"). 456 """ 457 458 return "__newdata_%s" % style 459 460 def encode_literal_instantiator(path): 461 462 """ 463 Encode a reference to an instantiator for a literal having the given 'path'. 464 """ 465 466 return "__newliteral_%s" % encode_path(path) 467 468 def encode_literal_reference(n): 469 470 "Encode a reference to a literal constant with the number 'n'." 471 472 return "__constvalue%s" % n 473 474 475 476 # Track all encoded paths, detecting and avoiding conflicts. 477 478 all_encoded_paths = {} 479 480 def encode_path(path): 481 482 "Encode 'path' as an output program object, translating special symbols." 483 484 if path in reserved_words: 485 return "__%s" % path 486 else: 487 part_encoded = path.replace("#", "__").replace("$", "__") 488 489 if "." not in path: 490 return part_encoded 491 492 encoded = part_encoded.replace(".", "_") 493 494 # Test for a conflict with the encoding of a different path, re-encoding 495 # if necessary. 496 497 previous = all_encoded_paths.get(encoded) 498 replacement = "_" 499 500 while previous: 501 if path == previous: 502 return encoded 503 replacement += "_" 504 encoded = part_encoded.replace(".", replacement) 505 previous = all_encoded_paths.get(encoded) 506 507 # Store any new or re-encoded path. 508 509 all_encoded_paths[encoded] = path 510 return encoded 511 512 def encode_code(name): 513 514 "Encode 'name' as an attribute code indicator." 515 516 return "__ATTRCODE(%s)" % encode_path(name) 517 518 def encode_pcode(name): 519 520 "Encode 'name' as an parameter code indicator." 521 522 return "__PARAMCODE(%s)" % encode_path(name) 523 524 def encode_pos(name): 525 526 "Encode 'name' as an attribute position indicator." 527 528 return "__ATTRPOS(%s)" % encode_path(name) 529 530 def encode_ppos(name): 531 532 "Encode 'name' as an parameter position indicator." 533 534 return "__PARAMPOS(%s)" % encode_path(name) 535 536 def encode_predefined_reference(path): 537 538 "Encode a reference to a predefined constant value for 'path'." 539 540 return "__predefined_%s" % encode_path(path) 541 542 def encode_size(kind, path=None): 543 544 """ 545 Encode a structure size reference for the given 'kind' of structure, with 546 'path' indicating a specific structure name. 547 """ 548 549 return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "") 550 551 def encode_symbol(symbol_type, path=None): 552 553 "Encode a symbol with the given 'symbol_type' and optional 'path'." 554 555 return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "") 556 557 def encode_tablename(kind, path): 558 559 """ 560 Encode a table reference for the given 'kind' of table structure, indicating 561 a 'path' for the specific object concerned. 562 """ 563 564 return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path)) 565 566 def encode_type_attribute(path): 567 568 "Encode the special type attribute for 'path'." 569 570 return "#%s" % path 571 572 def decode_type_attribute(s): 573 574 "Decode the special type attribute 's'." 575 576 return s[1:] 577 578 def is_type_attribute(s): 579 580 "Return whether 's' is a type attribute name." 581 582 return s.startswith("#") 583 584 585 586 # A mapping from kinds to structure size reference prefixes. 587 588 structure_size_prefixes = { 589 "<class>" : "c", 590 "<module>" : "m", 591 "<instance>" : "i" 592 } 593 594 # A mapping from kinds to table name prefixes. 595 596 table_name_prefixes = { 597 "<class>" : "Class", 598 "<function>" : "Function", 599 "<module>" : "Module", 600 "<instance>" : "Instance" 601 } 602 603 604 605 # Output language reserved words. 606 607 reserved_words = [ 608 "break", "char", "const", "continue", 609 "default", "double", "else", 610 "float", "for", 611 "if", "int", "long", 612 "NULL", 613 "return", "struct", 614 "typedef", 615 "void", "while", 616 ] 617 618 # vim: tabstop=4 expandtab shiftwidth=4