1 #!/usr/bin/env python 2 3 """ 4 Encoder functions, producing representations of program objects. 5 6 Copyright (C) 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import first, InstructionSequence 23 24 # Output encoding and decoding for the summary files. 25 26 def encode_attrnames(attrnames): 27 28 "Encode the 'attrnames' representing usage." 29 30 return ", ".join(attrnames) or "{}" 31 32 def encode_constrained(constrained): 33 34 "Encode the 'constrained' status for program summaries." 35 36 return constrained and "constrained" or "deduced" 37 38 def encode_usage(usage): 39 40 "Encode attribute details from 'usage'." 41 42 all_attrnames = [] 43 for t in usage: 44 attrname, invocation, assignment = t 45 all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or "")) 46 return ", ".join(all_attrnames) or "{}" 47 48 def decode_usage(s): 49 50 "Decode attribute details from 's'." 51 52 all_attrnames = set() 53 for attrname_str in s.split(", "): 54 all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("="))) 55 56 all_attrnames = list(all_attrnames) 57 all_attrnames.sort() 58 return tuple(all_attrnames) 59 60 def encode_access_location(t): 61 62 "Encode the access location 't'." 63 64 path, name, attrname, version = t 65 return "%s %s %s:%d" % (path, name or "{}", attrname, version) 66 67 def encode_location(t): 68 69 "Encode the general location 't' in a concise form." 70 71 path, name, attrname, version = t 72 if name is not None and version is not None: 73 return "%s %s:%d" % (path, name, version) 74 elif name is not None: 75 return "%s %s" % (path, name) 76 else: 77 return "%s :%s" % (path, attrname) 78 79 def encode_modifiers(modifiers): 80 81 "Encode assignment and invocation details from 'modifiers'." 82 83 all_modifiers = [] 84 for t in modifiers: 85 all_modifiers.append(encode_modifier_term(t)) 86 return "".join(all_modifiers) 87 88 def encode_modifier_term(t): 89 90 "Encode modifier 't' representing an assignment or an invocation." 91 92 assignment, invocation = t 93 if assignment: 94 return "=" 95 elif invocation is not None: 96 arguments, keywords = invocation 97 return "(%d;%s)" % (arguments, ",".join(keywords)) 98 else: 99 return "_" 100 101 def decode_modifiers(s): 102 103 "Decode 's' containing modifiers." 104 105 i = 0 106 end = len(s) 107 108 modifiers = [] 109 110 while i < end: 111 if s[i] == "=": 112 modifiers.append((True, None)) 113 i += 1 114 elif s[i] == "(": 115 j = s.index(";", i) 116 arguments = int(s[i+1:j]) 117 i = j 118 j = s.index(")", i) 119 keywords = s[i+1:j] 120 keywords = keywords and keywords.split(",") or [] 121 modifiers.append((False, (arguments, keywords))) 122 i = j + 1 123 else: 124 modifiers.append((False, None)) 125 i += 1 126 127 return modifiers 128 129 130 131 # Test generation functions. 132 133 def get_kinds(all_types): 134 135 """ 136 Return object kind details for 'all_types', being a collection of 137 references for program types. 138 """ 139 140 return map(lambda ref: ref.get_kind(), all_types) 141 142 def test_label_for_kind(kind): 143 144 "Return the label used for 'kind' in test details." 145 146 return kind == "<instance>" and "instance" or "type" 147 148 def test_label_for_type(ref): 149 150 "Return the label used for 'ref' in test details." 151 152 return test_label_for_kind(ref.get_kind()) 153 154 155 156 # Instruction representation encoding. 157 158 def encode_instruction(instruction): 159 160 """ 161 Encode the 'instruction' - a sequence starting with an operation and 162 followed by arguments, each of which may be an instruction sequence or a 163 plain value - to produce a function call string representation. 164 """ 165 166 op = instruction[0] 167 args = instruction[1:] 168 169 if args: 170 a = [] 171 for arg in args: 172 if isinstance(arg, tuple): 173 a.append(encode_instruction(arg)) 174 else: 175 a.append(arg or "{}") 176 argstr = "(%s)" % ", ".join(a) 177 return "%s%s" % (op, argstr) 178 else: 179 return op 180 181 182 183 # Output program encoding. 184 185 attribute_loading_ops = ( 186 "__load_via_class", "__load_via_object", "__get_class_and_load", 187 ) 188 189 attribute_ops = attribute_loading_ops + ( 190 "__store_via_object", 191 ) 192 193 checked_loading_ops = ( 194 "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any", 195 ) 196 197 checked_ops = checked_loading_ops + ( 198 "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any", 199 ) 200 201 typename_ops = ( 202 "__test_common_instance", "__test_common_object", "__test_common_type", 203 ) 204 205 type_ops = ( 206 "__test_specific_instance", "__test_specific_object", "__test_specific_type", 207 ) 208 209 static_ops = ( 210 "__load_static_ignore", "__load_static_replace", "__load_static_test", 211 ) 212 213 context_values = ( 214 "<context>", 215 ) 216 217 context_ops = ( 218 "<context>", "<set_context>", 219 ) 220 221 reference_acting_ops = attribute_ops + checked_ops + typename_ops 222 attribute_producing_ops = attribute_loading_ops + checked_loading_ops 223 224 def encode_access_instruction(instruction, subs, context_index): 225 226 """ 227 Encode the 'instruction' - a sequence starting with an operation and 228 followed by arguments, each of which may be an instruction sequence or a 229 plain value - to produce a function call string representation. 230 231 The 'subs' parameter defines a mapping of substitutions for special values 232 used in instructions. 233 234 The 'context_index' parameter defines the position in local context storage 235 for the referenced context or affected by a context operation. 236 237 Return both the encoded instruction and a collection of substituted names. 238 """ 239 240 op = instruction[0] 241 args = instruction[1:] 242 substituted = set() 243 244 # Encode the arguments. 245 246 a = [] 247 if args: 248 converting_op = op 249 for arg in args: 250 s, _substituted = encode_access_instruction_arg(arg, subs, converting_op, context_index) 251 substituted.update(_substituted) 252 a.append(s) 253 converting_op = None 254 255 # Modify certain arguments. 256 257 # Convert attribute name arguments to position symbols. 258 259 if op in attribute_ops: 260 arg = a[1] 261 a[1] = encode_symbol("pos", arg) 262 263 # Convert attribute name arguments to position and code symbols. 264 265 elif op in checked_ops: 266 arg = a[1] 267 a[1] = encode_symbol("pos", arg) 268 a.insert(2, encode_symbol("code", arg)) 269 270 # Convert type name arguments to position and code symbols. 271 272 elif op in typename_ops: 273 arg = encode_type_attribute(args[1]) 274 a[1] = encode_symbol("pos", arg) 275 a.insert(2, encode_symbol("code", arg)) 276 277 # Obtain addresses of type arguments. 278 279 elif op in type_ops: 280 a[1] = "&%s" % a[1] 281 282 # Obtain addresses of static objects. 283 284 elif op in static_ops: 285 a[-1] = "&%s" % a[-1] 286 287 # Add context storage information to certain operations. 288 289 elif op in context_ops: 290 a.insert(0, context_index) 291 292 # Define any argument string. 293 294 if a: 295 argstr = "(%s)" % ", ".join(map(str, a)) 296 else: 297 argstr = "" 298 299 # Substitute the first element of the instruction, which may not be an 300 # operation at all. 301 302 if subs.has_key(op): 303 substituted.add(op) 304 305 # Break accessor initialisation into initialisation and value-yielding 306 # parts: 307 308 if op == "<set_accessor>" and isinstance(a[0], InstructionSequence): 309 ops = [] 310 ops += a[0].get_init_instructions() 311 ops.append("%s(%s)" % (subs[op], a[0].get_value_instruction())) 312 return ", ".join(map(str, ops)), substituted 313 314 op = subs[op] 315 316 elif not args: 317 op = "&%s" % encode_path(op) 318 319 return "%s%s" % (op, argstr), substituted 320 321 def encode_access_instruction_arg(arg, subs, op, context_index): 322 323 """ 324 Encode 'arg' using 'subs' to define substitutions, 'op' to indicate the 325 operation to which the argument belongs, and 'context_index' to indicate any 326 affected context storage. 327 328 Return a tuple containing the encoded form of 'arg' along with a collection 329 of any substituted values. 330 """ 331 332 if isinstance(arg, tuple): 333 encoded, substituted = encode_access_instruction(arg, subs, context_index) 334 335 # Convert attribute results to references where required. 336 337 if op and op in reference_acting_ops and arg[0] in attribute_producing_ops: 338 return "%s.value" % encoded, substituted 339 else: 340 return encoded, substituted 341 342 # Special values only need replacing, not encoding. 343 344 elif subs.has_key(arg): 345 346 # Handle values modified by storage details. 347 348 if arg in context_values: 349 return "%s(%s)" % (subs.get(arg), context_index), set([arg]) 350 else: 351 return subs.get(arg), set([arg]) 352 353 # Convert static references to the appropriate type. 354 355 elif op and op in reference_acting_ops and arg != "<accessor>": 356 return "&%s" % encode_path(arg), set() 357 358 # Other values may need encoding. 359 360 else: 361 return encode_path(arg), set() 362 363 def encode_function_pointer(path): 364 365 "Encode 'path' as a reference to an output program function." 366 367 return "__fn_%s" % encode_path(path) 368 369 def encode_instantiator_pointer(path): 370 371 "Encode 'path' as a reference to an output program instantiator." 372 373 return "__new_%s" % encode_path(path) 374 375 def encode_instructions(instructions): 376 377 "Encode 'instructions' as a sequence." 378 379 if len(instructions) == 1: 380 return instructions[0] 381 else: 382 return "(\n%s\n)" % ",\n".join(instructions) 383 384 def encode_literal_constant(n): 385 386 "Encode a name for the literal constant with the number 'n'." 387 388 return "__const%d" % n 389 390 def encode_literal_constant_size(value): 391 392 "Encode a size for the literal constant with the given 'value'." 393 394 if isinstance(value, basestring): 395 return len(value) 396 else: 397 return 0 398 399 def encode_literal_constant_member(value): 400 401 "Encode the member name for the 'value' in the final program." 402 403 return "%svalue" % value.__class__.__name__ 404 405 def encode_literal_constant_value(value): 406 407 "Encode the given 'value' in the final program." 408 409 if isinstance(value, (int, float)): 410 return str(value) 411 else: 412 l = [] 413 414 # Encode characters including non-ASCII ones. 415 416 for c in str(value): 417 if c == '"': l.append('\\"') 418 elif c == '\n': l.append('\\n') 419 elif c == '\t': l.append('\\t') 420 elif c == '\r': l.append('\\r') 421 elif c == '\\': l.append('\\\\') 422 elif 0x20 <= ord(c) < 0x80: l.append(c) 423 else: l.append("\\x%02x" % ord(c)) 424 425 return '"%s"' % "".join(l) 426 427 def encode_literal_data_initialiser(style): 428 429 """ 430 Encode a reference to a function populating the data for a literal having 431 the given 'style' ("mapping" or "sequence"). 432 """ 433 434 return "__newdata_%s" % style 435 436 def encode_literal_instantiator(path): 437 438 """ 439 Encode a reference to an instantiator for a literal having the given 'path'. 440 """ 441 442 return "__newliteral_%s" % encode_path(path) 443 444 def encode_literal_reference(n): 445 446 "Encode a reference to a literal constant with the number 'n'." 447 448 return "__constvalue%d" % n 449 450 451 452 # Track all encoded paths, detecting and avoiding conflicts. 453 454 all_encoded_paths = {} 455 456 def encode_path(path): 457 458 "Encode 'path' as an output program object, translating special symbols." 459 460 if path in reserved_words: 461 return "__%s" % path 462 else: 463 part_encoded = path.replace("#", "__").replace("$", "__") 464 465 if "." not in path: 466 return part_encoded 467 468 encoded = part_encoded.replace(".", "_") 469 470 # Test for a conflict with the encoding of a different path, re-encoding 471 # if necessary. 472 473 previous = all_encoded_paths.get(encoded) 474 replacement = "_" 475 476 while previous: 477 if path == previous: 478 return encoded 479 replacement += "_" 480 encoded = part_encoded.replace(".", replacement) 481 previous = all_encoded_paths.get(encoded) 482 483 # Store any new or re-encoded path. 484 485 all_encoded_paths[encoded] = path 486 return encoded 487 488 def encode_predefined_reference(path): 489 490 "Encode a reference to a predefined constant value for 'path'." 491 492 return "__predefined_%s" % encode_path(path) 493 494 def encode_size(kind, path=None): 495 496 """ 497 Encode a structure size reference for the given 'kind' of structure, with 498 'path' indicating a specific structure name. 499 """ 500 501 return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "") 502 503 def encode_symbol(symbol_type, path=None): 504 505 "Encode a symbol with the given 'symbol_type' and optional 'path'." 506 507 return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "") 508 509 def encode_tablename(kind, path): 510 511 """ 512 Encode a table reference for the given 'kind' of table structure, indicating 513 a 'path' for the specific object concerned. 514 """ 515 516 return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path)) 517 518 def encode_type_attribute(path): 519 520 "Encode the special type attribute for 'path'." 521 522 return "#%s" % path 523 524 def decode_type_attribute(s): 525 526 "Decode the special type attribute 's'." 527 528 return s[1:] 529 530 def is_type_attribute(s): 531 532 "Return whether 's' is a type attribute name." 533 534 return s.startswith("#") 535 536 537 538 # A mapping from kinds to structure size reference prefixes. 539 540 structure_size_prefixes = { 541 "<class>" : "c", 542 "<module>" : "m", 543 "<instance>" : "i" 544 } 545 546 # A mapping from kinds to table name prefixes. 547 548 table_name_prefixes = { 549 "<class>" : "Class", 550 "<function>" : "Function", 551 "<module>" : "Module", 552 "<instance>" : "Instance" 553 } 554 555 556 557 # Output language reserved words. 558 559 reserved_words = [ 560 "break", "char", "const", "continue", 561 "default", "double", "else", 562 "float", "for", 563 "if", "int", "long", 564 "NULL", 565 "return", "struct", 566 "typedef", 567 "void", "while", 568 ] 569 570 # vim: tabstop=4 expandtab shiftwidth=4