1 #!/usr/bin/env python 2 3 """ 4 Optimise object layouts and generate access instruction plans. 5 6 Copyright (C) 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import add_counter_item, get_attrname_from_location, init_item, \ 23 sorted_output 24 from encoders import encode_access_location, encode_instruction, get_kinds 25 from os.path import exists, join 26 from os import makedirs 27 from referencing import Reference 28 29 class Optimiser: 30 31 "Optimise objects in a program." 32 33 def __init__(self, importer, deducer, output): 34 35 """ 36 Initialise an instance using the given 'importer' and 'deducer' that 37 will perform the arrangement of attributes for program objects, writing 38 the results to the given 'output' directory. 39 """ 40 41 self.importer = importer 42 self.deducer = deducer 43 self.output = output 44 45 # Locations/offsets of attributes in objects. 46 47 self.locations = None 48 self.attr_locations = None 49 self.all_attrnames = None 50 51 # Locations of parameters in parameter tables. 52 53 self.arg_locations = None 54 self.param_locations = None 55 self.all_paramnames = None 56 57 # Specific attribute access information. 58 59 self.access_instructions = {} 60 self.accessor_kinds = {} 61 62 # Object structure information. 63 64 self.structures = {} 65 self.attr_table = {} 66 67 # Parameter list information. 68 69 self.parameters = {} 70 self.param_table = {} 71 72 # Constant literal information. 73 74 self.constants = [] 75 self.constant_numbers = {} 76 77 # Optimiser activities. 78 79 self.populate_objects() 80 self.position_attributes() 81 self.populate_parameters() 82 self.position_parameters() 83 self.populate_tables() 84 self.populate_constants() 85 self.initialise_access_instructions() 86 87 def to_output(self): 88 89 "Write the output files using optimisation information." 90 91 if not exists(self.output): 92 makedirs(self.output) 93 94 self.write_objects() 95 96 def write_objects(self): 97 98 """ 99 Write object-related output. 100 101 The locations are a list of positions indicating the attributes residing 102 at each position in the different structures in a program. 103 104 ---- 105 106 The parameter locations are a list of positions indicating the parameters 107 residing at each position in the different parameter lists in a program. 108 109 ---- 110 111 Each attribute plan provides attribute details in the following format: 112 113 location " " name " " test " " test type " " base 114 " " traversed attributes " " traversed attribute ambiguity 115 " " traversal access modes 116 " " attributes to traverse " " attribute ambiguity 117 " " context " " access method " " static attribute 118 119 Locations have the following format: 120 121 qualified name of scope "." local name ":" name version 122 123 Traversal access modes are either "class" (obtain accessor class to 124 access attribute) or "object" (obtain attribute directly from accessor). 125 126 ---- 127 128 The structures are presented as a table in the following format: 129 130 qualified name " " attribute names 131 132 The attribute names are separated by ", " characters and indicate the 133 attribute provided at each position in the structure associated with the 134 given type. Where no attribute is provided at a particular location 135 within a structure, "-" is given. 136 137 ---- 138 139 The parameters are presented as a table in the following format: 140 141 qualified name " " parameter details 142 143 The parameter details are separated by ", " characters and indicate 144 the parameter name and list position for each parameter described at 145 each location in the parameter table associated with the given 146 function. Where no parameter details are provided at a particular 147 location within a parameter table, "-" is given. The name and list 148 position are separated by a colon (":"). 149 150 ---- 151 152 The attribute table is presented as a table in the following format: 153 154 qualified name " " attribute identifiers 155 156 Instead of attribute names, identifiers defined according to the order 157 given in the "attrnames" file are employed to denote the attributes 158 featured in each type's structure. Where no attribute is provided at a 159 particular location within a structure, "-" is given. 160 161 ---- 162 163 The parameter table is presented as a table in the following format: 164 165 qualified name " " parameter details 166 167 Instead of parameter names, identifiers defined according to the order 168 given in the "paramnames" file are employed to denote the parameters 169 featured in each function's parameter table. Where no parameter is 170 provided at a particular location within a table, "-" is given. 171 172 ---- 173 174 The ordered list of attribute names is given in the "attrnames" file. 175 176 ---- 177 178 The ordered list of parameter names is given in the "paramnames" file. 179 180 ---- 181 182 The ordered list of constant literals is given in the "constants" file. 183 """ 184 185 f = open(join(self.output, "locations"), "w") 186 try: 187 for attrnames in self.locations: 188 print >>f, sorted_output(attrnames) 189 190 finally: 191 f.close() 192 193 f = open(join(self.output, "parameter_locations"), "w") 194 try: 195 for argnames in self.arg_locations: 196 print >>f, sorted_output(argnames) 197 198 finally: 199 f.close() 200 201 f = open(join(self.output, "instruction_plans"), "w") 202 try: 203 access_instructions = self.access_instructions.items() 204 access_instructions.sort() 205 206 for location, instructions in access_instructions: 207 print >>f, encode_access_location(location), "..." 208 for instruction in instructions: 209 print >>f, encode_instruction(instruction) 210 print >>f 211 212 finally: 213 f.close() 214 215 f = open(join(self.output, "structures"), "w") 216 try: 217 structures = self.structures.items() 218 structures.sort() 219 220 for name, attrnames in structures: 221 print >>f, name, ", ".join([s or "-" for s in attrnames]) 222 223 finally: 224 f.close() 225 226 f = open(join(self.output, "parameters"), "w") 227 try: 228 parameters = self.parameters.items() 229 parameters.sort() 230 231 for name, argnames in parameters: 232 print >>f, name, ", ".join([s and ("%s:%d" % s) or "-" for s in argnames]) 233 234 finally: 235 f.close() 236 237 f = open(join(self.output, "attrtable"), "w") 238 try: 239 attr_table = self.attr_table.items() 240 attr_table.sort() 241 242 for name, attrcodes in attr_table: 243 print >>f, name, ", ".join([i is not None and str(i) or "-" for i in attrcodes]) 244 245 finally: 246 f.close() 247 248 f = open(join(self.output, "paramtable"), "w") 249 try: 250 param_table = self.param_table.items() 251 param_table.sort() 252 253 for name, paramcodes in param_table: 254 print >>f, name, ", ".join([s and ("%d:%d" % s) or "-" for s in paramcodes]) 255 256 finally: 257 f.close() 258 259 f = open(join(self.output, "attrnames"), "w") 260 try: 261 for name in self.all_attrnames: 262 print >>f, name 263 264 finally: 265 f.close() 266 267 f = open(join(self.output, "paramnames"), "w") 268 try: 269 for name in self.all_paramnames: 270 print >>f, name 271 272 finally: 273 f.close() 274 275 f = open(join(self.output, "constants"), "w") 276 try: 277 constants = [] 278 for (value, value_type, encoding), n in self.constants.items(): 279 constants.append((n, value_type, encoding, value)) 280 constants.sort() 281 for n, value_type, encoding, value in constants: 282 print >>f, value_type, encoding or "{}", repr(value) 283 284 finally: 285 f.close() 286 287 def populate_objects(self): 288 289 "Populate objects using attribute and usage information." 290 291 self.all_attrs = {} 292 293 # Partition attributes into separate sections so that class and instance 294 # attributes are treated separately. 295 296 for source, objkind in [ 297 (self.importer.all_class_attrs, "<class>"), 298 (self.importer.all_instance_attrs, "<instance>"), 299 (self.importer.all_module_attrs, "<module>") 300 ]: 301 302 for name, attrnames in source.items(): 303 304 # Remove temporary names from structures. 305 306 attrnames = filter(lambda x: not x.startswith("$t"), attrnames) 307 self.all_attrs[(objkind, name)] = attrnames 308 309 self.locations = get_allocated_locations(self.all_attrs, get_attributes_and_sizes) 310 311 def populate_parameters(self): 312 313 "Populate parameter tables using parameter information." 314 315 self.arg_locations = [set()] + get_allocated_locations(self.importer.function_parameters, get_parameters_and_sizes) 316 317 def position_attributes(self): 318 319 "Position specific attribute references." 320 321 # Reverse the location mappings. 322 323 attr_locations = self.attr_locations = {} 324 325 for i, attrnames in enumerate(self.locations): 326 for attrname in attrnames: 327 attr_locations[attrname] = i 328 329 # Record the structures. 330 331 for (objkind, name), attrnames in self.all_attrs.items(): 332 key = Reference(objkind, name) 333 l = self.structures[key] = [None] * len(attrnames) 334 for attrname in attrnames: 335 position = attr_locations[attrname] 336 if position >= len(l): 337 l.extend([None] * (position - len(l) + 1)) 338 l[position] = attrname 339 340 def initialise_access_instructions(self): 341 342 "Expand access plans into instruction sequences." 343 344 for access_location, access_plan in self.deducer.access_plans.items(): 345 346 # Obtain the access details. 347 348 name, test, test_type, base, \ 349 traversed, traversal_modes, attrnames, \ 350 context, context_test, \ 351 first_method, final_method, \ 352 origin, accessor_kinds = access_plan 353 354 instructions = [] 355 emit = instructions.append 356 357 if base: 358 original_accessor = base 359 else: 360 original_accessor = "<expr>" # use a generic placeholder 361 362 # Prepare for any first attribute access. 363 364 if traversed: 365 attrname = traversed[0] 366 del traversed[0] 367 elif attrnames: 368 attrname = attrnames[0] 369 del attrnames[0] 370 371 # Perform the first access explicitly if at least one operation 372 # requires it. 373 374 access_first_attribute = final_method in ("access", "assign") or traversed or attrnames 375 376 # Determine whether the first access involves assignment. 377 378 assigning = not traversed and not attrnames and final_method == "assign" 379 set_accessor = assigning and "<set_target_accessor>" or "<set_accessor>" 380 stored_accessor = assigning and "<target_accessor>" or "<accessor>" 381 382 # Set the context if already available. 383 384 if context == "base": 385 accessor = context_var = (base,) 386 elif context == "original-accessor": 387 388 # Prevent re-evaluation of any dynamic expression by storing it. 389 390 if original_accessor == "<expr>": 391 emit((set_accessor, original_accessor)) 392 accessor = context_var = (stored_accessor,) 393 else: 394 accessor = context_var = (original_accessor,) 395 396 # Assigning does not set the context. 397 398 elif context in ("final-accessor", "unset") and access_first_attribute: 399 400 # Prevent re-evaluation of any dynamic expression by storing it. 401 402 if original_accessor == "<expr>": 403 emit((set_accessor, original_accessor)) 404 accessor = (stored_accessor,) 405 else: 406 accessor = (original_accessor,) 407 408 # Apply any test. 409 410 if test[0] == "test": 411 accessor = ("__%s_%s_%s" % test, accessor, test_type) 412 413 # Perform the first or final access. 414 # The access only needs performing if the resulting accessor is used. 415 416 remaining = len(traversed + attrnames) 417 418 if access_first_attribute: 419 420 if first_method == "relative-class": 421 if assigning: 422 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 423 else: 424 accessor = ("__load_via_class", accessor, attrname) 425 426 elif first_method == "relative-object": 427 if assigning: 428 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 429 else: 430 accessor = ("__load_via_object", accessor, attrname) 431 432 elif first_method == "relative-object-class": 433 if assigning: 434 emit(("__get_class_and_store", accessor, attrname, "<assexpr>")) 435 else: 436 accessor = ("__get_class_and_load", accessor, attrname) 437 438 elif first_method == "check-class": 439 if assigning: 440 emit(("__check_and_store_via_class", accessor, attrname, "<assexpr>")) 441 else: 442 accessor = ("__check_and_load_via_class", accessor, attrname) 443 444 elif first_method == "check-object": 445 if assigning: 446 emit(("__check_and_store_via_object", accessor, attrname, "<assexpr>")) 447 else: 448 accessor = ("__check_and_load_via_object", accessor, attrname) 449 450 elif first_method == "check-object-class": 451 if assigning: 452 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 453 else: 454 accessor = ("__check_and_load_via_any", accessor, attrname) 455 456 # Traverse attributes using the accessor. 457 458 if traversed: 459 for attrname, traversal_mode in zip(traversed, traversal_modes): 460 assigning = remaining == 1 and final_method == "assign" 461 462 # Set the context, if appropriate. 463 464 if remaining == 1 and final_method != "assign" and context == "final-accessor": 465 emit(("__set_context", accessor)) 466 accessor = context_var = "<context>" 467 468 # Perform the access only if not achieved directly. 469 470 if remaining > 1 or final_method in ("access", "assign"): 471 472 if traversal_mode == "class": 473 if assigning: 474 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 475 else: 476 accessor = ("__load_via_class", accessor, attrname) 477 else: 478 if assigning: 479 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 480 else: 481 accessor = ("__load_via_object", accessor, attrname) 482 483 remaining -= 1 484 485 if attrnames: 486 for attrname in attrnames: 487 assigning = remaining == 1 and final_method == "assign" 488 489 # Set the context, if appropriate. 490 491 if remaining == 1 and final_method != "assign" and context == "final-accessor": 492 emit(("__set_context", accessor)) 493 accessor = context_var = "<context>" 494 495 # Perform the access only if not achieved directly. 496 497 if remaining > 1 or final_method in ("access", "assign"): 498 499 if assigning: 500 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 501 else: 502 accessor = ("__check_and_load_via_any", accessor, attrname) 503 504 remaining -= 1 505 506 # Define or emit the means of accessing the actual target. 507 508 if final_method == "static-assign": 509 parent, attrname = origin.rsplit(".", 1) 510 emit(("__store_via_object", parent, attrname, "<assexpr>")) 511 512 elif final_method in ("static", "static-invoke"): 513 accessor = ("__load_static_ignore", origin) 514 515 # Wrap accesses in context operations. 516 517 if context_test == "test": 518 if final_method in ("static", "static-invoke"): 519 emit(("__load_static_test", context_var, origin)) 520 else: 521 emit(("__test_context", context_var, accessor)) 522 523 elif context_test == "replace": 524 if final_method in ("static", "static-invoke"): 525 emit(("__load_static_replace", context_var, origin)) 526 else: 527 emit(("__update_context", context_var, accessor)) 528 529 elif final_method not in ("assign", "static-assign"): 530 emit(accessor) 531 532 self.access_instructions[access_location] = instructions 533 self.accessor_kinds[access_location] = accessor_kinds 534 535 def get_ambiguity_for_attributes(self, attrnames): 536 537 """ 538 Return a list of attribute position alternatives corresponding to each 539 of the given 'attrnames'. 540 """ 541 542 ambiguity = [] 543 544 for attrname in attrnames: 545 position = self.attr_locations[attrname] 546 ambiguity.append(len(self.locations[position])) 547 548 return ambiguity 549 550 def position_parameters(self): 551 552 "Position the parameters for each function's parameter table." 553 554 # Reverse the location mappings. 555 556 param_locations = self.param_locations = {} 557 558 for i, argnames in enumerate(self.arg_locations): 559 560 # Position the arguments. 561 562 for argname in argnames: 563 param_locations[argname] = i 564 565 for name, argnames in self.importer.function_parameters.items(): 566 567 # Allocate an extra context parameter in the table. 568 569 l = self.parameters[name] = [None] + [None] * len(argnames) 570 571 # Store an entry for the name along with the name's position in the 572 # parameter list. 573 574 for pos, argname in enumerate(argnames): 575 576 # Position the argument in the table. 577 578 position = param_locations[argname] 579 if position >= len(l): 580 l.extend([None] * (position - len(l) + 1)) 581 582 # Indicate an argument list position starting from 1 (after the 583 # initial context argument). 584 585 l[position] = (argname, pos + 1) 586 587 def populate_tables(self): 588 589 """ 590 Assign identifiers to attributes and encode structure information using 591 these identifiers. 592 """ 593 594 self.all_attrnames, d = self._get_name_mapping(self.attr_locations) 595 596 # Record the numbers indicating the locations of the names. 597 598 for key, attrnames in self.structures.items(): 599 l = self.attr_table[key] = [] 600 for attrname in attrnames: 601 if attrname is None: 602 l.append(None) 603 else: 604 l.append(d[attrname]) 605 606 self.all_paramnames, d = self._get_name_mapping(self.param_locations) 607 608 # Record the numbers indicating the locations of the names. 609 610 for key, values in self.parameters.items(): 611 l = self.param_table[key] = [] 612 for value in values: 613 if value is None: 614 l.append(None) 615 else: 616 name, pos = value 617 l.append((d[name], pos)) 618 619 def _get_name_mapping(self, locations): 620 621 """ 622 Get a sorted list of names from 'locations', then map them to 623 identifying numbers. Return the list and the mapping. 624 """ 625 626 all_names = locations.keys() 627 all_names.sort() 628 d = {} 629 for i, name in enumerate(all_names): 630 d[name] = i 631 return all_names, d 632 633 def populate_constants(self): 634 635 """ 636 Obtain a collection of distinct constant literals, building a mapping 637 from constant references to those in this collection. 638 """ 639 640 # Obtain mappings from constant values to identifiers. 641 642 self.constants = {} 643 644 for path, constants in self.importer.all_constants.items(): 645 646 # Record constants and obtain a number for them. 647 # Each constant is actually (value, value_type, encoding). 648 649 for constant, n in constants.items(): 650 add_counter_item(self.constants, constant) 651 652 self.constant_numbers = {} 653 654 for name, constant in self.importer.all_constant_values.items(): 655 self.constant_numbers[name] = self.constants[constant] 656 657 def combine_rows(a, b): 658 c = [] 659 for i, j in zip(a, b): 660 if i is None or j is None: 661 c.append(i or j) 662 else: 663 return None 664 return c 665 666 def get_attributes_and_sizes(d): 667 668 """ 669 Return a matrix of attributes, a list of type names corresponding to columns 670 in the matrix, and a list of ranked sizes each indicating... 671 672 * a weighted size depending on the kind of object 673 * the minimum size of an object employing an attribute 674 * the number of free columns in the matrix for the attribute 675 * the attribute name itself 676 """ 677 678 attrs = {} 679 sizes = {} 680 objkinds = {} 681 682 for name, attrnames in d.items(): 683 objkind, _name = name 684 685 for attrname in attrnames: 686 687 # Record each type supporting the attribute. 688 689 init_item(attrs, attrname, set) 690 attrs[attrname].add(name) 691 692 # Maintain a record of the smallest object size supporting the given 693 # attribute. 694 695 if not sizes.has_key(attrname): 696 sizes[attrname] = len(attrnames) 697 else: 698 sizes[attrname] = min(sizes[attrname], len(attrnames)) 699 700 # Record the object types/kinds supporting the attribute. 701 702 init_item(objkinds, attrname, set) 703 objkinds[attrname].add(objkind) 704 705 # Obtain attribute details in order of size and occupancy. 706 707 names = d.keys() 708 709 rsizes = [] 710 for attrname, size in sizes.items(): 711 priority = "<instance>" in objkinds[attrname] and 0.5 or 1 712 occupied = len(attrs[attrname]) 713 key = (priority * size, size, len(names) - occupied, attrname) 714 rsizes.append(key) 715 716 rsizes.sort() 717 718 # Make a matrix of attributes. 719 720 matrix = {} 721 for attrname, types in attrs.items(): 722 row = [] 723 for name in names: 724 if name in types: 725 row.append(attrname) 726 else: 727 row.append(None) 728 matrix[attrname] = row 729 730 return matrix, names, rsizes 731 732 def get_parameters_and_sizes(d): 733 734 """ 735 Return a matrix of parameters, a list of functions corresponding to columns 736 in the matrix, and a list of ranked sizes each indicating... 737 738 * a weighted size depending on the kind of object 739 * the minimum size of a parameter list employing a parameter 740 * the number of free columns in the matrix for the parameter 741 * the parameter name itself 742 743 This is a slightly simpler version of the above 'get_attributes_and_sizes' 744 function. 745 """ 746 747 params = {} 748 sizes = {} 749 750 for name, argnames in d.items(): 751 for argname in argnames: 752 753 # Record each function supporting the parameter. 754 755 init_item(params, argname, set) 756 params[argname].add(name) 757 758 # Maintain a record of the smallest parameter list supporting the 759 # given parameter. 760 761 if not sizes.has_key(argname): 762 sizes[argname] = len(argnames) 763 else: 764 sizes[argname] = min(sizes[argname], len(argnames)) 765 766 # Obtain attribute details in order of size and occupancy. 767 768 names = d.keys() 769 770 rsizes = [] 771 for argname, size in sizes.items(): 772 occupied = len(params[argname]) 773 key = (size, size, len(names) - occupied, argname) 774 rsizes.append(key) 775 776 rsizes.sort() 777 778 # Make a matrix of parameters. 779 780 matrix = {} 781 for argname, types in params.items(): 782 row = [] 783 for name in names: 784 if name in types: 785 row.append(argname) 786 else: 787 row.append(None) 788 matrix[argname] = row 789 790 return matrix, names, rsizes 791 792 def get_allocated_locations(d, fn): 793 794 """ 795 Return a list where each element corresponds to a structure location and 796 contains a set of attribute names that may be stored at that location, given 797 a mapping 'd' whose keys are (object kind, object name) tuples and whose 798 values are collections of attributes. 799 """ 800 801 matrix, names, rsizes = fn(d) 802 allocated = [] 803 804 x = 0 805 while x < len(rsizes): 806 weight, size, free, attrname = rsizes[x] 807 base = matrix[attrname] 808 y = x + 1 809 while y < len(rsizes): 810 _weight, _size, _free, _attrname = rsizes[y] 811 occupied = len(names) - _free 812 if occupied > free: 813 break 814 new = combine_rows(base, matrix[_attrname]) 815 if new: 816 del matrix[_attrname] 817 del rsizes[y] 818 base = new 819 free -= occupied 820 else: 821 y += 1 822 allocated.append(base) 823 x += 1 824 825 # Return the list of attribute names from each row of the allocated 826 # attributes table. 827 828 locations = [] 829 for attrnames in allocated: 830 l = set() 831 for attrname in attrnames: 832 if attrname: 833 l.add(attrname) 834 locations.append(l) 835 return locations 836 837 # vim: tabstop=4 expandtab shiftwidth=4