1 #!/usr/bin/env python 2 3 """ 4 Optimise object layouts and generate access instruction plans. 5 6 Copyright (C) 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import add_counter_item, get_attrname_from_location, init_item, \ 23 sorted_output 24 from encoders import encode_access_location, encode_instruction, get_kinds 25 from os.path import exists, join 26 from os import makedirs 27 from referencing import Reference 28 29 class Optimiser: 30 31 "Optimise objects in a program." 32 33 def __init__(self, importer, deducer, output): 34 35 """ 36 Initialise an instance using the given 'importer' and 'deducer' that 37 will perform the arrangement of attributes for program objects, writing 38 the results to the given 'output' directory. 39 """ 40 41 self.importer = importer 42 self.deducer = deducer 43 self.output = output 44 45 # Locations/offsets of attributes in objects. 46 47 self.locations = None 48 self.attr_locations = None 49 self.all_attrnames = None 50 51 # Locations of parameters in parameter tables. 52 53 self.arg_locations = None 54 self.param_locations = None 55 self.all_paramnames = None 56 57 # Specific attribute access information. 58 59 self.access_instructions = {} 60 self.accessor_kinds = {} 61 62 # Object structure information. 63 64 self.structures = {} 65 self.attr_table = {} 66 67 # Parameter list information. 68 69 self.parameters = {} 70 self.param_table = {} 71 72 # Constant literal information. 73 74 self.constants = [] 75 self.constant_numbers = {} 76 77 # Optimiser activities. 78 79 self.populate_objects() 80 self.position_attributes() 81 self.populate_parameters() 82 self.position_parameters() 83 self.populate_tables() 84 self.populate_constants() 85 self.initialise_access_instructions() 86 87 def to_output(self): 88 89 "Write the output files using optimisation information." 90 91 if not exists(self.output): 92 makedirs(self.output) 93 94 self.write_objects() 95 96 def write_objects(self): 97 98 """ 99 Write object-related output. 100 101 The locations are a list of positions indicating the attributes residing 102 at each position in the different structures in a program. 103 104 ---- 105 106 The parameter locations are a list of positions indicating the parameters 107 residing at each position in the different parameter lists in a program. 108 109 ---- 110 111 Each attribute plan provides attribute details in the following format: 112 113 location " " name " " test " " test type " " base 114 " " traversed attributes " " traversed attribute ambiguity 115 " " traversal access modes 116 " " attributes to traverse " " attribute ambiguity 117 " " context " " access method " " static attribute 118 119 Locations have the following format: 120 121 qualified name of scope "." local name ":" name version 122 123 Traversal access modes are either "class" (obtain accessor class to 124 access attribute) or "object" (obtain attribute directly from accessor). 125 126 ---- 127 128 The structures are presented as a table in the following format: 129 130 qualified name " " attribute names 131 132 The attribute names are separated by ", " characters and indicate the 133 attribute provided at each position in the structure associated with the 134 given type. Where no attribute is provided at a particular location 135 within a structure, "-" is given. 136 137 ---- 138 139 The parameters are presented as a table in the following format: 140 141 qualified name " " parameter details 142 143 The parameter details are separated by ", " characters and indicate 144 the parameter name and list position for each parameter described at 145 each location in the parameter table associated with the given 146 function. Where no parameter details are provided at a particular 147 location within a parameter table, "-" is given. The name and list 148 position are separated by a colon (":"). 149 150 ---- 151 152 The attribute table is presented as a table in the following format: 153 154 qualified name " " attribute identifiers 155 156 Instead of attribute names, identifiers defined according to the order 157 given in the "attrnames" file are employed to denote the attributes 158 featured in each type's structure. Where no attribute is provided at a 159 particular location within a structure, "-" is given. 160 161 ---- 162 163 The parameter table is presented as a table in the following format: 164 165 qualified name " " parameter details 166 167 Instead of parameter names, identifiers defined according to the order 168 given in the "paramnames" file are employed to denote the parameters 169 featured in each function's parameter table. Where no parameter is 170 provided at a particular location within a table, "-" is given. 171 172 ---- 173 174 The ordered list of attribute names is given in the "attrnames" file. 175 176 ---- 177 178 The ordered list of parameter names is given in the "paramnames" file. 179 180 ---- 181 182 The ordered list of constant literals is given in the "constants" file. 183 """ 184 185 f = open(join(self.output, "locations"), "w") 186 try: 187 for attrnames in self.locations: 188 print >>f, sorted_output(attrnames) 189 190 finally: 191 f.close() 192 193 f = open(join(self.output, "parameter_locations"), "w") 194 try: 195 for argnames in self.arg_locations: 196 print >>f, sorted_output(argnames) 197 198 finally: 199 f.close() 200 201 f = open(join(self.output, "instruction_plans"), "w") 202 try: 203 access_instructions = self.access_instructions.items() 204 access_instructions.sort() 205 206 for location, instructions in access_instructions: 207 print >>f, encode_access_location(location), "..." 208 for instruction in instructions: 209 print >>f, encode_instruction(instruction) 210 print >>f 211 212 finally: 213 f.close() 214 215 f = open(join(self.output, "structures"), "w") 216 try: 217 structures = self.structures.items() 218 structures.sort() 219 220 for name, attrnames in structures: 221 print >>f, name, ", ".join([s or "-" for s in attrnames]) 222 223 finally: 224 f.close() 225 226 f = open(join(self.output, "parameters"), "w") 227 try: 228 parameters = self.parameters.items() 229 parameters.sort() 230 231 for name, argnames in parameters: 232 print >>f, name, ", ".join([s and ("%s:%d" % s) or "-" for s in argnames]) 233 234 finally: 235 f.close() 236 237 f = open(join(self.output, "attrtable"), "w") 238 try: 239 attr_table = self.attr_table.items() 240 attr_table.sort() 241 242 for name, attrcodes in attr_table: 243 print >>f, name, ", ".join([i is not None and str(i) or "-" for i in attrcodes]) 244 245 finally: 246 f.close() 247 248 f = open(join(self.output, "paramtable"), "w") 249 try: 250 param_table = self.param_table.items() 251 param_table.sort() 252 253 for name, paramcodes in param_table: 254 print >>f, name, ", ".join([s and ("%d:%d" % s) or "-" for s in paramcodes]) 255 256 finally: 257 f.close() 258 259 f = open(join(self.output, "attrnames"), "w") 260 try: 261 for name in self.all_attrnames: 262 print >>f, name 263 264 finally: 265 f.close() 266 267 f = open(join(self.output, "paramnames"), "w") 268 try: 269 for name in self.all_paramnames: 270 print >>f, name 271 272 finally: 273 f.close() 274 275 f = open(join(self.output, "constants"), "w") 276 try: 277 constants = [] 278 for (value, value_type, encoding), n in self.constants.items(): 279 constants.append((n, value_type, encoding, value)) 280 constants.sort() 281 for n, value_type, encoding, value in constants: 282 print >>f, value_type, encoding or "{}", repr(value) 283 284 finally: 285 f.close() 286 287 def populate_objects(self): 288 289 "Populate objects using attribute and usage information." 290 291 all_attrs = {} 292 293 # Partition attributes into separate sections so that class and instance 294 # attributes are treated separately. 295 296 for source, objtype in [ 297 (self.importer.all_class_attrs, "<class>"), 298 (self.importer.all_instance_attrs, "<instance>"), 299 (self.importer.all_module_attrs, "<module>") 300 ]: 301 for name, attrs in source.items(): 302 all_attrs[(objtype, name)] = attrs 303 304 self.locations = get_allocated_locations(all_attrs, get_attributes_and_sizes) 305 306 def populate_parameters(self): 307 308 "Populate parameter tables using parameter information." 309 310 self.arg_locations = [set()] + get_allocated_locations(self.importer.function_parameters, get_parameters_and_sizes) 311 312 def position_attributes(self): 313 314 "Position specific attribute references." 315 316 # Reverse the location mappings. 317 318 attr_locations = self.attr_locations = {} 319 320 for i, attrnames in enumerate(self.locations): 321 for attrname in attrnames: 322 attr_locations[attrname] = i 323 324 # Record the structures. 325 326 for source, objtype in [ 327 (self.importer.all_class_attrs, "<class>"), 328 (self.importer.all_instance_attrs, "<instance>"), 329 (self.importer.all_module_attrs, "<module>") 330 ]: 331 332 for name, attrnames in source.items(): 333 key = Reference(objtype, name) 334 l = self.structures[key] = [None] * len(attrnames) 335 for attrname in attrnames: 336 position = attr_locations[attrname] 337 if position >= len(l): 338 l.extend([None] * (position - len(l) + 1)) 339 l[position] = attrname 340 341 def initialise_access_instructions(self): 342 343 "Expand access plans into instruction sequences." 344 345 for access_location, access_plan in self.deducer.access_plans.items(): 346 347 # Obtain the access details. 348 349 name, test, test_type, base, \ 350 traversed, traversal_modes, attrnames, \ 351 context, context_test, \ 352 first_method, final_method, \ 353 origin, accessor_kinds = access_plan 354 355 instructions = [] 356 emit = instructions.append 357 358 if base: 359 original_accessor = base 360 else: 361 original_accessor = "<expr>" # use a generic placeholder 362 363 # Prepare for any first attribute access. 364 365 if traversed: 366 attrname = traversed[0] 367 del traversed[0] 368 elif attrnames: 369 attrname = attrnames[0] 370 del attrnames[0] 371 372 # Perform the first access explicitly if at least one operation 373 # requires it. 374 375 access_first_attribute = final_method in ("access", "assign") or traversed or attrnames 376 377 # Determine whether the first access involves assignment. 378 379 assigning = not traversed and not attrnames and final_method == "assign" 380 set_accessor = assigning and "<set_target_accessor>" or "<set_accessor>" 381 stored_accessor = assigning and "<target_accessor>" or "<accessor>" 382 383 # Set the context if already available. 384 385 if context == "base": 386 accessor = context_var = (base,) 387 elif context == "original-accessor": 388 389 # Prevent re-evaluation of any dynamic expression by storing it. 390 391 if original_accessor == "<expr>": 392 emit((set_accessor, original_accessor)) 393 accessor = context_var = (stored_accessor,) 394 else: 395 accessor = context_var = (original_accessor,) 396 397 # Assigning does not set the context. 398 399 elif context in ("final-accessor", "unset") and access_first_attribute: 400 401 # Prevent re-evaluation of any dynamic expression by storing it. 402 403 if original_accessor == "<expr>": 404 emit((set_accessor, original_accessor)) 405 accessor = (stored_accessor,) 406 else: 407 accessor = (original_accessor,) 408 409 # Apply any test. 410 411 if test[0] == "test": 412 accessor = ("__%s_%s_%s" % test, accessor, test_type) 413 414 # Perform the first or final access. 415 # The access only needs performing if the resulting accessor is used. 416 417 remaining = len(traversed + attrnames) 418 419 if access_first_attribute: 420 421 if first_method == "relative-class": 422 if assigning: 423 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 424 else: 425 accessor = ("__load_via_class", accessor, attrname) 426 427 elif first_method == "relative-object": 428 if assigning: 429 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 430 else: 431 accessor = ("__load_via_object", accessor, attrname) 432 433 elif first_method == "relative-object-class": 434 if assigning: 435 emit(("__get_class_and_store", accessor, attrname, "<assexpr>")) 436 else: 437 accessor = ("__get_class_and_load", accessor, attrname) 438 439 elif first_method == "check-class": 440 if assigning: 441 emit(("__check_and_store_via_class", accessor, attrname, "<assexpr>")) 442 else: 443 accessor = ("__check_and_load_via_class", accessor, attrname) 444 445 elif first_method == "check-object": 446 if assigning: 447 emit(("__check_and_store_via_object", accessor, attrname, "<assexpr>")) 448 else: 449 accessor = ("__check_and_load_via_object", accessor, attrname) 450 451 elif first_method == "check-object-class": 452 if assigning: 453 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 454 else: 455 accessor = ("__check_and_load_via_any", accessor, attrname) 456 457 # Traverse attributes using the accessor. 458 459 if traversed: 460 for attrname, traversal_mode in zip(traversed, traversal_modes): 461 assigning = remaining == 1 and final_method == "assign" 462 463 # Set the context, if appropriate. 464 465 if remaining == 1 and final_method != "assign" and context == "final-accessor": 466 emit(("__set_context", accessor)) 467 accessor = context_var = "<context>" 468 469 # Perform the access only if not achieved directly. 470 471 if remaining > 1 or final_method in ("access", "assign"): 472 473 if traversal_mode == "class": 474 if assigning: 475 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 476 else: 477 accessor = ("__load_via_class", accessor, attrname) 478 else: 479 if assigning: 480 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 481 else: 482 accessor = ("__load_via_object", accessor, attrname) 483 484 remaining -= 1 485 486 if attrnames: 487 for attrname in attrnames: 488 assigning = remaining == 1 and final_method == "assign" 489 490 # Set the context, if appropriate. 491 492 if remaining == 1 and final_method != "assign" and context == "final-accessor": 493 emit(("__set_context", accessor)) 494 accessor = context_var = "<context>" 495 496 # Perform the access only if not achieved directly. 497 498 if remaining > 1 or final_method in ("access", "assign"): 499 500 if assigning: 501 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 502 else: 503 accessor = ("__check_and_load_via_any", accessor, attrname) 504 505 remaining -= 1 506 507 # Define or emit the means of accessing the actual target. 508 509 if final_method == "static-assign": 510 parent, attrname = origin.rsplit(".", 1) 511 emit(("__store_via_object", parent, attrname, "<assexpr>")) 512 513 elif final_method in ("static", "static-invoke"): 514 parent, attrname = origin.rsplit(".", 1) 515 accessor = ("__load_static", parent, origin) 516 517 # Wrap accesses in context operations. 518 519 if context_test == "test": 520 emit(("__test_context", context_var, accessor)) 521 522 elif context_test == "replace": 523 emit(("__update_context", context_var, accessor)) 524 525 elif final_method not in ("assign", "static-assign"): 526 emit(accessor) 527 528 self.access_instructions[access_location] = instructions 529 self.accessor_kinds[access_location] = accessor_kinds 530 531 def get_ambiguity_for_attributes(self, attrnames): 532 533 """ 534 Return a list of attribute position alternatives corresponding to each 535 of the given 'attrnames'. 536 """ 537 538 ambiguity = [] 539 540 for attrname in attrnames: 541 position = self.attr_locations[attrname] 542 ambiguity.append(len(self.locations[position])) 543 544 return ambiguity 545 546 def position_parameters(self): 547 548 "Position the parameters for each function's parameter table." 549 550 # Reverse the location mappings. 551 552 param_locations = self.param_locations = {} 553 554 for i, argnames in enumerate(self.arg_locations): 555 556 # Position the arguments. 557 558 for argname in argnames: 559 param_locations[argname] = i 560 561 for name, argnames in self.importer.function_parameters.items(): 562 563 # Allocate an extra context parameter in the table. 564 565 l = self.parameters[name] = [None] + [None] * len(argnames) 566 567 # Store an entry for the name along with the name's position in the 568 # parameter list. 569 570 for pos, argname in enumerate(argnames): 571 572 # Position the argument in the table. 573 574 position = param_locations[argname] 575 if position >= len(l): 576 l.extend([None] * (position - len(l) + 1)) 577 578 # Indicate an argument list position starting from 1 (after the 579 # initial context argument). 580 581 l[position] = (argname, pos + 1) 582 583 def populate_tables(self): 584 585 """ 586 Assign identifiers to attributes and encode structure information using 587 these identifiers. 588 """ 589 590 self.all_attrnames, d = self._get_name_mapping(self.attr_locations) 591 592 # Record the numbers indicating the locations of the names. 593 594 for key, attrnames in self.structures.items(): 595 l = self.attr_table[key] = [] 596 for attrname in attrnames: 597 if attrname is None: 598 l.append(None) 599 else: 600 l.append(d[attrname]) 601 602 self.all_paramnames, d = self._get_name_mapping(self.param_locations) 603 604 # Record the numbers indicating the locations of the names. 605 606 for key, values in self.parameters.items(): 607 l = self.param_table[key] = [] 608 for value in values: 609 if value is None: 610 l.append(None) 611 else: 612 name, pos = value 613 l.append((d[name], pos)) 614 615 def _get_name_mapping(self, locations): 616 617 """ 618 Get a sorted list of names from 'locations', then map them to 619 identifying numbers. Return the list and the mapping. 620 """ 621 622 all_names = locations.keys() 623 all_names.sort() 624 d = {} 625 for i, name in enumerate(all_names): 626 d[name] = i 627 return all_names, d 628 629 def populate_constants(self): 630 631 """ 632 Obtain a collection of distinct constant literals, building a mapping 633 from constant references to those in this collection. 634 """ 635 636 # Obtain mappings from constant values to identifiers. 637 638 self.constants = {} 639 640 for path, constants in self.importer.all_constants.items(): 641 642 # Record constants and obtain a number for them. 643 # Each constant is actually (value, value_type, encoding). 644 645 for constant, n in constants.items(): 646 add_counter_item(self.constants, constant) 647 648 self.constant_numbers = {} 649 650 for name, constant in self.importer.all_constant_values.items(): 651 self.constant_numbers[name] = self.constants[constant] 652 653 def combine_rows(a, b): 654 c = [] 655 for i, j in zip(a, b): 656 if i is None or j is None: 657 c.append(i or j) 658 else: 659 return None 660 return c 661 662 def get_attributes_and_sizes(d): 663 664 """ 665 Return a matrix of attributes, a list of type names corresponding to columns 666 in the matrix, and a list of ranked sizes each indicating... 667 668 * a weighted size depending on the kind of object 669 * the minimum size of an object employing an attribute 670 * the number of free columns in the matrix for the attribute 671 * the attribute name itself 672 """ 673 674 attrs = {} 675 sizes = {} 676 objtypes = {} 677 678 for name, attrnames in d.items(): 679 objtype, _name = name 680 681 for attrname in attrnames: 682 683 # Record each type supporting the attribute. 684 685 init_item(attrs, attrname, set) 686 attrs[attrname].add(name) 687 688 # Maintain a record of the smallest object size supporting the given 689 # attribute. 690 691 if not sizes.has_key(attrname): 692 sizes[attrname] = len(attrnames) 693 else: 694 sizes[attrname] = min(sizes[attrname], len(attrnames)) 695 696 # Record the object types/kinds supporting the attribute. 697 698 init_item(objtypes, attrname, set) 699 objtypes[attrname].add(objtype) 700 701 # Obtain attribute details in order of size and occupancy. 702 703 names = d.keys() 704 705 rsizes = [] 706 for attrname, size in sizes.items(): 707 priority = "<instance>" in objtypes[attrname] and 0.5 or 1 708 occupied = len(attrs[attrname]) 709 key = (priority * size, size, len(names) - occupied, attrname) 710 rsizes.append(key) 711 712 rsizes.sort() 713 714 # Make a matrix of attributes. 715 716 matrix = {} 717 for attrname, types in attrs.items(): 718 row = [] 719 for name in names: 720 if name in types: 721 row.append(attrname) 722 else: 723 row.append(None) 724 matrix[attrname] = row 725 726 return matrix, names, rsizes 727 728 def get_parameters_and_sizes(d): 729 730 """ 731 Return a matrix of parameters, a list of functions corresponding to columns 732 in the matrix, and a list of ranked sizes each indicating... 733 734 * a weighted size depending on the kind of object 735 * the minimum size of a parameter list employing a parameter 736 * the number of free columns in the matrix for the parameter 737 * the parameter name itself 738 739 This is a slightly simpler version of the above 'get_attributes_and_sizes' 740 function. 741 """ 742 743 params = {} 744 sizes = {} 745 746 for name, argnames in d.items(): 747 for argname in argnames: 748 749 # Record each function supporting the parameter. 750 751 init_item(params, argname, set) 752 params[argname].add(name) 753 754 # Maintain a record of the smallest parameter list supporting the 755 # given parameter. 756 757 if not sizes.has_key(argname): 758 sizes[argname] = len(argnames) 759 else: 760 sizes[argname] = min(sizes[argname], len(argnames)) 761 762 # Obtain attribute details in order of size and occupancy. 763 764 names = d.keys() 765 766 rsizes = [] 767 for argname, size in sizes.items(): 768 occupied = len(params[argname]) 769 key = (size, size, len(names) - occupied, argname) 770 rsizes.append(key) 771 772 rsizes.sort() 773 774 # Make a matrix of parameters. 775 776 matrix = {} 777 for argname, types in params.items(): 778 row = [] 779 for name in names: 780 if name in types: 781 row.append(argname) 782 else: 783 row.append(None) 784 matrix[argname] = row 785 786 return matrix, names, rsizes 787 788 def get_allocated_locations(d, fn): 789 790 """ 791 Return a list where each element corresponds to a structure location and 792 contains a set of attribute names that may be stored at that location, given 793 a mapping 'd' whose keys are (object type, object name) tuples and whose 794 values are collections of attributes. 795 """ 796 797 matrix, names, rsizes = fn(d) 798 allocated = [] 799 800 x = 0 801 while x < len(rsizes): 802 weight, size, free, attrname = rsizes[x] 803 base = matrix[attrname] 804 y = x + 1 805 while y < len(rsizes): 806 _weight, _size, _free, _attrname = rsizes[y] 807 occupied = len(names) - _free 808 if occupied > free: 809 break 810 new = combine_rows(base, matrix[_attrname]) 811 if new: 812 del matrix[_attrname] 813 del rsizes[y] 814 base = new 815 free -= occupied 816 else: 817 y += 1 818 allocated.append(base) 819 x += 1 820 821 # Return the list of attribute names from each row of the allocated 822 # attributes table. 823 824 locations = [] 825 for attrnames in allocated: 826 l = set() 827 for attrname in attrnames: 828 if attrname: 829 l.add(attrname) 830 locations.append(l) 831 return locations 832 833 # vim: tabstop=4 expandtab shiftwidth=4