1 #!/usr/bin/env python 2 3 """ 4 Optimise object layouts and generate access instruction plans. 5 6 Copyright (C) 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import add_counter_item, get_attrname_from_location, init_item, \ 23 sorted_output 24 from encoders import encode_access_location, encode_instruction, get_kinds 25 from os.path import exists, join 26 from os import makedirs 27 from referencing import Reference 28 29 class Optimiser: 30 31 "Optimise objects in a program." 32 33 def __init__(self, importer, deducer, output): 34 35 """ 36 Initialise an instance using the given 'importer' and 'deducer' that 37 will perform the arrangement of attributes for program objects, writing 38 the results to the given 'output' directory. 39 """ 40 41 self.importer = importer 42 self.deducer = deducer 43 self.output = output 44 45 # Locations/offsets of attributes in objects. 46 47 self.locations = None 48 self.attr_locations = None 49 self.all_attrnames = None 50 51 # Locations of parameters in parameter tables. 52 53 self.arg_locations = None 54 self.param_locations = None 55 self.all_paramnames = None 56 57 # Specific attribute access information. 58 59 self.access_instructions = {} 60 self.accessor_kinds = {} 61 62 # Object structure information. 63 64 self.structures = {} 65 self.attr_table = {} 66 67 # Parameter list information. 68 69 self.parameters = {} 70 self.param_table = {} 71 72 # Constant literal information. 73 74 self.constants = [] 75 self.constant_numbers = {} 76 77 # Optimiser activities. 78 79 self.populate_objects() 80 self.position_attributes() 81 self.populate_parameters() 82 self.position_parameters() 83 self.populate_tables() 84 self.populate_constants() 85 self.initialise_access_instructions() 86 87 def to_output(self): 88 89 "Write the output files using optimisation information." 90 91 if not exists(self.output): 92 makedirs(self.output) 93 94 self.write_objects() 95 96 def write_objects(self): 97 98 """ 99 Write object-related output. 100 101 The locations are a list of positions indicating the attributes residing 102 at each position in the different structures in a program. 103 104 ---- 105 106 The parameter locations are a list of positions indicating the parameters 107 residing at each position in the different parameter lists in a program. 108 109 ---- 110 111 Each attribute plan provides attribute details in the following format: 112 113 location " " name " " test " " test type " " base 114 " " traversed attributes " " traversed attribute ambiguity 115 " " traversal access modes 116 " " attributes to traverse " " attribute ambiguity 117 " " context " " access method " " static attribute 118 119 Locations have the following format: 120 121 qualified name of scope "." local name ":" name version 122 123 Traversal access modes are either "class" (obtain accessor class to 124 access attribute) or "object" (obtain attribute directly from accessor). 125 126 ---- 127 128 The structures are presented as a table in the following format: 129 130 qualified name " " attribute names 131 132 The attribute names are separated by ", " characters and indicate the 133 attribute provided at each position in the structure associated with the 134 given type. Where no attribute is provided at a particular location 135 within a structure, "-" is given. 136 137 ---- 138 139 The parameters are presented as a table in the following format: 140 141 qualified name " " parameter details 142 143 The parameter details are separated by ", " characters and indicate 144 the parameter name and list position for each parameter described at 145 each location in the parameter table associated with the given 146 function. Where no parameter details are provided at a particular 147 location within a parameter table, "-" is given. The name and list 148 position are separated by a colon (":"). 149 150 ---- 151 152 The attribute table is presented as a table in the following format: 153 154 qualified name " " attribute identifiers 155 156 Instead of attribute names, identifiers defined according to the order 157 given in the "attrnames" file are employed to denote the attributes 158 featured in each type's structure. Where no attribute is provided at a 159 particular location within a structure, "-" is given. 160 161 ---- 162 163 The parameter table is presented as a table in the following format: 164 165 qualified name " " parameter details 166 167 Instead of parameter names, identifiers defined according to the order 168 given in the "paramnames" file are employed to denote the parameters 169 featured in each function's parameter table. Where no parameter is 170 provided at a particular location within a table, "-" is given. 171 172 ---- 173 174 The ordered list of attribute names is given in the "attrnames" file. 175 176 ---- 177 178 The ordered list of parameter names is given in the "paramnames" file. 179 180 ---- 181 182 The ordered list of constant literals is given in the "constants" file. 183 """ 184 185 f = open(join(self.output, "locations"), "w") 186 try: 187 for attrnames in self.locations: 188 print >>f, sorted_output(attrnames) 189 190 finally: 191 f.close() 192 193 f = open(join(self.output, "parameter_locations"), "w") 194 try: 195 for argnames in self.arg_locations: 196 print >>f, sorted_output(argnames) 197 198 finally: 199 f.close() 200 201 f = open(join(self.output, "instruction_plans"), "w") 202 try: 203 access_instructions = self.access_instructions.items() 204 access_instructions.sort() 205 206 for location, instructions in access_instructions: 207 print >>f, encode_access_location(location), "..." 208 for instruction in instructions: 209 print >>f, encode_instruction(instruction) 210 print >>f 211 212 finally: 213 f.close() 214 215 f = open(join(self.output, "structures"), "w") 216 try: 217 structures = self.structures.items() 218 structures.sort() 219 220 for name, attrnames in structures: 221 print >>f, name, ", ".join([s or "-" for s in attrnames]) 222 223 finally: 224 f.close() 225 226 f = open(join(self.output, "parameters"), "w") 227 try: 228 parameters = self.parameters.items() 229 parameters.sort() 230 231 for name, argnames in parameters: 232 print >>f, name, ", ".join([s and ("%s:%d" % s) or "-" for s in argnames]) 233 234 finally: 235 f.close() 236 237 f = open(join(self.output, "attrtable"), "w") 238 try: 239 attr_table = self.attr_table.items() 240 attr_table.sort() 241 242 for name, attrcodes in attr_table: 243 print >>f, name, ", ".join([i is not None and str(i) or "-" for i in attrcodes]) 244 245 finally: 246 f.close() 247 248 f = open(join(self.output, "paramtable"), "w") 249 try: 250 param_table = self.param_table.items() 251 param_table.sort() 252 253 for name, paramcodes in param_table: 254 print >>f, name, ", ".join([s and ("%d:%d" % s) or "-" for s in paramcodes]) 255 256 finally: 257 f.close() 258 259 f = open(join(self.output, "attrnames"), "w") 260 try: 261 for name in self.all_attrnames: 262 print >>f, name 263 264 finally: 265 f.close() 266 267 f = open(join(self.output, "paramnames"), "w") 268 try: 269 for name in self.all_paramnames: 270 print >>f, name 271 272 finally: 273 f.close() 274 275 f = open(join(self.output, "constants"), "w") 276 try: 277 constants = [(n, value) for (value, n) in self.constants.items()] 278 constants.sort() 279 for n, value in constants: 280 print >>f, repr(value) 281 282 finally: 283 f.close() 284 285 def populate_objects(self): 286 287 "Populate objects using attribute and usage information." 288 289 all_attrs = {} 290 291 # Partition attributes into separate sections so that class and instance 292 # attributes are treated separately. 293 294 for source, objtype in [ 295 (self.importer.all_class_attrs, "<class>"), 296 (self.importer.all_instance_attrs, "<instance>"), 297 (self.importer.all_module_attrs, "<module>") 298 ]: 299 for name, attrs in source.items(): 300 all_attrs[(objtype, name)] = attrs 301 302 self.locations = get_allocated_locations(all_attrs, get_attributes_and_sizes) 303 304 def populate_parameters(self): 305 306 "Populate parameter tables using parameter information." 307 308 self.arg_locations = [set()] + get_allocated_locations(self.importer.function_parameters, get_parameters_and_sizes) 309 310 def position_attributes(self): 311 312 "Position specific attribute references." 313 314 # Reverse the location mappings. 315 316 attr_locations = self.attr_locations = {} 317 318 for i, attrnames in enumerate(self.locations): 319 for attrname in attrnames: 320 attr_locations[attrname] = i 321 322 # Record the structures. 323 324 for source, objtype in [ 325 (self.importer.all_class_attrs, "<class>"), 326 (self.importer.all_instance_attrs, "<instance>"), 327 (self.importer.all_module_attrs, "<module>") 328 ]: 329 330 for name, attrnames in source.items(): 331 key = Reference(objtype, name) 332 l = self.structures[key] = [None] * len(attrnames) 333 for attrname in attrnames: 334 position = attr_locations[attrname] 335 if position >= len(l): 336 l.extend([None] * (position - len(l) + 1)) 337 l[position] = attrname 338 339 def initialise_access_instructions(self): 340 341 "Expand access plans into instruction sequences." 342 343 for access_location, access_plan in self.deducer.access_plans.items(): 344 345 # Obtain the access details. 346 347 name, test, test_type, base, \ 348 traversed, traversal_modes, attrnames, \ 349 context, context_test, \ 350 first_method, final_method, \ 351 origin, accessor_kinds = access_plan 352 353 instructions = [] 354 emit = instructions.append 355 356 if base: 357 original_accessor = base 358 else: 359 original_accessor = "<expr>" # use a generic placeholder 360 361 # Prepare for any first attribute access. 362 363 if traversed: 364 attrname = traversed[0] 365 del traversed[0] 366 elif attrnames: 367 attrname = attrnames[0] 368 del attrnames[0] 369 370 # Perform the first access explicitly if at least one operation 371 # requires it. 372 373 access_first_attribute = final_method in ("access", "assign") or traversed or attrnames 374 375 # Determine whether the first access involves assignment. 376 377 assigning = not traversed and not attrnames and final_method == "assign" 378 set_accessor = assigning and "__set_target_accessor" or "__set_accessor" 379 stored_accessor = assigning and "<target_accessor>" or "<accessor>" 380 381 # Set the context if already available. 382 383 if context == "base": 384 accessor = context_var = (base,) 385 elif context == "original-accessor": 386 387 # Prevent re-evaluation of any dynamic expression by storing it. 388 389 if original_accessor == "<expr>": 390 emit((set_accessor, original_accessor)) 391 accessor = context_var = (stored_accessor,) 392 else: 393 accessor = context_var = (original_accessor,) 394 395 # Assigning does not set the context. 396 397 elif context in ("final-accessor", "unset") and access_first_attribute: 398 399 # Prevent re-evaluation of any dynamic expression by storing it. 400 401 if original_accessor == "<expr>": 402 emit((set_accessor, original_accessor)) 403 accessor = (stored_accessor,) 404 else: 405 accessor = (original_accessor,) 406 407 # Apply any test. 408 409 if test[0] == "test": 410 accessor = ("__%s_%s_%s" % test, accessor, test_type) 411 412 # Perform the first or final access. 413 # The access only needs performing if the resulting accessor is used. 414 415 remaining = len(traversed + attrnames) 416 417 if access_first_attribute: 418 419 if first_method == "relative-class": 420 if assigning: 421 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 422 else: 423 accessor = ("__load_via_class", accessor, attrname) 424 425 elif first_method == "relative-object": 426 if assigning: 427 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 428 else: 429 accessor = ("__load_via_object", accessor, attrname) 430 431 elif first_method == "relative-object-class": 432 if assigning: 433 emit(("__get_class_and_store", accessor, attrname, "<assexpr>")) 434 else: 435 accessor = ("__get_class_and_load", accessor, attrname) 436 437 elif first_method == "check-class": 438 if assigning: 439 emit(("__check_and_store_via_class", accessor, attrname, "<assexpr>")) 440 else: 441 accessor = ("__check_and_load_via_class", accessor, attrname) 442 443 elif first_method == "check-object": 444 if assigning: 445 emit(("__check_and_store_via_object", accessor, attrname, "<assexpr>")) 446 else: 447 accessor = ("__check_and_load_via_object", accessor, attrname) 448 449 elif first_method == "check-object-class": 450 if assigning: 451 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 452 else: 453 accessor = ("__check_and_load_via_any", accessor, attrname) 454 455 # Traverse attributes using the accessor. 456 457 if traversed: 458 for attrname, traversal_mode in zip(traversed, traversal_modes): 459 assigning = remaining == 1 and final_method == "assign" 460 461 # Set the context, if appropriate. 462 463 if remaining == 1 and final_method != "assign" and context == "final-accessor": 464 emit(("__set_context", accessor)) 465 accessor = context_var = "<context>" 466 467 # Perform the access only if not achieved directly. 468 469 if remaining > 1 or final_method in ("access", "assign"): 470 471 if traversal_mode == "class": 472 if assigning: 473 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 474 else: 475 accessor = ("__load_via_class", accessor, attrname) 476 else: 477 if assigning: 478 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 479 else: 480 accessor = ("__load_via_object", accessor, attrname) 481 482 remaining -= 1 483 484 if attrnames: 485 for attrname in attrnames: 486 assigning = remaining == 1 and final_method == "assign" 487 488 # Set the context, if appropriate. 489 490 if remaining == 1 and final_method != "assign" and context == "final-accessor": 491 emit(("__set_context", accessor)) 492 accessor = context_var = "<context>" 493 494 # Perform the access only if not achieved directly. 495 496 if remaining > 1 or final_method in ("access", "assign"): 497 498 if assigning: 499 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 500 else: 501 accessor = ("__check_and_load_via_any", accessor, attrname) 502 503 remaining -= 1 504 505 # Define or emit the means of accessing the actual target. 506 507 if final_method == "static-assign": 508 parent, attrname = origin.rsplit(".", 1) 509 emit(("__store_via_object", parent, attrname, "<assexpr>")) 510 511 elif final_method in ("static", "static-invoke"): 512 parent, attrname = origin.rsplit(".", 1) 513 accessor = ("__load_static", parent, origin) 514 515 # Wrap accesses in context operations. 516 517 if context_test == "test": 518 emit(("__test_context", context_var, accessor)) 519 520 elif context_test == "replace": 521 522 # Static invocation targets have a context added but no other 523 # transformation performed. 524 525 if final_method == "static-invoke": 526 emit(("__update_context", context_var, accessor)) 527 528 # Other invocation targets gain a context and have the bound 529 # version of the callable activated. 530 531 else: 532 emit(("__replace_context", context_var, accessor)) 533 534 elif final_method not in ("assign", "static-assign"): 535 emit(accessor) 536 537 self.access_instructions[access_location] = instructions 538 self.accessor_kinds[access_location] = accessor_kinds 539 540 def get_ambiguity_for_attributes(self, attrnames): 541 542 """ 543 Return a list of attribute position alternatives corresponding to each 544 of the given 'attrnames'. 545 """ 546 547 ambiguity = [] 548 549 for attrname in attrnames: 550 position = self.attr_locations[attrname] 551 ambiguity.append(len(self.locations[position])) 552 553 return ambiguity 554 555 def position_parameters(self): 556 557 "Position the parameters for each function's parameter table." 558 559 # Reverse the location mappings. 560 561 param_locations = self.param_locations = {} 562 563 for i, argnames in enumerate(self.arg_locations): 564 565 # Position the arguments. 566 567 for argname in argnames: 568 param_locations[argname] = i 569 570 for name, argnames in self.importer.function_parameters.items(): 571 572 # Allocate an extra context parameter in the table. 573 574 l = self.parameters[name] = [None] + [None] * len(argnames) 575 576 # Store an entry for the name along with the name's position in the 577 # parameter list. 578 579 for pos, argname in enumerate(argnames): 580 581 # Position the argument in the table. 582 583 position = param_locations[argname] 584 if position >= len(l): 585 l.extend([None] * (position - len(l) + 1)) 586 587 # Indicate an argument list position starting from 1 (after the 588 # initial context argument). 589 590 l[position] = (argname, pos + 1) 591 592 def populate_tables(self): 593 594 """ 595 Assign identifiers to attributes and encode structure information using 596 these identifiers. 597 """ 598 599 self.all_attrnames, d = self._get_name_mapping(self.attr_locations) 600 601 # Record the numbers indicating the locations of the names. 602 603 for key, attrnames in self.structures.items(): 604 l = self.attr_table[key] = [] 605 for attrname in attrnames: 606 if attrname is None: 607 l.append(None) 608 else: 609 l.append(d[attrname]) 610 611 self.all_paramnames, d = self._get_name_mapping(self.param_locations) 612 613 # Record the numbers indicating the locations of the names. 614 615 for key, values in self.parameters.items(): 616 l = self.param_table[key] = [] 617 for value in values: 618 if value is None: 619 l.append(None) 620 else: 621 name, pos = value 622 l.append((d[name], pos)) 623 624 def _get_name_mapping(self, locations): 625 626 """ 627 Get a sorted list of names from 'locations', then map them to 628 identifying numbers. Return the list and the mapping. 629 """ 630 631 all_names = locations.keys() 632 all_names.sort() 633 return all_names, dict([(name, i) for i, name in enumerate(all_names)]) 634 635 def populate_constants(self): 636 637 """ 638 Obtain a collection of distinct constant literals, building a mapping 639 from constant references to those in this collection. 640 """ 641 642 # Obtain mappings from constant values to identifiers. 643 644 self.constants = {} 645 646 for path, constants in self.importer.all_constants.items(): 647 for constant, n in constants.items(): 648 649 # Record constants and obtain a number for them. 650 651 add_counter_item(self.constants, constant) 652 653 self.constant_numbers = {} 654 655 for name, (value, value_type) in self.importer.all_constant_values.items(): 656 self.constant_numbers[name] = self.constants[value] 657 658 def combine_rows(a, b): 659 c = [] 660 for i, j in zip(a, b): 661 if i is None or j is None: 662 c.append(i or j) 663 else: 664 return None 665 return c 666 667 def get_attributes_and_sizes(d): 668 669 """ 670 Return a matrix of attributes, a list of type names corresponding to columns 671 in the matrix, and a list of ranked sizes each indicating... 672 673 * a weighted size depending on the kind of object 674 * the minimum size of an object employing an attribute 675 * the number of free columns in the matrix for the attribute 676 * the attribute name itself 677 """ 678 679 attrs = {} 680 sizes = {} 681 objtypes = {} 682 683 for name, attrnames in d.items(): 684 objtype, _name = name 685 686 for attrname in attrnames: 687 688 # Record each type supporting the attribute. 689 690 init_item(attrs, attrname, set) 691 attrs[attrname].add(name) 692 693 # Maintain a record of the smallest object size supporting the given 694 # attribute. 695 696 if not sizes.has_key(attrname): 697 sizes[attrname] = len(attrnames) 698 else: 699 sizes[attrname] = min(sizes[attrname], len(attrnames)) 700 701 # Record the object types/kinds supporting the attribute. 702 703 init_item(objtypes, attrname, set) 704 objtypes[attrname].add(objtype) 705 706 # Obtain attribute details in order of size and occupancy. 707 708 names = d.keys() 709 710 rsizes = [] 711 for attrname, size in sizes.items(): 712 priority = "<instance>" in objtypes[attrname] and 0.5 or 1 713 occupied = len(attrs[attrname]) 714 key = (priority * size, size, len(names) - occupied, attrname) 715 rsizes.append(key) 716 717 rsizes.sort() 718 719 # Make a matrix of attributes. 720 721 matrix = {} 722 for attrname, types in attrs.items(): 723 row = [] 724 for name in names: 725 if name in types: 726 row.append(attrname) 727 else: 728 row.append(None) 729 matrix[attrname] = row 730 731 return matrix, names, rsizes 732 733 def get_parameters_and_sizes(d): 734 735 """ 736 Return a matrix of parameters, a list of functions corresponding to columns 737 in the matrix, and a list of ranked sizes each indicating... 738 739 * a weighted size depending on the kind of object 740 * the minimum size of a parameter list employing a parameter 741 * the number of free columns in the matrix for the parameter 742 * the parameter name itself 743 744 This is a slightly simpler version of the above 'get_attributes_and_sizes' 745 function. 746 """ 747 748 params = {} 749 sizes = {} 750 751 for name, argnames in d.items(): 752 for argname in argnames: 753 754 # Record each function supporting the parameter. 755 756 init_item(params, argname, set) 757 params[argname].add(name) 758 759 # Maintain a record of the smallest parameter list supporting the 760 # given parameter. 761 762 if not sizes.has_key(argname): 763 sizes[argname] = len(argnames) 764 else: 765 sizes[argname] = min(sizes[argname], len(argnames)) 766 767 # Obtain attribute details in order of size and occupancy. 768 769 names = d.keys() 770 771 rsizes = [] 772 for argname, size in sizes.items(): 773 occupied = len(params[argname]) 774 key = (size, size, len(names) - occupied, argname) 775 rsizes.append(key) 776 777 rsizes.sort() 778 779 # Make a matrix of parameters. 780 781 matrix = {} 782 for argname, types in params.items(): 783 row = [] 784 for name in names: 785 if name in types: 786 row.append(argname) 787 else: 788 row.append(None) 789 matrix[argname] = row 790 791 return matrix, names, rsizes 792 793 def get_allocated_locations(d, fn): 794 795 """ 796 Return a list where each element corresponds to a structure location and 797 contains a set of attribute names that may be stored at that location, given 798 a mapping 'd' whose keys are (object type, object name) tuples and whose 799 values are collections of attributes. 800 """ 801 802 matrix, names, rsizes = fn(d) 803 allocated = [] 804 805 x = 0 806 while x < len(rsizes): 807 weight, size, free, attrname = rsizes[x] 808 base = matrix[attrname] 809 y = x + 1 810 while y < len(rsizes): 811 _weight, _size, _free, _attrname = rsizes[y] 812 occupied = len(names) - _free 813 if occupied > free: 814 break 815 new = combine_rows(base, matrix[_attrname]) 816 if new: 817 del matrix[_attrname] 818 del rsizes[y] 819 base = new 820 free -= occupied 821 else: 822 y += 1 823 allocated.append(base) 824 x += 1 825 826 # Return the list of attribute names from each row of the allocated 827 # attributes table. 828 829 locations = [] 830 for attrnames in allocated: 831 l = set() 832 for attrname in attrnames: 833 if attrname: 834 l.add(attrname) 835 locations.append(l) 836 return locations 837 838 # vim: tabstop=4 expandtab shiftwidth=4