1 #!/usr/bin/env python 2 3 """ 4 Optimise object layouts and generate access instruction plans. 5 6 Copyright (C) 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import add_counter_item, get_attrname_from_location, init_item, \ 23 sorted_output 24 from encoders import encode_access_location, encode_instruction, get_kinds 25 from os.path import exists, join 26 from os import makedirs 27 from referencing import Reference 28 29 class Optimiser: 30 31 "Optimise objects in a program." 32 33 def __init__(self, importer, deducer, output): 34 35 """ 36 Initialise an instance using the given 'importer' and 'deducer' that 37 will perform the arrangement of attributes for program objects, writing 38 the results to the given 'output' directory. 39 """ 40 41 self.importer = importer 42 self.deducer = deducer 43 self.output = output 44 45 # Locations/offsets of attributes in objects. 46 47 self.locations = None 48 self.attr_locations = None 49 self.all_attrnames = None 50 51 # Locations of parameters in parameter tables. 52 53 self.arg_locations = None 54 self.param_locations = None 55 self.all_paramnames = None 56 57 # Specific attribute access information. 58 59 self.access_instructions = {} 60 self.accessor_kinds = {} 61 62 # Object structure information. 63 64 self.structures = {} 65 self.attr_table = {} 66 67 # Parameter list information. 68 69 self.parameters = {} 70 self.param_table = {} 71 72 # Constant literal information. 73 74 self.constants = [] 75 self.constant_numbers = {} 76 77 # Optimiser activities. 78 79 self.populate_objects() 80 self.position_attributes() 81 self.populate_parameters() 82 self.position_parameters() 83 self.populate_tables() 84 self.populate_constants() 85 self.initialise_access_instructions() 86 87 def to_output(self): 88 89 "Write the output files using optimisation information." 90 91 if not exists(self.output): 92 makedirs(self.output) 93 94 self.write_objects() 95 96 def write_objects(self): 97 98 """ 99 Write object-related output. 100 101 The locations are a list of positions indicating the attributes residing 102 at each position in the different structures in a program. 103 104 ---- 105 106 The parameter locations are a list of positions indicating the parameters 107 residing at each position in the different parameter lists in a program. 108 109 ---- 110 111 Each attribute plan provides attribute details in the following format: 112 113 location " " name " " test " " test type " " base 114 " " traversed attributes " " traversed attribute ambiguity 115 " " traversal access modes 116 " " attributes to traverse " " attribute ambiguity 117 " " context " " access method " " static attribute 118 119 Locations have the following format: 120 121 qualified name of scope "." local name ":" name version 122 123 Traversal access modes are either "class" (obtain accessor class to 124 access attribute) or "object" (obtain attribute directly from accessor). 125 126 ---- 127 128 The structures are presented as a table in the following format: 129 130 qualified name " " attribute names 131 132 The attribute names are separated by ", " characters and indicate the 133 attribute provided at each position in the structure associated with the 134 given type. Where no attribute is provided at a particular location 135 within a structure, "-" is given. 136 137 ---- 138 139 The parameters are presented as a table in the following format: 140 141 qualified name " " parameter details 142 143 The parameter details are separated by ", " characters and indicate 144 the parameter name and list position for each parameter described at 145 each location in the parameter table associated with the given 146 function. Where no parameter details are provided at a particular 147 location within a parameter table, "-" is given. The name and list 148 position are separated by a colon (":"). 149 150 ---- 151 152 The attribute table is presented as a table in the following format: 153 154 qualified name " " attribute identifiers 155 156 Instead of attribute names, identifiers defined according to the order 157 given in the "attrnames" file are employed to denote the attributes 158 featured in each type's structure. Where no attribute is provided at a 159 particular location within a structure, "-" is given. 160 161 ---- 162 163 The parameter table is presented as a table in the following format: 164 165 qualified name " " parameter details 166 167 Instead of parameter names, identifiers defined according to the order 168 given in the "paramnames" file are employed to denote the parameters 169 featured in each function's parameter table. Where no parameter is 170 provided at a particular location within a table, "-" is given. 171 172 ---- 173 174 The ordered list of attribute names is given in the "attrnames" file. 175 176 ---- 177 178 The ordered list of parameter names is given in the "paramnames" file. 179 180 ---- 181 182 The ordered list of constant literals is given in the "constants" file. 183 """ 184 185 f = open(join(self.output, "locations"), "w") 186 try: 187 for attrnames in self.locations: 188 print >>f, sorted_output(attrnames) 189 190 finally: 191 f.close() 192 193 f = open(join(self.output, "parameter_locations"), "w") 194 try: 195 for argnames in self.arg_locations: 196 print >>f, sorted_output(argnames) 197 198 finally: 199 f.close() 200 201 f = open(join(self.output, "instruction_plans"), "w") 202 try: 203 access_instructions = self.access_instructions.items() 204 access_instructions.sort() 205 206 for location, instructions in access_instructions: 207 print >>f, encode_access_location(location), "..." 208 for instruction in instructions: 209 print >>f, encode_instruction(instruction) 210 print >>f 211 212 finally: 213 f.close() 214 215 f = open(join(self.output, "structures"), "w") 216 try: 217 structures = self.structures.items() 218 structures.sort() 219 220 for name, attrnames in structures: 221 print >>f, name, ", ".join([s or "-" for s in attrnames]) 222 223 finally: 224 f.close() 225 226 f = open(join(self.output, "parameters"), "w") 227 try: 228 parameters = self.parameters.items() 229 parameters.sort() 230 231 for name, argnames in parameters: 232 print >>f, name, ", ".join([s and ("%s:%d" % s) or "-" for s in argnames]) 233 234 finally: 235 f.close() 236 237 f = open(join(self.output, "attrtable"), "w") 238 try: 239 attr_table = self.attr_table.items() 240 attr_table.sort() 241 242 for name, attrcodes in attr_table: 243 print >>f, name, ", ".join([i is not None and str(i) or "-" for i in attrcodes]) 244 245 finally: 246 f.close() 247 248 f = open(join(self.output, "paramtable"), "w") 249 try: 250 param_table = self.param_table.items() 251 param_table.sort() 252 253 for name, paramcodes in param_table: 254 print >>f, name, ", ".join([s and ("%d:%d" % s) or "-" for s in paramcodes]) 255 256 finally: 257 f.close() 258 259 f = open(join(self.output, "attrnames"), "w") 260 try: 261 for name in self.all_attrnames: 262 print >>f, name 263 264 finally: 265 f.close() 266 267 f = open(join(self.output, "paramnames"), "w") 268 try: 269 for name in self.all_paramnames: 270 print >>f, name 271 272 finally: 273 f.close() 274 275 f = open(join(self.output, "constants"), "w") 276 try: 277 constants = [(n, value) for (value, n) in self.constants.items()] 278 constants.sort() 279 for n, value in constants: 280 print >>f, repr(value) 281 282 finally: 283 f.close() 284 285 def populate_objects(self): 286 287 "Populate objects using attribute and usage information." 288 289 all_attrs = {} 290 291 # Partition attributes into separate sections so that class and instance 292 # attributes are treated separately. 293 294 for source, objtype in [ 295 (self.importer.all_class_attrs, "<class>"), 296 (self.importer.all_instance_attrs, "<instance>"), 297 (self.importer.all_module_attrs, "<module>") 298 ]: 299 for name, attrs in source.items(): 300 all_attrs[(objtype, name)] = attrs 301 302 self.locations = get_allocated_locations(all_attrs, get_attributes_and_sizes) 303 304 def populate_parameters(self): 305 306 "Populate parameter tables using parameter information." 307 308 self.arg_locations = [set()] + get_allocated_locations(self.importer.function_parameters, get_parameters_and_sizes) 309 310 def position_attributes(self): 311 312 "Position specific attribute references." 313 314 # Reverse the location mappings. 315 316 attr_locations = self.attr_locations = {} 317 318 for i, attrnames in enumerate(self.locations): 319 for attrname in attrnames: 320 attr_locations[attrname] = i 321 322 # Record the structures. 323 324 for source, objtype in [ 325 (self.importer.all_class_attrs, "<class>"), 326 (self.importer.all_instance_attrs, "<instance>"), 327 (self.importer.all_module_attrs, "<module>") 328 ]: 329 330 for name, attrnames in source.items(): 331 key = Reference(objtype, name) 332 l = self.structures[key] = [None] * len(attrnames) 333 for attrname in attrnames: 334 position = attr_locations[attrname] 335 if position >= len(l): 336 l.extend([None] * (position - len(l) + 1)) 337 l[position] = attrname 338 339 def initialise_access_instructions(self): 340 341 "Expand access plans into instruction sequences." 342 343 for access_location, access_plan in self.deducer.access_plans.items(): 344 345 # Obtain the access details. 346 347 name, test, test_type, base, \ 348 traversed, traversal_modes, attrnames, \ 349 context, context_test, \ 350 first_method, final_method, \ 351 origin, accessor_kinds = access_plan 352 353 instructions = [] 354 emit = instructions.append 355 356 if base: 357 original_accessor = base 358 else: 359 original_accessor = "<expr>" # use a generic placeholder 360 361 # Prepare for any first attribute access. 362 363 if traversed: 364 attrname = traversed[0] 365 del traversed[0] 366 elif attrnames: 367 attrname = attrnames[0] 368 del attrnames[0] 369 370 # Perform the first access explicitly if at least one operation 371 # requires it. 372 373 access_first_attribute = final_method in ("access", "assign") or traversed or attrnames 374 375 # Determine whether the first access involves assignment. 376 377 assigning = not traversed and not attrnames and final_method == "assign" 378 379 # Set the context if already available. 380 381 if context == "base": 382 accessor = context_var = (base,) 383 elif context == "original-accessor": 384 385 # Prevent re-evaluation of any dynamic expression by storing it. 386 387 if original_accessor == "<expr>": 388 emit(("__set_accessor", original_accessor)) 389 accessor = context_var = ("<accessor>",) 390 else: 391 accessor = context_var = (original_accessor,) 392 393 # Assigning does not set the context. 394 395 elif context in ("final-accessor", "unset") and access_first_attribute: 396 397 # Prevent re-evaluation of any dynamic expression by storing it. 398 399 if original_accessor == "<expr>": 400 emit(("__set_accessor", original_accessor)) 401 accessor = ("<accessor>",) 402 else: 403 accessor = (original_accessor,) 404 405 # Apply any test. 406 407 if test[0] == "test": 408 accessor = ("__%s_%s_%s" % test, accessor, test_type) 409 410 # Perform the first or final access. 411 # The access only needs performing if the resulting accessor is used. 412 413 remaining = len(traversed + attrnames) 414 415 if access_first_attribute: 416 417 if first_method == "relative-class": 418 if assigning: 419 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 420 else: 421 accessor = ("__load_via_class", accessor, attrname) 422 423 elif first_method == "relative-object": 424 if assigning: 425 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 426 else: 427 accessor = ("__load_via_object", accessor, attrname) 428 429 elif first_method == "relative-object-class": 430 if assigning: 431 emit(("__get_class_and_store", accessor, attrname, "<assexpr>")) 432 else: 433 accessor = ("__get_class_and_load", accessor, attrname) 434 435 elif first_method == "check-class": 436 if assigning: 437 emit(("__check_and_store_via_class", accessor, attrname, "<assexpr>")) 438 else: 439 accessor = ("__check_and_load_via_class", accessor, attrname) 440 441 elif first_method == "check-object": 442 if assigning: 443 emit(("__check_and_store_via_object", accessor, attrname, "<assexpr>")) 444 else: 445 accessor = ("__check_and_load_via_object", accessor, attrname) 446 447 elif first_method == "check-object-class": 448 if assigning: 449 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 450 else: 451 accessor = ("__check_and_load_via_any", accessor, attrname) 452 453 # Traverse attributes using the accessor. 454 455 if traversed: 456 for attrname, traversal_mode in zip(traversed, traversal_modes): 457 assigning = remaining == 1 and final_method == "assign" 458 459 # Set the context, if appropriate. 460 461 if remaining == 1 and final_method != "assign" and context == "final-accessor": 462 emit(("__set_context", accessor)) 463 accessor = context_var = "<context>" 464 465 # Perform the access only if not achieved directly. 466 467 if remaining > 1 or final_method in ("access", "assign"): 468 469 if traversal_mode == "class": 470 if assigning: 471 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 472 else: 473 accessor = ("__load_via_class", accessor, attrname) 474 else: 475 if assigning: 476 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 477 else: 478 accessor = ("__load_via_object", accessor, attrname) 479 480 remaining -= 1 481 482 if attrnames: 483 for attrname in attrnames: 484 assigning = remaining == 1 and final_method == "assign" 485 486 # Set the context, if appropriate. 487 488 if remaining == 1 and final_method != "assign" and context == "final-accessor": 489 emit(("__set_context", accessor)) 490 accessor = context_var = "<context>" 491 492 # Perform the access only if not achieved directly. 493 494 if remaining > 1 or final_method in ("access", "assign"): 495 496 if assigning: 497 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 498 else: 499 accessor = ("__check_and_load_via_any", accessor, attrname) 500 501 remaining -= 1 502 503 # Define or emit the means of accessing the actual target. 504 505 if final_method == "static-assign": 506 parent, attrname = origin.rsplit(".", 1) 507 emit(("__store_via_object", parent, attrname, "<assexpr>")) 508 509 elif final_method in ("static", "static-invoke"): 510 parent, attrname = origin.rsplit(".", 1) 511 accessor = ("__load_static", parent, origin) 512 513 # Wrap accesses in context operations. 514 515 if context_test == "test": 516 emit(("__test_context", context_var, accessor)) 517 518 elif context_test == "replace": 519 520 # Static invocation targets have a context added but no other 521 # transformation performed. 522 523 if final_method == "static-invoke": 524 emit(("__update_context", context_var, accessor)) 525 526 # Other invocation targets gain a context and have the bound 527 # version of the callable activated. 528 529 else: 530 emit(("__replace_context", context_var, accessor)) 531 532 elif final_method not in ("assign", "static-assign"): 533 emit(accessor) 534 535 self.access_instructions[access_location] = instructions 536 self.accessor_kinds[access_location] = accessor_kinds 537 538 def get_ambiguity_for_attributes(self, attrnames): 539 540 """ 541 Return a list of attribute position alternatives corresponding to each 542 of the given 'attrnames'. 543 """ 544 545 ambiguity = [] 546 547 for attrname in attrnames: 548 position = self.attr_locations[attrname] 549 ambiguity.append(len(self.locations[position])) 550 551 return ambiguity 552 553 def position_parameters(self): 554 555 "Position the parameters for each function's parameter table." 556 557 # Reverse the location mappings. 558 559 param_locations = self.param_locations = {} 560 561 for i, argnames in enumerate(self.arg_locations): 562 563 # Position the arguments. 564 565 for argname in argnames: 566 param_locations[argname] = i 567 568 for name, argnames in self.importer.function_parameters.items(): 569 570 # Allocate an extra context parameter in the table. 571 572 l = self.parameters[name] = [None] + [None] * len(argnames) 573 574 # Store an entry for the name along with the name's position in the 575 # parameter list. 576 577 for pos, argname in enumerate(argnames): 578 579 # Position the argument in the table. 580 581 position = param_locations[argname] 582 if position >= len(l): 583 l.extend([None] * (position - len(l) + 1)) 584 585 # Indicate an argument list position starting from 1 (after the 586 # initial context argument). 587 588 l[position] = (argname, pos + 1) 589 590 def populate_tables(self): 591 592 """ 593 Assign identifiers to attributes and encode structure information using 594 these identifiers. 595 """ 596 597 self.all_attrnames, d = self._get_name_mapping(self.attr_locations) 598 599 # Record the numbers indicating the locations of the names. 600 601 for key, attrnames in self.structures.items(): 602 l = self.attr_table[key] = [] 603 for attrname in attrnames: 604 if attrname is None: 605 l.append(None) 606 else: 607 l.append(d[attrname]) 608 609 self.all_paramnames, d = self._get_name_mapping(self.param_locations) 610 611 # Record the numbers indicating the locations of the names. 612 613 for key, values in self.parameters.items(): 614 l = self.param_table[key] = [] 615 for value in values: 616 if value is None: 617 l.append(None) 618 else: 619 name, pos = value 620 l.append((d[name], pos)) 621 622 def _get_name_mapping(self, locations): 623 624 """ 625 Get a sorted list of names from 'locations', then map them to 626 identifying numbers. Return the list and the mapping. 627 """ 628 629 all_names = locations.keys() 630 all_names.sort() 631 return all_names, dict([(name, i) for i, name in enumerate(all_names)]) 632 633 def populate_constants(self): 634 635 """ 636 Obtain a collection of distinct constant literals, building a mapping 637 from constant references to those in this collection. 638 """ 639 640 # Obtain mappings from constant values to identifiers. 641 642 self.constants = {} 643 644 for path, constants in self.importer.all_constants.items(): 645 for constant, n in constants.items(): 646 647 # Record constants and obtain a number for them. 648 649 add_counter_item(self.constants, constant) 650 651 self.constant_numbers = {} 652 653 for name, (value, value_type) in self.importer.all_constant_values.items(): 654 self.constant_numbers[name] = self.constants[value] 655 656 def combine_rows(a, b): 657 c = [] 658 for i, j in zip(a, b): 659 if i is None or j is None: 660 c.append(i or j) 661 else: 662 return None 663 return c 664 665 def get_attributes_and_sizes(d): 666 667 """ 668 Return a matrix of attributes, a list of type names corresponding to columns 669 in the matrix, and a list of ranked sizes each indicating... 670 671 * a weighted size depending on the kind of object 672 * the minimum size of an object employing an attribute 673 * the number of free columns in the matrix for the attribute 674 * the attribute name itself 675 """ 676 677 attrs = {} 678 sizes = {} 679 objtypes = {} 680 681 for name, attrnames in d.items(): 682 objtype, _name = name 683 684 for attrname in attrnames: 685 686 # Record each type supporting the attribute. 687 688 init_item(attrs, attrname, set) 689 attrs[attrname].add(name) 690 691 # Maintain a record of the smallest object size supporting the given 692 # attribute. 693 694 if not sizes.has_key(attrname): 695 sizes[attrname] = len(attrnames) 696 else: 697 sizes[attrname] = min(sizes[attrname], len(attrnames)) 698 699 # Record the object types/kinds supporting the attribute. 700 701 init_item(objtypes, attrname, set) 702 objtypes[attrname].add(objtype) 703 704 # Obtain attribute details in order of size and occupancy. 705 706 names = d.keys() 707 708 rsizes = [] 709 for attrname, size in sizes.items(): 710 priority = "<instance>" in objtypes[attrname] and 0.5 or 1 711 occupied = len(attrs[attrname]) 712 key = (priority * size, size, len(names) - occupied, attrname) 713 rsizes.append(key) 714 715 rsizes.sort() 716 717 # Make a matrix of attributes. 718 719 matrix = {} 720 for attrname, types in attrs.items(): 721 row = [] 722 for name in names: 723 if name in types: 724 row.append(attrname) 725 else: 726 row.append(None) 727 matrix[attrname] = row 728 729 return matrix, names, rsizes 730 731 def get_parameters_and_sizes(d): 732 733 """ 734 Return a matrix of parameters, a list of functions corresponding to columns 735 in the matrix, and a list of ranked sizes each indicating... 736 737 * a weighted size depending on the kind of object 738 * the minimum size of a parameter list employing a parameter 739 * the number of free columns in the matrix for the parameter 740 * the parameter name itself 741 742 This is a slightly simpler version of the above 'get_attributes_and_sizes' 743 function. 744 """ 745 746 params = {} 747 sizes = {} 748 749 for name, argnames in d.items(): 750 for argname in argnames: 751 752 # Record each function supporting the parameter. 753 754 init_item(params, argname, set) 755 params[argname].add(name) 756 757 # Maintain a record of the smallest parameter list supporting the 758 # given parameter. 759 760 if not sizes.has_key(argname): 761 sizes[argname] = len(argnames) 762 else: 763 sizes[argname] = min(sizes[argname], len(argnames)) 764 765 # Obtain attribute details in order of size and occupancy. 766 767 names = d.keys() 768 769 rsizes = [] 770 for argname, size in sizes.items(): 771 occupied = len(params[argname]) 772 key = (size, size, len(names) - occupied, argname) 773 rsizes.append(key) 774 775 rsizes.sort() 776 777 # Make a matrix of parameters. 778 779 matrix = {} 780 for argname, types in params.items(): 781 row = [] 782 for name in names: 783 if name in types: 784 row.append(argname) 785 else: 786 row.append(None) 787 matrix[argname] = row 788 789 return matrix, names, rsizes 790 791 def get_allocated_locations(d, fn): 792 793 """ 794 Return a list where each element corresponds to a structure location and 795 contains a set of attribute names that may be stored at that location, given 796 a mapping 'd' whose keys are (object type, object name) tuples and whose 797 values are collections of attributes. 798 """ 799 800 matrix, names, rsizes = fn(d) 801 allocated = [] 802 803 x = 0 804 while x < len(rsizes): 805 weight, size, free, attrname = rsizes[x] 806 base = matrix[attrname] 807 y = x + 1 808 while y < len(rsizes): 809 _weight, _size, _free, _attrname = rsizes[y] 810 occupied = len(names) - _free 811 if occupied > free: 812 break 813 new = combine_rows(base, matrix[_attrname]) 814 if new: 815 del matrix[_attrname] 816 del rsizes[y] 817 base = new 818 free -= occupied 819 else: 820 y += 1 821 allocated.append(base) 822 x += 1 823 824 # Return the list of attribute names from each row of the allocated 825 # attributes table. 826 827 locations = [] 828 for attrnames in allocated: 829 l = set() 830 for attrname in attrnames: 831 if attrname: 832 l.add(attrname) 833 locations.append(l) 834 return locations 835 836 # vim: tabstop=4 expandtab shiftwidth=4