1 #!/usr/bin/env python 2 3 """ 4 Optimise object layouts and generate access instruction plans. 5 6 Copyright (C) 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import add_counter_item, get_attrname_from_location, init_item, \ 23 sorted_output 24 from encoders import encode_access_location, encode_instruction, get_kinds 25 from os.path import exists, join 26 from os import makedirs 27 from referencing import Reference 28 29 class Optimiser: 30 31 "Optimise objects in a program." 32 33 def __init__(self, importer, deducer, output): 34 35 """ 36 Initialise an instance using the given 'importer' and 'deducer' that 37 will perform the arrangement of attributes for program objects, writing 38 the results to the given 'output' directory. 39 """ 40 41 self.importer = importer 42 self.deducer = deducer 43 self.output = output 44 45 # Locations/offsets of attributes in objects. 46 47 self.locations = None 48 self.attr_locations = None 49 self.all_attrnames = None 50 51 # Locations of parameters in parameter tables. 52 53 self.arg_locations = None 54 self.param_locations = None 55 self.all_paramnames = None 56 57 # Specific attribute access information. 58 59 self.access_instructions = {} 60 self.accessor_kinds = {} 61 62 # Object structure information. 63 64 self.structures = {} 65 self.attr_table = {} 66 67 # Parameter list information. 68 69 self.parameters = {} 70 self.param_table = {} 71 72 # Constant literal information. 73 74 self.constants = [] 75 self.constant_numbers = {} 76 77 # Optimiser activities. 78 79 self.populate_objects() 80 self.position_attributes() 81 self.populate_parameters() 82 self.position_parameters() 83 self.populate_tables() 84 self.populate_constants() 85 self.initialise_access_instructions() 86 87 def to_output(self): 88 89 "Write the output files using optimisation information." 90 91 if not exists(self.output): 92 makedirs(self.output) 93 94 self.write_objects() 95 96 def write_objects(self): 97 98 """ 99 Write object-related output. 100 101 The locations are a list of positions indicating the attributes residing 102 at each position in the different structures in a program. 103 104 ---- 105 106 The parameter locations are a list of positions indicating the parameters 107 residing at each position in the different parameter lists in a program. 108 109 ---- 110 111 Each attribute plan provides attribute details in the following format: 112 113 location " " name " " test " " test type " " base 114 " " traversed attributes " " traversed attribute ambiguity 115 " " traversal access modes 116 " " attributes to traverse " " attribute ambiguity 117 " " context " " access method " " static attribute 118 119 Locations have the following format: 120 121 qualified name of scope "." local name ":" name version 122 123 Traversal access modes are either "class" (obtain accessor class to 124 access attribute) or "object" (obtain attribute directly from accessor). 125 126 ---- 127 128 The structures are presented as a table in the following format: 129 130 qualified name " " attribute names 131 132 The attribute names are separated by ", " characters and indicate the 133 attribute provided at each position in the structure associated with the 134 given type. Where no attribute is provided at a particular location 135 within a structure, "-" is given. 136 137 ---- 138 139 The parameters are presented as a table in the following format: 140 141 qualified name " " parameter details 142 143 The parameter details are separated by ", " characters and indicate 144 the parameter name and list position for each parameter described at 145 each location in the parameter table associated with the given 146 function. Where no parameter details are provided at a particular 147 location within a parameter table, "-" is given. The name and list 148 position are separated by a colon (":"). 149 150 ---- 151 152 The attribute table is presented as a table in the following format: 153 154 qualified name " " attribute identifiers 155 156 Instead of attribute names, identifiers defined according to the order 157 given in the "attrnames" file are employed to denote the attributes 158 featured in each type's structure. Where no attribute is provided at a 159 particular location within a structure, "-" is given. 160 161 ---- 162 163 The parameter table is presented as a table in the following format: 164 165 qualified name " " parameter details 166 167 Instead of parameter names, identifiers defined according to the order 168 given in the "paramnames" file are employed to denote the parameters 169 featured in each function's parameter table. Where no parameter is 170 provided at a particular location within a table, "-" is given. 171 172 ---- 173 174 The ordered list of attribute names is given in the "attrnames" file. 175 176 ---- 177 178 The ordered list of parameter names is given in the "paramnames" file. 179 180 ---- 181 182 The ordered list of constant literals is given in the "constants" file. 183 """ 184 185 f = open(join(self.output, "locations"), "w") 186 try: 187 for attrnames in self.locations: 188 print >>f, sorted_output(attrnames) 189 190 finally: 191 f.close() 192 193 f = open(join(self.output, "parameter_locations"), "w") 194 try: 195 for argnames in self.arg_locations: 196 print >>f, sorted_output(argnames) 197 198 finally: 199 f.close() 200 201 f = open(join(self.output, "instruction_plans"), "w") 202 try: 203 access_instructions = self.access_instructions.items() 204 access_instructions.sort() 205 206 for location, instructions in access_instructions: 207 print >>f, encode_access_location(location), "..." 208 for instruction in instructions: 209 print >>f, encode_instruction(instruction) 210 print >>f 211 212 finally: 213 f.close() 214 215 f = open(join(self.output, "structures"), "w") 216 try: 217 structures = self.structures.items() 218 structures.sort() 219 220 for name, attrnames in structures: 221 print >>f, name, ", ".join([s or "-" for s in attrnames]) 222 223 finally: 224 f.close() 225 226 f = open(join(self.output, "parameters"), "w") 227 try: 228 parameters = self.parameters.items() 229 parameters.sort() 230 231 for name, argnames in parameters: 232 print >>f, name, ", ".join([s and ("%s:%d" % s) or "-" for s in argnames]) 233 234 finally: 235 f.close() 236 237 f = open(join(self.output, "attrtable"), "w") 238 try: 239 attr_table = self.attr_table.items() 240 attr_table.sort() 241 242 for name, attrcodes in attr_table: 243 print >>f, name, ", ".join([i is not None and str(i) or "-" for i in attrcodes]) 244 245 finally: 246 f.close() 247 248 f = open(join(self.output, "paramtable"), "w") 249 try: 250 param_table = self.param_table.items() 251 param_table.sort() 252 253 for name, paramcodes in param_table: 254 print >>f, name, ", ".join([s and ("%d:%d" % s) or "-" for s in paramcodes]) 255 256 finally: 257 f.close() 258 259 f = open(join(self.output, "attrnames"), "w") 260 try: 261 for name in self.all_attrnames: 262 print >>f, name 263 264 finally: 265 f.close() 266 267 f = open(join(self.output, "paramnames"), "w") 268 try: 269 for name in self.all_paramnames: 270 print >>f, name 271 272 finally: 273 f.close() 274 275 f = open(join(self.output, "constants"), "w") 276 try: 277 constants = [] 278 for (value, value_type), n in self.constants.items(): 279 constants.append((n, value_type, value)) 280 constants.sort() 281 for n, value_type, value in constants: 282 print >>f, value_type, repr(value) 283 284 finally: 285 f.close() 286 287 def populate_objects(self): 288 289 "Populate objects using attribute and usage information." 290 291 all_attrs = {} 292 293 # Partition attributes into separate sections so that class and instance 294 # attributes are treated separately. 295 296 for source, objtype in [ 297 (self.importer.all_class_attrs, "<class>"), 298 (self.importer.all_instance_attrs, "<instance>"), 299 (self.importer.all_module_attrs, "<module>") 300 ]: 301 for name, attrs in source.items(): 302 all_attrs[(objtype, name)] = attrs 303 304 self.locations = get_allocated_locations(all_attrs, get_attributes_and_sizes) 305 306 def populate_parameters(self): 307 308 "Populate parameter tables using parameter information." 309 310 self.arg_locations = [set()] + get_allocated_locations(self.importer.function_parameters, get_parameters_and_sizes) 311 312 def position_attributes(self): 313 314 "Position specific attribute references." 315 316 # Reverse the location mappings. 317 318 attr_locations = self.attr_locations = {} 319 320 for i, attrnames in enumerate(self.locations): 321 for attrname in attrnames: 322 attr_locations[attrname] = i 323 324 # Record the structures. 325 326 for source, objtype in [ 327 (self.importer.all_class_attrs, "<class>"), 328 (self.importer.all_instance_attrs, "<instance>"), 329 (self.importer.all_module_attrs, "<module>") 330 ]: 331 332 for name, attrnames in source.items(): 333 key = Reference(objtype, name) 334 l = self.structures[key] = [None] * len(attrnames) 335 for attrname in attrnames: 336 position = attr_locations[attrname] 337 if position >= len(l): 338 l.extend([None] * (position - len(l) + 1)) 339 l[position] = attrname 340 341 def initialise_access_instructions(self): 342 343 "Expand access plans into instruction sequences." 344 345 for access_location, access_plan in self.deducer.access_plans.items(): 346 347 # Obtain the access details. 348 349 name, test, test_type, base, \ 350 traversed, traversal_modes, attrnames, \ 351 context, context_test, \ 352 first_method, final_method, \ 353 origin, accessor_kinds = access_plan 354 355 instructions = [] 356 emit = instructions.append 357 358 if base: 359 original_accessor = base 360 else: 361 original_accessor = "<expr>" # use a generic placeholder 362 363 # Prepare for any first attribute access. 364 365 if traversed: 366 attrname = traversed[0] 367 del traversed[0] 368 elif attrnames: 369 attrname = attrnames[0] 370 del attrnames[0] 371 372 # Perform the first access explicitly if at least one operation 373 # requires it. 374 375 access_first_attribute = final_method in ("access", "assign") or traversed or attrnames 376 377 # Determine whether the first access involves assignment. 378 379 assigning = not traversed and not attrnames and final_method == "assign" 380 set_accessor = assigning and "__set_target_accessor" or "__set_accessor" 381 stored_accessor = assigning and "<target_accessor>" or "<accessor>" 382 383 # Set the context if already available. 384 385 if context == "base": 386 accessor = context_var = (base,) 387 elif context == "original-accessor": 388 389 # Prevent re-evaluation of any dynamic expression by storing it. 390 391 if original_accessor == "<expr>": 392 emit((set_accessor, original_accessor)) 393 accessor = context_var = (stored_accessor,) 394 else: 395 accessor = context_var = (original_accessor,) 396 397 # Assigning does not set the context. 398 399 elif context in ("final-accessor", "unset") and access_first_attribute: 400 401 # Prevent re-evaluation of any dynamic expression by storing it. 402 403 if original_accessor == "<expr>": 404 emit((set_accessor, original_accessor)) 405 accessor = (stored_accessor,) 406 else: 407 accessor = (original_accessor,) 408 409 # Apply any test. 410 411 if test[0] == "test": 412 accessor = ("__%s_%s_%s" % test, accessor, test_type) 413 414 # Perform the first or final access. 415 # The access only needs performing if the resulting accessor is used. 416 417 remaining = len(traversed + attrnames) 418 419 if access_first_attribute: 420 421 if first_method == "relative-class": 422 if assigning: 423 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 424 else: 425 accessor = ("__load_via_class", accessor, attrname) 426 427 elif first_method == "relative-object": 428 if assigning: 429 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 430 else: 431 accessor = ("__load_via_object", accessor, attrname) 432 433 elif first_method == "relative-object-class": 434 if assigning: 435 emit(("__get_class_and_store", accessor, attrname, "<assexpr>")) 436 else: 437 accessor = ("__get_class_and_load", accessor, attrname) 438 439 elif first_method == "check-class": 440 if assigning: 441 emit(("__check_and_store_via_class", accessor, attrname, "<assexpr>")) 442 else: 443 accessor = ("__check_and_load_via_class", accessor, attrname) 444 445 elif first_method == "check-object": 446 if assigning: 447 emit(("__check_and_store_via_object", accessor, attrname, "<assexpr>")) 448 else: 449 accessor = ("__check_and_load_via_object", accessor, attrname) 450 451 elif first_method == "check-object-class": 452 if assigning: 453 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 454 else: 455 accessor = ("__check_and_load_via_any", accessor, attrname) 456 457 # Traverse attributes using the accessor. 458 459 if traversed: 460 for attrname, traversal_mode in zip(traversed, traversal_modes): 461 assigning = remaining == 1 and final_method == "assign" 462 463 # Set the context, if appropriate. 464 465 if remaining == 1 and final_method != "assign" and context == "final-accessor": 466 emit(("__set_context", accessor)) 467 accessor = context_var = "<context>" 468 469 # Perform the access only if not achieved directly. 470 471 if remaining > 1 or final_method in ("access", "assign"): 472 473 if traversal_mode == "class": 474 if assigning: 475 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 476 else: 477 accessor = ("__load_via_class", accessor, attrname) 478 else: 479 if assigning: 480 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 481 else: 482 accessor = ("__load_via_object", accessor, attrname) 483 484 remaining -= 1 485 486 if attrnames: 487 for attrname in attrnames: 488 assigning = remaining == 1 and final_method == "assign" 489 490 # Set the context, if appropriate. 491 492 if remaining == 1 and final_method != "assign" and context == "final-accessor": 493 emit(("__set_context", accessor)) 494 accessor = context_var = "<context>" 495 496 # Perform the access only if not achieved directly. 497 498 if remaining > 1 or final_method in ("access", "assign"): 499 500 if assigning: 501 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 502 else: 503 accessor = ("__check_and_load_via_any", accessor, attrname) 504 505 remaining -= 1 506 507 # Define or emit the means of accessing the actual target. 508 509 if final_method == "static-assign": 510 parent, attrname = origin.rsplit(".", 1) 511 emit(("__store_via_object", parent, attrname, "<assexpr>")) 512 513 elif final_method in ("static", "static-invoke"): 514 parent, attrname = origin.rsplit(".", 1) 515 accessor = ("__load_static", parent, origin) 516 517 # Wrap accesses in context operations. 518 519 if context_test == "test": 520 emit(("__test_context", context_var, accessor)) 521 522 elif context_test == "replace": 523 524 # Static invocation targets have a context added but no other 525 # transformation performed. 526 527 if final_method == "static-invoke": 528 emit(("__update_context", context_var, accessor)) 529 530 # Other invocation targets gain a context and have the bound 531 # version of the callable activated. 532 533 else: 534 emit(("__replace_context", context_var, accessor)) 535 536 elif final_method not in ("assign", "static-assign"): 537 emit(accessor) 538 539 self.access_instructions[access_location] = instructions 540 self.accessor_kinds[access_location] = accessor_kinds 541 542 def get_ambiguity_for_attributes(self, attrnames): 543 544 """ 545 Return a list of attribute position alternatives corresponding to each 546 of the given 'attrnames'. 547 """ 548 549 ambiguity = [] 550 551 for attrname in attrnames: 552 position = self.attr_locations[attrname] 553 ambiguity.append(len(self.locations[position])) 554 555 return ambiguity 556 557 def position_parameters(self): 558 559 "Position the parameters for each function's parameter table." 560 561 # Reverse the location mappings. 562 563 param_locations = self.param_locations = {} 564 565 for i, argnames in enumerate(self.arg_locations): 566 567 # Position the arguments. 568 569 for argname in argnames: 570 param_locations[argname] = i 571 572 for name, argnames in self.importer.function_parameters.items(): 573 574 # Allocate an extra context parameter in the table. 575 576 l = self.parameters[name] = [None] + [None] * len(argnames) 577 578 # Store an entry for the name along with the name's position in the 579 # parameter list. 580 581 for pos, argname in enumerate(argnames): 582 583 # Position the argument in the table. 584 585 position = param_locations[argname] 586 if position >= len(l): 587 l.extend([None] * (position - len(l) + 1)) 588 589 # Indicate an argument list position starting from 1 (after the 590 # initial context argument). 591 592 l[position] = (argname, pos + 1) 593 594 def populate_tables(self): 595 596 """ 597 Assign identifiers to attributes and encode structure information using 598 these identifiers. 599 """ 600 601 self.all_attrnames, d = self._get_name_mapping(self.attr_locations) 602 603 # Record the numbers indicating the locations of the names. 604 605 for key, attrnames in self.structures.items(): 606 l = self.attr_table[key] = [] 607 for attrname in attrnames: 608 if attrname is None: 609 l.append(None) 610 else: 611 l.append(d[attrname]) 612 613 self.all_paramnames, d = self._get_name_mapping(self.param_locations) 614 615 # Record the numbers indicating the locations of the names. 616 617 for key, values in self.parameters.items(): 618 l = self.param_table[key] = [] 619 for value in values: 620 if value is None: 621 l.append(None) 622 else: 623 name, pos = value 624 l.append((d[name], pos)) 625 626 def _get_name_mapping(self, locations): 627 628 """ 629 Get a sorted list of names from 'locations', then map them to 630 identifying numbers. Return the list and the mapping. 631 """ 632 633 all_names = locations.keys() 634 all_names.sort() 635 return all_names, dict([(name, i) for i, name in enumerate(all_names)]) 636 637 def populate_constants(self): 638 639 """ 640 Obtain a collection of distinct constant literals, building a mapping 641 from constant references to those in this collection. 642 """ 643 644 # Obtain mappings from constant values to identifiers. 645 646 self.constants = {} 647 648 for path, constants in self.importer.all_constants.items(): 649 650 # Record constants and obtain a number for them. 651 # Each constant is actually (value, value_type). 652 653 for constant, n in constants.items(): 654 add_counter_item(self.constants, constant) 655 656 self.constant_numbers = {} 657 658 for name, constant in self.importer.all_constant_values.items(): 659 self.constant_numbers[name] = self.constants[constant] 660 661 def combine_rows(a, b): 662 c = [] 663 for i, j in zip(a, b): 664 if i is None or j is None: 665 c.append(i or j) 666 else: 667 return None 668 return c 669 670 def get_attributes_and_sizes(d): 671 672 """ 673 Return a matrix of attributes, a list of type names corresponding to columns 674 in the matrix, and a list of ranked sizes each indicating... 675 676 * a weighted size depending on the kind of object 677 * the minimum size of an object employing an attribute 678 * the number of free columns in the matrix for the attribute 679 * the attribute name itself 680 """ 681 682 attrs = {} 683 sizes = {} 684 objtypes = {} 685 686 for name, attrnames in d.items(): 687 objtype, _name = name 688 689 for attrname in attrnames: 690 691 # Record each type supporting the attribute. 692 693 init_item(attrs, attrname, set) 694 attrs[attrname].add(name) 695 696 # Maintain a record of the smallest object size supporting the given 697 # attribute. 698 699 if not sizes.has_key(attrname): 700 sizes[attrname] = len(attrnames) 701 else: 702 sizes[attrname] = min(sizes[attrname], len(attrnames)) 703 704 # Record the object types/kinds supporting the attribute. 705 706 init_item(objtypes, attrname, set) 707 objtypes[attrname].add(objtype) 708 709 # Obtain attribute details in order of size and occupancy. 710 711 names = d.keys() 712 713 rsizes = [] 714 for attrname, size in sizes.items(): 715 priority = "<instance>" in objtypes[attrname] and 0.5 or 1 716 occupied = len(attrs[attrname]) 717 key = (priority * size, size, len(names) - occupied, attrname) 718 rsizes.append(key) 719 720 rsizes.sort() 721 722 # Make a matrix of attributes. 723 724 matrix = {} 725 for attrname, types in attrs.items(): 726 row = [] 727 for name in names: 728 if name in types: 729 row.append(attrname) 730 else: 731 row.append(None) 732 matrix[attrname] = row 733 734 return matrix, names, rsizes 735 736 def get_parameters_and_sizes(d): 737 738 """ 739 Return a matrix of parameters, a list of functions corresponding to columns 740 in the matrix, and a list of ranked sizes each indicating... 741 742 * a weighted size depending on the kind of object 743 * the minimum size of a parameter list employing a parameter 744 * the number of free columns in the matrix for the parameter 745 * the parameter name itself 746 747 This is a slightly simpler version of the above 'get_attributes_and_sizes' 748 function. 749 """ 750 751 params = {} 752 sizes = {} 753 754 for name, argnames in d.items(): 755 for argname in argnames: 756 757 # Record each function supporting the parameter. 758 759 init_item(params, argname, set) 760 params[argname].add(name) 761 762 # Maintain a record of the smallest parameter list supporting the 763 # given parameter. 764 765 if not sizes.has_key(argname): 766 sizes[argname] = len(argnames) 767 else: 768 sizes[argname] = min(sizes[argname], len(argnames)) 769 770 # Obtain attribute details in order of size and occupancy. 771 772 names = d.keys() 773 774 rsizes = [] 775 for argname, size in sizes.items(): 776 occupied = len(params[argname]) 777 key = (size, size, len(names) - occupied, argname) 778 rsizes.append(key) 779 780 rsizes.sort() 781 782 # Make a matrix of parameters. 783 784 matrix = {} 785 for argname, types in params.items(): 786 row = [] 787 for name in names: 788 if name in types: 789 row.append(argname) 790 else: 791 row.append(None) 792 matrix[argname] = row 793 794 return matrix, names, rsizes 795 796 def get_allocated_locations(d, fn): 797 798 """ 799 Return a list where each element corresponds to a structure location and 800 contains a set of attribute names that may be stored at that location, given 801 a mapping 'd' whose keys are (object type, object name) tuples and whose 802 values are collections of attributes. 803 """ 804 805 matrix, names, rsizes = fn(d) 806 allocated = [] 807 808 x = 0 809 while x < len(rsizes): 810 weight, size, free, attrname = rsizes[x] 811 base = matrix[attrname] 812 y = x + 1 813 while y < len(rsizes): 814 _weight, _size, _free, _attrname = rsizes[y] 815 occupied = len(names) - _free 816 if occupied > free: 817 break 818 new = combine_rows(base, matrix[_attrname]) 819 if new: 820 del matrix[_attrname] 821 del rsizes[y] 822 base = new 823 free -= occupied 824 else: 825 y += 1 826 allocated.append(base) 827 x += 1 828 829 # Return the list of attribute names from each row of the allocated 830 # attributes table. 831 832 locations = [] 833 for attrnames in allocated: 834 l = set() 835 for attrname in attrnames: 836 if attrname: 837 l.add(attrname) 838 locations.append(l) 839 return locations 840 841 # vim: tabstop=4 expandtab shiftwidth=4