1 #!/usr/bin/env python 2 3 """ 4 Optimise object layouts and generate access instruction plans. 5 6 Copyright (C) 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from common import add_counter_item, get_attrname_from_location, init_item, \ 23 sorted_output 24 from encoders import encode_access_location, encode_instruction, get_kinds 25 from os.path import exists, join 26 from os import makedirs 27 from referencing import Reference 28 29 class Optimiser: 30 31 "Optimise objects in a program." 32 33 def __init__(self, importer, deducer, output): 34 35 """ 36 Initialise an instance using the given 'importer' and 'deducer' that 37 will perform the arrangement of attributes for program objects, writing 38 the results to the given 'output' directory. 39 """ 40 41 self.importer = importer 42 self.deducer = deducer 43 self.output = output 44 45 # Locations/offsets of attributes in objects. 46 47 self.locations = None 48 self.attr_locations = None 49 self.all_attrnames = None 50 51 # Locations of parameters in parameter tables. 52 53 self.arg_locations = None 54 self.param_locations = None 55 self.all_paramnames = None 56 57 # Specific attribute access information. 58 59 self.access_instructions = {} 60 self.accessor_kinds = {} 61 62 # Object structure information. 63 64 self.structures = {} 65 self.attr_table = {} 66 67 # Parameter list information. 68 69 self.parameters = {} 70 self.param_table = {} 71 72 # Constant literal information. 73 74 self.constants = [] 75 self.constant_numbers = {} 76 77 # Optimiser activities. 78 79 self.populate_objects() 80 self.position_attributes() 81 self.populate_parameters() 82 self.position_parameters() 83 self.populate_tables() 84 self.populate_constants() 85 self.initialise_access_instructions() 86 87 def to_output(self): 88 89 "Write the output files using optimisation information." 90 91 if not exists(self.output): 92 makedirs(self.output) 93 94 self.write_objects() 95 96 def write_objects(self): 97 98 """ 99 Write object-related output. 100 101 The locations are a list of positions indicating the attributes residing 102 at each position in the different structures in a program. 103 104 ---- 105 106 The parameter locations are a list of positions indicating the parameters 107 residing at each position in the different parameter lists in a program. 108 109 ---- 110 111 Each attribute plan provides attribute details in the following format: 112 113 location " " name " " test " " test type " " base 114 " " traversed attributes " " traversed attribute ambiguity 115 " " traversal access modes 116 " " attributes to traverse " " attribute ambiguity 117 " " context " " access method " " static attribute 118 119 Locations have the following format: 120 121 qualified name of scope "." local name ":" name version 122 123 Traversal access modes are either "class" (obtain accessor class to 124 access attribute) or "object" (obtain attribute directly from accessor). 125 126 ---- 127 128 The structures are presented as a table in the following format: 129 130 qualified name " " attribute names 131 132 The attribute names are separated by ", " characters and indicate the 133 attribute provided at each position in the structure associated with the 134 given type. Where no attribute is provided at a particular location 135 within a structure, "-" is given. 136 137 ---- 138 139 The parameters are presented as a table in the following format: 140 141 qualified name " " parameter details 142 143 The parameter details are separated by ", " characters and indicate 144 the parameter name and list position for each parameter described at 145 each location in the parameter table associated with the given 146 function. Where no parameter details are provided at a particular 147 location within a parameter table, "-" is given. The name and list 148 position are separated by a colon (":"). 149 150 ---- 151 152 The attribute table is presented as a table in the following format: 153 154 qualified name " " attribute identifiers 155 156 Instead of attribute names, identifiers defined according to the order 157 given in the "attrnames" file are employed to denote the attributes 158 featured in each type's structure. Where no attribute is provided at a 159 particular location within a structure, "-" is given. 160 161 ---- 162 163 The parameter table is presented as a table in the following format: 164 165 qualified name " " parameter details 166 167 Instead of parameter names, identifiers defined according to the order 168 given in the "paramnames" file are employed to denote the parameters 169 featured in each function's parameter table. Where no parameter is 170 provided at a particular location within a table, "-" is given. 171 172 ---- 173 174 The ordered list of attribute names is given in the "attrnames" file. 175 176 ---- 177 178 The ordered list of parameter names is given in the "paramnames" file. 179 180 ---- 181 182 The ordered list of constant literals is given in the "constants" file. 183 """ 184 185 f = open(join(self.output, "locations"), "w") 186 try: 187 for attrnames in self.locations: 188 print >>f, sorted_output(attrnames) 189 190 finally: 191 f.close() 192 193 f = open(join(self.output, "parameter_locations"), "w") 194 try: 195 for argnames in self.arg_locations: 196 print >>f, sorted_output(argnames) 197 198 finally: 199 f.close() 200 201 f = open(join(self.output, "instruction_plans"), "w") 202 try: 203 access_instructions = self.access_instructions.items() 204 access_instructions.sort() 205 206 for location, instructions in access_instructions: 207 print >>f, encode_access_location(location), "..." 208 for instruction in instructions: 209 print >>f, encode_instruction(instruction) 210 print >>f 211 212 finally: 213 f.close() 214 215 f = open(join(self.output, "structures"), "w") 216 try: 217 structures = self.structures.items() 218 structures.sort() 219 220 for name, attrnames in structures: 221 print >>f, name, ", ".join([s or "-" for s in attrnames]) 222 223 finally: 224 f.close() 225 226 f = open(join(self.output, "parameters"), "w") 227 try: 228 parameters = self.parameters.items() 229 parameters.sort() 230 231 for name, argnames in parameters: 232 print >>f, name, ", ".join([s and ("%s:%d" % s) or "-" for s in argnames]) 233 234 finally: 235 f.close() 236 237 f = open(join(self.output, "attrtable"), "w") 238 try: 239 attr_table = self.attr_table.items() 240 attr_table.sort() 241 242 for name, attrcodes in attr_table: 243 print >>f, name, ", ".join([i is not None and str(i) or "-" for i in attrcodes]) 244 245 finally: 246 f.close() 247 248 f = open(join(self.output, "paramtable"), "w") 249 try: 250 param_table = self.param_table.items() 251 param_table.sort() 252 253 for name, paramcodes in param_table: 254 print >>f, name, ", ".join([s and ("%d:%d" % s) or "-" for s in paramcodes]) 255 256 finally: 257 f.close() 258 259 f = open(join(self.output, "attrnames"), "w") 260 try: 261 for name in self.all_attrnames: 262 print >>f, name 263 264 finally: 265 f.close() 266 267 f = open(join(self.output, "paramnames"), "w") 268 try: 269 for name in self.all_paramnames: 270 print >>f, name 271 272 finally: 273 f.close() 274 275 f = open(join(self.output, "constants"), "w") 276 try: 277 constants = [] 278 for (value, value_type, encoding), n in self.constants.items(): 279 constants.append((n, value_type, encoding, value)) 280 constants.sort() 281 for n, value_type, encoding, value in constants: 282 print >>f, value_type, encoding or "{}", repr(value) 283 284 finally: 285 f.close() 286 287 def populate_objects(self): 288 289 "Populate objects using attribute and usage information." 290 291 self.all_attrs = {} 292 293 # Partition attributes into separate sections so that class and instance 294 # attributes are treated separately. 295 296 for source, objtype in [ 297 (self.importer.all_class_attrs, "<class>"), 298 (self.importer.all_instance_attrs, "<instance>"), 299 (self.importer.all_module_attrs, "<module>") 300 ]: 301 302 for name, attrnames in source.items(): 303 self.all_attrs[(objtype, name)] = attrnames 304 305 self.locations = get_allocated_locations(self.all_attrs, get_attributes_and_sizes) 306 307 def populate_parameters(self): 308 309 "Populate parameter tables using parameter information." 310 311 self.arg_locations = [set()] + get_allocated_locations(self.importer.function_parameters, get_parameters_and_sizes) 312 313 def position_attributes(self): 314 315 "Position specific attribute references." 316 317 # Reverse the location mappings. 318 319 attr_locations = self.attr_locations = {} 320 321 for i, attrnames in enumerate(self.locations): 322 for attrname in attrnames: 323 attr_locations[attrname] = i 324 325 # Record the structures. 326 327 for (objtype, name), attrnames in self.all_attrs.items(): 328 key = Reference(objtype, name) 329 l = self.structures[key] = [None] * len(attrnames) 330 for attrname in attrnames: 331 position = attr_locations[attrname] 332 if position >= len(l): 333 l.extend([None] * (position - len(l) + 1)) 334 l[position] = attrname 335 336 def initialise_access_instructions(self): 337 338 "Expand access plans into instruction sequences." 339 340 for access_location, access_plan in self.deducer.access_plans.items(): 341 342 # Obtain the access details. 343 344 name, test, test_type, base, \ 345 traversed, traversal_modes, attrnames, \ 346 context, context_test, \ 347 first_method, final_method, \ 348 origin, accessor_kinds = access_plan 349 350 instructions = [] 351 emit = instructions.append 352 353 if base: 354 original_accessor = base 355 else: 356 original_accessor = "<expr>" # use a generic placeholder 357 358 # Prepare for any first attribute access. 359 360 if traversed: 361 attrname = traversed[0] 362 del traversed[0] 363 elif attrnames: 364 attrname = attrnames[0] 365 del attrnames[0] 366 367 # Perform the first access explicitly if at least one operation 368 # requires it. 369 370 access_first_attribute = final_method in ("access", "assign") or traversed or attrnames 371 372 # Determine whether the first access involves assignment. 373 374 assigning = not traversed and not attrnames and final_method == "assign" 375 set_accessor = assigning and "<set_target_accessor>" or "<set_accessor>" 376 stored_accessor = assigning and "<target_accessor>" or "<accessor>" 377 378 # Set the context if already available. 379 380 if context == "base": 381 accessor = context_var = (base,) 382 elif context == "original-accessor": 383 384 # Prevent re-evaluation of any dynamic expression by storing it. 385 386 if original_accessor == "<expr>": 387 emit((set_accessor, original_accessor)) 388 accessor = context_var = (stored_accessor,) 389 else: 390 accessor = context_var = (original_accessor,) 391 392 # Assigning does not set the context. 393 394 elif context in ("final-accessor", "unset") and access_first_attribute: 395 396 # Prevent re-evaluation of any dynamic expression by storing it. 397 398 if original_accessor == "<expr>": 399 emit((set_accessor, original_accessor)) 400 accessor = (stored_accessor,) 401 else: 402 accessor = (original_accessor,) 403 404 # Apply any test. 405 406 if test[0] == "test": 407 accessor = ("__%s_%s_%s" % test, accessor, test_type) 408 409 # Perform the first or final access. 410 # The access only needs performing if the resulting accessor is used. 411 412 remaining = len(traversed + attrnames) 413 414 if access_first_attribute: 415 416 if first_method == "relative-class": 417 if assigning: 418 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 419 else: 420 accessor = ("__load_via_class", accessor, attrname) 421 422 elif first_method == "relative-object": 423 if assigning: 424 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 425 else: 426 accessor = ("__load_via_object", accessor, attrname) 427 428 elif first_method == "relative-object-class": 429 if assigning: 430 emit(("__get_class_and_store", accessor, attrname, "<assexpr>")) 431 else: 432 accessor = ("__get_class_and_load", accessor, attrname) 433 434 elif first_method == "check-class": 435 if assigning: 436 emit(("__check_and_store_via_class", accessor, attrname, "<assexpr>")) 437 else: 438 accessor = ("__check_and_load_via_class", accessor, attrname) 439 440 elif first_method == "check-object": 441 if assigning: 442 emit(("__check_and_store_via_object", accessor, attrname, "<assexpr>")) 443 else: 444 accessor = ("__check_and_load_via_object", accessor, attrname) 445 446 elif first_method == "check-object-class": 447 if assigning: 448 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 449 else: 450 accessor = ("__check_and_load_via_any", accessor, attrname) 451 452 # Traverse attributes using the accessor. 453 454 if traversed: 455 for attrname, traversal_mode in zip(traversed, traversal_modes): 456 assigning = remaining == 1 and final_method == "assign" 457 458 # Set the context, if appropriate. 459 460 if remaining == 1 and final_method != "assign" and context == "final-accessor": 461 emit(("__set_context", accessor)) 462 accessor = context_var = "<context>" 463 464 # Perform the access only if not achieved directly. 465 466 if remaining > 1 or final_method in ("access", "assign"): 467 468 if traversal_mode == "class": 469 if assigning: 470 emit(("__store_via_class", accessor, attrname, "<assexpr>")) 471 else: 472 accessor = ("__load_via_class", accessor, attrname) 473 else: 474 if assigning: 475 emit(("__store_via_object", accessor, attrname, "<assexpr>")) 476 else: 477 accessor = ("__load_via_object", accessor, attrname) 478 479 remaining -= 1 480 481 if attrnames: 482 for attrname in attrnames: 483 assigning = remaining == 1 and final_method == "assign" 484 485 # Set the context, if appropriate. 486 487 if remaining == 1 and final_method != "assign" and context == "final-accessor": 488 emit(("__set_context", accessor)) 489 accessor = context_var = "<context>" 490 491 # Perform the access only if not achieved directly. 492 493 if remaining > 1 or final_method in ("access", "assign"): 494 495 if assigning: 496 emit(("__check_and_store_via_any", accessor, attrname, "<assexpr>")) 497 else: 498 accessor = ("__check_and_load_via_any", accessor, attrname) 499 500 remaining -= 1 501 502 # Define or emit the means of accessing the actual target. 503 504 if final_method == "static-assign": 505 parent, attrname = origin.rsplit(".", 1) 506 emit(("__store_via_object", parent, attrname, "<assexpr>")) 507 508 elif final_method in ("static", "static-invoke"): 509 parent, attrname = origin.rsplit(".", 1) 510 accessor = ("__load_static", parent, origin) 511 512 # Wrap accesses in context operations. 513 514 if context_test == "test": 515 emit(("__test_context", context_var, accessor)) 516 517 elif context_test == "replace": 518 emit(("__update_context", context_var, accessor)) 519 520 elif final_method not in ("assign", "static-assign"): 521 emit(accessor) 522 523 self.access_instructions[access_location] = instructions 524 self.accessor_kinds[access_location] = accessor_kinds 525 526 def get_ambiguity_for_attributes(self, attrnames): 527 528 """ 529 Return a list of attribute position alternatives corresponding to each 530 of the given 'attrnames'. 531 """ 532 533 ambiguity = [] 534 535 for attrname in attrnames: 536 position = self.attr_locations[attrname] 537 ambiguity.append(len(self.locations[position])) 538 539 return ambiguity 540 541 def position_parameters(self): 542 543 "Position the parameters for each function's parameter table." 544 545 # Reverse the location mappings. 546 547 param_locations = self.param_locations = {} 548 549 for i, argnames in enumerate(self.arg_locations): 550 551 # Position the arguments. 552 553 for argname in argnames: 554 param_locations[argname] = i 555 556 for name, argnames in self.importer.function_parameters.items(): 557 558 # Allocate an extra context parameter in the table. 559 560 l = self.parameters[name] = [None] + [None] * len(argnames) 561 562 # Store an entry for the name along with the name's position in the 563 # parameter list. 564 565 for pos, argname in enumerate(argnames): 566 567 # Position the argument in the table. 568 569 position = param_locations[argname] 570 if position >= len(l): 571 l.extend([None] * (position - len(l) + 1)) 572 573 # Indicate an argument list position starting from 1 (after the 574 # initial context argument). 575 576 l[position] = (argname, pos + 1) 577 578 def populate_tables(self): 579 580 """ 581 Assign identifiers to attributes and encode structure information using 582 these identifiers. 583 """ 584 585 self.all_attrnames, d = self._get_name_mapping(self.attr_locations) 586 587 # Record the numbers indicating the locations of the names. 588 589 for key, attrnames in self.structures.items(): 590 l = self.attr_table[key] = [] 591 for attrname in attrnames: 592 if attrname is None: 593 l.append(None) 594 else: 595 l.append(d[attrname]) 596 597 self.all_paramnames, d = self._get_name_mapping(self.param_locations) 598 599 # Record the numbers indicating the locations of the names. 600 601 for key, values in self.parameters.items(): 602 l = self.param_table[key] = [] 603 for value in values: 604 if value is None: 605 l.append(None) 606 else: 607 name, pos = value 608 l.append((d[name], pos)) 609 610 def _get_name_mapping(self, locations): 611 612 """ 613 Get a sorted list of names from 'locations', then map them to 614 identifying numbers. Return the list and the mapping. 615 """ 616 617 all_names = locations.keys() 618 all_names.sort() 619 d = {} 620 for i, name in enumerate(all_names): 621 d[name] = i 622 return all_names, d 623 624 def populate_constants(self): 625 626 """ 627 Obtain a collection of distinct constant literals, building a mapping 628 from constant references to those in this collection. 629 """ 630 631 # Obtain mappings from constant values to identifiers. 632 633 self.constants = {} 634 635 for path, constants in self.importer.all_constants.items(): 636 637 # Record constants and obtain a number for them. 638 # Each constant is actually (value, value_type, encoding). 639 640 for constant, n in constants.items(): 641 add_counter_item(self.constants, constant) 642 643 self.constant_numbers = {} 644 645 for name, constant in self.importer.all_constant_values.items(): 646 self.constant_numbers[name] = self.constants[constant] 647 648 def combine_rows(a, b): 649 c = [] 650 for i, j in zip(a, b): 651 if i is None or j is None: 652 c.append(i or j) 653 else: 654 return None 655 return c 656 657 def get_attributes_and_sizes(d): 658 659 """ 660 Return a matrix of attributes, a list of type names corresponding to columns 661 in the matrix, and a list of ranked sizes each indicating... 662 663 * a weighted size depending on the kind of object 664 * the minimum size of an object employing an attribute 665 * the number of free columns in the matrix for the attribute 666 * the attribute name itself 667 """ 668 669 attrs = {} 670 sizes = {} 671 objtypes = {} 672 673 for name, attrnames in d.items(): 674 objtype, _name = name 675 676 for attrname in attrnames: 677 678 # Record each type supporting the attribute. 679 680 init_item(attrs, attrname, set) 681 attrs[attrname].add(name) 682 683 # Maintain a record of the smallest object size supporting the given 684 # attribute. 685 686 if not sizes.has_key(attrname): 687 sizes[attrname] = len(attrnames) 688 else: 689 sizes[attrname] = min(sizes[attrname], len(attrnames)) 690 691 # Record the object types/kinds supporting the attribute. 692 693 init_item(objtypes, attrname, set) 694 objtypes[attrname].add(objtype) 695 696 # Obtain attribute details in order of size and occupancy. 697 698 names = d.keys() 699 700 rsizes = [] 701 for attrname, size in sizes.items(): 702 priority = "<instance>" in objtypes[attrname] and 0.5 or 1 703 occupied = len(attrs[attrname]) 704 key = (priority * size, size, len(names) - occupied, attrname) 705 rsizes.append(key) 706 707 rsizes.sort() 708 709 # Make a matrix of attributes. 710 711 matrix = {} 712 for attrname, types in attrs.items(): 713 row = [] 714 for name in names: 715 if name in types: 716 row.append(attrname) 717 else: 718 row.append(None) 719 matrix[attrname] = row 720 721 return matrix, names, rsizes 722 723 def get_parameters_and_sizes(d): 724 725 """ 726 Return a matrix of parameters, a list of functions corresponding to columns 727 in the matrix, and a list of ranked sizes each indicating... 728 729 * a weighted size depending on the kind of object 730 * the minimum size of a parameter list employing a parameter 731 * the number of free columns in the matrix for the parameter 732 * the parameter name itself 733 734 This is a slightly simpler version of the above 'get_attributes_and_sizes' 735 function. 736 """ 737 738 params = {} 739 sizes = {} 740 741 for name, argnames in d.items(): 742 for argname in argnames: 743 744 # Record each function supporting the parameter. 745 746 init_item(params, argname, set) 747 params[argname].add(name) 748 749 # Maintain a record of the smallest parameter list supporting the 750 # given parameter. 751 752 if not sizes.has_key(argname): 753 sizes[argname] = len(argnames) 754 else: 755 sizes[argname] = min(sizes[argname], len(argnames)) 756 757 # Obtain attribute details in order of size and occupancy. 758 759 names = d.keys() 760 761 rsizes = [] 762 for argname, size in sizes.items(): 763 occupied = len(params[argname]) 764 key = (size, size, len(names) - occupied, argname) 765 rsizes.append(key) 766 767 rsizes.sort() 768 769 # Make a matrix of parameters. 770 771 matrix = {} 772 for argname, types in params.items(): 773 row = [] 774 for name in names: 775 if name in types: 776 row.append(argname) 777 else: 778 row.append(None) 779 matrix[argname] = row 780 781 return matrix, names, rsizes 782 783 def get_allocated_locations(d, fn): 784 785 """ 786 Return a list where each element corresponds to a structure location and 787 contains a set of attribute names that may be stored at that location, given 788 a mapping 'd' whose keys are (object type, object name) tuples and whose 789 values are collections of attributes. 790 """ 791 792 matrix, names, rsizes = fn(d) 793 allocated = [] 794 795 x = 0 796 while x < len(rsizes): 797 weight, size, free, attrname = rsizes[x] 798 base = matrix[attrname] 799 y = x + 1 800 while y < len(rsizes): 801 _weight, _size, _free, _attrname = rsizes[y] 802 occupied = len(names) - _free 803 if occupied > free: 804 break 805 new = combine_rows(base, matrix[_attrname]) 806 if new: 807 del matrix[_attrname] 808 del rsizes[y] 809 base = new 810 free -= occupied 811 else: 812 y += 1 813 allocated.append(base) 814 x += 1 815 816 # Return the list of attribute names from each row of the allocated 817 # attributes table. 818 819 locations = [] 820 for attrnames in allocated: 821 l = set() 822 for attrname in attrnames: 823 if attrname: 824 l.add(attrname) 825 locations.append(l) 826 return locations 827 828 # vim: tabstop=4 expandtab shiftwidth=4