Lichen

branching.py

790:51cde302e18f
2017-03-30 Paul Boddie Fixed dictionary initialisation.
     1 #!/usr/bin/env python     2      3 """     4 Track attribute usage for names.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from common import dict_for_keys, init_item    24     25 class Branch:    26     27     """    28     A control-flow branch capturing local attribute usage for names.    29     Branches typically begin with assignments or function parameters and are    30     connected to others introduced by conditional and loop nodes.    31     32     Branches hosting accesses, and thus providing usage information, are    33     contributors to preceding branches.    34     35     Branches also provide a route from accesses back to assignments which are    36     the ultimate suppliers of the names involved.    37     """    38     39     def __init__(self, names, assigning=False, values=None):    40     41         """    42         Capture attribute usage for the given 'names', with the accompanying    43         'values' indicating assigned values for each name, if indicated.    44         """    45     46         self.contributors = set()    47         self.suppliers = {}    48         self.assignments = set(assigning and names or [])    49         self.usage = {}    50         self.values = {}    51     52         # Initialise usage for each name.    53     54         for name in names:    55             self.usage[name] = set()    56     57         # Initialise assigned values if any were provided.    58     59         if values:    60             for name, value in zip(names, values):    61                 if value:    62                     self.values[name] = value    63     64         # Computed results.    65     66         self.combined_usage = None    67     68     def get_assignment_sources(self, name):    69     70         """    71         Return the sources of 'name' from this branch's assignment information,    72         returning a list containing only this branch itself if it is the source.    73         """    74     75         if name in self.assignments:    76             return [self]    77         else:    78             sources = []    79             for b in self.get_all_suppliers(name):    80                 if name in b.assignments:    81                     sources.append(b)    82             return sources    83     84     def set_usage(self, name, attrname, invocation=False, assignment=False):    85     86         """    87         Record usage on the given 'name' of the attribute 'attrname', noting the    88         invocation of the attribute if 'invocation' is set to a true value, or    89         noting the assignment of the attribute if 'assignment' is set to a true    90         value.    91         """    92     93         if self.usage.has_key(name):    94             self.usage[name].add((attrname, invocation, assignment))    95     96     def get_usage(self):    97     98         """    99         Obtain usage from this node, combined with usage observed by its   100         contributors. Unlike the local usage which involves only a single set of   101         attribute names for a given variable name, the returned usage is a set   102         of attribute name combinations for a given variable name. For example:   103    104         {'a': set([('p', 'q', 'r'), ('p', 'r')])}   105         """   106    107         if self.combined_usage is None:   108    109             # Accumulate usage observations from contributors.   110    111             all_usage = []   112    113             for contributor in self.contributors:   114    115                 # Record any usage that can be returned.   116    117                 all_usage.append(contributor.get_usage())   118    119             # Merge usage from the contributors.   120    121             merged_usage = merge_dicts(all_usage)   122    123             # Make the local usage compatible with the combined usage.   124    125             usage = deepen_dict(self.usage)   126    127             self.combined_usage = combine_dicts(usage, merged_usage, combine_sets)   128    129         return self.combined_usage   130    131     def get_all_suppliers(self, name, all_suppliers=None):   132    133         "Return all branches supplying this branch with definitions of 'name'."   134    135         all_suppliers = all_suppliers or set()   136         all_suppliers.add(self)   137    138         if self.suppliers.has_key(name):   139             for supplier in self.suppliers[name]:   140                 if supplier not in all_suppliers:   141                     supplier.get_all_suppliers(name, all_suppliers)   142    143         return all_suppliers   144    145     def __repr__(self):   146         return "Branch(%r, %r)" % (self.usage.keys(),   147             self.assignments and True or False)   148    149 class BranchTracker:   150    151     """   152     A tracker of attribute usage for names in a namespace. This tracker directs   153     usage observations to branches which are the ultimate repositories of   154     attribute usage information.   155    156     As a program unit is inspected, the branches associated with names may   157     change. Assignments reset the branches; control-flow operations cause   158     branches to be accumulated from different code paths.   159     """   160    161     def __init__(self):   162    163         # Track assignments.   164    165         self.assignments = {}   166    167         # Details of attributes at each active branch level.   168    169         self.attribute_branches = [{}]          # stack of branches for names   170         self.attribute_branch_shelves = []      # stack of shelved branches   171    172         # Suspended branch details plus loop details.   173    174         self.suspended_broken_branches = []     # stack of lists of dicts   175         self.suspended_continuing_branches = [] # stack of lists of dicts   176    177         # Abandoned usage, useful for reviving usage for exception handlers.   178    179         self.abandoned_branches = [[]]          # stack of lists of branches   180    181         # Returning branches are like abandoned branches but are only revived in   182         # finally clauses.   183    184         self.returning_branches = [[]]   185    186         # Branches active when starting loops.   187    188         self.loop_branches = []   189    190     # Structure assembly methods.   191    192     def new_branchpoint(self, loop_node=False):   193    194         """   195         Indicate that branches diverge, initialising resources dependent on   196         any given 'loop_node'.   197         """   198    199         self.attribute_branch_shelves.append([])   200    201         if loop_node:   202             self.suspended_broken_branches.append([])   203             self.suspended_continuing_branches.append([])   204    205         # Retain a record of abandoned branches.   206    207         self.abandoned_branches.append([])   208         self.returning_branches.append([])   209    210     def new_branch(self, loop_node=False):   211    212         "Create a new branch."   213    214         attribute_branches = self.attribute_branches[-1]   215    216         branch, new_branches = self._new_branch(attribute_branches)   217    218         if branch and loop_node:   219             self.loop_branches.append(branch)   220    221         # Start using the branch for known names.   222    223         self.attribute_branches.append(new_branches)   224    225     def _new_branch(self, attribute_branches):   226    227         """   228         Define a new branch that will record attribute usage on known names from   229         'attribute_branches'.   230         """   231    232         # Detect abandoned branches.   233    234         if isinstance(attribute_branches, AbandonedDict):   235             return None, AbandonedDict()   236    237         # Otherwise, define a new branch.   238    239         names = attribute_branches.keys()   240    241         new_branches = {}   242         branch = Branch(names)   243    244         for name in names:   245             new_branches[name] = [branch]   246    247         # Add this new branch as a contributor to the previously active   248         # branches.   249    250         self._connect_branches(attribute_branches, branch)   251    252         return branch, new_branches   253    254     def shelve_branch(self, loop_node=False):   255    256         "Retain the current branch for later merging."   257    258         branches = self.attribute_branches.pop()   259         self.attribute_branch_shelves[-1].append(branches)   260    261         # Connect any loop branch to the active branches as contributors.   262    263         if loop_node:   264             branch = self.loop_branches.pop()   265             self._connect_branches(branches, branch, loop_node)   266    267     def abandon_branch(self):   268    269         "Abandon the current branch, retaining it for later."   270    271         attribute_branches = self.attribute_branches[-1]   272         self._abandon_branch()   273         self.abandoned_branches[-1].append(attribute_branches)   274    275     def abandon_returning_branch(self):   276    277         "Abandon the current branch, retaining it for later."   278    279         attribute_branches = self.attribute_branches[-1]   280         self._abandon_branch()   281         self.returning_branches[-1].append(attribute_branches)   282    283     def suspend_broken_branch(self):   284    285         "Suspend a branch for breaking out of a loop."   286    287         attribute_branches = self.attribute_branches[-1]   288    289         branches = self.suspended_broken_branches[-1]   290         branches.append(attribute_branches)   291         self._abandon_branch()   292    293     def suspend_continuing_branch(self):   294    295         "Suspend a branch for loop continuation."   296    297         attribute_branches = self.attribute_branches[-1]   298    299         branches = self.suspended_continuing_branches[-1]   300         branches.append(attribute_branches)   301         self._abandon_branch()   302    303     def _abandon_branch(self):   304    305         "Abandon the current branch."   306    307         self.attribute_branches[-1] = AbandonedDict()   308    309     def resume_abandoned_branches(self):   310    311         """   312         Resume branches previously abandoned.   313    314         Abandoned branches are not reset because they may not be handled by   315         exception handlers after all.   316         """   317    318         current_branches = self.attribute_branches[-1]   319         abandoned_branches = self.abandoned_branches[-1]   320         merged_branches = merge_dicts(abandoned_branches + [current_branches])   321    322         # Replace the combined branches with a new branch applying to all active   323         # names, connected to the supplying branches.   324    325         branch, new_branches = self._new_branch(merged_branches)   326         self.attribute_branches.append(new_branches)   327    328         # Although returning branches should not be considered as accumulating   329         # usage, they do provide sources of assignments.   330    331         if branch:   332             for returning_branches in self.returning_branches[-1]:   333                 self._connect_suppliers(returning_branches, branch)   334    335     def resume_all_abandoned_branches(self):   336    337         """   338         Resume branches previously abandoned including returning branches.   339    340         Abandoned branches are not reset because they may not be handled by   341         exception handlers after all.   342         """   343    344         current_branches = self.attribute_branches[-1]   345         abandoned_branches = self.abandoned_branches[-1]   346         returning_branches = self.returning_branches[-1]   347         merged_branches = merge_dicts(abandoned_branches + returning_branches + [current_branches])   348         self.replace_branches(merged_branches)   349    350         # Return the previously-active branches for later restoration.   351    352         return current_branches   353    354     def resume_broken_branches(self):   355    356         "Resume branches previously suspended for breaking out of a loop."   357    358         suspended_branches = self.suspended_broken_branches.pop()   359         current_branches = self.attribute_branches[-1]   360    361         # Merge suspended branches with the current branch.   362    363         merged_branches = merge_dicts(suspended_branches + [current_branches])   364         self.replace_branches(merged_branches)   365    366     def resume_continuing_branches(self):   367    368         "Resume branches previously suspended for loop continuation."   369    370         suspended_branches = self.suspended_continuing_branches.pop()   371         current_branches = self.attribute_branches[-1]   372    373         # Merge suspended branches with the current branch.   374    375         merged_branches = merge_dicts(suspended_branches + [current_branches])   376         self.replace_branches(merged_branches)   377    378     def replace_branches(self, merged_branches):   379    380         """   381         Replace the 'merged_branches' with a new branch applying to all active   382         names, connected to the supplying branches.   383         """   384    385         branch, new_branches = self._new_branch(merged_branches)   386         self.attribute_branches[-1] = new_branches   387    388     def restore_active_branches(self, branches):   389    390         "Restore the active 'branches'."   391    392         self.attribute_branches[-1] = branches   393    394     def merge_branches(self):   395    396         "Merge branches."   397    398         # Combine the attribute branches. This ensures that a list of branches   399         # affected by attribute usage is maintained for the current branch.   400    401         all_shelved_branches = self.attribute_branch_shelves.pop()   402         merged_branches = merge_dicts(all_shelved_branches, missing=make_missing)   403         self.replace_branches(merged_branches)   404    405         # Abandoned branches are retained for exception handling purposes.   406    407         all_abandoned_branches = self.abandoned_branches.pop()   408         new_abandoned_branches = merge_dicts(all_abandoned_branches)   409         self.abandoned_branches[-1].append(new_abandoned_branches)   410    411         # Returning branches are retained for finally clauses.   412    413         all_returning_branches = self.returning_branches.pop()   414         new_returning_branches = merge_dicts(all_returning_branches)   415         self.returning_branches[-1].append(new_returning_branches)   416    417     # Internal structure assembly methods.   418    419     def _connect_branches(self, attribute_branches, contributor, loop_node=False):   420    421         """   422         Given the 'attribute_branches' mapping, connect the branches referenced   423         in the mapping to the given 'contributor' branch. If 'loop_node' is   424         set to a true value, connect only the branches so that the 'contributor'   425         references the nodes supplying it with name information.   426         """   427    428         all_branches = self._connect_suppliers(attribute_branches, contributor)   429         if not loop_node:   430             self._connect_contributor(contributor, all_branches)   431    432     def _connect_suppliers(self, attribute_branches, contributor):   433    434         "Connect the 'attribute_branches' to the given 'contributor'."   435    436         # Gather branches involved with all known names into a single set.   437    438         all_branches = set()   439    440         for name, branches in attribute_branches.items():   441             all_branches.update(branches)   442    443             # Also note receiving branches on the contributor.   444    445             for branch in branches:   446                 init_item(contributor.suppliers, name, set)   447                 contributor.suppliers[name].add(branch)   448    449         return all_branches   450    451     def _connect_contributor(self, contributor, branches):   452    453         "Connect the given 'contributor' branch to the given 'branches'."   454    455         for branch in branches:   456             branch.contributors.add(contributor)   457    458     # Attribute usage methods.   459    460     def tracking_name(self, name):   461    462         """   463         Return whether 'name' is being tracked, returning all branches doing so   464         if it is.   465         """   466    467         return self.assignments.has_key(name) and self.have_name(name)   468    469     def have_name(self, name):   470    471         "Return whether 'name' is known."   472    473         return self.attribute_branches[-1].get(name)   474    475     def assign_names(self, names, values=None):   476    477         """   478         Define the start of usage tracking for the given 'names', each being   479         assigned with the corresponding 'values' if indicated.   480         """   481    482         branches = self.attribute_branches[-1]   483         branch = Branch(names, True, values)   484    485         for name in names:   486             branches[name] = [branch]   487             init_item(self.assignments, name, list)   488             self.assignments[name].append(branch)   489    490         return branch   491    492     def use_attribute(self, name, attrname, invocation=False, assignment=False):   493    494         """   495         Indicate the use on the given 'name' of an attribute with the given   496         'attrname', optionally involving an invocation of the attribute if   497         'invocation' is set to a true value, or involving an assignment of the   498         attribute if 'assignment' is set to a true value.   499    500         Return all branches that support 'name'.   501         """   502    503         branches = self.attribute_branches[-1]   504    505         # Add the usage to all current branches.   506    507         if branches.has_key(name):   508             for branch in branches[name]:   509                 branch.set_usage(name, attrname, invocation, assignment)   510             return branches[name]   511         else:   512             return None   513    514     # Query methods.   515    516     def get_assignment_positions_for_branches(self, name, branches, missing=True):   517    518         """   519         Return the positions of assignments involving the given 'name' affected   520         by the given 'branches'. If 'missing' is set to a false value, branches   521         with missing name details will be excluded instead of contributing the   522         value None to the list of positions.   523         """   524    525         if not branches:   526             return [None]   527    528         positions = set()   529         assignments = self.assignments[name]   530    531         for assignment in self.get_assignments_for_branches(name, branches):   532    533             # Use None to indicate a branch without assignment information.   534    535             if missing and isinstance(assignment, MissingBranch):   536                 positions.add(None)   537             else:   538                 pos = assignments.index(assignment)   539                 positions.add(pos)   540    541         positions = list(positions)   542         positions.sort()   543         return positions   544    545     def get_assignments_for_branches(self, name, branches, missing=True):   546    547         """   548         Return the origins of assignments involving the given 'name' affected   549         by the given 'branches'. The origins are a list of branches where names   550         are defined using assignments. If 'missing' is set to a false value,   551         branches with missing name details are excluded.   552         """   553    554         all_branches = []   555         assignments = self.assignments[name]   556    557         # Obtain the assignments recorded for each branch.   558    559         for branch in branches:   560    561             # Find the branch representing the definition of some names in the   562             # scope's assignments, making sure that the given name is involved.   563    564             for assignment in branch.get_assignment_sources(name):   565    566                 # Capture branches without assignment information as well as   567                 # genuine assignment branches.   568    569                 if assignment in assignments or missing and isinstance(assignment, MissingBranch):   570                     all_branches.append(assignment)   571    572         return all_branches   573    574     def get_all_usage(self):   575    576         """   577         Convert usage observations from the tracker to a simple mapping of   578         names to sets of attribute names.   579         """   580    581         d = {}   582         for name, branches in self.assignments.items():   583             d[name] = self.get_usage_from_branches_for_name(branches, name)   584         return d   585    586     def get_usage_from_branches_for_name(self, branches, name):   587    588         """   589         Convert usage observations from the 'branches' to a simple list of   590         usage sets for the given 'name'.   591         """   592    593         l = []   594         for branch in branches:   595             l.append(branch.get_usage()[name])   596         return l   597    598     def get_all_values(self):   599    600         "Return a mapping from names to lists of assigned values."   601    602         d = {}   603         for name, branches in self.assignments.items():   604             l = []   605             for branch in branches:   606                 l.append(branch.values.get(name))   607             d[name] = l   608         return d   609    610     def returns_value(self):   611    612         "Indicate whether a value is always being returned."   613    614         return isinstance(self.attribute_branches[-1], AbandonedDict)   615    616 # Special objects.   617    618 class AbandonedDict(dict):   619    620     "A dictionary representing mappings in an abandoned branch."   621    622     def __repr__(self):   623         return "AbandonedDict()"   624    625 class MissingBranch(Branch):   626    627     "A branch introduced during dictionary merging."   628    629     def __repr__(self):   630         return "MissingBranch(%r, %r)" % (self.usage.keys(),   631             self.assignments and True or False)   632    633 def make_missing(name):   634    635     "Make a special branch indicating missing name information."   636    637     return set([MissingBranch([name], True)])   638    639 # Dictionary utilities.   640    641 def merge_dicts(dicts, ignored=AbandonedDict, missing=None):   642    643     """   644     Merge the given 'dicts' mapping keys to sets of values.   645    646     Where 'ignored' is specified, any dictionary of the given type is ignored.   647     Where all dictionaries to be merged are of the given type, an instance of   648     the type is returned as the merged dictionary.   649    650     Where 'missing' is specified, it provides a callable that produces a set of   651     suitable values for a given name.   652     """   653    654     new_dict = {}   655     all_names = set()   656    657     # Determine all known names.   658    659     for old_dict in dicts:   660         all_names.update(old_dict.keys())   661    662     # Merge the dictionaries, looking for all known names in each one.   663    664     have_dicts = False   665    666     for old_dict in dicts:   667    668         # Abandoned dictionaries should not contribute information.   669    670         if isinstance(old_dict, ignored):   671             continue   672         else:   673             have_dicts = True   674    675         for name in all_names:   676    677             # Find branches providing each name.   678    679             if old_dict.has_key(name):   680                 values = old_dict[name]   681    682             # Branches not providing names may indicate usage before assignment.   683    684             elif missing:   685                 values = missing(name)   686             else:   687                 continue   688    689             # Initialise mappings in the resulting dictionary.   690    691             if not new_dict.has_key(name):   692                 new_dict[name] = set(values)   693             else:   694                 new_dict[name].update(values)   695    696     # Where no dictionaries contributed, all branches were abandoned.   697    698     if have_dicts:   699         return new_dict   700     else:   701         return ignored()   702    703 def deepen_dict(d):   704    705     """   706     Return a version of dictionary 'd' with its values converted to sets   707     containing each original value as a single element in each new value.   708     Original values are assumed to be sequences. Thus...   709    710     {"self" : ("x", "y")}   711    712     ...would become...   713    714     {"self" : set([("x", "y")])}   715    716     ...allowing other such values to be added to the set alongside the original   717     value.   718     """   719    720     l = []   721    722     for key, value in d.items():   723    724         # Sort the attribute name details for stable comparisons.   725    726         value = list(value)   727         value.sort()   728         l.append((key, set([tuple(value)])))   729    730     return dict(l)   731    732 def combine_sets(s1, s2):   733    734     "Combine elements from sets 's1' and 's2'."   735    736     if not s1:   737         return s2   738     elif not s2:   739         return s1   740    741     s = set()   742    743     for i1 in s1:   744         for i2 in s2:   745    746             # Sort the attribute name details for stable comparisons.   747    748             l = list(set(i1 + i2))   749             l.sort()   750             s.add(tuple(l))   751    752     return s   753    754 def combine_dicts(d1, d2, combine=combine_sets):   755    756     """   757     Combine dictionaries 'd1' and 'd2' such that the values for common keys   758     are themselves combined in the result.   759     """   760    761     d = {}   762    763     for key in d1.keys():   764         if d2.has_key(key):   765             d[key] = combine(d1[key], d2[key])   766         else:   767             d[key] = d1[key]   768    769     return d   770    771 # vim: tabstop=4 expandtab shiftwidth=4