Lichen

encoders.py

1000:f646afbcfe1c
14 months ago Paul Boddie Support value replacement for wrapper objects created when obtaining attributes that may be called. value-replacement-for-wrapper
     1 #!/usr/bin/env python     2      3 """     4 Encoder functions, producing representations of program objects.     5      6 Copyright (C) 2016, 2017, 2018, 2023 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from common import first, InstructionSequence    23     24     25     26 # Value digest computation.    27     28 from base64 import b64encode    29 from hashlib import sha1    30     31 def digest(values):    32     m = sha1()    33     for value in values:    34         m.update(str(value))    35     return b64encode(m.digest()).replace("+", "__").replace("/", "_").rstrip("=")    36     37     38     39 # Output encoding and decoding for the summary files.    40     41 def encode_attrnames(attrnames):    42     43     "Encode the 'attrnames' representing usage."    44     45     return ", ".join(attrnames) or "{}"    46     47 def encode_constrained(constrained):    48     49     "Encode the 'constrained' status for program summaries."    50     51     return constrained and "constrained" or "deduced"    52     53 def encode_usage(usage):    54     55     "Encode attribute details from 'usage'."    56     57     all_attrnames = []    58     for t in usage:    59         attrname, invocation, assignment = t    60         all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or ""))    61     return ", ".join(all_attrnames) or "{}"    62     63 def decode_usage(s):    64     65     "Decode attribute details from 's'."    66     67     all_attrnames = set()    68     for attrname_str in s.split(", "):    69         all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("=")))    70     71     all_attrnames = list(all_attrnames)    72     all_attrnames.sort()    73     return tuple(all_attrnames)    74     75 def encode_access_location(t):    76     77     "Encode the access location 't'."    78     79     return "%s:%s:%s:%d" % (t.path, t.name or "{}", t.attrnames or "{}", t.access_number)    80     81 def decode_access_location(s):    82     83     "Decode the access location 's'."    84     85     path, name, attrnames, access_number = s.split(":")    86     return path, name, attrnames, access_number    87     88 def encode_alias_location(t, invocation=False):    89     90     "Encode the alias location 't'."    91     92     return "%s:%s:%s%s%s%s" % (t.path, t.name or "{}", t.attrnames or "{}",    93         t.version is not None and ":=%d" % t.version or "",    94         t.access_number is not None and ":#%d" % t.access_number or "",    95         invocation and "!" or "")    96     97 def decode_alias_location(s):    98     99     "Decode the alias location 's'."   100    101     path, name, rest = s.split(":", 2)   102     attrnames = version = access_number = None   103     invocation = rest.endswith("!")   104    105     t = rest.rstrip("!").split(":#")   106     if len(t) > 1:   107         rest = t[0]; access_number = int(t[1])   108    109     t = rest.split(":=")   110     if len(t) > 1:   111         attrnames = t[0]; version = int(t[1])   112     else:   113         attrnames = rest   114    115     return path, name, attrnames, version, access_number, invocation   116    117 def encode_location(t):   118    119     "Encode the general location 't' in a concise form."   120    121     if t.name is not None and t.version is not None:   122         return "%s:%s:%d" % (t.path, t.name, t.version)   123     elif t.name is not None:   124         return "%s:%s" % (t.path, t.name)   125     else:   126         return "%s::%s" % (t.path, t.attrnames)   127    128 def encode_modifiers(modifiers):   129    130     "Encode assignment and invocation details from 'modifiers'."   131    132     all_modifiers = []   133     for t in modifiers:   134         all_modifiers.append(encode_modifier_term(t))   135     return "".join(all_modifiers)   136    137 def encode_modifier_term(t):   138    139     "Encode modifier 't' representing an assignment or an invocation."   140    141     assignment, invocation = t   142     if assignment:   143         return "="   144     elif invocation is not None:   145         arguments, keywords = invocation   146         return "(%d;%s)" % (arguments, ",".join(keywords))   147     else:   148         return "_"   149    150 def decode_modifiers(s):   151    152     "Decode 's' containing modifiers."   153    154     i = 0   155     end = len(s)   156    157     modifiers = []   158    159     while i < end:   160         if s[i] == "=":   161             modifiers.append((True, None))   162             i += 1   163         elif s[i] == "(":   164             j = s.index(";", i)   165             arguments = int(s[i+1:j])   166             i = j   167             j = s.index(")", i)   168             keywords = s[i+1:j]   169             keywords = keywords and keywords.split(",") or []   170             modifiers.append((False, (arguments, keywords)))   171             i = j + 1   172         else:   173             modifiers.append((False, None))   174             i += 1   175    176     return modifiers   177    178    179    180 # Test generation functions.   181    182 def get_kinds(all_types):   183    184     """    185     Return object kind details for 'all_types', being a collection of   186     references for program types.   187     """   188    189     return map(lambda ref: ref.get_kind(), all_types)   190    191 def test_label_for_kind(kind):   192    193     "Return the label used for 'kind' in test details."   194    195     return kind == "<instance>" and "instance" or "type"   196    197 def test_label_for_type(ref):   198    199     "Return the label used for 'ref' in test details."   200    201     return test_label_for_kind(ref.get_kind())   202    203    204    205 # Instruction representation encoding.   206    207 def encode_instruction(instruction):   208    209     """   210     Encode the 'instruction' - a sequence starting with an operation and   211     followed by arguments, each of which may be an instruction sequence or a   212     plain value - to produce a function call string representation.   213     """   214    215     op = instruction[0]   216     args = instruction[1:]   217    218     if args:   219         a = []   220         for arg in args:   221             if isinstance(arg, tuple):   222                 a.append(encode_instruction(arg))   223             else:   224                 a.append(arg or "{}")   225         argstr = "(%s)" % ", ".join(a)   226         return "%s%s" % (op, argstr)   227     else:   228         return op   229    230    231    232 # Output program encoding.   233    234 attribute_loading_ops = (   235     "__load_via_class", "__load_via_object", "__get_class_and_load",   236     "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any",   237     )   238    239 attribute_ref_lookup_ops = (   240     "__get_object_attr_ref", "__get_class_attr_ref",   241     "__check_and_get_object_attr_ref",   242     )   243    244 typename_ops = (   245     "__test_common_instance", "__test_common_object", "__test_common_type",   246     )   247    248 type_ops = (   249     "__test_specific_instance", "__test_specific_object", "__test_specific_type",   250     )   251    252 static_ops = (   253     "__load_static_ignore", "__load_static_replace", "__load_static_test", "<test_context_static>",   254     )   255    256 accessor_values = (   257     "<accessor>",   258     )   259    260 accessor_ops = (   261     "<accessor>", "<set_accessor>",   262     )   263    264 attribute_ref_values = (   265     "<attr_ref>",   266     )   267    268 attribute_ref_ops = (   269     "<attr_ref>", "<set_attr_ref>",   270     )   271    272 context_values = (   273     "<context>",   274     )   275    276 context_ops = (   277     "<context>", "<set_context>", "<test_context_revert>", "<test_context_static>",   278     )   279    280 context_op_functions = (   281     "<test_context_revert>", "<test_context_static>",   282     )   283    284 reference_acting_ops = attribute_ref_lookup_ops + attribute_loading_ops + type_ops + typename_ops   285 attribute_producing_ops = attribute_loading_ops   286    287 attribute_producing_variables = (   288     "<accessor>", "<context>", "<name>", "<private_context>", "<target_accessor>"   289     )   290    291 def encode_access_instruction(instruction, subs, accessor_index, context_index,   292     attribute_ref_index):   293    294     """   295     Encode the 'instruction' - a sequence starting with an operation and   296     followed by arguments, each of which may be an instruction sequence or a   297     plain value - to produce a function call string representation.   298    299     The 'subs' parameter defines a mapping of substitutions for special values   300     used in instructions.   301    302     The 'accessor_index' parameter defines the position in local accessor   303     storage for the referenced accessor or affected by an accessor operation.   304    305     The 'context_index' parameter defines the position in local context storage   306     for the referenced context or affected by a context operation.   307    308     The 'attribute_ref_index' parameter defines the position in local attribute   309     reference storage for a referenced attribute.   310    311     Return both the encoded instruction and a collection of substituted names.   312     """   313    314     op = instruction[0]   315     args = instruction[1:]   316     substituted = set()   317    318     # Encode the arguments.   319    320     a = []   321     if args:   322         converting_op = op   323         for arg in args:   324             s, _substituted = encode_access_instruction_arg(arg, subs,   325                 converting_op, accessor_index, context_index, attribute_ref_index)   326             substituted.update(_substituted)   327             a.append(s)   328             converting_op = None   329    330     # Modify certain arguments.   331    332     # Convert type name arguments.   333    334     if op in typename_ops:   335         a[1] = encode_path(encode_type_attribute(args[1]))   336    337     # Obtain addresses of type arguments.   338    339     elif op in type_ops:   340         a[1] = "&%s" % a[1]   341    342     # Obtain addresses of static objects.   343    344     elif op in static_ops:   345         a[-1] = "&%s" % a[-1]   346    347     # Add accessor storage information to certain operations.   348    349     if op in accessor_ops:   350         a.insert(0, accessor_index)   351    352     # Add attribute reference storage information to certain operations.   353    354     if op in attribute_ref_ops:   355         a.insert(0, attribute_ref_index)   356    357     # Add context storage information to certain operations.   358    359     if op in context_ops:   360         a.insert(0, context_index)   361    362     # Add the local context array to certain operations.   363    364     if op in context_op_functions:   365         a.append("__tmp_contexts")   366    367     # Define any argument string.   368    369     if a:   370         argstr = "(%s)" % ", ".join(map(str, a))   371     else:   372         argstr = ""   373    374     # Substitute the first element of the instruction, which may not be an   375     # operation at all.   376    377     if subs.has_key(op):   378         substituted.add(op)   379    380         # Break accessor initialisation into initialisation and value-yielding   381         # parts:   382    383         if op == "<set_accessor>" and isinstance(a[0], InstructionSequence):   384             ops = []   385             ops += a[0].get_init_instructions()   386             ops.append("%s(%s)" % (subs[op], a[0].get_value_instruction()))   387             return ", ".join(map(str, ops)), substituted   388    389         op = subs[op]   390    391     elif not args:   392         op = "&%s" % encode_path(op)   393    394     return "%s%s" % (op, argstr), substituted   395    396 def encode_access_instruction_arg(arg, subs, op, accessor_index, context_index, attribute_ref_index):   397    398     """   399     Encode 'arg' using 'subs' to define substitutions, 'op' to indicate the   400     operation to which the argument belongs, and with 'accessor_index' and   401     'context_index' indicating any affected accessor and context storage.   402    403     Return a tuple containing the encoded form of 'arg' along with a collection   404     of any substituted values.   405     """   406    407     if isinstance(arg, tuple):   408         encoded, substituted = encode_access_instruction(arg, subs,   409             accessor_index, context_index, attribute_ref_index)   410         return attribute_to_reference(op, arg[0], encoded, substituted)   411    412     # Special values only need replacing, not encoding.   413    414     elif subs.has_key(arg):   415    416         # Handle values modified by storage details.   417    418         if arg in accessor_values or arg in context_values:   419             encoded = "%s(%s)" % (subs.get(arg), context_index)   420         elif arg in attribute_ref_values:   421             encoded = "%s(%s)" % (subs.get(arg), attribute_ref_index)   422         else:   423             encoded = subs.get(arg)   424    425         substituted = set([arg])   426         return attribute_to_reference(op, arg, encoded, substituted)   427    428     # Convert static references to the appropriate type.   429    430     elif op and op in reference_acting_ops and \   431          arg not in attribute_producing_variables:   432    433         return "&%s" % encode_path(arg), set()   434    435     # Other values may need encoding.   436    437     else:   438         return encode_path(arg), set()   439    440 def attribute_to_reference(op, arg, encoded, substituted):   441    442     # Convert attribute results to references where required.   443    444     if op and op in reference_acting_ops and (   445        arg in attribute_producing_ops or   446        arg in attribute_producing_variables):   447    448         return "__VALUE(%s)" % encoded, substituted   449     else:   450         return encoded, substituted   451    452 def encode_function_pointer(path):   453    454     "Encode 'path' as a reference to an output program function."   455    456     return "__fn_%s" % encode_path(path)   457    458 def encode_instantiator_pointer(path):   459    460     "Encode 'path' as a reference to an output program instantiator."   461    462     return "__new_%s" % encode_path(path)   463    464 def encode_instructions(instructions):   465    466     "Encode 'instructions' as a sequence."   467    468     if len(instructions) == 1:   469         return instructions[0]   470     else:   471         return "(\n%s\n)" % ",\n".join(instructions)   472    473 def encode_literal_constant(n):   474    475     "Encode a name for the literal constant with the number 'n'."   476    477     return "__const%s" % n   478    479 def encode_literal_constant_size(value):   480    481     "Encode a size for the literal constant with the given 'value'."   482    483     if isinstance(value, basestring):   484         return len(value)   485     else:   486         return 0   487    488 def encode_literal_constant_member(value):   489    490     "Encode the member name for the 'value' in the final program."   491    492     return "%svalue" % value.__class__.__name__   493    494 def encode_literal_constant_value(value):   495    496     "Encode the given 'value' in the final program."   497    498     if isinstance(value, (int, float)):   499         return str(value)   500     else:   501         l = []   502    503         # Encode characters including non-ASCII ones.   504    505         for c in str(value):   506             if c == '"': l.append('\\"')   507             elif c == '\n': l.append('\\n')   508             elif c == '\t': l.append('\\t')   509             elif c == '\r': l.append('\\r')   510             elif c == '\\': l.append('\\\\')   511             elif 0x20 <= ord(c) < 0x80: l.append(c)   512             else: l.append("\\x%02x" % ord(c))   513    514         return '"%s"' % "".join(l)   515    516 def encode_literal_data_initialiser(style):   517    518     """   519     Encode a reference to a function populating the data for a literal having   520     the given 'style' ("mapping" or "sequence").   521     """   522    523     return "__newdata_%s" % style   524    525 def encode_literal_instantiator(path):   526    527     """   528     Encode a reference to an instantiator for a literal having the given 'path'.   529     """   530    531     return "__newliteral_%s" % encode_path(path)   532    533 def encode_literal_reference(n):   534    535     "Encode a reference to a literal constant with the number 'n'."   536    537     return "__constvalue%s" % n   538    539 def encode_trailing_area(path):   540    541     """   542     Encode any reference to trailing data members for instances of the type   543     given by 'path'.   544     """   545    546     return "__TRAILING_%s" % encode_path(path)   547    548    549    550 # Track all encoded paths, detecting and avoiding conflicts.   551    552 all_encoded_paths = {}   553    554 def encode_path(path):   555    556     "Encode 'path' as an output program object, translating special symbols."   557    558     if path in reserved_words:   559         return "__%s" % path   560     else:   561         part_encoded = path.replace("#", "__").replace("$", "__")   562    563         if "." not in path:   564             return part_encoded   565    566         encoded = part_encoded.replace(".", "_")   567    568         # Test for a conflict with the encoding of a different path, re-encoding   569         # if necessary.   570    571         previous = all_encoded_paths.get(encoded)   572         replacement = "_"   573    574         while previous:   575             if path == previous:   576                 return encoded   577             replacement += "_"   578             encoded = part_encoded.replace(".", replacement)   579             previous = all_encoded_paths.get(encoded)   580    581         # Store any new or re-encoded path.   582    583         all_encoded_paths[encoded] = path   584         return encoded   585    586 def encode_code(name):   587    588     "Encode 'name' as an attribute code indicator."   589    590     return "__ATTRCODE(%s)" % encode_path(name)   591    592 def encode_pcode(name):   593    594     "Encode 'name' as an parameter code indicator."   595    596     return "__PARAMCODE(%s)" % encode_path(name)   597    598 def encode_pos(name):   599    600     "Encode 'name' as an attribute position indicator."   601    602     return "__ATTRPOS(%s)" % encode_path(name)   603    604 def encode_ppos(name):   605    606     "Encode 'name' as an parameter position indicator."   607    608     return "__PARAMPOS(%s)" % encode_path(name)   609    610 def encode_predefined_reference(path):   611    612     "Encode a reference to a predefined constant value for 'path'."   613    614     return "__predefined_%s" % encode_path(path)   615    616 def encode_size(kind, path=None):   617    618     """   619     Encode a structure size reference for the given 'kind' of structure, with   620     'path' indicating a specific structure name.   621     """   622    623     return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "")   624    625 def encode_symbol(symbol_type, path=None):   626    627     "Encode a symbol with the given 'symbol_type' and optional 'path'."   628    629     return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "")   630    631 def encode_tablename(kind, path):   632    633     """   634     Encode a table reference for the given 'kind' of table structure, indicating   635     a 'path' for the specific object concerned.   636     """   637    638     return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path))   639    640 def encode_type_attribute(path):   641    642     "Encode the special type attribute for 'path'."   643    644     return "#%s" % path   645    646 def decode_type_attribute(s):   647    648     "Decode the special type attribute 's'."   649    650     return s[1:]   651    652 def is_type_attribute(s):   653    654     "Return whether 's' is a type attribute name."   655    656     return s.startswith("#")   657    658    659    660 # A mapping from kinds to structure size reference prefixes.   661    662 structure_size_prefixes = {   663     "<class>" : "c",   664     "<module>" : "m",   665     "<instance>" : "i"   666     }   667    668 # A mapping from kinds to table name prefixes.   669    670 table_name_prefixes = {   671     "<class>" : "Class",   672     "<function>" : "Function",   673     "<module>" : "Module",   674     "<instance>" : "Instance"   675     }   676    677    678    679 # Output language reserved words.   680    681 reserved_words = [   682     "break", "char", "const", "continue",   683     "default", "double", "else",   684     "float", "for",   685     "if", "int", "long",   686     "NULL",   687     "return", "struct",   688     "typedef",   689     "void", "while",   690     ]   691    692 # vim: tabstop=4 expandtab shiftwidth=4