Lichen

encoders.py

769:00e902870a29
2017-03-25 Paul Boddie Merged changes from the normal-function-parameters branch, making it the default line of development from now on.
     1 #!/usr/bin/env python     2      3 """     4 Encoder functions, producing representations of program objects.     5      6 Copyright (C) 2016, 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from common import first, InstructionSequence    23     24     25     26 # Value digest computation.    27     28 from base64 import b64encode    29 from hashlib import sha1    30     31 def digest(values):    32     m = sha1()    33     for value in values:    34         m.update(str(value))    35     return b64encode(m.digest()).replace("+", "__").replace("/", "_").rstrip("=")    36     37     38     39 # Output encoding and decoding for the summary files.    40     41 def encode_attrnames(attrnames):    42     43     "Encode the 'attrnames' representing usage."    44     45     return ", ".join(attrnames) or "{}"    46     47 def encode_constrained(constrained):    48     49     "Encode the 'constrained' status for program summaries."    50     51     return constrained and "constrained" or "deduced"    52     53 def encode_usage(usage):    54     55     "Encode attribute details from 'usage'."    56     57     all_attrnames = []    58     for t in usage:    59         attrname, invocation, assignment = t    60         all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or ""))    61     return ", ".join(all_attrnames) or "{}"    62     63 def decode_usage(s):    64     65     "Decode attribute details from 's'."    66     67     all_attrnames = set()    68     for attrname_str in s.split(", "):    69         all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("=")))    70     71     all_attrnames = list(all_attrnames)    72     all_attrnames.sort()    73     return tuple(all_attrnames)    74     75 def encode_access_location(t):    76     77     "Encode the access location 't'."    78     79     path, name, attrname, version = t    80     return "%s %s %s:%d" % (path, name or "{}", attrname, version)    81     82 def encode_location(t):    83     84     "Encode the general location 't' in a concise form."    85     86     path, name, attrname, version = t    87     if name is not None and version is not None:    88         return "%s %s:%d" % (path, name, version)    89     elif name is not None:    90         return "%s %s" % (path, name)    91     else:    92         return "%s :%s" % (path, attrname)    93     94 def encode_modifiers(modifiers):    95     96     "Encode assignment and invocation details from 'modifiers'."    97     98     all_modifiers = []    99     for t in modifiers:   100         all_modifiers.append(encode_modifier_term(t))   101     return "".join(all_modifiers)   102    103 def encode_modifier_term(t):   104    105     "Encode modifier 't' representing an assignment or an invocation."   106    107     assignment, invocation = t   108     if assignment:   109         return "="   110     elif invocation is not None:   111         arguments, keywords = invocation   112         return "(%d;%s)" % (arguments, ",".join(keywords))   113     else:   114         return "_"   115    116 def decode_modifiers(s):   117    118     "Decode 's' containing modifiers."   119    120     i = 0   121     end = len(s)   122    123     modifiers = []   124    125     while i < end:   126         if s[i] == "=":   127             modifiers.append((True, None))   128             i += 1   129         elif s[i] == "(":   130             j = s.index(";", i)   131             arguments = int(s[i+1:j])   132             i = j   133             j = s.index(")", i)   134             keywords = s[i+1:j]   135             keywords = keywords and keywords.split(",") or []   136             modifiers.append((False, (arguments, keywords)))   137             i = j + 1   138         else:   139             modifiers.append((False, None))   140             i += 1   141    142     return modifiers   143    144    145    146 # Test generation functions.   147    148 def get_kinds(all_types):   149    150     """    151     Return object kind details for 'all_types', being a collection of   152     references for program types.   153     """   154    155     return map(lambda ref: ref.get_kind(), all_types)   156    157 def test_label_for_kind(kind):   158    159     "Return the label used for 'kind' in test details."   160    161     return kind == "<instance>" and "instance" or "type"   162    163 def test_label_for_type(ref):   164    165     "Return the label used for 'ref' in test details."   166    167     return test_label_for_kind(ref.get_kind())   168    169    170    171 # Instruction representation encoding.   172    173 def encode_instruction(instruction):   174    175     """   176     Encode the 'instruction' - a sequence starting with an operation and   177     followed by arguments, each of which may be an instruction sequence or a   178     plain value - to produce a function call string representation.   179     """   180    181     op = instruction[0]   182     args = instruction[1:]   183    184     if args:   185         a = []   186         for arg in args:   187             if isinstance(arg, tuple):   188                 a.append(encode_instruction(arg))   189             else:   190                 a.append(arg or "{}")   191         argstr = "(%s)" % ", ".join(a)   192         return "%s%s" % (op, argstr)   193     else:   194         return op   195    196    197    198 # Output program encoding.   199    200 attribute_loading_ops = (   201     "__load_via_class", "__load_via_object", "__get_class_and_load",   202     )   203    204 attribute_ops = attribute_loading_ops + (   205     "__store_via_object",   206     )   207    208 checked_loading_ops = (   209     "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any",   210     )   211    212 checked_ops = checked_loading_ops + (   213     "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any",   214     )   215    216 typename_ops = (   217     "__test_common_instance", "__test_common_object", "__test_common_type",   218     )   219    220 type_ops = (   221     "__test_specific_instance", "__test_specific_object", "__test_specific_type",   222     )   223    224 static_ops = (   225     "__load_static_ignore", "__load_static_replace", "__load_static_test", "<test_context_static>",   226     )   227    228 context_values = (   229     "<context>",   230     )   231    232 context_ops = (   233     "<context>", "<set_context>", "<test_context_revert>", "<test_context_static>",   234     )   235    236 context_op_functions = (   237     "<test_context_revert>", "<test_context_static>",   238     )   239    240 reference_acting_ops = attribute_ops + checked_ops + type_ops + typename_ops   241 attribute_producing_ops = attribute_loading_ops + checked_loading_ops   242    243 attribute_producing_variables = (   244     "<accessor>", "<context>", "<name>", "<private_context>", "<target_accessor>"   245     )   246    247 def encode_access_instruction(instruction, subs, context_index):   248    249     """   250     Encode the 'instruction' - a sequence starting with an operation and   251     followed by arguments, each of which may be an instruction sequence or a   252     plain value - to produce a function call string representation.   253    254     The 'subs' parameter defines a mapping of substitutions for special values   255     used in instructions.   256    257     The 'context_index' parameter defines the position in local context storage   258     for the referenced context or affected by a context operation.   259    260     Return both the encoded instruction and a collection of substituted names.   261     """   262    263     op = instruction[0]   264     args = instruction[1:]   265     substituted = set()   266    267     # Encode the arguments.   268    269     a = []   270     if args:   271         converting_op = op   272         for arg in args:   273             s, _substituted = encode_access_instruction_arg(arg, subs, converting_op, context_index)   274             substituted.update(_substituted)   275             a.append(s)   276             converting_op = None   277    278     # Modify certain arguments.   279    280     # Convert type name arguments.   281    282     if op in typename_ops:   283         a[1] = encode_path(encode_type_attribute(args[1]))   284    285     # Obtain addresses of type arguments.   286    287     elif op in type_ops:   288         a[1] = "&%s" % a[1]   289    290     # Obtain addresses of static objects.   291    292     elif op in static_ops:   293         a[-1] = "&%s" % a[-1]   294    295     # Add context storage information to certain operations.   296    297     if op in context_ops:   298         a.insert(0, context_index)   299    300     # Add the local context array to certain operations.   301    302     if op in context_op_functions:   303         a.append("__tmp_contexts")   304    305     # Define any argument string.   306    307     if a:   308         argstr = "(%s)" % ", ".join(map(str, a))   309     else:   310         argstr = ""   311    312     # Substitute the first element of the instruction, which may not be an   313     # operation at all.   314    315     if subs.has_key(op):   316         substituted.add(op)   317    318         # Break accessor initialisation into initialisation and value-yielding   319         # parts:   320    321         if op == "<set_accessor>" and isinstance(a[0], InstructionSequence):   322             ops = []   323             ops += a[0].get_init_instructions()   324             ops.append("%s(%s)" % (subs[op], a[0].get_value_instruction()))   325             return ", ".join(map(str, ops)), substituted   326    327         op = subs[op]   328    329     elif not args:   330         op = "&%s" % encode_path(op)   331    332     return "%s%s" % (op, argstr), substituted   333    334 def encode_access_instruction_arg(arg, subs, op, context_index):   335    336     """   337     Encode 'arg' using 'subs' to define substitutions, 'op' to indicate the   338     operation to which the argument belongs, and 'context_index' to indicate any   339     affected context storage.   340    341     Return a tuple containing the encoded form of 'arg' along with a collection   342     of any substituted values.   343     """   344    345     if isinstance(arg, tuple):   346         encoded, substituted = encode_access_instruction(arg, subs, context_index)   347         return attribute_to_reference(op, arg[0], encoded, substituted)   348    349     # Special values only need replacing, not encoding.   350    351     elif subs.has_key(arg):   352    353         # Handle values modified by storage details.   354    355         if arg in context_values:   356             encoded = "%s(%s)" % (subs.get(arg), context_index)   357         else:   358             encoded = subs.get(arg)   359    360         substituted = set([arg])   361         return attribute_to_reference(op, arg, encoded, substituted)   362    363     # Convert static references to the appropriate type.   364    365     elif op and op in reference_acting_ops and \   366          arg not in attribute_producing_variables:   367    368         return "&%s" % encode_path(arg), set()   369    370     # Other values may need encoding.   371    372     else:   373         return encode_path(arg), set()   374    375 def attribute_to_reference(op, arg, encoded, substituted):   376    377     # Convert attribute results to references where required.   378    379     if op and op in reference_acting_ops and (   380        arg in attribute_producing_ops or   381        arg in attribute_producing_variables):   382    383         return "__VALUE(%s)" % encoded, substituted   384     else:   385         return encoded, substituted   386    387 def encode_function_pointer(path):   388    389     "Encode 'path' as a reference to an output program function."   390    391     return "__fn_%s" % encode_path(path)   392    393 def encode_instantiator_pointer(path):   394    395     "Encode 'path' as a reference to an output program instantiator."   396    397     return "__new_%s" % encode_path(path)   398    399 def encode_instructions(instructions):   400    401     "Encode 'instructions' as a sequence."   402    403     if len(instructions) == 1:   404         return instructions[0]   405     else:   406         return "(\n%s\n)" % ",\n".join(instructions)   407    408 def encode_literal_constant(n):   409    410     "Encode a name for the literal constant with the number 'n'."   411    412     return "__const%s" % n   413    414 def encode_literal_constant_size(value):   415    416     "Encode a size for the literal constant with the given 'value'."   417    418     if isinstance(value, basestring):   419         return len(value)   420     else:   421         return 0   422    423 def encode_literal_constant_member(value):   424    425     "Encode the member name for the 'value' in the final program."   426    427     return "%svalue" % value.__class__.__name__   428    429 def encode_literal_constant_value(value):   430    431     "Encode the given 'value' in the final program."   432    433     if isinstance(value, (int, float)):   434         return str(value)   435     else:   436         l = []   437    438         # Encode characters including non-ASCII ones.   439    440         for c in str(value):   441             if c == '"': l.append('\\"')   442             elif c == '\n': l.append('\\n')   443             elif c == '\t': l.append('\\t')   444             elif c == '\r': l.append('\\r')   445             elif c == '\\': l.append('\\\\')   446             elif 0x20 <= ord(c) < 0x80: l.append(c)   447             else: l.append("\\x%02x" % ord(c))   448    449         return '"%s"' % "".join(l)   450    451 def encode_literal_data_initialiser(style):   452    453     """   454     Encode a reference to a function populating the data for a literal having   455     the given 'style' ("mapping" or "sequence").   456     """   457    458     return "__newdata_%s" % style   459    460 def encode_literal_instantiator(path):   461    462     """   463     Encode a reference to an instantiator for a literal having the given 'path'.   464     """   465    466     return "__newliteral_%s" % encode_path(path)   467    468 def encode_literal_reference(n):   469    470     "Encode a reference to a literal constant with the number 'n'."   471    472     return "__constvalue%s" % n   473    474    475    476 # Track all encoded paths, detecting and avoiding conflicts.   477    478 all_encoded_paths = {}   479    480 def encode_path(path):   481    482     "Encode 'path' as an output program object, translating special symbols."   483    484     if path in reserved_words:   485         return "__%s" % path   486     else:   487         part_encoded = path.replace("#", "__").replace("$", "__")   488    489         if "." not in path:   490             return part_encoded   491    492         encoded = part_encoded.replace(".", "_")   493    494         # Test for a conflict with the encoding of a different path, re-encoding   495         # if necessary.   496    497         previous = all_encoded_paths.get(encoded)   498         replacement = "_"   499    500         while previous:   501             if path == previous:   502                 return encoded   503             replacement += "_"   504             encoded = part_encoded.replace(".", replacement)   505             previous = all_encoded_paths.get(encoded)   506    507         # Store any new or re-encoded path.   508    509         all_encoded_paths[encoded] = path   510         return encoded   511    512 def encode_code(name):   513    514     "Encode 'name' as an attribute code indicator."   515    516     return "__ATTRCODE(%s)" % encode_path(name)   517    518 def encode_pcode(name):   519    520     "Encode 'name' as an parameter code indicator."   521    522     return "__PARAMCODE(%s)" % encode_path(name)   523    524 def encode_pos(name):   525    526     "Encode 'name' as an attribute position indicator."   527    528     return "__ATTRPOS(%s)" % encode_path(name)   529    530 def encode_ppos(name):   531    532     "Encode 'name' as an parameter position indicator."   533    534     return "__PARAMPOS(%s)" % encode_path(name)   535    536 def encode_predefined_reference(path):   537    538     "Encode a reference to a predefined constant value for 'path'."   539    540     return "__predefined_%s" % encode_path(path)   541    542 def encode_size(kind, path=None):   543    544     """   545     Encode a structure size reference for the given 'kind' of structure, with   546     'path' indicating a specific structure name.   547     """   548    549     return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "")   550    551 def encode_symbol(symbol_type, path=None):   552    553     "Encode a symbol with the given 'symbol_type' and optional 'path'."   554    555     return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "")   556    557 def encode_tablename(kind, path):   558    559     """   560     Encode a table reference for the given 'kind' of table structure, indicating   561     a 'path' for the specific object concerned.   562     """   563    564     return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path))   565    566 def encode_type_attribute(path):   567    568     "Encode the special type attribute for 'path'."   569    570     return "#%s" % path   571    572 def decode_type_attribute(s):   573    574     "Decode the special type attribute 's'."   575    576     return s[1:]   577    578 def is_type_attribute(s):   579    580     "Return whether 's' is a type attribute name."   581    582     return s.startswith("#")   583    584    585    586 # A mapping from kinds to structure size reference prefixes.   587    588 structure_size_prefixes = {   589     "<class>" : "c",   590     "<module>" : "m",   591     "<instance>" : "i"   592     }   593    594 # A mapping from kinds to table name prefixes.   595    596 table_name_prefixes = {   597     "<class>" : "Class",   598     "<function>" : "Function",   599     "<module>" : "Module",   600     "<instance>" : "Instance"   601     }   602    603    604    605 # Output language reserved words.   606    607 reserved_words = [   608     "break", "char", "const", "continue",   609     "default", "double", "else",   610     "float", "for",   611     "if", "int", "long",   612     "NULL",   613     "return", "struct",   614     "typedef",   615     "void", "while",   616     ]   617    618 # vim: tabstop=4 expandtab shiftwidth=4