Lichen

encoders.py

835:4bf5180fbfee
2018-07-02 Paul Boddie Added missing sequence length check when unpacking sequences.
     1 #!/usr/bin/env python     2      3 """     4 Encoder functions, producing representations of program objects.     5      6 Copyright (C) 2016, 2017, 2018 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from common import first, InstructionSequence    23     24     25     26 # Value digest computation.    27     28 from base64 import b64encode    29 from hashlib import sha1    30     31 def digest(values):    32     m = sha1()    33     for value in values:    34         m.update(str(value))    35     return b64encode(m.digest()).replace("+", "__").replace("/", "_").rstrip("=")    36     37     38     39 # Output encoding and decoding for the summary files.    40     41 def encode_attrnames(attrnames):    42     43     "Encode the 'attrnames' representing usage."    44     45     return ", ".join(attrnames) or "{}"    46     47 def encode_constrained(constrained):    48     49     "Encode the 'constrained' status for program summaries."    50     51     return constrained and "constrained" or "deduced"    52     53 def encode_usage(usage):    54     55     "Encode attribute details from 'usage'."    56     57     all_attrnames = []    58     for t in usage:    59         attrname, invocation, assignment = t    60         all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or ""))    61     return ", ".join(all_attrnames) or "{}"    62     63 def decode_usage(s):    64     65     "Decode attribute details from 's'."    66     67     all_attrnames = set()    68     for attrname_str in s.split(", "):    69         all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("=")))    70     71     all_attrnames = list(all_attrnames)    72     all_attrnames.sort()    73     return tuple(all_attrnames)    74     75 def encode_access_location(t):    76     77     "Encode the access location 't'."    78     79     return "%s:%s:%s:%d" % (t.path, t.name or "{}", t.attrnames or "{}", t.access_number)    80     81 def encode_alias_location(t, invocation=False):    82     83     "Encode the alias location 't'."    84     85     return "%s:%s:%s%s%s%s" % (t.path, t.name or "{}", t.attrnames or "{}",    86         t.version is not None and ":=%d" % t.version or "",    87         t.access_number is not None and ":#%d" % t.access_number or "",    88         invocation and "!" or "")    89     90 def decode_alias_location(s):    91     92     "Decode the alias location 's'."    93     94     path, name, rest = s.split(":", 2)    95     attrnames = version = access_number = None    96     invocation = rest.endswith("!")    97     98     t = rest.rstrip("!").split(":#")    99     if len(t) > 1:   100         rest = t[0]; access_number = int(t[1])   101    102     t = rest.split(":=")   103     if len(t) > 1:   104         attrnames = t[0]; version = int(t[1])   105     else:   106         attrnames = rest   107    108     return path, name, attrnames, version, access_number, invocation   109    110 def encode_location(t):   111    112     "Encode the general location 't' in a concise form."   113    114     if t.name is not None and t.version is not None:   115         return "%s:%s:%d" % (t.path, t.name, t.version)   116     elif t.name is not None:   117         return "%s:%s" % (t.path, t.name)   118     else:   119         return "%s::%s" % (t.path, t.attrnames)   120    121 def encode_modifiers(modifiers):   122    123     "Encode assignment and invocation details from 'modifiers'."   124    125     all_modifiers = []   126     for t in modifiers:   127         all_modifiers.append(encode_modifier_term(t))   128     return "".join(all_modifiers)   129    130 def encode_modifier_term(t):   131    132     "Encode modifier 't' representing an assignment or an invocation."   133    134     assignment, invocation = t   135     if assignment:   136         return "="   137     elif invocation is not None:   138         arguments, keywords = invocation   139         return "(%d;%s)" % (arguments, ",".join(keywords))   140     else:   141         return "_"   142    143 def decode_modifiers(s):   144    145     "Decode 's' containing modifiers."   146    147     i = 0   148     end = len(s)   149    150     modifiers = []   151    152     while i < end:   153         if s[i] == "=":   154             modifiers.append((True, None))   155             i += 1   156         elif s[i] == "(":   157             j = s.index(";", i)   158             arguments = int(s[i+1:j])   159             i = j   160             j = s.index(")", i)   161             keywords = s[i+1:j]   162             keywords = keywords and keywords.split(",") or []   163             modifiers.append((False, (arguments, keywords)))   164             i = j + 1   165         else:   166             modifiers.append((False, None))   167             i += 1   168    169     return modifiers   170    171    172    173 # Test generation functions.   174    175 def get_kinds(all_types):   176    177     """    178     Return object kind details for 'all_types', being a collection of   179     references for program types.   180     """   181    182     return map(lambda ref: ref.get_kind(), all_types)   183    184 def test_label_for_kind(kind):   185    186     "Return the label used for 'kind' in test details."   187    188     return kind == "<instance>" and "instance" or "type"   189    190 def test_label_for_type(ref):   191    192     "Return the label used for 'ref' in test details."   193    194     return test_label_for_kind(ref.get_kind())   195    196    197    198 # Instruction representation encoding.   199    200 def encode_instruction(instruction):   201    202     """   203     Encode the 'instruction' - a sequence starting with an operation and   204     followed by arguments, each of which may be an instruction sequence or a   205     plain value - to produce a function call string representation.   206     """   207    208     op = instruction[0]   209     args = instruction[1:]   210    211     if args:   212         a = []   213         for arg in args:   214             if isinstance(arg, tuple):   215                 a.append(encode_instruction(arg))   216             else:   217                 a.append(arg or "{}")   218         argstr = "(%s)" % ", ".join(a)   219         return "%s%s" % (op, argstr)   220     else:   221         return op   222    223    224    225 # Output program encoding.   226    227 attribute_loading_ops = (   228     "__load_via_class", "__load_via_object", "__get_class_and_load",   229     )   230    231 attribute_ops = attribute_loading_ops + (   232     "__store_via_class", "__store_via_object",   233     )   234    235 checked_loading_ops = (   236     "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any",   237     )   238    239 checked_ops = checked_loading_ops + (   240     "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any",   241     )   242    243 typename_ops = (   244     "__test_common_instance", "__test_common_object", "__test_common_type",   245     )   246    247 type_ops = (   248     "__test_specific_instance", "__test_specific_object", "__test_specific_type",   249     )   250    251 static_ops = (   252     "__load_static_ignore", "__load_static_replace", "__load_static_test", "<test_context_static>",   253     )   254    255 context_values = (   256     "<context>",   257     )   258    259 context_ops = (   260     "<context>", "<set_context>", "<test_context_revert>", "<test_context_static>",   261     )   262    263 context_op_functions = (   264     "<test_context_revert>", "<test_context_static>",   265     )   266    267 reference_acting_ops = attribute_ops + checked_ops + type_ops + typename_ops   268 attribute_producing_ops = attribute_loading_ops + checked_loading_ops   269    270 attribute_producing_variables = (   271     "<accessor>", "<context>", "<name>", "<private_context>", "<target_accessor>"   272     )   273    274 def encode_access_instruction(instruction, subs, context_index):   275    276     """   277     Encode the 'instruction' - a sequence starting with an operation and   278     followed by arguments, each of which may be an instruction sequence or a   279     plain value - to produce a function call string representation.   280    281     The 'subs' parameter defines a mapping of substitutions for special values   282     used in instructions.   283    284     The 'context_index' parameter defines the position in local context storage   285     for the referenced context or affected by a context operation.   286    287     Return both the encoded instruction and a collection of substituted names.   288     """   289    290     op = instruction[0]   291     args = instruction[1:]   292     substituted = set()   293    294     # Encode the arguments.   295    296     a = []   297     if args:   298         converting_op = op   299         for arg in args:   300             s, _substituted = encode_access_instruction_arg(arg, subs, converting_op, context_index)   301             substituted.update(_substituted)   302             a.append(s)   303             converting_op = None   304    305     # Modify certain arguments.   306    307     # Convert type name arguments.   308    309     if op in typename_ops:   310         a[1] = encode_path(encode_type_attribute(args[1]))   311    312     # Obtain addresses of type arguments.   313    314     elif op in type_ops:   315         a[1] = "&%s" % a[1]   316    317     # Obtain addresses of static objects.   318    319     elif op in static_ops:   320         a[-1] = "&%s" % a[-1]   321    322     # Add context storage information to certain operations.   323    324     if op in context_ops:   325         a.insert(0, context_index)   326    327     # Add the local context array to certain operations.   328    329     if op in context_op_functions:   330         a.append("__tmp_contexts")   331    332     # Define any argument string.   333    334     if a:   335         argstr = "(%s)" % ", ".join(map(str, a))   336     else:   337         argstr = ""   338    339     # Substitute the first element of the instruction, which may not be an   340     # operation at all.   341    342     if subs.has_key(op):   343         substituted.add(op)   344    345         # Break accessor initialisation into initialisation and value-yielding   346         # parts:   347    348         if op == "<set_accessor>" and isinstance(a[0], InstructionSequence):   349             ops = []   350             ops += a[0].get_init_instructions()   351             ops.append("%s(%s)" % (subs[op], a[0].get_value_instruction()))   352             return ", ".join(map(str, ops)), substituted   353    354         op = subs[op]   355    356     elif not args:   357         op = "&%s" % encode_path(op)   358    359     return "%s%s" % (op, argstr), substituted   360    361 def encode_access_instruction_arg(arg, subs, op, context_index):   362    363     """   364     Encode 'arg' using 'subs' to define substitutions, 'op' to indicate the   365     operation to which the argument belongs, and 'context_index' to indicate any   366     affected context storage.   367    368     Return a tuple containing the encoded form of 'arg' along with a collection   369     of any substituted values.   370     """   371    372     if isinstance(arg, tuple):   373         encoded, substituted = encode_access_instruction(arg, subs, context_index)   374         return attribute_to_reference(op, arg[0], encoded, substituted)   375    376     # Special values only need replacing, not encoding.   377    378     elif subs.has_key(arg):   379    380         # Handle values modified by storage details.   381    382         if arg in context_values:   383             encoded = "%s(%s)" % (subs.get(arg), context_index)   384         else:   385             encoded = subs.get(arg)   386    387         substituted = set([arg])   388         return attribute_to_reference(op, arg, encoded, substituted)   389    390     # Convert static references to the appropriate type.   391    392     elif op and op in reference_acting_ops and \   393          arg not in attribute_producing_variables:   394    395         return "&%s" % encode_path(arg), set()   396    397     # Other values may need encoding.   398    399     else:   400         return encode_path(arg), set()   401    402 def attribute_to_reference(op, arg, encoded, substituted):   403    404     # Convert attribute results to references where required.   405    406     if op and op in reference_acting_ops and (   407        arg in attribute_producing_ops or   408        arg in attribute_producing_variables):   409    410         return "__VALUE(%s)" % encoded, substituted   411     else:   412         return encoded, substituted   413    414 def encode_function_pointer(path):   415    416     "Encode 'path' as a reference to an output program function."   417    418     return "__fn_%s" % encode_path(path)   419    420 def encode_instantiator_pointer(path):   421    422     "Encode 'path' as a reference to an output program instantiator."   423    424     return "__new_%s" % encode_path(path)   425    426 def encode_instructions(instructions):   427    428     "Encode 'instructions' as a sequence."   429    430     if len(instructions) == 1:   431         return instructions[0]   432     else:   433         return "(\n%s\n)" % ",\n".join(instructions)   434    435 def encode_literal_constant(n):   436    437     "Encode a name for the literal constant with the number 'n'."   438    439     return "__const%s" % n   440    441 def encode_literal_constant_size(value):   442    443     "Encode a size for the literal constant with the given 'value'."   444    445     if isinstance(value, basestring):   446         return len(value)   447     else:   448         return 0   449    450 def encode_literal_constant_member(value):   451    452     "Encode the member name for the 'value' in the final program."   453    454     return "%svalue" % value.__class__.__name__   455    456 def encode_literal_constant_value(value):   457    458     "Encode the given 'value' in the final program."   459    460     if isinstance(value, (int, float)):   461         return str(value)   462     else:   463         l = []   464    465         # Encode characters including non-ASCII ones.   466    467         for c in str(value):   468             if c == '"': l.append('\\"')   469             elif c == '\n': l.append('\\n')   470             elif c == '\t': l.append('\\t')   471             elif c == '\r': l.append('\\r')   472             elif c == '\\': l.append('\\\\')   473             elif 0x20 <= ord(c) < 0x80: l.append(c)   474             else: l.append("\\x%02x" % ord(c))   475    476         return '"%s"' % "".join(l)   477    478 def encode_literal_data_initialiser(style):   479    480     """   481     Encode a reference to a function populating the data for a literal having   482     the given 'style' ("mapping" or "sequence").   483     """   484    485     return "__newdata_%s" % style   486    487 def encode_literal_instantiator(path):   488    489     """   490     Encode a reference to an instantiator for a literal having the given 'path'.   491     """   492    493     return "__newliteral_%s" % encode_path(path)   494    495 def encode_literal_reference(n):   496    497     "Encode a reference to a literal constant with the number 'n'."   498    499     return "__constvalue%s" % n   500    501    502    503 # Track all encoded paths, detecting and avoiding conflicts.   504    505 all_encoded_paths = {}   506    507 def encode_path(path):   508    509     "Encode 'path' as an output program object, translating special symbols."   510    511     if path in reserved_words:   512         return "__%s" % path   513     else:   514         part_encoded = path.replace("#", "__").replace("$", "__")   515    516         if "." not in path:   517             return part_encoded   518    519         encoded = part_encoded.replace(".", "_")   520    521         # Test for a conflict with the encoding of a different path, re-encoding   522         # if necessary.   523    524         previous = all_encoded_paths.get(encoded)   525         replacement = "_"   526    527         while previous:   528             if path == previous:   529                 return encoded   530             replacement += "_"   531             encoded = part_encoded.replace(".", replacement)   532             previous = all_encoded_paths.get(encoded)   533    534         # Store any new or re-encoded path.   535    536         all_encoded_paths[encoded] = path   537         return encoded   538    539 def encode_code(name):   540    541     "Encode 'name' as an attribute code indicator."   542    543     return "__ATTRCODE(%s)" % encode_path(name)   544    545 def encode_pcode(name):   546    547     "Encode 'name' as an parameter code indicator."   548    549     return "__PARAMCODE(%s)" % encode_path(name)   550    551 def encode_pos(name):   552    553     "Encode 'name' as an attribute position indicator."   554    555     return "__ATTRPOS(%s)" % encode_path(name)   556    557 def encode_ppos(name):   558    559     "Encode 'name' as an parameter position indicator."   560    561     return "__PARAMPOS(%s)" % encode_path(name)   562    563 def encode_predefined_reference(path):   564    565     "Encode a reference to a predefined constant value for 'path'."   566    567     return "__predefined_%s" % encode_path(path)   568    569 def encode_size(kind, path=None):   570    571     """   572     Encode a structure size reference for the given 'kind' of structure, with   573     'path' indicating a specific structure name.   574     """   575    576     return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "")   577    578 def encode_symbol(symbol_type, path=None):   579    580     "Encode a symbol with the given 'symbol_type' and optional 'path'."   581    582     return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "")   583    584 def encode_tablename(kind, path):   585    586     """   587     Encode a table reference for the given 'kind' of table structure, indicating   588     a 'path' for the specific object concerned.   589     """   590    591     return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path))   592    593 def encode_type_attribute(path):   594    595     "Encode the special type attribute for 'path'."   596    597     return "#%s" % path   598    599 def decode_type_attribute(s):   600    601     "Decode the special type attribute 's'."   602    603     return s[1:]   604    605 def is_type_attribute(s):   606    607     "Return whether 's' is a type attribute name."   608    609     return s.startswith("#")   610    611    612    613 # A mapping from kinds to structure size reference prefixes.   614    615 structure_size_prefixes = {   616     "<class>" : "c",   617     "<module>" : "m",   618     "<instance>" : "i"   619     }   620    621 # A mapping from kinds to table name prefixes.   622    623 table_name_prefixes = {   624     "<class>" : "Class",   625     "<function>" : "Function",   626     "<module>" : "Module",   627     "<instance>" : "Instance"   628     }   629    630    631    632 # Output language reserved words.   633    634 reserved_words = [   635     "break", "char", "const", "continue",   636     "default", "double", "else",   637     "float", "for",   638     "if", "int", "long",   639     "NULL",   640     "return", "struct",   641     "typedef",   642     "void", "while",   643     ]   644    645 # vim: tabstop=4 expandtab shiftwidth=4