Lichen

encoders.py

808:d0cef6095b28
2017-04-11 Paul Boddie Used string formatting instead of the buffer class for string representations.
     1 #!/usr/bin/env python     2      3 """     4 Encoder functions, producing representations of program objects.     5      6 Copyright (C) 2016, 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from common import first, InstructionSequence    23     24     25     26 # Value digest computation.    27     28 from base64 import b64encode    29 from hashlib import sha1    30     31 def digest(values):    32     m = sha1()    33     for value in values:    34         m.update(str(value))    35     return b64encode(m.digest()).replace("+", "__").replace("/", "_").rstrip("=")    36     37     38     39 # Output encoding and decoding for the summary files.    40     41 def encode_attrnames(attrnames):    42     43     "Encode the 'attrnames' representing usage."    44     45     return ", ".join(attrnames) or "{}"    46     47 def encode_constrained(constrained):    48     49     "Encode the 'constrained' status for program summaries."    50     51     return constrained and "constrained" or "deduced"    52     53 def encode_usage(usage):    54     55     "Encode attribute details from 'usage'."    56     57     all_attrnames = []    58     for t in usage:    59         attrname, invocation, assignment = t    60         all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or ""))    61     return ", ".join(all_attrnames) or "{}"    62     63 def decode_usage(s):    64     65     "Decode attribute details from 's'."    66     67     all_attrnames = set()    68     for attrname_str in s.split(", "):    69         all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("=")))    70     71     all_attrnames = list(all_attrnames)    72     all_attrnames.sort()    73     return tuple(all_attrnames)    74     75 def encode_access_location(t):    76     77     "Encode the access location 't'."    78     79     return "%s:%s:%s:%d" % (t.path, t.name or "{}", t.attrnames or "{}", t.access_number)    80     81 def encode_alias_location(t, invocation=False):    82     83     "Encode the alias location 't'."    84     85     return "%s:%s:%s%s%s%s" % (t.path, t.name or "{}", t.attrnames or "{}",    86         t.version is not None and ":=%d" % t.version or "",    87         t.access_number is not None and ":#%d" % t.access_number or "",    88         invocation and "!" or "")    89     90 def encode_location(t):    91     92     "Encode the general location 't' in a concise form."    93     94     if t.name is not None and t.version is not None:    95         return "%s:%s:%d" % (t.path, t.name, t.version)    96     elif t.name is not None:    97         return "%s:%s" % (t.path, t.name)    98     else:    99         return "%s::%s" % (t.path, t.attrnames)   100    101 def encode_modifiers(modifiers):   102    103     "Encode assignment and invocation details from 'modifiers'."   104    105     all_modifiers = []   106     for t in modifiers:   107         all_modifiers.append(encode_modifier_term(t))   108     return "".join(all_modifiers)   109    110 def encode_modifier_term(t):   111    112     "Encode modifier 't' representing an assignment or an invocation."   113    114     assignment, invocation = t   115     if assignment:   116         return "="   117     elif invocation is not None:   118         arguments, keywords = invocation   119         return "(%d;%s)" % (arguments, ",".join(keywords))   120     else:   121         return "_"   122    123 def decode_modifiers(s):   124    125     "Decode 's' containing modifiers."   126    127     i = 0   128     end = len(s)   129    130     modifiers = []   131    132     while i < end:   133         if s[i] == "=":   134             modifiers.append((True, None))   135             i += 1   136         elif s[i] == "(":   137             j = s.index(";", i)   138             arguments = int(s[i+1:j])   139             i = j   140             j = s.index(")", i)   141             keywords = s[i+1:j]   142             keywords = keywords and keywords.split(",") or []   143             modifiers.append((False, (arguments, keywords)))   144             i = j + 1   145         else:   146             modifiers.append((False, None))   147             i += 1   148    149     return modifiers   150    151    152    153 # Test generation functions.   154    155 def get_kinds(all_types):   156    157     """    158     Return object kind details for 'all_types', being a collection of   159     references for program types.   160     """   161    162     return map(lambda ref: ref.get_kind(), all_types)   163    164 def test_label_for_kind(kind):   165    166     "Return the label used for 'kind' in test details."   167    168     return kind == "<instance>" and "instance" or "type"   169    170 def test_label_for_type(ref):   171    172     "Return the label used for 'ref' in test details."   173    174     return test_label_for_kind(ref.get_kind())   175    176    177    178 # Instruction representation encoding.   179    180 def encode_instruction(instruction):   181    182     """   183     Encode the 'instruction' - a sequence starting with an operation and   184     followed by arguments, each of which may be an instruction sequence or a   185     plain value - to produce a function call string representation.   186     """   187    188     op = instruction[0]   189     args = instruction[1:]   190    191     if args:   192         a = []   193         for arg in args:   194             if isinstance(arg, tuple):   195                 a.append(encode_instruction(arg))   196             else:   197                 a.append(arg or "{}")   198         argstr = "(%s)" % ", ".join(a)   199         return "%s%s" % (op, argstr)   200     else:   201         return op   202    203    204    205 # Output program encoding.   206    207 attribute_loading_ops = (   208     "__load_via_class", "__load_via_object", "__get_class_and_load",   209     )   210    211 attribute_ops = attribute_loading_ops + (   212     "__store_via_object",   213     )   214    215 checked_loading_ops = (   216     "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any",   217     )   218    219 checked_ops = checked_loading_ops + (   220     "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any",   221     )   222    223 typename_ops = (   224     "__test_common_instance", "__test_common_object", "__test_common_type",   225     )   226    227 type_ops = (   228     "__test_specific_instance", "__test_specific_object", "__test_specific_type",   229     )   230    231 static_ops = (   232     "__load_static_ignore", "__load_static_replace", "__load_static_test", "<test_context_static>",   233     )   234    235 context_values = (   236     "<context>",   237     )   238    239 context_ops = (   240     "<context>", "<set_context>", "<test_context_revert>", "<test_context_static>",   241     )   242    243 context_op_functions = (   244     "<test_context_revert>", "<test_context_static>",   245     )   246    247 reference_acting_ops = attribute_ops + checked_ops + type_ops + typename_ops   248 attribute_producing_ops = attribute_loading_ops + checked_loading_ops   249    250 attribute_producing_variables = (   251     "<accessor>", "<context>", "<name>", "<private_context>", "<target_accessor>"   252     )   253    254 def encode_access_instruction(instruction, subs, context_index):   255    256     """   257     Encode the 'instruction' - a sequence starting with an operation and   258     followed by arguments, each of which may be an instruction sequence or a   259     plain value - to produce a function call string representation.   260    261     The 'subs' parameter defines a mapping of substitutions for special values   262     used in instructions.   263    264     The 'context_index' parameter defines the position in local context storage   265     for the referenced context or affected by a context operation.   266    267     Return both the encoded instruction and a collection of substituted names.   268     """   269    270     op = instruction[0]   271     args = instruction[1:]   272     substituted = set()   273    274     # Encode the arguments.   275    276     a = []   277     if args:   278         converting_op = op   279         for arg in args:   280             s, _substituted = encode_access_instruction_arg(arg, subs, converting_op, context_index)   281             substituted.update(_substituted)   282             a.append(s)   283             converting_op = None   284    285     # Modify certain arguments.   286    287     # Convert type name arguments.   288    289     if op in typename_ops:   290         a[1] = encode_path(encode_type_attribute(args[1]))   291    292     # Obtain addresses of type arguments.   293    294     elif op in type_ops:   295         a[1] = "&%s" % a[1]   296    297     # Obtain addresses of static objects.   298    299     elif op in static_ops:   300         a[-1] = "&%s" % a[-1]   301    302     # Add context storage information to certain operations.   303    304     if op in context_ops:   305         a.insert(0, context_index)   306    307     # Add the local context array to certain operations.   308    309     if op in context_op_functions:   310         a.append("__tmp_contexts")   311    312     # Define any argument string.   313    314     if a:   315         argstr = "(%s)" % ", ".join(map(str, a))   316     else:   317         argstr = ""   318    319     # Substitute the first element of the instruction, which may not be an   320     # operation at all.   321    322     if subs.has_key(op):   323         substituted.add(op)   324    325         # Break accessor initialisation into initialisation and value-yielding   326         # parts:   327    328         if op == "<set_accessor>" and isinstance(a[0], InstructionSequence):   329             ops = []   330             ops += a[0].get_init_instructions()   331             ops.append("%s(%s)" % (subs[op], a[0].get_value_instruction()))   332             return ", ".join(map(str, ops)), substituted   333    334         op = subs[op]   335    336     elif not args:   337         op = "&%s" % encode_path(op)   338    339     return "%s%s" % (op, argstr), substituted   340    341 def encode_access_instruction_arg(arg, subs, op, context_index):   342    343     """   344     Encode 'arg' using 'subs' to define substitutions, 'op' to indicate the   345     operation to which the argument belongs, and 'context_index' to indicate any   346     affected context storage.   347    348     Return a tuple containing the encoded form of 'arg' along with a collection   349     of any substituted values.   350     """   351    352     if isinstance(arg, tuple):   353         encoded, substituted = encode_access_instruction(arg, subs, context_index)   354         return attribute_to_reference(op, arg[0], encoded, substituted)   355    356     # Special values only need replacing, not encoding.   357    358     elif subs.has_key(arg):   359    360         # Handle values modified by storage details.   361    362         if arg in context_values:   363             encoded = "%s(%s)" % (subs.get(arg), context_index)   364         else:   365             encoded = subs.get(arg)   366    367         substituted = set([arg])   368         return attribute_to_reference(op, arg, encoded, substituted)   369    370     # Convert static references to the appropriate type.   371    372     elif op and op in reference_acting_ops and \   373          arg not in attribute_producing_variables:   374    375         return "&%s" % encode_path(arg), set()   376    377     # Other values may need encoding.   378    379     else:   380         return encode_path(arg), set()   381    382 def attribute_to_reference(op, arg, encoded, substituted):   383    384     # Convert attribute results to references where required.   385    386     if op and op in reference_acting_ops and (   387        arg in attribute_producing_ops or   388        arg in attribute_producing_variables):   389    390         return "__VALUE(%s)" % encoded, substituted   391     else:   392         return encoded, substituted   393    394 def encode_function_pointer(path):   395    396     "Encode 'path' as a reference to an output program function."   397    398     return "__fn_%s" % encode_path(path)   399    400 def encode_instantiator_pointer(path):   401    402     "Encode 'path' as a reference to an output program instantiator."   403    404     return "__new_%s" % encode_path(path)   405    406 def encode_instructions(instructions):   407    408     "Encode 'instructions' as a sequence."   409    410     if len(instructions) == 1:   411         return instructions[0]   412     else:   413         return "(\n%s\n)" % ",\n".join(instructions)   414    415 def encode_literal_constant(n):   416    417     "Encode a name for the literal constant with the number 'n'."   418    419     return "__const%s" % n   420    421 def encode_literal_constant_size(value):   422    423     "Encode a size for the literal constant with the given 'value'."   424    425     if isinstance(value, basestring):   426         return len(value)   427     else:   428         return 0   429    430 def encode_literal_constant_member(value):   431    432     "Encode the member name for the 'value' in the final program."   433    434     return "%svalue" % value.__class__.__name__   435    436 def encode_literal_constant_value(value):   437    438     "Encode the given 'value' in the final program."   439    440     if isinstance(value, (int, float)):   441         return str(value)   442     else:   443         l = []   444    445         # Encode characters including non-ASCII ones.   446    447         for c in str(value):   448             if c == '"': l.append('\\"')   449             elif c == '\n': l.append('\\n')   450             elif c == '\t': l.append('\\t')   451             elif c == '\r': l.append('\\r')   452             elif c == '\\': l.append('\\\\')   453             elif 0x20 <= ord(c) < 0x80: l.append(c)   454             else: l.append("\\x%02x" % ord(c))   455    456         return '"%s"' % "".join(l)   457    458 def encode_literal_data_initialiser(style):   459    460     """   461     Encode a reference to a function populating the data for a literal having   462     the given 'style' ("mapping" or "sequence").   463     """   464    465     return "__newdata_%s" % style   466    467 def encode_literal_instantiator(path):   468    469     """   470     Encode a reference to an instantiator for a literal having the given 'path'.   471     """   472    473     return "__newliteral_%s" % encode_path(path)   474    475 def encode_literal_reference(n):   476    477     "Encode a reference to a literal constant with the number 'n'."   478    479     return "__constvalue%s" % n   480    481    482    483 # Track all encoded paths, detecting and avoiding conflicts.   484    485 all_encoded_paths = {}   486    487 def encode_path(path):   488    489     "Encode 'path' as an output program object, translating special symbols."   490    491     if path in reserved_words:   492         return "__%s" % path   493     else:   494         part_encoded = path.replace("#", "__").replace("$", "__")   495    496         if "." not in path:   497             return part_encoded   498    499         encoded = part_encoded.replace(".", "_")   500    501         # Test for a conflict with the encoding of a different path, re-encoding   502         # if necessary.   503    504         previous = all_encoded_paths.get(encoded)   505         replacement = "_"   506    507         while previous:   508             if path == previous:   509                 return encoded   510             replacement += "_"   511             encoded = part_encoded.replace(".", replacement)   512             previous = all_encoded_paths.get(encoded)   513    514         # Store any new or re-encoded path.   515    516         all_encoded_paths[encoded] = path   517         return encoded   518    519 def encode_code(name):   520    521     "Encode 'name' as an attribute code indicator."   522    523     return "__ATTRCODE(%s)" % encode_path(name)   524    525 def encode_pcode(name):   526    527     "Encode 'name' as an parameter code indicator."   528    529     return "__PARAMCODE(%s)" % encode_path(name)   530    531 def encode_pos(name):   532    533     "Encode 'name' as an attribute position indicator."   534    535     return "__ATTRPOS(%s)" % encode_path(name)   536    537 def encode_ppos(name):   538    539     "Encode 'name' as an parameter position indicator."   540    541     return "__PARAMPOS(%s)" % encode_path(name)   542    543 def encode_predefined_reference(path):   544    545     "Encode a reference to a predefined constant value for 'path'."   546    547     return "__predefined_%s" % encode_path(path)   548    549 def encode_size(kind, path=None):   550    551     """   552     Encode a structure size reference for the given 'kind' of structure, with   553     'path' indicating a specific structure name.   554     """   555    556     return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "")   557    558 def encode_symbol(symbol_type, path=None):   559    560     "Encode a symbol with the given 'symbol_type' and optional 'path'."   561    562     return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "")   563    564 def encode_tablename(kind, path):   565    566     """   567     Encode a table reference for the given 'kind' of table structure, indicating   568     a 'path' for the specific object concerned.   569     """   570    571     return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path))   572    573 def encode_type_attribute(path):   574    575     "Encode the special type attribute for 'path'."   576    577     return "#%s" % path   578    579 def decode_type_attribute(s):   580    581     "Decode the special type attribute 's'."   582    583     return s[1:]   584    585 def is_type_attribute(s):   586    587     "Return whether 's' is a type attribute name."   588    589     return s.startswith("#")   590    591    592    593 # A mapping from kinds to structure size reference prefixes.   594    595 structure_size_prefixes = {   596     "<class>" : "c",   597     "<module>" : "m",   598     "<instance>" : "i"   599     }   600    601 # A mapping from kinds to table name prefixes.   602    603 table_name_prefixes = {   604     "<class>" : "Class",   605     "<function>" : "Function",   606     "<module>" : "Module",   607     "<instance>" : "Instance"   608     }   609    610    611    612 # Output language reserved words.   613    614 reserved_words = [   615     "break", "char", "const", "continue",   616     "default", "double", "else",   617     "float", "for",   618     "if", "int", "long",   619     "NULL",   620     "return", "struct",   621     "typedef",   622     "void", "while",   623     ]   624    625 # vim: tabstop=4 expandtab shiftwidth=4