Lichen

Annotated encoders.py

611:d1d907801d42
2017-02-23 Paul Boddie Replaced list comprehension usage. method-wrapper-for-context
paul@0 1
#!/usr/bin/env python
paul@0 2
paul@0 3
"""
paul@0 4
Encoder functions, producing representations of program objects.
paul@0 5
paul@498 6
Copyright (C) 2016, 2017 Paul Boddie <paul@boddie.org.uk>
paul@0 7
paul@0 8
This program is free software; you can redistribute it and/or modify it under
paul@0 9
the terms of the GNU General Public License as published by the Free Software
paul@0 10
Foundation; either version 3 of the License, or (at your option) any later
paul@0 11
version.
paul@0 12
paul@0 13
This program is distributed in the hope that it will be useful, but WITHOUT
paul@0 14
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@0 15
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
paul@0 16
details.
paul@0 17
paul@0 18
You should have received a copy of the GNU General Public License along with
paul@0 19
this program.  If not, see <http://www.gnu.org/licenses/>.
paul@0 20
"""
paul@0 21
paul@498 22
from common import first, InstructionSequence
paul@56 23
paul@609 24
paul@609 25
paul@609 26
# Value digest computation.
paul@609 27
paul@609 28
from base64 import b64encode
paul@609 29
from hashlib import sha1
paul@609 30
paul@609 31
def digest(values):
paul@609 32
    m = sha1()
paul@609 33
    for value in values:
paul@609 34
        m.update(str(value))
paul@609 35
    return b64encode(m.digest()).replace("+", "__").replace("/", "_").rstrip("=")
paul@609 36
paul@609 37
paul@609 38
paul@0 39
# Output encoding and decoding for the summary files.
paul@0 40
paul@0 41
def encode_attrnames(attrnames):
paul@0 42
paul@0 43
    "Encode the 'attrnames' representing usage."
paul@0 44
paul@0 45
    return ", ".join(attrnames) or "{}"
paul@0 46
paul@0 47
def encode_constrained(constrained):
paul@0 48
paul@0 49
    "Encode the 'constrained' status for program summaries."
paul@0 50
paul@0 51
    return constrained and "constrained" or "deduced"
paul@0 52
paul@0 53
def encode_usage(usage):
paul@0 54
paul@0 55
    "Encode attribute details from 'usage'."
paul@0 56
paul@0 57
    all_attrnames = []
paul@0 58
    for t in usage:
paul@107 59
        attrname, invocation, assignment = t
paul@107 60
        all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or ""))
paul@0 61
    return ", ".join(all_attrnames) or "{}"
paul@0 62
paul@88 63
def decode_usage(s):
paul@88 64
paul@88 65
    "Decode attribute details from 's'."
paul@88 66
paul@88 67
    all_attrnames = set()
paul@88 68
    for attrname_str in s.split(", "):
paul@107 69
        all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("=")))
paul@88 70
paul@88 71
    all_attrnames = list(all_attrnames)
paul@88 72
    all_attrnames.sort()
paul@88 73
    return tuple(all_attrnames)
paul@88 74
paul@0 75
def encode_access_location(t):
paul@0 76
paul@0 77
    "Encode the access location 't'."
paul@0 78
paul@0 79
    path, name, attrname, version = t
paul@0 80
    return "%s %s %s:%d" % (path, name or "{}", attrname, version)
paul@0 81
paul@0 82
def encode_location(t):
paul@0 83
paul@0 84
    "Encode the general location 't' in a concise form."
paul@0 85
paul@0 86
    path, name, attrname, version = t
paul@0 87
    if name is not None and version is not None:
paul@0 88
        return "%s %s:%d" % (path, name, version)
paul@0 89
    elif name is not None:
paul@0 90
        return "%s %s" % (path, name)
paul@0 91
    else:
paul@0 92
        return "%s :%s" % (path, attrname)
paul@0 93
paul@0 94
def encode_modifiers(modifiers):
paul@0 95
paul@553 96
    "Encode assignment and invocation details from 'modifiers'."
paul@0 97
paul@0 98
    all_modifiers = []
paul@0 99
    for t in modifiers:
paul@0 100
        all_modifiers.append(encode_modifier_term(t))
paul@0 101
    return "".join(all_modifiers)
paul@0 102
paul@0 103
def encode_modifier_term(t):
paul@0 104
paul@553 105
    "Encode modifier 't' representing an assignment or an invocation."
paul@0 106
paul@117 107
    assignment, invocation = t
paul@553 108
    if assignment:
paul@553 109
        return "="
paul@553 110
    elif invocation is not None:
paul@557 111
        arguments, keywords = invocation
paul@557 112
        return "(%d;%s)" % (arguments, ",".join(keywords))
paul@553 113
    else:
paul@553 114
        return "_"
paul@0 115
paul@553 116
def decode_modifiers(s):
paul@553 117
paul@553 118
    "Decode 's' containing modifiers."
paul@553 119
paul@553 120
    i = 0
paul@553 121
    end = len(s)
paul@0 122
paul@553 123
    modifiers = []
paul@0 124
paul@553 125
    while i < end:
paul@553 126
        if s[i] == "=":
paul@553 127
            modifiers.append((True, None))
paul@553 128
            i += 1
paul@553 129
        elif s[i] == "(":
paul@557 130
            j = s.index(";", i)
paul@557 131
            arguments = int(s[i+1:j])
paul@557 132
            i = j
paul@553 133
            j = s.index(")", i)
paul@557 134
            keywords = s[i+1:j]
paul@557 135
            keywords = keywords and keywords.split(",") or []
paul@557 136
            modifiers.append((False, (arguments, keywords)))
paul@553 137
            i = j + 1
paul@553 138
        else:
paul@553 139
            modifiers.append((False, None))
paul@553 140
            i += 1
paul@553 141
paul@553 142
    return modifiers
paul@0 143
paul@56 144
paul@56 145
paul@56 146
# Test generation functions.
paul@56 147
paul@56 148
def get_kinds(all_types):
paul@56 149
paul@56 150
    """ 
paul@56 151
    Return object kind details for 'all_types', being a collection of
paul@56 152
    references for program types.
paul@56 153
    """
paul@56 154
paul@56 155
    return map(lambda ref: ref.get_kind(), all_types)
paul@56 156
paul@237 157
def test_label_for_kind(kind):
paul@56 158
paul@237 159
    "Return the label used for 'kind' in test details."
paul@56 160
paul@237 161
    return kind == "<instance>" and "instance" or "type"
paul@56 162
paul@237 163
def test_label_for_type(ref):
paul@56 164
paul@237 165
    "Return the label used for 'ref' in test details."
paul@56 166
paul@237 167
    return test_label_for_kind(ref.get_kind())
paul@56 168
paul@56 169
paul@56 170
paul@94 171
# Instruction representation encoding.
paul@94 172
paul@94 173
def encode_instruction(instruction):
paul@94 174
paul@94 175
    """
paul@94 176
    Encode the 'instruction' - a sequence starting with an operation and
paul@94 177
    followed by arguments, each of which may be an instruction sequence or a
paul@94 178
    plain value - to produce a function call string representation.
paul@94 179
    """
paul@94 180
paul@94 181
    op = instruction[0]
paul@94 182
    args = instruction[1:]
paul@94 183
paul@94 184
    if args:
paul@94 185
        a = []
paul@113 186
        for arg in args:
paul@113 187
            if isinstance(arg, tuple):
paul@113 188
                a.append(encode_instruction(arg))
paul@94 189
            else:
paul@113 190
                a.append(arg or "{}")
paul@94 191
        argstr = "(%s)" % ", ".join(a)
paul@94 192
        return "%s%s" % (op, argstr)
paul@94 193
    else:
paul@94 194
        return op
paul@94 195
paul@94 196
paul@94 197
paul@0 198
# Output program encoding.
paul@0 199
paul@153 200
attribute_loading_ops = (
paul@153 201
    "__load_via_class", "__load_via_object", "__get_class_and_load",
paul@153 202
    )
paul@153 203
paul@153 204
attribute_ops = attribute_loading_ops + (
paul@113 205
    "__store_via_object",
paul@113 206
    )
paul@113 207
paul@153 208
checked_loading_ops = (
paul@113 209
    "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any",
paul@153 210
    )
paul@153 211
paul@153 212
checked_ops = checked_loading_ops + (
paul@113 213
    "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any",
paul@113 214
    )
paul@113 215
paul@113 216
typename_ops = (
paul@144 217
    "__test_common_instance", "__test_common_object", "__test_common_type",
paul@113 218
    )
paul@113 219
paul@385 220
type_ops = (
paul@385 221
    "__test_specific_instance", "__test_specific_object", "__test_specific_type",
paul@385 222
    )
paul@385 223
paul@141 224
static_ops = (
paul@595 225
    "__load_static_ignore", "__load_static_replace", "__load_static_test", "<test_context_static>",
paul@141 226
    )
paul@141 227
paul@591 228
context_values = (
paul@591 229
    "<context>",
paul@591 230
    )
paul@591 231
paul@591 232
context_ops = (
paul@601 233
    "<context>", "<set_context>", "<test_context_revert>", "<test_context_static>",
paul@591 234
    )
paul@591 235
paul@602 236
context_op_functions = (
paul@602 237
    "<test_context_revert>", "<test_context_static>",
paul@602 238
    )
paul@602 239
paul@153 240
reference_acting_ops = attribute_ops + checked_ops + typename_ops
paul@153 241
attribute_producing_ops = attribute_loading_ops + checked_loading_ops
paul@153 242
paul@591 243
def encode_access_instruction(instruction, subs, context_index):
paul@113 244
paul@113 245
    """
paul@113 246
    Encode the 'instruction' - a sequence starting with an operation and
paul@113 247
    followed by arguments, each of which may be an instruction sequence or a
paul@113 248
    plain value - to produce a function call string representation.
paul@113 249
paul@113 250
    The 'subs' parameter defines a mapping of substitutions for special values
paul@113 251
    used in instructions.
paul@482 252
paul@591 253
    The 'context_index' parameter defines the position in local context storage
paul@591 254
    for the referenced context or affected by a context operation.
paul@591 255
paul@482 256
    Return both the encoded instruction and a collection of substituted names.
paul@113 257
    """
paul@113 258
paul@113 259
    op = instruction[0]
paul@113 260
    args = instruction[1:]
paul@482 261
    substituted = set()
paul@113 262
paul@591 263
    # Encode the arguments.
paul@113 264
paul@591 265
    a = []
paul@591 266
    if args:
paul@153 267
        converting_op = op
paul@113 268
        for arg in args:
paul@591 269
            s, _substituted = encode_access_instruction_arg(arg, subs, converting_op, context_index)
paul@482 270
            substituted.update(_substituted)
paul@482 271
            a.append(s)
paul@153 272
            converting_op = None
paul@113 273
paul@591 274
    # Modify certain arguments.
paul@113 275
paul@591 276
    # Convert attribute name arguments to position symbols.
paul@113 277
paul@591 278
    if op in attribute_ops:
paul@591 279
        arg = a[1]
paul@591 280
        a[1] = encode_symbol("pos", arg)
paul@591 281
paul@591 282
    # Convert attribute name arguments to position and code symbols.
paul@113 283
paul@591 284
    elif op in checked_ops:
paul@591 285
        arg = a[1]
paul@591 286
        a[1] = encode_symbol("pos", arg)
paul@591 287
        a.insert(2, encode_symbol("code", arg))
paul@113 288
paul@591 289
    # Convert type name arguments to position and code symbols.
paul@113 290
paul@591 291
    elif op in typename_ops:
paul@591 292
        arg = encode_type_attribute(args[1])
paul@591 293
        a[1] = encode_symbol("pos", arg)
paul@591 294
        a.insert(2, encode_symbol("code", arg))
paul@113 295
paul@591 296
    # Obtain addresses of type arguments.
paul@591 297
paul@591 298
    elif op in type_ops:
paul@591 299
        a[1] = "&%s" % a[1]
paul@113 300
paul@591 301
    # Obtain addresses of static objects.
paul@591 302
paul@591 303
    elif op in static_ops:
paul@591 304
        a[-1] = "&%s" % a[-1]
paul@385 305
paul@591 306
    # Add context storage information to certain operations.
paul@385 307
paul@595 308
    if op in context_ops:
paul@591 309
        a.insert(0, context_index)
paul@141 310
paul@602 311
    # Add the local context array to certain operations.
paul@602 312
paul@602 313
    if op in context_op_functions:
paul@602 314
        a.append("__tmp_contexts")
paul@602 315
paul@591 316
    # Define any argument string.
paul@141 317
paul@591 318
    if a:
paul@491 319
        argstr = "(%s)" % ", ".join(map(str, a))
paul@591 320
    else:
paul@591 321
        argstr = ""
paul@113 322
paul@113 323
    # Substitute the first element of the instruction, which may not be an
paul@113 324
    # operation at all.
paul@113 325
paul@144 326
    if subs.has_key(op):
paul@482 327
        substituted.add(op)
paul@498 328
paul@498 329
        # Break accessor initialisation into initialisation and value-yielding
paul@498 330
        # parts:
paul@498 331
paul@498 332
        if op == "<set_accessor>" and isinstance(a[0], InstructionSequence):
paul@498 333
            ops = []
paul@498 334
            ops += a[0].get_init_instructions()
paul@498 335
            ops.append("%s(%s)" % (subs[op], a[0].get_value_instruction()))
paul@498 336
            return ", ".join(map(str, ops)), substituted
paul@498 337
paul@144 338
        op = subs[op]
paul@498 339
paul@144 340
    elif not args:
paul@144 341
        op = "&%s" % encode_path(op)
paul@144 342
paul@482 343
    return "%s%s" % (op, argstr), substituted
paul@113 344
paul@591 345
def encode_access_instruction_arg(arg, subs, op, context_index):
paul@113 346
paul@482 347
    """
paul@591 348
    Encode 'arg' using 'subs' to define substitutions, 'op' to indicate the
paul@591 349
    operation to which the argument belongs, and 'context_index' to indicate any
paul@591 350
    affected context storage.
paul@591 351
paul@591 352
    Return a tuple containing the encoded form of 'arg' along with a collection
paul@591 353
    of any substituted values.
paul@482 354
    """
paul@113 355
paul@113 356
    if isinstance(arg, tuple):
paul@591 357
        encoded, substituted = encode_access_instruction(arg, subs, context_index)
paul@153 358
paul@153 359
        # Convert attribute results to references where required.
paul@153 360
paul@153 361
        if op and op in reference_acting_ops and arg[0] in attribute_producing_ops:
paul@482 362
            return "%s.value" % encoded, substituted
paul@153 363
        else:
paul@482 364
            return encoded, substituted
paul@113 365
paul@113 366
    # Special values only need replacing, not encoding.
paul@113 367
paul@113 368
    elif subs.has_key(arg):
paul@591 369
paul@591 370
        # Handle values modified by storage details.
paul@591 371
paul@591 372
        if arg in context_values:
paul@591 373
            return "%s(%s)" % (subs.get(arg), context_index), set([arg])
paul@591 374
        else:
paul@591 375
            return subs.get(arg), set([arg])
paul@113 376
paul@258 377
    # Convert static references to the appropriate type.
paul@258 378
paul@258 379
    elif op and op in reference_acting_ops and arg != "<accessor>":
paul@482 380
        return "&%s" % encode_path(arg), set()
paul@258 381
paul@113 382
    # Other values may need encoding.
paul@113 383
paul@113 384
    else:
paul@482 385
        return encode_path(arg), set()
paul@113 386
paul@0 387
def encode_function_pointer(path):
paul@0 388
paul@0 389
    "Encode 'path' as a reference to an output program function."
paul@0 390
paul@0 391
    return "__fn_%s" % encode_path(path)
paul@0 392
paul@0 393
def encode_instantiator_pointer(path):
paul@0 394
paul@0 395
    "Encode 'path' as a reference to an output program instantiator."
paul@0 396
paul@0 397
    return "__new_%s" % encode_path(path)
paul@0 398
paul@491 399
def encode_instructions(instructions):
paul@491 400
paul@491 401
    "Encode 'instructions' as a sequence."
paul@491 402
paul@491 403
    if len(instructions) == 1:
paul@491 404
        return instructions[0]
paul@491 405
    else:
paul@491 406
        return "(\n%s\n)" % ",\n".join(instructions)
paul@491 407
paul@136 408
def encode_literal_constant(n):
paul@136 409
paul@136 410
    "Encode a name for the literal constant with the number 'n'."
paul@136 411
paul@609 412
    return "__const%s" % n
paul@136 413
paul@378 414
def encode_literal_constant_size(value):
paul@378 415
paul@378 416
    "Encode a size for the literal constant with the given 'value'."
paul@378 417
paul@378 418
    if isinstance(value, basestring):
paul@378 419
        return len(value)
paul@378 420
    else:
paul@378 421
        return 0
paul@378 422
paul@136 423
def encode_literal_constant_member(value):
paul@136 424
paul@136 425
    "Encode the member name for the 'value' in the final program."
paul@136 426
paul@136 427
    return "%svalue" % value.__class__.__name__
paul@136 428
paul@136 429
def encode_literal_constant_value(value):
paul@136 430
paul@136 431
    "Encode the given 'value' in the final program."
paul@136 432
paul@136 433
    if isinstance(value, (int, float)):
paul@136 434
        return str(value)
paul@136 435
    else:
paul@451 436
        l = []
paul@451 437
paul@451 438
        # Encode characters including non-ASCII ones.
paul@451 439
paul@451 440
        for c in str(value):
paul@451 441
            if c == '"': l.append('\\"')
paul@451 442
            elif c == '\n': l.append('\\n')
paul@451 443
            elif c == '\t': l.append('\\t')
paul@451 444
            elif c == '\r': l.append('\\r')
paul@512 445
            elif c == '\\': l.append('\\\\')
paul@451 446
            elif 0x20 <= ord(c) < 0x80: l.append(c)
paul@451 447
            else: l.append("\\x%02x" % ord(c))
paul@451 448
paul@451 449
        return '"%s"' % "".join(l)
paul@136 450
paul@283 451
def encode_literal_data_initialiser(style):
paul@283 452
paul@283 453
    """
paul@283 454
    Encode a reference to a function populating the data for a literal having
paul@283 455
    the given 'style' ("mapping" or "sequence").
paul@283 456
    """
paul@283 457
paul@283 458
    return "__newdata_%s" % style
paul@283 459
paul@159 460
def encode_literal_instantiator(path):
paul@159 461
paul@159 462
    """
paul@159 463
    Encode a reference to an instantiator for a literal having the given 'path'.
paul@159 464
    """
paul@159 465
paul@159 466
    return "__newliteral_%s" % encode_path(path)
paul@159 467
paul@136 468
def encode_literal_reference(n):
paul@136 469
paul@136 470
    "Encode a reference to a literal constant with the number 'n'."
paul@136 471
paul@609 472
    return "__constvalue%s" % n
paul@136 473
paul@512 474
paul@512 475
paul@340 476
# Track all encoded paths, detecting and avoiding conflicts.
paul@340 477
paul@340 478
all_encoded_paths = {}
paul@340 479
paul@0 480
def encode_path(path):
paul@0 481
paul@0 482
    "Encode 'path' as an output program object, translating special symbols."
paul@0 483
paul@0 484
    if path in reserved_words:
paul@0 485
        return "__%s" % path
paul@0 486
    else:
paul@340 487
        part_encoded = path.replace("#", "__").replace("$", "__")
paul@349 488
paul@349 489
        if "." not in path:
paul@349 490
            return part_encoded
paul@349 491
paul@340 492
        encoded = part_encoded.replace(".", "_")
paul@340 493
paul@340 494
        # Test for a conflict with the encoding of a different path, re-encoding
paul@340 495
        # if necessary.
paul@340 496
paul@340 497
        previous = all_encoded_paths.get(encoded)
paul@340 498
        replacement = "_"
paul@340 499
paul@340 500
        while previous:
paul@340 501
            if path == previous:
paul@340 502
                return encoded
paul@340 503
            replacement += "_"
paul@340 504
            encoded = part_encoded.replace(".", replacement)
paul@340 505
            previous = all_encoded_paths.get(encoded)
paul@340 506
paul@340 507
        # Store any new or re-encoded path.
paul@340 508
paul@340 509
        all_encoded_paths[encoded] = path
paul@340 510
        return encoded
paul@0 511
paul@136 512
def encode_predefined_reference(path):
paul@136 513
paul@136 514
    "Encode a reference to a predefined constant value for 'path'."
paul@136 515
paul@136 516
    return "__predefined_%s" % encode_path(path)
paul@136 517
paul@150 518
def encode_size(kind, path=None):
paul@150 519
paul@150 520
    """
paul@150 521
    Encode a structure size reference for the given 'kind' of structure, with
paul@150 522
    'path' indicating a specific structure name.
paul@150 523
    """
paul@150 524
paul@150 525
    return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "")
paul@150 526
paul@0 527
def encode_symbol(symbol_type, path=None):
paul@0 528
paul@0 529
    "Encode a symbol with the given 'symbol_type' and optional 'path'."
paul@0 530
paul@0 531
    return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "")
paul@0 532
paul@150 533
def encode_tablename(kind, path):
paul@150 534
paul@150 535
    """
paul@150 536
    Encode a table reference for the given 'kind' of table structure, indicating
paul@150 537
    a 'path' for the specific object concerned.
paul@150 538
    """
paul@150 539
paul@150 540
    return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path))
paul@150 541
paul@131 542
def encode_type_attribute(path):
paul@131 543
paul@131 544
    "Encode the special type attribute for 'path'."
paul@131 545
paul@131 546
    return "#%s" % path
paul@131 547
paul@318 548
def decode_type_attribute(s):
paul@318 549
paul@318 550
    "Decode the special type attribute 's'."
paul@318 551
paul@318 552
    return s[1:]
paul@318 553
paul@318 554
def is_type_attribute(s):
paul@318 555
paul@318 556
    "Return whether 's' is a type attribute name."
paul@318 557
paul@318 558
    return s.startswith("#")
paul@318 559
paul@56 560
paul@56 561
paul@150 562
# A mapping from kinds to structure size reference prefixes.
paul@150 563
paul@150 564
structure_size_prefixes = {
paul@150 565
    "<class>" : "c",
paul@150 566
    "<module>" : "m",
paul@150 567
    "<instance>" : "i"
paul@150 568
    }
paul@150 569
paul@150 570
# A mapping from kinds to table name prefixes.
paul@150 571
paul@150 572
table_name_prefixes = {
paul@150 573
    "<class>" : "Class",
paul@150 574
    "<function>" : "Function",
paul@150 575
    "<module>" : "Module",
paul@150 576
    "<instance>" : "Instance"
paul@150 577
    }
paul@150 578
paul@150 579
paul@150 580
paul@0 581
# Output language reserved words.
paul@0 582
paul@0 583
reserved_words = [
paul@0 584
    "break", "char", "const", "continue",
paul@0 585
    "default", "double", "else",
paul@0 586
    "float", "for",
paul@0 587
    "if", "int", "long",
paul@0 588
    "NULL",
paul@0 589
    "return", "struct",
paul@0 590
    "typedef",
paul@0 591
    "void", "while",
paul@0 592
    ]
paul@0 593
paul@0 594
# vim: tabstop=4 expandtab shiftwidth=4