Lichen

Annotated encoders.py

601:adcdaeb19307
2017-02-19 Paul Boddie Fixed the context test and set operation to replace the local context with any applicable attribute context. Added a test of method rebinding that requires this fix. method-wrapper-for-context
paul@0 1
#!/usr/bin/env python
paul@0 2
paul@0 3
"""
paul@0 4
Encoder functions, producing representations of program objects.
paul@0 5
paul@498 6
Copyright (C) 2016, 2017 Paul Boddie <paul@boddie.org.uk>
paul@0 7
paul@0 8
This program is free software; you can redistribute it and/or modify it under
paul@0 9
the terms of the GNU General Public License as published by the Free Software
paul@0 10
Foundation; either version 3 of the License, or (at your option) any later
paul@0 11
version.
paul@0 12
paul@0 13
This program is distributed in the hope that it will be useful, but WITHOUT
paul@0 14
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@0 15
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
paul@0 16
details.
paul@0 17
paul@0 18
You should have received a copy of the GNU General Public License along with
paul@0 19
this program.  If not, see <http://www.gnu.org/licenses/>.
paul@0 20
"""
paul@0 21
paul@498 22
from common import first, InstructionSequence
paul@56 23
paul@0 24
# Output encoding and decoding for the summary files.
paul@0 25
paul@0 26
def encode_attrnames(attrnames):
paul@0 27
paul@0 28
    "Encode the 'attrnames' representing usage."
paul@0 29
paul@0 30
    return ", ".join(attrnames) or "{}"
paul@0 31
paul@0 32
def encode_constrained(constrained):
paul@0 33
paul@0 34
    "Encode the 'constrained' status for program summaries."
paul@0 35
paul@0 36
    return constrained and "constrained" or "deduced"
paul@0 37
paul@0 38
def encode_usage(usage):
paul@0 39
paul@0 40
    "Encode attribute details from 'usage'."
paul@0 41
paul@0 42
    all_attrnames = []
paul@0 43
    for t in usage:
paul@107 44
        attrname, invocation, assignment = t
paul@107 45
        all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or ""))
paul@0 46
    return ", ".join(all_attrnames) or "{}"
paul@0 47
paul@88 48
def decode_usage(s):
paul@88 49
paul@88 50
    "Decode attribute details from 's'."
paul@88 51
paul@88 52
    all_attrnames = set()
paul@88 53
    for attrname_str in s.split(", "):
paul@107 54
        all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("=")))
paul@88 55
paul@88 56
    all_attrnames = list(all_attrnames)
paul@88 57
    all_attrnames.sort()
paul@88 58
    return tuple(all_attrnames)
paul@88 59
paul@0 60
def encode_access_location(t):
paul@0 61
paul@0 62
    "Encode the access location 't'."
paul@0 63
paul@0 64
    path, name, attrname, version = t
paul@0 65
    return "%s %s %s:%d" % (path, name or "{}", attrname, version)
paul@0 66
paul@0 67
def encode_location(t):
paul@0 68
paul@0 69
    "Encode the general location 't' in a concise form."
paul@0 70
paul@0 71
    path, name, attrname, version = t
paul@0 72
    if name is not None and version is not None:
paul@0 73
        return "%s %s:%d" % (path, name, version)
paul@0 74
    elif name is not None:
paul@0 75
        return "%s %s" % (path, name)
paul@0 76
    else:
paul@0 77
        return "%s :%s" % (path, attrname)
paul@0 78
paul@0 79
def encode_modifiers(modifiers):
paul@0 80
paul@553 81
    "Encode assignment and invocation details from 'modifiers'."
paul@0 82
paul@0 83
    all_modifiers = []
paul@0 84
    for t in modifiers:
paul@0 85
        all_modifiers.append(encode_modifier_term(t))
paul@0 86
    return "".join(all_modifiers)
paul@0 87
paul@0 88
def encode_modifier_term(t):
paul@0 89
paul@553 90
    "Encode modifier 't' representing an assignment or an invocation."
paul@0 91
paul@117 92
    assignment, invocation = t
paul@553 93
    if assignment:
paul@553 94
        return "="
paul@553 95
    elif invocation is not None:
paul@557 96
        arguments, keywords = invocation
paul@557 97
        return "(%d;%s)" % (arguments, ",".join(keywords))
paul@553 98
    else:
paul@553 99
        return "_"
paul@0 100
paul@553 101
def decode_modifiers(s):
paul@553 102
paul@553 103
    "Decode 's' containing modifiers."
paul@553 104
paul@553 105
    i = 0
paul@553 106
    end = len(s)
paul@0 107
paul@553 108
    modifiers = []
paul@0 109
paul@553 110
    while i < end:
paul@553 111
        if s[i] == "=":
paul@553 112
            modifiers.append((True, None))
paul@553 113
            i += 1
paul@553 114
        elif s[i] == "(":
paul@557 115
            j = s.index(";", i)
paul@557 116
            arguments = int(s[i+1:j])
paul@557 117
            i = j
paul@553 118
            j = s.index(")", i)
paul@557 119
            keywords = s[i+1:j]
paul@557 120
            keywords = keywords and keywords.split(",") or []
paul@557 121
            modifiers.append((False, (arguments, keywords)))
paul@553 122
            i = j + 1
paul@553 123
        else:
paul@553 124
            modifiers.append((False, None))
paul@553 125
            i += 1
paul@553 126
paul@553 127
    return modifiers
paul@0 128
paul@56 129
paul@56 130
paul@56 131
# Test generation functions.
paul@56 132
paul@56 133
def get_kinds(all_types):
paul@56 134
paul@56 135
    """ 
paul@56 136
    Return object kind details for 'all_types', being a collection of
paul@56 137
    references for program types.
paul@56 138
    """
paul@56 139
paul@56 140
    return map(lambda ref: ref.get_kind(), all_types)
paul@56 141
paul@237 142
def test_label_for_kind(kind):
paul@56 143
paul@237 144
    "Return the label used for 'kind' in test details."
paul@56 145
paul@237 146
    return kind == "<instance>" and "instance" or "type"
paul@56 147
paul@237 148
def test_label_for_type(ref):
paul@56 149
paul@237 150
    "Return the label used for 'ref' in test details."
paul@56 151
paul@237 152
    return test_label_for_kind(ref.get_kind())
paul@56 153
paul@56 154
paul@56 155
paul@94 156
# Instruction representation encoding.
paul@94 157
paul@94 158
def encode_instruction(instruction):
paul@94 159
paul@94 160
    """
paul@94 161
    Encode the 'instruction' - a sequence starting with an operation and
paul@94 162
    followed by arguments, each of which may be an instruction sequence or a
paul@94 163
    plain value - to produce a function call string representation.
paul@94 164
    """
paul@94 165
paul@94 166
    op = instruction[0]
paul@94 167
    args = instruction[1:]
paul@94 168
paul@94 169
    if args:
paul@94 170
        a = []
paul@113 171
        for arg in args:
paul@113 172
            if isinstance(arg, tuple):
paul@113 173
                a.append(encode_instruction(arg))
paul@94 174
            else:
paul@113 175
                a.append(arg or "{}")
paul@94 176
        argstr = "(%s)" % ", ".join(a)
paul@94 177
        return "%s%s" % (op, argstr)
paul@94 178
    else:
paul@94 179
        return op
paul@94 180
paul@94 181
paul@94 182
paul@0 183
# Output program encoding.
paul@0 184
paul@153 185
attribute_loading_ops = (
paul@153 186
    "__load_via_class", "__load_via_object", "__get_class_and_load",
paul@153 187
    )
paul@153 188
paul@153 189
attribute_ops = attribute_loading_ops + (
paul@113 190
    "__store_via_object",
paul@113 191
    )
paul@113 192
paul@153 193
checked_loading_ops = (
paul@113 194
    "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any",
paul@153 195
    )
paul@153 196
paul@153 197
checked_ops = checked_loading_ops + (
paul@113 198
    "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any",
paul@113 199
    )
paul@113 200
paul@113 201
typename_ops = (
paul@144 202
    "__test_common_instance", "__test_common_object", "__test_common_type",
paul@113 203
    )
paul@113 204
paul@385 205
type_ops = (
paul@385 206
    "__test_specific_instance", "__test_specific_object", "__test_specific_type",
paul@385 207
    )
paul@385 208
paul@141 209
static_ops = (
paul@595 210
    "__load_static_ignore", "__load_static_replace", "__load_static_test", "<test_context_static>",
paul@141 211
    )
paul@141 212
paul@591 213
context_values = (
paul@591 214
    "<context>",
paul@591 215
    )
paul@591 216
paul@591 217
context_ops = (
paul@601 218
    "<context>", "<set_context>", "<test_context_revert>", "<test_context_static>",
paul@591 219
    )
paul@591 220
paul@153 221
reference_acting_ops = attribute_ops + checked_ops + typename_ops
paul@153 222
attribute_producing_ops = attribute_loading_ops + checked_loading_ops
paul@153 223
paul@591 224
def encode_access_instruction(instruction, subs, context_index):
paul@113 225
paul@113 226
    """
paul@113 227
    Encode the 'instruction' - a sequence starting with an operation and
paul@113 228
    followed by arguments, each of which may be an instruction sequence or a
paul@113 229
    plain value - to produce a function call string representation.
paul@113 230
paul@113 231
    The 'subs' parameter defines a mapping of substitutions for special values
paul@113 232
    used in instructions.
paul@482 233
paul@591 234
    The 'context_index' parameter defines the position in local context storage
paul@591 235
    for the referenced context or affected by a context operation.
paul@591 236
paul@482 237
    Return both the encoded instruction and a collection of substituted names.
paul@113 238
    """
paul@113 239
paul@113 240
    op = instruction[0]
paul@113 241
    args = instruction[1:]
paul@482 242
    substituted = set()
paul@113 243
paul@591 244
    # Encode the arguments.
paul@113 245
paul@591 246
    a = []
paul@591 247
    if args:
paul@153 248
        converting_op = op
paul@113 249
        for arg in args:
paul@591 250
            s, _substituted = encode_access_instruction_arg(arg, subs, converting_op, context_index)
paul@482 251
            substituted.update(_substituted)
paul@482 252
            a.append(s)
paul@153 253
            converting_op = None
paul@113 254
paul@591 255
    # Modify certain arguments.
paul@113 256
paul@591 257
    # Convert attribute name arguments to position symbols.
paul@113 258
paul@591 259
    if op in attribute_ops:
paul@591 260
        arg = a[1]
paul@591 261
        a[1] = encode_symbol("pos", arg)
paul@591 262
paul@591 263
    # Convert attribute name arguments to position and code symbols.
paul@113 264
paul@591 265
    elif op in checked_ops:
paul@591 266
        arg = a[1]
paul@591 267
        a[1] = encode_symbol("pos", arg)
paul@591 268
        a.insert(2, encode_symbol("code", arg))
paul@113 269
paul@591 270
    # Convert type name arguments to position and code symbols.
paul@113 271
paul@591 272
    elif op in typename_ops:
paul@591 273
        arg = encode_type_attribute(args[1])
paul@591 274
        a[1] = encode_symbol("pos", arg)
paul@591 275
        a.insert(2, encode_symbol("code", arg))
paul@113 276
paul@591 277
    # Obtain addresses of type arguments.
paul@591 278
paul@591 279
    elif op in type_ops:
paul@591 280
        a[1] = "&%s" % a[1]
paul@113 281
paul@591 282
    # Obtain addresses of static objects.
paul@591 283
paul@591 284
    elif op in static_ops:
paul@591 285
        a[-1] = "&%s" % a[-1]
paul@385 286
paul@591 287
    # Add context storage information to certain operations.
paul@385 288
paul@595 289
    if op in context_ops:
paul@591 290
        a.insert(0, context_index)
paul@141 291
paul@591 292
    # Define any argument string.
paul@141 293
paul@591 294
    if a:
paul@491 295
        argstr = "(%s)" % ", ".join(map(str, a))
paul@591 296
    else:
paul@591 297
        argstr = ""
paul@113 298
paul@113 299
    # Substitute the first element of the instruction, which may not be an
paul@113 300
    # operation at all.
paul@113 301
paul@144 302
    if subs.has_key(op):
paul@482 303
        substituted.add(op)
paul@498 304
paul@498 305
        # Break accessor initialisation into initialisation and value-yielding
paul@498 306
        # parts:
paul@498 307
paul@498 308
        if op == "<set_accessor>" and isinstance(a[0], InstructionSequence):
paul@498 309
            ops = []
paul@498 310
            ops += a[0].get_init_instructions()
paul@498 311
            ops.append("%s(%s)" % (subs[op], a[0].get_value_instruction()))
paul@498 312
            return ", ".join(map(str, ops)), substituted
paul@498 313
paul@144 314
        op = subs[op]
paul@498 315
paul@144 316
    elif not args:
paul@144 317
        op = "&%s" % encode_path(op)
paul@144 318
paul@482 319
    return "%s%s" % (op, argstr), substituted
paul@113 320
paul@591 321
def encode_access_instruction_arg(arg, subs, op, context_index):
paul@113 322
paul@482 323
    """
paul@591 324
    Encode 'arg' using 'subs' to define substitutions, 'op' to indicate the
paul@591 325
    operation to which the argument belongs, and 'context_index' to indicate any
paul@591 326
    affected context storage.
paul@591 327
paul@591 328
    Return a tuple containing the encoded form of 'arg' along with a collection
paul@591 329
    of any substituted values.
paul@482 330
    """
paul@113 331
paul@113 332
    if isinstance(arg, tuple):
paul@591 333
        encoded, substituted = encode_access_instruction(arg, subs, context_index)
paul@153 334
paul@153 335
        # Convert attribute results to references where required.
paul@153 336
paul@153 337
        if op and op in reference_acting_ops and arg[0] in attribute_producing_ops:
paul@482 338
            return "%s.value" % encoded, substituted
paul@153 339
        else:
paul@482 340
            return encoded, substituted
paul@113 341
paul@113 342
    # Special values only need replacing, not encoding.
paul@113 343
paul@113 344
    elif subs.has_key(arg):
paul@591 345
paul@591 346
        # Handle values modified by storage details.
paul@591 347
paul@591 348
        if arg in context_values:
paul@591 349
            return "%s(%s)" % (subs.get(arg), context_index), set([arg])
paul@591 350
        else:
paul@591 351
            return subs.get(arg), set([arg])
paul@113 352
paul@258 353
    # Convert static references to the appropriate type.
paul@258 354
paul@258 355
    elif op and op in reference_acting_ops and arg != "<accessor>":
paul@482 356
        return "&%s" % encode_path(arg), set()
paul@258 357
paul@113 358
    # Other values may need encoding.
paul@113 359
paul@113 360
    else:
paul@482 361
        return encode_path(arg), set()
paul@113 362
paul@0 363
def encode_function_pointer(path):
paul@0 364
paul@0 365
    "Encode 'path' as a reference to an output program function."
paul@0 366
paul@0 367
    return "__fn_%s" % encode_path(path)
paul@0 368
paul@0 369
def encode_instantiator_pointer(path):
paul@0 370
paul@0 371
    "Encode 'path' as a reference to an output program instantiator."
paul@0 372
paul@0 373
    return "__new_%s" % encode_path(path)
paul@0 374
paul@491 375
def encode_instructions(instructions):
paul@491 376
paul@491 377
    "Encode 'instructions' as a sequence."
paul@491 378
paul@491 379
    if len(instructions) == 1:
paul@491 380
        return instructions[0]
paul@491 381
    else:
paul@491 382
        return "(\n%s\n)" % ",\n".join(instructions)
paul@491 383
paul@136 384
def encode_literal_constant(n):
paul@136 385
paul@136 386
    "Encode a name for the literal constant with the number 'n'."
paul@136 387
paul@136 388
    return "__const%d" % n
paul@136 389
paul@378 390
def encode_literal_constant_size(value):
paul@378 391
paul@378 392
    "Encode a size for the literal constant with the given 'value'."
paul@378 393
paul@378 394
    if isinstance(value, basestring):
paul@378 395
        return len(value)
paul@378 396
    else:
paul@378 397
        return 0
paul@378 398
paul@136 399
def encode_literal_constant_member(value):
paul@136 400
paul@136 401
    "Encode the member name for the 'value' in the final program."
paul@136 402
paul@136 403
    return "%svalue" % value.__class__.__name__
paul@136 404
paul@136 405
def encode_literal_constant_value(value):
paul@136 406
paul@136 407
    "Encode the given 'value' in the final program."
paul@136 408
paul@136 409
    if isinstance(value, (int, float)):
paul@136 410
        return str(value)
paul@136 411
    else:
paul@451 412
        l = []
paul@451 413
paul@451 414
        # Encode characters including non-ASCII ones.
paul@451 415
paul@451 416
        for c in str(value):
paul@451 417
            if c == '"': l.append('\\"')
paul@451 418
            elif c == '\n': l.append('\\n')
paul@451 419
            elif c == '\t': l.append('\\t')
paul@451 420
            elif c == '\r': l.append('\\r')
paul@512 421
            elif c == '\\': l.append('\\\\')
paul@451 422
            elif 0x20 <= ord(c) < 0x80: l.append(c)
paul@451 423
            else: l.append("\\x%02x" % ord(c))
paul@451 424
paul@451 425
        return '"%s"' % "".join(l)
paul@136 426
paul@283 427
def encode_literal_data_initialiser(style):
paul@283 428
paul@283 429
    """
paul@283 430
    Encode a reference to a function populating the data for a literal having
paul@283 431
    the given 'style' ("mapping" or "sequence").
paul@283 432
    """
paul@283 433
paul@283 434
    return "__newdata_%s" % style
paul@283 435
paul@159 436
def encode_literal_instantiator(path):
paul@159 437
paul@159 438
    """
paul@159 439
    Encode a reference to an instantiator for a literal having the given 'path'.
paul@159 440
    """
paul@159 441
paul@159 442
    return "__newliteral_%s" % encode_path(path)
paul@159 443
paul@136 444
def encode_literal_reference(n):
paul@136 445
paul@136 446
    "Encode a reference to a literal constant with the number 'n'."
paul@136 447
paul@136 448
    return "__constvalue%d" % n
paul@136 449
paul@512 450
paul@512 451
paul@340 452
# Track all encoded paths, detecting and avoiding conflicts.
paul@340 453
paul@340 454
all_encoded_paths = {}
paul@340 455
paul@0 456
def encode_path(path):
paul@0 457
paul@0 458
    "Encode 'path' as an output program object, translating special symbols."
paul@0 459
paul@0 460
    if path in reserved_words:
paul@0 461
        return "__%s" % path
paul@0 462
    else:
paul@340 463
        part_encoded = path.replace("#", "__").replace("$", "__")
paul@349 464
paul@349 465
        if "." not in path:
paul@349 466
            return part_encoded
paul@349 467
paul@340 468
        encoded = part_encoded.replace(".", "_")
paul@340 469
paul@340 470
        # Test for a conflict with the encoding of a different path, re-encoding
paul@340 471
        # if necessary.
paul@340 472
paul@340 473
        previous = all_encoded_paths.get(encoded)
paul@340 474
        replacement = "_"
paul@340 475
paul@340 476
        while previous:
paul@340 477
            if path == previous:
paul@340 478
                return encoded
paul@340 479
            replacement += "_"
paul@340 480
            encoded = part_encoded.replace(".", replacement)
paul@340 481
            previous = all_encoded_paths.get(encoded)
paul@340 482
paul@340 483
        # Store any new or re-encoded path.
paul@340 484
paul@340 485
        all_encoded_paths[encoded] = path
paul@340 486
        return encoded
paul@0 487
paul@136 488
def encode_predefined_reference(path):
paul@136 489
paul@136 490
    "Encode a reference to a predefined constant value for 'path'."
paul@136 491
paul@136 492
    return "__predefined_%s" % encode_path(path)
paul@136 493
paul@150 494
def encode_size(kind, path=None):
paul@150 495
paul@150 496
    """
paul@150 497
    Encode a structure size reference for the given 'kind' of structure, with
paul@150 498
    'path' indicating a specific structure name.
paul@150 499
    """
paul@150 500
paul@150 501
    return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "")
paul@150 502
paul@0 503
def encode_symbol(symbol_type, path=None):
paul@0 504
paul@0 505
    "Encode a symbol with the given 'symbol_type' and optional 'path'."
paul@0 506
paul@0 507
    return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "")
paul@0 508
paul@150 509
def encode_tablename(kind, path):
paul@150 510
paul@150 511
    """
paul@150 512
    Encode a table reference for the given 'kind' of table structure, indicating
paul@150 513
    a 'path' for the specific object concerned.
paul@150 514
    """
paul@150 515
paul@150 516
    return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path))
paul@150 517
paul@131 518
def encode_type_attribute(path):
paul@131 519
paul@131 520
    "Encode the special type attribute for 'path'."
paul@131 521
paul@131 522
    return "#%s" % path
paul@131 523
paul@318 524
def decode_type_attribute(s):
paul@318 525
paul@318 526
    "Decode the special type attribute 's'."
paul@318 527
paul@318 528
    return s[1:]
paul@318 529
paul@318 530
def is_type_attribute(s):
paul@318 531
paul@318 532
    "Return whether 's' is a type attribute name."
paul@318 533
paul@318 534
    return s.startswith("#")
paul@318 535
paul@56 536
paul@56 537
paul@150 538
# A mapping from kinds to structure size reference prefixes.
paul@150 539
paul@150 540
structure_size_prefixes = {
paul@150 541
    "<class>" : "c",
paul@150 542
    "<module>" : "m",
paul@150 543
    "<instance>" : "i"
paul@150 544
    }
paul@150 545
paul@150 546
# A mapping from kinds to table name prefixes.
paul@150 547
paul@150 548
table_name_prefixes = {
paul@150 549
    "<class>" : "Class",
paul@150 550
    "<function>" : "Function",
paul@150 551
    "<module>" : "Module",
paul@150 552
    "<instance>" : "Instance"
paul@150 553
    }
paul@150 554
paul@150 555
paul@150 556
paul@0 557
# Output language reserved words.
paul@0 558
paul@0 559
reserved_words = [
paul@0 560
    "break", "char", "const", "continue",
paul@0 561
    "default", "double", "else",
paul@0 562
    "float", "for",
paul@0 563
    "if", "int", "long",
paul@0 564
    "NULL",
paul@0 565
    "return", "struct",
paul@0 566
    "typedef",
paul@0 567
    "void", "while",
paul@0 568
    ]
paul@0 569
paul@0 570
# vim: tabstop=4 expandtab shiftwidth=4