Lichen

Annotated encoders.py

583:aed28d04304d
2017-02-13 Paul Boddie Re-added size information to string instances as the __size__ attribute. This fixes problems introduced when using strlen on data likely to contain embedded nulls, which was the reason for having size information explicitly stored in the first place. attr-strvalue-without-size
paul@0 1
#!/usr/bin/env python
paul@0 2
paul@0 3
"""
paul@0 4
Encoder functions, producing representations of program objects.
paul@0 5
paul@498 6
Copyright (C) 2016, 2017 Paul Boddie <paul@boddie.org.uk>
paul@0 7
paul@0 8
This program is free software; you can redistribute it and/or modify it under
paul@0 9
the terms of the GNU General Public License as published by the Free Software
paul@0 10
Foundation; either version 3 of the License, or (at your option) any later
paul@0 11
version.
paul@0 12
paul@0 13
This program is distributed in the hope that it will be useful, but WITHOUT
paul@0 14
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@0 15
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
paul@0 16
details.
paul@0 17
paul@0 18
You should have received a copy of the GNU General Public License along with
paul@0 19
this program.  If not, see <http://www.gnu.org/licenses/>.
paul@0 20
"""
paul@0 21
paul@498 22
from common import first, InstructionSequence
paul@56 23
paul@0 24
# Output encoding and decoding for the summary files.
paul@0 25
paul@0 26
def encode_attrnames(attrnames):
paul@0 27
paul@0 28
    "Encode the 'attrnames' representing usage."
paul@0 29
paul@0 30
    return ", ".join(attrnames) or "{}"
paul@0 31
paul@0 32
def encode_constrained(constrained):
paul@0 33
paul@0 34
    "Encode the 'constrained' status for program summaries."
paul@0 35
paul@0 36
    return constrained and "constrained" or "deduced"
paul@0 37
paul@0 38
def encode_usage(usage):
paul@0 39
paul@0 40
    "Encode attribute details from 'usage'."
paul@0 41
paul@0 42
    all_attrnames = []
paul@0 43
    for t in usage:
paul@107 44
        attrname, invocation, assignment = t
paul@107 45
        all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or ""))
paul@0 46
    return ", ".join(all_attrnames) or "{}"
paul@0 47
paul@88 48
def decode_usage(s):
paul@88 49
paul@88 50
    "Decode attribute details from 's'."
paul@88 51
paul@88 52
    all_attrnames = set()
paul@88 53
    for attrname_str in s.split(", "):
paul@107 54
        all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("=")))
paul@88 55
paul@88 56
    all_attrnames = list(all_attrnames)
paul@88 57
    all_attrnames.sort()
paul@88 58
    return tuple(all_attrnames)
paul@88 59
paul@0 60
def encode_access_location(t):
paul@0 61
paul@0 62
    "Encode the access location 't'."
paul@0 63
paul@0 64
    path, name, attrname, version = t
paul@0 65
    return "%s %s %s:%d" % (path, name or "{}", attrname, version)
paul@0 66
paul@0 67
def encode_location(t):
paul@0 68
paul@0 69
    "Encode the general location 't' in a concise form."
paul@0 70
paul@0 71
    path, name, attrname, version = t
paul@0 72
    if name is not None and version is not None:
paul@0 73
        return "%s %s:%d" % (path, name, version)
paul@0 74
    elif name is not None:
paul@0 75
        return "%s %s" % (path, name)
paul@0 76
    else:
paul@0 77
        return "%s :%s" % (path, attrname)
paul@0 78
paul@0 79
def encode_modifiers(modifiers):
paul@0 80
paul@553 81
    "Encode assignment and invocation details from 'modifiers'."
paul@0 82
paul@0 83
    all_modifiers = []
paul@0 84
    for t in modifiers:
paul@0 85
        all_modifiers.append(encode_modifier_term(t))
paul@0 86
    return "".join(all_modifiers)
paul@0 87
paul@0 88
def encode_modifier_term(t):
paul@0 89
paul@553 90
    "Encode modifier 't' representing an assignment or an invocation."
paul@0 91
paul@117 92
    assignment, invocation = t
paul@553 93
    if assignment:
paul@553 94
        return "="
paul@553 95
    elif invocation is not None:
paul@557 96
        arguments, keywords = invocation
paul@557 97
        return "(%d;%s)" % (arguments, ",".join(keywords))
paul@553 98
    else:
paul@553 99
        return "_"
paul@0 100
paul@553 101
def decode_modifiers(s):
paul@553 102
paul@553 103
    "Decode 's' containing modifiers."
paul@553 104
paul@553 105
    i = 0
paul@553 106
    end = len(s)
paul@0 107
paul@553 108
    modifiers = []
paul@0 109
paul@553 110
    while i < end:
paul@553 111
        if s[i] == "=":
paul@553 112
            modifiers.append((True, None))
paul@553 113
            i += 1
paul@553 114
        elif s[i] == "(":
paul@557 115
            j = s.index(";", i)
paul@557 116
            arguments = int(s[i+1:j])
paul@557 117
            i = j
paul@553 118
            j = s.index(")", i)
paul@557 119
            keywords = s[i+1:j]
paul@557 120
            keywords = keywords and keywords.split(",") or []
paul@557 121
            modifiers.append((False, (arguments, keywords)))
paul@553 122
            i = j + 1
paul@553 123
        else:
paul@553 124
            modifiers.append((False, None))
paul@553 125
            i += 1
paul@553 126
paul@553 127
    return modifiers
paul@0 128
paul@56 129
paul@56 130
paul@56 131
# Test generation functions.
paul@56 132
paul@56 133
def get_kinds(all_types):
paul@56 134
paul@56 135
    """ 
paul@56 136
    Return object kind details for 'all_types', being a collection of
paul@56 137
    references for program types.
paul@56 138
    """
paul@56 139
paul@56 140
    return map(lambda ref: ref.get_kind(), all_types)
paul@56 141
paul@237 142
def test_label_for_kind(kind):
paul@56 143
paul@237 144
    "Return the label used for 'kind' in test details."
paul@56 145
paul@237 146
    return kind == "<instance>" and "instance" or "type"
paul@56 147
paul@237 148
def test_label_for_type(ref):
paul@56 149
paul@237 150
    "Return the label used for 'ref' in test details."
paul@56 151
paul@237 152
    return test_label_for_kind(ref.get_kind())
paul@56 153
paul@56 154
paul@56 155
paul@94 156
# Instruction representation encoding.
paul@94 157
paul@94 158
def encode_instruction(instruction):
paul@94 159
paul@94 160
    """
paul@94 161
    Encode the 'instruction' - a sequence starting with an operation and
paul@94 162
    followed by arguments, each of which may be an instruction sequence or a
paul@94 163
    plain value - to produce a function call string representation.
paul@94 164
    """
paul@94 165
paul@94 166
    op = instruction[0]
paul@94 167
    args = instruction[1:]
paul@94 168
paul@94 169
    if args:
paul@94 170
        a = []
paul@113 171
        for arg in args:
paul@113 172
            if isinstance(arg, tuple):
paul@113 173
                a.append(encode_instruction(arg))
paul@94 174
            else:
paul@113 175
                a.append(arg or "{}")
paul@94 176
        argstr = "(%s)" % ", ".join(a)
paul@94 177
        return "%s%s" % (op, argstr)
paul@94 178
    else:
paul@94 179
        return op
paul@94 180
paul@94 181
paul@94 182
paul@0 183
# Output program encoding.
paul@0 184
paul@153 185
attribute_loading_ops = (
paul@153 186
    "__load_via_class", "__load_via_object", "__get_class_and_load",
paul@153 187
    )
paul@153 188
paul@153 189
attribute_ops = attribute_loading_ops + (
paul@113 190
    "__store_via_object",
paul@113 191
    )
paul@113 192
paul@153 193
checked_loading_ops = (
paul@113 194
    "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any",
paul@153 195
    )
paul@153 196
paul@153 197
checked_ops = checked_loading_ops + (
paul@113 198
    "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any",
paul@113 199
    )
paul@113 200
paul@113 201
typename_ops = (
paul@144 202
    "__test_common_instance", "__test_common_object", "__test_common_type",
paul@113 203
    )
paul@113 204
paul@385 205
type_ops = (
paul@385 206
    "__test_specific_instance", "__test_specific_object", "__test_specific_type",
paul@385 207
    )
paul@385 208
paul@141 209
static_ops = (
paul@141 210
    "__load_static",
paul@141 211
    )
paul@141 212
paul@153 213
reference_acting_ops = attribute_ops + checked_ops + typename_ops
paul@153 214
attribute_producing_ops = attribute_loading_ops + checked_loading_ops
paul@153 215
paul@113 216
def encode_access_instruction(instruction, subs):
paul@113 217
paul@113 218
    """
paul@113 219
    Encode the 'instruction' - a sequence starting with an operation and
paul@113 220
    followed by arguments, each of which may be an instruction sequence or a
paul@113 221
    plain value - to produce a function call string representation.
paul@113 222
paul@113 223
    The 'subs' parameter defines a mapping of substitutions for special values
paul@113 224
    used in instructions.
paul@482 225
paul@482 226
    Return both the encoded instruction and a collection of substituted names.
paul@113 227
    """
paul@113 228
paul@113 229
    op = instruction[0]
paul@113 230
    args = instruction[1:]
paul@482 231
    substituted = set()
paul@113 232
paul@113 233
    if not args:
paul@113 234
        argstr = ""
paul@113 235
paul@113 236
    else:
paul@113 237
        # Encode the arguments.
paul@113 238
paul@113 239
        a = []
paul@153 240
        converting_op = op
paul@113 241
        for arg in args:
paul@482 242
            s, _substituted = encode_access_instruction_arg(arg, subs, converting_op)
paul@482 243
            substituted.update(_substituted)
paul@482 244
            a.append(s)
paul@153 245
            converting_op = None
paul@113 246
paul@113 247
        # Modify certain arguments.
paul@113 248
paul@113 249
        # Convert attribute name arguments to position symbols.
paul@113 250
paul@113 251
        if op in attribute_ops:
paul@113 252
            arg = a[1]
paul@113 253
            a[1] = encode_symbol("pos", arg)
paul@113 254
paul@113 255
        # Convert attribute name arguments to position and code symbols.
paul@113 256
paul@113 257
        elif op in checked_ops:
paul@113 258
            arg = a[1]
paul@113 259
            a[1] = encode_symbol("pos", arg)
paul@113 260
            a.insert(2, encode_symbol("code", arg))
paul@113 261
paul@113 262
        # Convert type name arguments to position and code symbols.
paul@113 263
paul@113 264
        elif op in typename_ops:
paul@339 265
            arg = encode_type_attribute(args[1])
paul@113 266
            a[1] = encode_symbol("pos", arg)
paul@113 267
            a.insert(2, encode_symbol("code", arg))
paul@113 268
paul@385 269
        # Obtain addresses of type arguments.
paul@385 270
paul@385 271
        elif op in type_ops:
paul@385 272
            a[1] = "&%s" % a[1]
paul@385 273
paul@141 274
        # Obtain addresses of static objects.
paul@141 275
paul@141 276
        elif op in static_ops:
paul@141 277
            a[0] = "&%s" % a[0]
paul@200 278
            a[1] = "&%s" % a[1]
paul@141 279
paul@491 280
        argstr = "(%s)" % ", ".join(map(str, a))
paul@113 281
paul@113 282
    # Substitute the first element of the instruction, which may not be an
paul@113 283
    # operation at all.
paul@113 284
paul@144 285
    if subs.has_key(op):
paul@482 286
        substituted.add(op)
paul@498 287
paul@498 288
        # Break accessor initialisation into initialisation and value-yielding
paul@498 289
        # parts:
paul@498 290
paul@498 291
        if op == "<set_accessor>" and isinstance(a[0], InstructionSequence):
paul@498 292
            ops = []
paul@498 293
            ops += a[0].get_init_instructions()
paul@498 294
            ops.append("%s(%s)" % (subs[op], a[0].get_value_instruction()))
paul@498 295
            return ", ".join(map(str, ops)), substituted
paul@498 296
paul@144 297
        op = subs[op]
paul@498 298
paul@144 299
    elif not args:
paul@144 300
        op = "&%s" % encode_path(op)
paul@144 301
paul@482 302
    return "%s%s" % (op, argstr), substituted
paul@113 303
paul@153 304
def encode_access_instruction_arg(arg, subs, op):
paul@113 305
paul@482 306
    """
paul@482 307
    Encode 'arg' using 'subs' to define substitutions, returning a tuple
paul@482 308
    containing the encoded form of 'arg' along with a collection of any
paul@482 309
    substituted values.
paul@482 310
    """
paul@113 311
paul@113 312
    if isinstance(arg, tuple):
paul@482 313
        encoded, substituted = encode_access_instruction(arg, subs)
paul@153 314
paul@153 315
        # Convert attribute results to references where required.
paul@153 316
paul@153 317
        if op and op in reference_acting_ops and arg[0] in attribute_producing_ops:
paul@482 318
            return "%s.value" % encoded, substituted
paul@153 319
        else:
paul@482 320
            return encoded, substituted
paul@113 321
paul@113 322
    # Special values only need replacing, not encoding.
paul@113 323
paul@113 324
    elif subs.has_key(arg):
paul@482 325
        return subs.get(arg), set([arg])
paul@113 326
paul@258 327
    # Convert static references to the appropriate type.
paul@258 328
paul@258 329
    elif op and op in reference_acting_ops and arg != "<accessor>":
paul@482 330
        return "&%s" % encode_path(arg), set()
paul@258 331
paul@113 332
    # Other values may need encoding.
paul@113 333
paul@113 334
    else:
paul@482 335
        return encode_path(arg), set()
paul@113 336
paul@0 337
def encode_function_pointer(path):
paul@0 338
paul@0 339
    "Encode 'path' as a reference to an output program function."
paul@0 340
paul@0 341
    return "__fn_%s" % encode_path(path)
paul@0 342
paul@0 343
def encode_instantiator_pointer(path):
paul@0 344
paul@0 345
    "Encode 'path' as a reference to an output program instantiator."
paul@0 346
paul@0 347
    return "__new_%s" % encode_path(path)
paul@0 348
paul@491 349
def encode_instructions(instructions):
paul@491 350
paul@491 351
    "Encode 'instructions' as a sequence."
paul@491 352
paul@491 353
    if len(instructions) == 1:
paul@491 354
        return instructions[0]
paul@491 355
    else:
paul@491 356
        return "(\n%s\n)" % ",\n".join(instructions)
paul@491 357
paul@136 358
def encode_literal_constant(n):
paul@136 359
paul@136 360
    "Encode a name for the literal constant with the number 'n'."
paul@136 361
paul@136 362
    return "__const%d" % n
paul@136 363
paul@378 364
def encode_literal_constant_size(value):
paul@378 365
paul@378 366
    "Encode a size for the literal constant with the given 'value'."
paul@378 367
paul@378 368
    if isinstance(value, basestring):
paul@378 369
        return len(value)
paul@378 370
    else:
paul@378 371
        return 0
paul@378 372
paul@136 373
def encode_literal_constant_member(value):
paul@136 374
paul@136 375
    "Encode the member name for the 'value' in the final program."
paul@136 376
paul@136 377
    return "%svalue" % value.__class__.__name__
paul@136 378
paul@136 379
def encode_literal_constant_value(value):
paul@136 380
paul@136 381
    "Encode the given 'value' in the final program."
paul@136 382
paul@136 383
    if isinstance(value, (int, float)):
paul@136 384
        return str(value)
paul@136 385
    else:
paul@451 386
        l = []
paul@451 387
paul@451 388
        # Encode characters including non-ASCII ones.
paul@451 389
paul@451 390
        for c in str(value):
paul@451 391
            if c == '"': l.append('\\"')
paul@451 392
            elif c == '\n': l.append('\\n')
paul@451 393
            elif c == '\t': l.append('\\t')
paul@451 394
            elif c == '\r': l.append('\\r')
paul@512 395
            elif c == '\\': l.append('\\\\')
paul@451 396
            elif 0x20 <= ord(c) < 0x80: l.append(c)
paul@451 397
            else: l.append("\\x%02x" % ord(c))
paul@451 398
paul@451 399
        return '"%s"' % "".join(l)
paul@136 400
paul@283 401
def encode_literal_data_initialiser(style):
paul@283 402
paul@283 403
    """
paul@283 404
    Encode a reference to a function populating the data for a literal having
paul@283 405
    the given 'style' ("mapping" or "sequence").
paul@283 406
    """
paul@283 407
paul@283 408
    return "__newdata_%s" % style
paul@283 409
paul@159 410
def encode_literal_instantiator(path):
paul@159 411
paul@159 412
    """
paul@159 413
    Encode a reference to an instantiator for a literal having the given 'path'.
paul@159 414
    """
paul@159 415
paul@159 416
    return "__newliteral_%s" % encode_path(path)
paul@159 417
paul@136 418
def encode_literal_reference(n):
paul@136 419
paul@136 420
    "Encode a reference to a literal constant with the number 'n'."
paul@136 421
paul@136 422
    return "__constvalue%d" % n
paul@136 423
paul@512 424
paul@512 425
paul@340 426
# Track all encoded paths, detecting and avoiding conflicts.
paul@340 427
paul@340 428
all_encoded_paths = {}
paul@340 429
paul@0 430
def encode_path(path):
paul@0 431
paul@0 432
    "Encode 'path' as an output program object, translating special symbols."
paul@0 433
paul@0 434
    if path in reserved_words:
paul@0 435
        return "__%s" % path
paul@0 436
    else:
paul@340 437
        part_encoded = path.replace("#", "__").replace("$", "__")
paul@349 438
paul@349 439
        if "." not in path:
paul@349 440
            return part_encoded
paul@349 441
paul@340 442
        encoded = part_encoded.replace(".", "_")
paul@340 443
paul@340 444
        # Test for a conflict with the encoding of a different path, re-encoding
paul@340 445
        # if necessary.
paul@340 446
paul@340 447
        previous = all_encoded_paths.get(encoded)
paul@340 448
        replacement = "_"
paul@340 449
paul@340 450
        while previous:
paul@340 451
            if path == previous:
paul@340 452
                return encoded
paul@340 453
            replacement += "_"
paul@340 454
            encoded = part_encoded.replace(".", replacement)
paul@340 455
            previous = all_encoded_paths.get(encoded)
paul@340 456
paul@340 457
        # Store any new or re-encoded path.
paul@340 458
paul@340 459
        all_encoded_paths[encoded] = path
paul@340 460
        return encoded
paul@0 461
paul@136 462
def encode_predefined_reference(path):
paul@136 463
paul@136 464
    "Encode a reference to a predefined constant value for 'path'."
paul@136 465
paul@136 466
    return "__predefined_%s" % encode_path(path)
paul@136 467
paul@150 468
def encode_size(kind, path=None):
paul@150 469
paul@150 470
    """
paul@150 471
    Encode a structure size reference for the given 'kind' of structure, with
paul@150 472
    'path' indicating a specific structure name.
paul@150 473
    """
paul@150 474
paul@150 475
    return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "")
paul@150 476
paul@0 477
def encode_symbol(symbol_type, path=None):
paul@0 478
paul@0 479
    "Encode a symbol with the given 'symbol_type' and optional 'path'."
paul@0 480
paul@0 481
    return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "")
paul@0 482
paul@150 483
def encode_tablename(kind, path):
paul@150 484
paul@150 485
    """
paul@150 486
    Encode a table reference for the given 'kind' of table structure, indicating
paul@150 487
    a 'path' for the specific object concerned.
paul@150 488
    """
paul@150 489
paul@150 490
    return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path))
paul@150 491
paul@131 492
def encode_type_attribute(path):
paul@131 493
paul@131 494
    "Encode the special type attribute for 'path'."
paul@131 495
paul@131 496
    return "#%s" % path
paul@131 497
paul@318 498
def decode_type_attribute(s):
paul@318 499
paul@318 500
    "Decode the special type attribute 's'."
paul@318 501
paul@318 502
    return s[1:]
paul@318 503
paul@318 504
def is_type_attribute(s):
paul@318 505
paul@318 506
    "Return whether 's' is a type attribute name."
paul@318 507
paul@318 508
    return s.startswith("#")
paul@318 509
paul@56 510
paul@56 511
paul@150 512
# A mapping from kinds to structure size reference prefixes.
paul@150 513
paul@150 514
structure_size_prefixes = {
paul@150 515
    "<class>" : "c",
paul@150 516
    "<module>" : "m",
paul@150 517
    "<instance>" : "i"
paul@150 518
    }
paul@150 519
paul@150 520
# A mapping from kinds to table name prefixes.
paul@150 521
paul@150 522
table_name_prefixes = {
paul@150 523
    "<class>" : "Class",
paul@150 524
    "<function>" : "Function",
paul@150 525
    "<module>" : "Module",
paul@150 526
    "<instance>" : "Instance"
paul@150 527
    }
paul@150 528
paul@150 529
paul@150 530
paul@0 531
# Output language reserved words.
paul@0 532
paul@0 533
reserved_words = [
paul@0 534
    "break", "char", "const", "continue",
paul@0 535
    "default", "double", "else",
paul@0 536
    "float", "for",
paul@0 537
    "if", "int", "long",
paul@0 538
    "NULL",
paul@0 539
    "return", "struct",
paul@0 540
    "typedef",
paul@0 541
    "void", "while",
paul@0 542
    ]
paul@0 543
paul@0 544
# vim: tabstop=4 expandtab shiftwidth=4