Lichen

Annotated encoders.py

343:e0879c83a439
2016-12-07 Paul Boddie Added support for reading to the end of a stream's input, fixing EOFError raising in fread by returning shorter amounts of data when EOF occurs, only raising an exception if no data was read before EOF occurred. Made the test input longer to exercise tests of reading remaining data.
paul@0 1
#!/usr/bin/env python
paul@0 2
paul@0 3
"""
paul@0 4
Encoder functions, producing representations of program objects.
paul@0 5
paul@0 6
Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk>
paul@0 7
paul@0 8
This program is free software; you can redistribute it and/or modify it under
paul@0 9
the terms of the GNU General Public License as published by the Free Software
paul@0 10
Foundation; either version 3 of the License, or (at your option) any later
paul@0 11
version.
paul@0 12
paul@0 13
This program is distributed in the hope that it will be useful, but WITHOUT
paul@0 14
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@0 15
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
paul@0 16
details.
paul@0 17
paul@0 18
You should have received a copy of the GNU General Public License along with
paul@0 19
this program.  If not, see <http://www.gnu.org/licenses/>.
paul@0 20
"""
paul@0 21
paul@56 22
from common import first
paul@56 23
paul@0 24
# Output encoding and decoding for the summary files.
paul@0 25
paul@0 26
def encode_attrnames(attrnames):
paul@0 27
paul@0 28
    "Encode the 'attrnames' representing usage."
paul@0 29
paul@0 30
    return ", ".join(attrnames) or "{}"
paul@0 31
paul@0 32
def encode_constrained(constrained):
paul@0 33
paul@0 34
    "Encode the 'constrained' status for program summaries."
paul@0 35
paul@0 36
    return constrained and "constrained" or "deduced"
paul@0 37
paul@0 38
def encode_usage(usage):
paul@0 39
paul@0 40
    "Encode attribute details from 'usage'."
paul@0 41
paul@0 42
    all_attrnames = []
paul@0 43
    for t in usage:
paul@107 44
        attrname, invocation, assignment = t
paul@107 45
        all_attrnames.append("%s%s" % (attrname, invocation and "!" or assignment and "=" or ""))
paul@0 46
    return ", ".join(all_attrnames) or "{}"
paul@0 47
paul@88 48
def decode_usage(s):
paul@88 49
paul@88 50
    "Decode attribute details from 's'."
paul@88 51
paul@88 52
    all_attrnames = set()
paul@88 53
    for attrname_str in s.split(", "):
paul@107 54
        all_attrnames.add((attrname_str.rstrip("!="), attrname_str.endswith("!"), attrname_str.endswith("=")))
paul@88 55
paul@88 56
    all_attrnames = list(all_attrnames)
paul@88 57
    all_attrnames.sort()
paul@88 58
    return tuple(all_attrnames)
paul@88 59
paul@0 60
def encode_access_location(t):
paul@0 61
paul@0 62
    "Encode the access location 't'."
paul@0 63
paul@0 64
    path, name, attrname, version = t
paul@0 65
    return "%s %s %s:%d" % (path, name or "{}", attrname, version)
paul@0 66
paul@0 67
def encode_location(t):
paul@0 68
paul@0 69
    "Encode the general location 't' in a concise form."
paul@0 70
paul@0 71
    path, name, attrname, version = t
paul@0 72
    if name is not None and version is not None:
paul@0 73
        return "%s %s:%d" % (path, name, version)
paul@0 74
    elif name is not None:
paul@0 75
        return "%s %s" % (path, name)
paul@0 76
    else:
paul@0 77
        return "%s :%s" % (path, attrname)
paul@0 78
paul@0 79
def encode_modifiers(modifiers):
paul@0 80
paul@0 81
    "Encode assignment details from 'modifiers'."
paul@0 82
paul@0 83
    all_modifiers = []
paul@0 84
    for t in modifiers:
paul@0 85
        all_modifiers.append(encode_modifier_term(t))
paul@0 86
    return "".join(all_modifiers)
paul@0 87
paul@0 88
def encode_modifier_term(t):
paul@0 89
paul@0 90
    "Encode modifier 't' representing assignment status."
paul@0 91
paul@117 92
    assignment, invocation = t
paul@117 93
    return assignment and "=" or invocation and "!" or "_"
paul@0 94
paul@0 95
def decode_modifier_term(s):
paul@0 96
paul@0 97
    "Decode modifier term 's' representing assignment status."
paul@0 98
paul@117 99
    return (s == "=", s == "!")
paul@0 100
paul@56 101
paul@56 102
paul@56 103
# Test generation functions.
paul@56 104
paul@56 105
def get_kinds(all_types):
paul@56 106
paul@56 107
    """ 
paul@56 108
    Return object kind details for 'all_types', being a collection of
paul@56 109
    references for program types.
paul@56 110
    """
paul@56 111
paul@56 112
    return map(lambda ref: ref.get_kind(), all_types)
paul@56 113
paul@237 114
def test_label_for_kind(kind):
paul@56 115
paul@237 116
    "Return the label used for 'kind' in test details."
paul@56 117
paul@237 118
    return kind == "<instance>" and "instance" or "type"
paul@56 119
paul@237 120
def test_label_for_type(ref):
paul@56 121
paul@237 122
    "Return the label used for 'ref' in test details."
paul@56 123
paul@237 124
    return test_label_for_kind(ref.get_kind())
paul@56 125
paul@56 126
paul@56 127
paul@94 128
# Instruction representation encoding.
paul@94 129
paul@94 130
def encode_instruction(instruction):
paul@94 131
paul@94 132
    """
paul@94 133
    Encode the 'instruction' - a sequence starting with an operation and
paul@94 134
    followed by arguments, each of which may be an instruction sequence or a
paul@94 135
    plain value - to produce a function call string representation.
paul@94 136
    """
paul@94 137
paul@94 138
    op = instruction[0]
paul@94 139
    args = instruction[1:]
paul@94 140
paul@94 141
    if args:
paul@94 142
        a = []
paul@113 143
        for arg in args:
paul@113 144
            if isinstance(arg, tuple):
paul@113 145
                a.append(encode_instruction(arg))
paul@94 146
            else:
paul@113 147
                a.append(arg or "{}")
paul@94 148
        argstr = "(%s)" % ", ".join(a)
paul@94 149
        return "%s%s" % (op, argstr)
paul@94 150
    else:
paul@94 151
        return op
paul@94 152
paul@94 153
paul@94 154
paul@0 155
# Output program encoding.
paul@0 156
paul@153 157
attribute_loading_ops = (
paul@153 158
    "__load_via_class", "__load_via_object", "__get_class_and_load",
paul@153 159
    )
paul@153 160
paul@153 161
attribute_ops = attribute_loading_ops + (
paul@113 162
    "__store_via_object",
paul@113 163
    )
paul@113 164
paul@153 165
checked_loading_ops = (
paul@113 166
    "__check_and_load_via_class", "__check_and_load_via_object", "__check_and_load_via_any",
paul@153 167
    )
paul@153 168
paul@153 169
checked_ops = checked_loading_ops + (
paul@113 170
    "__check_and_store_via_class", "__check_and_store_via_object", "__check_and_store_via_any",
paul@113 171
    )
paul@113 172
paul@113 173
typename_ops = (
paul@144 174
    "__test_common_instance", "__test_common_object", "__test_common_type",
paul@113 175
    )
paul@113 176
paul@141 177
static_ops = (
paul@141 178
    "__load_static",
paul@141 179
    )
paul@141 180
paul@153 181
reference_acting_ops = attribute_ops + checked_ops + typename_ops
paul@153 182
attribute_producing_ops = attribute_loading_ops + checked_loading_ops
paul@153 183
paul@113 184
def encode_access_instruction(instruction, subs):
paul@113 185
paul@113 186
    """
paul@113 187
    Encode the 'instruction' - a sequence starting with an operation and
paul@113 188
    followed by arguments, each of which may be an instruction sequence or a
paul@113 189
    plain value - to produce a function call string representation.
paul@113 190
paul@113 191
    The 'subs' parameter defines a mapping of substitutions for special values
paul@113 192
    used in instructions.
paul@113 193
    """
paul@113 194
paul@113 195
    op = instruction[0]
paul@113 196
    args = instruction[1:]
paul@113 197
paul@113 198
    if not args:
paul@113 199
        argstr = ""
paul@113 200
paul@113 201
    else:
paul@113 202
        # Encode the arguments.
paul@113 203
paul@113 204
        a = []
paul@153 205
        converting_op = op
paul@113 206
        for arg in args:
paul@153 207
            a.append(encode_access_instruction_arg(arg, subs, converting_op))
paul@153 208
            converting_op = None
paul@113 209
paul@113 210
        # Modify certain arguments.
paul@113 211
paul@113 212
        # Convert attribute name arguments to position symbols.
paul@113 213
paul@113 214
        if op in attribute_ops:
paul@113 215
            arg = a[1]
paul@113 216
            a[1] = encode_symbol("pos", arg)
paul@113 217
paul@113 218
        # Convert attribute name arguments to position and code symbols.
paul@113 219
paul@113 220
        elif op in checked_ops:
paul@113 221
            arg = a[1]
paul@113 222
            a[1] = encode_symbol("pos", arg)
paul@113 223
            a.insert(2, encode_symbol("code", arg))
paul@113 224
paul@113 225
        # Convert type name arguments to position and code symbols.
paul@113 226
paul@113 227
        elif op in typename_ops:
paul@339 228
            arg = encode_type_attribute(args[1])
paul@113 229
            a[1] = encode_symbol("pos", arg)
paul@113 230
            a.insert(2, encode_symbol("code", arg))
paul@113 231
paul@141 232
        # Obtain addresses of static objects.
paul@141 233
paul@141 234
        elif op in static_ops:
paul@141 235
            a[0] = "&%s" % a[0]
paul@200 236
            a[1] = "&%s" % a[1]
paul@141 237
paul@113 238
        argstr = "(%s)" % ", ".join(a)
paul@113 239
paul@113 240
    # Substitute the first element of the instruction, which may not be an
paul@113 241
    # operation at all.
paul@113 242
paul@144 243
    if subs.has_key(op):
paul@144 244
        op = subs[op]
paul@144 245
    elif not args:
paul@144 246
        op = "&%s" % encode_path(op)
paul@144 247
paul@144 248
    return "%s%s" % (op, argstr)
paul@113 249
paul@153 250
def encode_access_instruction_arg(arg, subs, op):
paul@113 251
paul@113 252
    "Encode 'arg' using 'subs' to define substitutions."
paul@113 253
paul@113 254
    if isinstance(arg, tuple):
paul@153 255
        encoded = encode_access_instruction(arg, subs)
paul@153 256
paul@153 257
        # Convert attribute results to references where required.
paul@153 258
paul@153 259
        if op and op in reference_acting_ops and arg[0] in attribute_producing_ops:
paul@153 260
            return "%s.value" % encoded
paul@153 261
        else:
paul@153 262
            return encoded
paul@113 263
paul@113 264
    # Special values only need replacing, not encoding.
paul@113 265
paul@113 266
    elif subs.has_key(arg):
paul@113 267
        return subs.get(arg)
paul@113 268
paul@258 269
    # Convert static references to the appropriate type.
paul@258 270
paul@258 271
    elif op and op in reference_acting_ops and arg != "<accessor>":
paul@258 272
        return "&%s" % encode_path(arg)
paul@258 273
paul@113 274
    # Other values may need encoding.
paul@113 275
paul@113 276
    else:
paul@113 277
        return encode_path(arg)
paul@113 278
paul@126 279
def encode_bound_reference(path):
paul@126 280
paul@126 281
    "Encode 'path' as a bound method name."
paul@126 282
paul@126 283
    return "__bound_%s" % encode_path(path)
paul@126 284
paul@0 285
def encode_function_pointer(path):
paul@0 286
paul@0 287
    "Encode 'path' as a reference to an output program function."
paul@0 288
paul@0 289
    return "__fn_%s" % encode_path(path)
paul@0 290
paul@149 291
def encode_initialiser_pointer(path):
paul@149 292
paul@149 293
    "Encode 'path' as a reference to an initialiser function structure."
paul@149 294
paul@149 295
    return encode_path("%s.__init__" % path)
paul@149 296
paul@0 297
def encode_instantiator_pointer(path):
paul@0 298
paul@0 299
    "Encode 'path' as a reference to an output program instantiator."
paul@0 300
paul@0 301
    return "__new_%s" % encode_path(path)
paul@0 302
paul@136 303
def encode_literal_constant(n):
paul@136 304
paul@136 305
    "Encode a name for the literal constant with the number 'n'."
paul@136 306
paul@136 307
    return "__const%d" % n
paul@136 308
paul@136 309
def encode_literal_constant_member(value):
paul@136 310
paul@136 311
    "Encode the member name for the 'value' in the final program."
paul@136 312
paul@136 313
    return "%svalue" % value.__class__.__name__
paul@136 314
paul@136 315
def encode_literal_constant_value(value):
paul@136 316
paul@136 317
    "Encode the given 'value' in the final program."
paul@136 318
paul@136 319
    if isinstance(value, (int, float)):
paul@136 320
        return str(value)
paul@136 321
    else:
paul@168 322
        return '"%s"' % str(value).replace('"', '\\"').replace("\n", "\\n").replace("\t", "\\t").replace("\r", "\\r")
paul@136 323
paul@283 324
def encode_literal_data_initialiser(style):
paul@283 325
paul@283 326
    """
paul@283 327
    Encode a reference to a function populating the data for a literal having
paul@283 328
    the given 'style' ("mapping" or "sequence").
paul@283 329
    """
paul@283 330
paul@283 331
    return "__newdata_%s" % style
paul@283 332
paul@159 333
def encode_literal_instantiator(path):
paul@159 334
paul@159 335
    """
paul@159 336
    Encode a reference to an instantiator for a literal having the given 'path'.
paul@159 337
    """
paul@159 338
paul@159 339
    return "__newliteral_%s" % encode_path(path)
paul@159 340
paul@136 341
def encode_literal_reference(n):
paul@136 342
paul@136 343
    "Encode a reference to a literal constant with the number 'n'."
paul@136 344
paul@136 345
    return "__constvalue%d" % n
paul@136 346
paul@340 347
# Track all encoded paths, detecting and avoiding conflicts.
paul@340 348
paul@340 349
all_encoded_paths = {}
paul@340 350
paul@0 351
def encode_path(path):
paul@0 352
paul@0 353
    "Encode 'path' as an output program object, translating special symbols."
paul@0 354
paul@0 355
    if path in reserved_words:
paul@0 356
        return "__%s" % path
paul@0 357
    else:
paul@340 358
        part_encoded = path.replace("#", "__").replace("$", "__")
paul@340 359
        encoded = part_encoded.replace(".", "_")
paul@340 360
paul@340 361
        # Test for a conflict with the encoding of a different path, re-encoding
paul@340 362
        # if necessary.
paul@340 363
paul@340 364
        previous = all_encoded_paths.get(encoded)
paul@340 365
        replacement = "_"
paul@340 366
paul@340 367
        while previous:
paul@340 368
            if path == previous:
paul@340 369
                return encoded
paul@340 370
            replacement += "_"
paul@340 371
            encoded = part_encoded.replace(".", replacement)
paul@340 372
            previous = all_encoded_paths.get(encoded)
paul@340 373
paul@340 374
        # Store any new or re-encoded path.
paul@340 375
paul@340 376
        all_encoded_paths[encoded] = path
paul@340 377
        return encoded
paul@0 378
paul@136 379
def encode_predefined_reference(path):
paul@136 380
paul@136 381
    "Encode a reference to a predefined constant value for 'path'."
paul@136 382
paul@136 383
    return "__predefined_%s" % encode_path(path)
paul@136 384
paul@150 385
def encode_size(kind, path=None):
paul@150 386
paul@150 387
    """
paul@150 388
    Encode a structure size reference for the given 'kind' of structure, with
paul@150 389
    'path' indicating a specific structure name.
paul@150 390
    """
paul@150 391
paul@150 392
    return "__%ssize%s" % (structure_size_prefixes.get(kind, kind), path and "_%s" % encode_path(path) or "")
paul@150 393
paul@0 394
def encode_symbol(symbol_type, path=None):
paul@0 395
paul@0 396
    "Encode a symbol with the given 'symbol_type' and optional 'path'."
paul@0 397
paul@0 398
    return "__%s%s" % (symbol_type, path and "_%s" % encode_path(path) or "")
paul@0 399
paul@150 400
def encode_tablename(kind, path):
paul@150 401
paul@150 402
    """
paul@150 403
    Encode a table reference for the given 'kind' of table structure, indicating
paul@150 404
    a 'path' for the specific object concerned.
paul@150 405
    """
paul@150 406
paul@150 407
    return "__%sTable_%s" % (table_name_prefixes[kind], encode_path(path))
paul@150 408
paul@131 409
def encode_type_attribute(path):
paul@131 410
paul@131 411
    "Encode the special type attribute for 'path'."
paul@131 412
paul@131 413
    return "#%s" % path
paul@131 414
paul@318 415
def decode_type_attribute(s):
paul@318 416
paul@318 417
    "Decode the special type attribute 's'."
paul@318 418
paul@318 419
    return s[1:]
paul@318 420
paul@318 421
def is_type_attribute(s):
paul@318 422
paul@318 423
    "Return whether 's' is a type attribute name."
paul@318 424
paul@318 425
    return s.startswith("#")
paul@318 426
paul@56 427
paul@56 428
paul@150 429
# A mapping from kinds to structure size reference prefixes.
paul@150 430
paul@150 431
structure_size_prefixes = {
paul@150 432
    "<class>" : "c",
paul@150 433
    "<module>" : "m",
paul@150 434
    "<instance>" : "i"
paul@150 435
    }
paul@150 436
paul@150 437
# A mapping from kinds to table name prefixes.
paul@150 438
paul@150 439
table_name_prefixes = {
paul@150 440
    "<class>" : "Class",
paul@150 441
    "<function>" : "Function",
paul@150 442
    "<module>" : "Module",
paul@150 443
    "<instance>" : "Instance"
paul@150 444
    }
paul@150 445
paul@150 446
paul@150 447
paul@0 448
# Output language reserved words.
paul@0 449
paul@0 450
reserved_words = [
paul@0 451
    "break", "char", "const", "continue",
paul@0 452
    "default", "double", "else",
paul@0 453
    "float", "for",
paul@0 454
    "if", "int", "long",
paul@0 455
    "NULL",
paul@0 456
    "return", "struct",
paul@0 457
    "typedef",
paul@0 458
    "void", "while",
paul@0 459
    ]
paul@0 460
paul@0 461
# vim: tabstop=4 expandtab shiftwidth=4