1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 7 Copyright (C) 2004, 2005, 2006, 2011 Paul Boddie <paul@boddie.org.uk> 8 Copyright (C) 2010 Braden Thomas <bradenthomas@me.com> 9 Copyright (C) 2011 David Drysdale <dmd@lurklurk.org> 10 11 This program is free software; you can redistribute it and/or modify it under 12 the terms of the GNU Lesser General Public License as published by the Free 13 Software Foundation; either version 3 of the License, or (at your option) any 14 later version. 15 16 This program is distributed in the hope that it will be useful, but WITHOUT 17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 18 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 19 details. 20 21 You should have received a copy of the GNU Lesser General Public License along 22 with this program. If not, see <http://www.gnu.org/licenses/>. 23 """ 24 25 import struct # for general decoding of class files 26 27 # Utility functions. 28 29 def u1(data): 30 return struct.unpack(">B", data[0:1])[0] 31 32 def u2(data): 33 return struct.unpack(">H", data[0:2])[0] 34 35 def s2(data): 36 return struct.unpack(">h", data[0:2])[0] 37 38 def u4(data): 39 return struct.unpack(">L", data[0:4])[0] 40 41 def s4(data): 42 return struct.unpack(">l", data[0:4])[0] 43 44 def s8(data): 45 return struct.unpack(">q", data[0:8])[0] 46 47 def f4(data): 48 return struct.unpack(">f", data[0:4])[0] 49 50 def f8(data): 51 return struct.unpack(">d", data[0:8])[0] 52 53 def su1(value): 54 return struct.pack(">B", value) 55 56 def su2(value): 57 return struct.pack(">H", value) 58 59 def ss2(value): 60 return struct.pack(">h", value) 61 62 def su4(value): 63 return struct.pack(">L", value) 64 65 def ss4(value): 66 return struct.pack(">l", value) 67 68 def ss8(value): 69 return struct.pack(">q", value) 70 71 def sf4(value): 72 return struct.pack(">f", value) 73 74 def sf8(value): 75 return struct.pack(">d", value) 76 77 # Useful tables and constants. 78 79 descriptor_base_type_mapping = { 80 "B" : "int", 81 "C" : "str", 82 "D" : "float", 83 "F" : "float", 84 "I" : "int", 85 "J" : "int", 86 "L" : "object", 87 "S" : "int", 88 "Z" : "bool", 89 "[" : "list" 90 } 91 92 type_names_to_default_values = { 93 "int" : 0, 94 "str" : u"", 95 "float" : 0.0, 96 "object" : None, 97 "bool" : 0, # NOTE: Should be False. 98 "list" : [] 99 } 100 101 def get_default_for_type(type_name): 102 global type_names_to_default_values 103 return type_names_to_default_values.get(type_name) 104 105 PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL, SUPER, SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \ 106 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800 107 108 def has_flags(flags, desired): 109 desired_flags = reduce(lambda a, b: a | b, desired, 0) 110 return (flags & desired_flags) == desired_flags 111 112 # Useful mix-ins. 113 114 class PythonMethodUtils: 115 symbol_sep = "___" # was "$" 116 type_sep = "__" # replaces "/" 117 array_sep = "_array_" # was "[]" 118 base_seps = ("_", "_") # was "<" and ">" 119 120 def get_unqualified_python_name(self): 121 name = self.get_name() 122 if str(name) == "<init>": 123 return "__init__" 124 elif str(name) == "<clinit>": 125 return "__clinit__" 126 else: 127 return str(name) 128 129 def get_python_name(self): 130 name = self.get_unqualified_python_name() 131 if name == "__clinit__": 132 return name 133 return name + self.symbol_sep + self._get_descriptor_as_name() 134 135 def _get_descriptor_as_name(self): 136 l = [] 137 for descriptor_type in self.get_descriptor()[0]: 138 l.append(self._get_type_as_name(descriptor_type)) 139 return self.symbol_sep.join(l) 140 141 def _get_type_as_name(self, descriptor_type, s=""): 142 base_type, object_type, array_type = descriptor_type 143 if base_type == "L": 144 return object_type.replace("/", self.type_sep) + s 145 elif base_type == "[": 146 return self._get_type_as_name(array_type, s + self.array_sep) 147 else: 148 return self.base_seps[0] + base_type + self.base_seps[1] + s 149 150 class PythonNameUtils: 151 def get_python_name(self): 152 # NOTE: This may not be comprehensive. 153 if not str(self.get_name()).startswith("["): 154 return str(self.get_name()).replace("/", ".") 155 else: 156 return self._get_type_name( 157 get_field_descriptor( 158 str(self.get_name()) 159 ) 160 ).replace("/", ".") 161 162 def _get_type_name(self, descriptor_type): 163 base_type, object_type, array_type = descriptor_type 164 if base_type == "L": 165 return object_type 166 elif base_type == "[": 167 return self._get_type_name(array_type) 168 else: 169 return descriptor_base_type_mapping[base_type] 170 171 class NameUtils: 172 def get_name(self): 173 if self.name_index != 0: 174 return self.class_file.constants[self.name_index - 1] 175 else: 176 # Some name indexes are zero to indicate special conditions. 177 return None 178 179 class NameAndTypeUtils: 180 def get_name(self): 181 if self.name_and_type_index != 0: 182 return self.class_file.constants[self.name_and_type_index - 1].get_name() 183 else: 184 # Some name indexes are zero to indicate special conditions. 185 return None 186 187 def get_field_descriptor(self): 188 if self.name_and_type_index != 0: 189 return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() 190 else: 191 # Some name indexes are zero to indicate special conditions. 192 return None 193 194 def get_method_descriptor(self): 195 if self.name_and_type_index != 0: 196 return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() 197 else: 198 # Some name indexes are zero to indicate special conditions. 199 return None 200 201 def get_class(self): 202 return self.class_file.constants[self.class_index - 1] 203 204 # Symbol parsing. 205 206 def get_method_descriptor(s): 207 assert s[0] == "(" 208 params = [] 209 s = s[1:] 210 while s[0] != ")": 211 parameter_descriptor, s = _get_parameter_descriptor(s) 212 params.append(parameter_descriptor) 213 if s[1] != "V": 214 return_type, s = _get_field_type(s[1:]) 215 else: 216 return_type, s = None, s[1:] 217 return params, return_type 218 219 def get_field_descriptor(s): 220 return _get_field_type(s)[0] 221 222 def _get_parameter_descriptor(s): 223 return _get_field_type(s) 224 225 def _get_component_type(s): 226 return _get_field_type(s) 227 228 def _get_field_type(s): 229 base_type, s = _get_base_type(s) 230 object_type = None 231 array_type = None 232 if base_type == "L": 233 object_type, s = _get_object_type(s) 234 elif base_type == "[": 235 array_type, s = _get_array_type(s) 236 return (base_type, object_type, array_type), s 237 238 def _get_base_type(s): 239 if len(s) > 0: 240 return s[0], s[1:] 241 else: 242 return None, s 243 244 def _get_object_type(s): 245 if len(s) > 0: 246 s_end = s.find(";") 247 assert s_end != -1 248 return s[:s_end], s[s_end+1:] 249 else: 250 return None, s 251 252 def _get_array_type(s): 253 if len(s) > 0: 254 return _get_component_type(s) 255 else: 256 return None, s 257 258 # Constant information. 259 260 class ClassInfo(NameUtils, PythonNameUtils): 261 def init(self, data, class_file): 262 self.class_file = class_file 263 self.name_index = u2(data[0:2]) 264 return data[2:] 265 def serialize(self): 266 return su2(self.name_index) 267 268 class RefInfo(NameAndTypeUtils): 269 def init(self, data, class_file): 270 self.class_file = class_file 271 self.class_index = u2(data[0:2]) 272 self.name_and_type_index = u2(data[2:4]) 273 return data[4:] 274 def serialize(self): 275 return su2(self.class_index)+su2(self.name_and_type_index) 276 277 class FieldRefInfo(RefInfo, PythonNameUtils): 278 def get_descriptor(self): 279 return RefInfo.get_field_descriptor(self) 280 281 class MethodRefInfo(RefInfo, PythonMethodUtils): 282 def get_descriptor(self): 283 return RefInfo.get_method_descriptor(self) 284 285 class InterfaceMethodRefInfo(MethodRefInfo): 286 pass 287 288 class NameAndTypeInfo(NameUtils, PythonNameUtils): 289 def init(self, data, class_file): 290 self.class_file = class_file 291 self.name_index = u2(data[0:2]) 292 self.descriptor_index = u2(data[2:4]) 293 return data[4:] 294 295 def serialize(self): 296 return su2(self.name_index)+su2(self.descriptor_index) 297 298 def get_field_descriptor(self): 299 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 300 301 def get_method_descriptor(self): 302 return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 303 304 class Utf8Info: 305 def init(self, data, class_file): 306 self.class_file = class_file 307 self.length = u2(data[0:2]) 308 self.bytes = data[2:2+self.length] 309 return data[2+self.length:] 310 311 def serialize(self): 312 return su2(self.length)+self.bytes 313 314 def __str__(self): 315 return self.bytes 316 317 def __unicode__(self): 318 return unicode(self.bytes, "utf-8") 319 320 def get_value(self): 321 return str(self) 322 323 class StringInfo: 324 def init(self, data, class_file): 325 self.class_file = class_file 326 self.string_index = u2(data[0:2]) 327 return data[2:] 328 329 def serialize(self): 330 return su2(self.string_index) 331 332 def __str__(self): 333 return str(self.class_file.constants[self.string_index - 1]) 334 335 def __unicode__(self): 336 return unicode(self.class_file.constants[self.string_index - 1]) 337 338 def get_value(self): 339 return str(self) 340 341 class SmallNumInfo: 342 def init(self, data, class_file): 343 self.class_file = class_file 344 self.bytes = data[0:4] 345 return data[4:] 346 def serialize(self): 347 return self.bytes 348 349 class IntegerInfo(SmallNumInfo): 350 def get_value(self): 351 return s4(self.bytes) 352 353 class FloatInfo(SmallNumInfo): 354 def get_value(self): 355 return f4(self.bytes) 356 357 class LargeNumInfo: 358 def init(self, data, class_file): 359 self.class_file = class_file 360 self.high_bytes = data[0:4] 361 self.low_bytes = data[4:8] 362 return data[8:] 363 def serialize(self): 364 return self.high_bytes+self.low_bytes 365 366 367 class LongInfo(LargeNumInfo): 368 def get_value(self): 369 return s8(self.high_bytes + self.low_bytes) 370 371 class DoubleInfo(LargeNumInfo): 372 def get_value(self): 373 return f8(self.high_bytes + self.low_bytes) 374 375 # Other information. 376 # Objects of these classes are generally aware of the class they reside in. 377 378 class ItemInfo(NameUtils): 379 def init(self, data, class_file): 380 self.class_file = class_file 381 self.access_flags = u2(data[0:2]) 382 self.name_index = u2(data[2:4]) 383 self.descriptor_index = u2(data[4:6]) 384 self.attributes, data = self.class_file._get_attributes(data[6:]) 385 return data 386 387 def serialize(self): 388 od = su2(self.access_flags)+su2(self.name_index)+su2(self.descriptor_index) 389 od += self.class_file._serialize_attributes(self.attributes) 390 return od 391 392 class FieldInfo(ItemInfo, PythonNameUtils): 393 def get_descriptor(self): 394 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 395 396 class MethodInfo(ItemInfo, PythonMethodUtils): 397 def get_descriptor(self): 398 return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 399 400 class AttributeInfo: 401 def init(self, data, class_file): 402 self.attribute_length = u4(data[0:4]) 403 self.info = data[4:4+self.attribute_length] 404 return data[4+self.attribute_length:] 405 406 def serialize(self): 407 return su4(self.attribute_length)+self.info 408 409 # NOTE: Decode the different attribute formats. 410 411 class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils): 412 def init(self, data, class_file): 413 self.class_file = class_file 414 self.attribute_length = u4(data[0:4]) 415 # Permit the NameUtils mix-in. 416 self.name_index = self.sourcefile_index = u2(data[4:6]) 417 return data[6:] 418 419 def serialize(self): 420 return su4(self.attribute_length)+su2(self.name_index) 421 422 class ConstantValueAttributeInfo(AttributeInfo): 423 def init(self, data, class_file): 424 self.class_file = class_file 425 self.attribute_length = u4(data[0:4]) 426 self.constant_value_index = u2(data[4:6]) 427 assert 4+self.attribute_length == 6 428 return data[4+self.attribute_length:] 429 430 def get_value(self): 431 return self.class_file.constants[self.constant_value_index - 1].get_value() 432 433 def serialize(self): 434 return su4(self.attribute_length)+su2(self.constant_value_index) 435 436 class CodeAttributeInfo(AttributeInfo): 437 def init(self, data, class_file): 438 self.class_file = class_file 439 self.attribute_length = u4(data[0:4]) 440 self.max_stack = u2(data[4:6]) 441 self.max_locals = u2(data[6:8]) 442 self.code_length = u4(data[8:12]) 443 end_of_code = 12+self.code_length 444 self.code = data[12:end_of_code] 445 self.exception_table_length = u2(data[end_of_code:end_of_code+2]) 446 self.exception_table = [] 447 data = data[end_of_code + 2:] 448 for i in range(0, self.exception_table_length): 449 exception = ExceptionInfo() 450 data = exception.init(data) 451 self.exception_table.append(exception) 452 self.attributes, data = self.class_file._get_attributes(data) 453 return data 454 455 def serialize(self): 456 od = su4(self.attribute_length)+su2(self.max_stack)+su2(self.max_locals)+su4(self.code_length)+self.code 457 od += su2(self.exception_table_length) 458 for e in self.exception_table: 459 od += e.serialize() 460 od += self.class_file._serialize_attributes(self.attributes) 461 return od 462 463 class ExceptionsAttributeInfo(AttributeInfo): 464 def init(self, data, class_file): 465 self.class_file = class_file 466 self.attribute_length = u4(data[0:4]) 467 self.number_of_exceptions = u2(data[4:6]) 468 self.exception_index_table = [] 469 index = 6 470 for i in range(0, self.number_of_exceptions): 471 self.exception_index_table.append(u2(data[index:index+2])) 472 index += 2 473 return data[index:] 474 475 def get_exception(self, i): 476 exception_index = self.exception_index_table[i] 477 return self.class_file.constants[exception_index - 1] 478 479 def serialize(self): 480 od = su4(self.attribute_length)+su2(self.number_of_exceptions) 481 for ei in self.exception_index_table: 482 od += su2(ei) 483 return od 484 485 class InnerClassesAttributeInfo(AttributeInfo): 486 def init(self, data, class_file): 487 self.class_file = class_file 488 self.attribute_length = u4(data[0:4]) 489 self.number_of_classes = u2(data[4:6]) 490 self.classes = [] 491 data = data[6:] 492 for i in range(0, self.number_of_classes): 493 inner_class = InnerClassInfo() 494 data = inner_class.init(data, self.class_file) 495 self.classes.append(inner_class) 496 return data 497 498 def serialize(self): 499 od = su4(self.attribute_length)+su2(self.number_of_classes) 500 for c in self.classes: 501 od += c.serialize() 502 return od 503 504 class SyntheticAttributeInfo(AttributeInfo): 505 pass 506 507 class LineNumberAttributeInfo(AttributeInfo): 508 def init(self, data, class_file): 509 self.class_file = class_file 510 self.attribute_length = u4(data[0:4]) 511 self.line_number_table_length = u2(data[4:6]) 512 self.line_number_table = [] 513 data = data[6:] 514 for i in range(0, self.line_number_table_length): 515 line_number = LineNumberInfo() 516 data = line_number.init(data) 517 self.line_number_table.append(line_number) 518 return data 519 520 def serialize(self): 521 od = su4(self.attribute_length)+su2(self.line_number_table_length) 522 for ln in self.line_number_table: 523 od += ln.serialize() 524 return od 525 526 class LocalVariableAttributeInfo(AttributeInfo): 527 def init(self, data, class_file): 528 self.class_file = class_file 529 self.attribute_length = u4(data[0:4]) 530 self.local_variable_table_length = u2(data[4:6]) 531 self.local_variable_table = [] 532 data = data[6:] 533 for i in range(0, self.local_variable_table_length): 534 local_variable = LocalVariableInfo() 535 data = local_variable.init(data, self.class_file) 536 self.local_variable_table.append(local_variable) 537 return data 538 539 def serialize(self): 540 od = su4(self.attribute_length)+su2(self.local_variable_table_length) 541 for lv in self.local_variable_table: 542 od += lv.serialize() 543 return od 544 545 class LocalVariableTypeAttributeInfo(AttributeInfo): 546 def init(self, data, class_file): 547 self.class_file = class_file 548 self.attribute_length = u4(data[0:4]) 549 local_variable_type_table_length = u2(data[4:6]) 550 data = data[6:] 551 self.local_variable_type_table = [] 552 for i in range(0, local_variable_type_table_length): 553 local_variable = LocalVariableInfo() 554 data = local_variable.init(data, self.class_file) 555 self.local_variable_type_table.append(local_variable) 556 return data 557 558 def serialize(self): 559 od = su4(self.attribute_length)+su2(len(self.local_variable_type_table)) 560 od += "".join([lv.serialize() for lv in self.local_variable_type_table]) 561 return od 562 563 class DeprecatedAttributeInfo(AttributeInfo): 564 pass 565 566 class VerificationTypeInfo(object): 567 def __init__(self, tag): 568 self.tag = tag 569 570 def init(self, data, class_file): 571 self.class_file = class_file 572 tag = u1(data[0:1]) 573 assert(tag == self.tag) 574 return data[1:] 575 576 def serialize(self): 577 return su1(self.tag) 578 579 class TopVariableInfo(VerificationTypeInfo): 580 TAG = 0 581 582 class IntegerVariableInfo(VerificationTypeInfo): 583 TAG = 1 584 585 class FloatVariableInfo(VerificationTypeInfo): 586 TAG = 2 587 588 class DoubleVariableInfo(VerificationTypeInfo): 589 TAG = 3 590 591 class LongVariableInfo(VerificationTypeInfo): 592 TAG = 4 593 594 class NullVariableInfo(VerificationTypeInfo): 595 TAG = 5 596 597 class UninitializedThisVariableInfo(VerificationTypeInfo): 598 TAG = 6 599 600 class ObjectVariableInfo(VerificationTypeInfo): 601 TAG = 7 602 603 def init(self, data, class_file): 604 data = super(ObjectVariableInfo, self).init(data, class_file) 605 self.cpool_index = u2(data) 606 return data[2:] 607 608 def serialize(self): 609 return super(ObjectVariableInfo, self).serialize() + su2(self.cpool_index) 610 611 class UninitializedVariableInfo(VerificationTypeInfo): 612 TAG = 8 613 614 def init(self, data, class_file): 615 data = super(UninitializedVariableInfo, self).init(data, class_file) 616 self.offset = u2(data) 617 return data[2:] 618 619 def serialize(self): 620 return super(UninitializedVariableInfo, self).serialize() + su2(self.offset) 621 622 VARIABLE_INFO_CLASSES = (TopVariableInfo, IntegerVariableInfo, FloatVariableInfo, DoubleVariableInfo, 623 LongVariableInfo, NullVariableInfo, UninitializedThisVariableInfo, 624 ObjectVariableInfo, UninitializedVariableInfo) 625 VARIABLE_INFO_TAG_MAP = dict([(cls.TAG, cls) for cls in VARIABLE_INFO_CLASSES]) 626 627 # Exception. 628 629 class UnknownVariableInfo: 630 def __init__(self, tag): 631 self.tag = tag 632 633 def __str__(self): 634 return repr(self.tag) 635 636 def create_verification_type_info(data): 637 # Does not consume data, just does lookahead. 638 tag = u1(data[0:1]) 639 if tag in VARIABLE_INFO_TAG_MAP: 640 return VARIABLE_INFO_TAG_MAP[tag](tag) 641 else: 642 raise UnknownVariableInfo, tag 643 644 class StackMapFrame(object): 645 def __init__(self, frame_type): 646 self.frame_type = frame_type 647 648 def init(self, data, class_file): 649 self.class_file = class_file 650 frame_type = u1(data[0:1]) 651 assert(frame_type == self.frame_type) 652 return data[1:] 653 654 def serialize(self): 655 return su1(self.frame_type) 656 657 class SameFrame(StackMapFrame): 658 TYPE_LOWER = 0 659 TYPE_UPPER = 63 660 661 class SameLocals1StackItemFrame(StackMapFrame): 662 TYPE_LOWER = 64 663 TYPE_UPPER = 127 664 665 def init(self, data, class_file): 666 data = super(SameLocals1StackItemFrame, self).init(data, class_file) 667 self.offset_delta = self.frame_type - 64 668 self.stack = [create_verification_type_info(data)] 669 return self.stack[0].init(data, class_file) 670 671 def serialize(self): 672 return super(SameLocals1StackItemFrame, self).serialize()+self.stack[0].serialize() 673 674 class SameLocals1StackItemFrameExtended(StackMapFrame): 675 TYPE_LOWER = 247 676 TYPE_UPPER = 247 677 678 def init(self, data, class_file): 679 data = super(SameLocals1StackItemFrameExtended, self).init(data, class_file) 680 self.offset_delta = u2(data[0:2]) 681 data = data[2:] 682 self.stack = [create_verification_type_info(data)] 683 return self.stack[0].init(data, class_file) 684 685 def serialize(self): 686 return super(SameLocals1StackItemFrameExtended, self).serialize()+su2(self.offset_delta)+self.stack[0].serialize() 687 688 class ChopFrame(StackMapFrame): 689 TYPE_LOWER = 248 690 TYPE_UPPER = 250 691 692 def init(self, data, class_file): 693 data = super(ChopFrame, self).init(data, class_file) 694 self.offset_delta = u2(data[0:2]) 695 return data[2:] 696 697 def serialize(self): 698 return super(ChopFrame, self).serialize()+su2(self.offset_delta) 699 700 class SameFrameExtended(StackMapFrame): 701 TYPE_LOWER = 251 702 TYPE_UPPER = 251 703 704 def init(self, data, class_file): 705 data = super(SameFrameExtended, self).init(data, class_file) 706 self.offset_delta = u2(data[0:2]) 707 return data[2:] 708 709 def serialize(self): 710 return super(SameFrameExtended, self).serialize()+su2(self.offset_delta) 711 712 class AppendFrame(StackMapFrame): 713 TYPE_LOWER = 252 714 TYPE_UPPER = 254 715 716 def init(self, data, class_file): 717 data = super(AppendFrame, self).init(data, class_file) 718 self.offset_delta = u2(data[0:2]) 719 data = data[2:] 720 num_locals = self.frame_type - 251 721 self.locals = [] 722 for ii in xrange(num_locals): 723 info = create_verification_type_info(data) 724 data = info.init(data, class_file) 725 self.locals.append(info) 726 return data 727 728 def serialize(self): 729 od = super(AppendFrame, self).serialize()+su2(self.offset_delta) 730 od += "".join([l.serialize() for l in self.locals]) 731 return od 732 733 class FullFrame(StackMapFrame): 734 TYPE_LOWER = 255 735 TYPE_UPPER = 255 736 737 def init(self, data, class_file): 738 data = super(FullFrame, self).init(data, class_file) 739 self.offset_delta = u2(data[0:2]) 740 num_locals = u2(data[2:4]) 741 data = data[4:] 742 self.locals = [] 743 for ii in xrange(num_locals): 744 info = create_verification_type_info(data) 745 data = info.init(data, class_file) 746 self.locals.append(info) 747 num_stack_items = u2(data[0:2]) 748 data = data[2:] 749 self.stack = [] 750 for ii in xrange(num_stack_items): 751 stack_item = create_verification_type_info(data) 752 data = stack_item.init(data, class_file) 753 self.stack.append(stack_item) 754 return data 755 756 def serialize(self): 757 od = super(FullFrame, self).serialize()+su2(self.offset_delta)+su2(len(self.locals)) 758 od += "".join([l.serialize() for l in self.locals]) 759 od += su2(len(self.stack)) 760 od += "".join([s.serialize() for s in self.stack]) 761 return od 762 763 FRAME_CLASSES = (SameFrame, SameLocals1StackItemFrame, SameLocals1StackItemFrameExtended, 764 ChopFrame, SameFrameExtended, AppendFrame, FullFrame) 765 766 # Exception. 767 768 class UnknownStackFrame: 769 def __init__(self, frame_type): 770 self.frame_type = frame_type 771 def __str__(self): 772 return repr(self.frame_type) 773 774 def create_stack_frame(data): 775 # Does not consume data, just does lookahead. 776 frame_type = u1(data[0:1]) 777 for cls in FRAME_CLASSES: 778 if frame_type >= cls.TYPE_LOWER and frame_type <= cls.TYPE_UPPER: 779 return cls(frame_type) 780 raise UnknownStackFrame, frame_type 781 782 class StackMapTableAttributeInfo(AttributeInfo): 783 def init(self, data, class_file): 784 self.class_file = class_file 785 self.attribute_length = u4(data[0:4]) 786 num_entries = u2(data[4:6]) 787 self.entries = [] 788 data = data[6:] 789 for i in range(0, num_entries): 790 frame = create_stack_frame(data) 791 data = frame.init(data, class_file) 792 self.entries.append(frame) 793 return data 794 795 def serialize(self): 796 od = su4(self.attribute_length)+su2(len(self.entries)) 797 od += "".join([e.serialize() for e in self.entries]) 798 return od 799 800 class EnclosingMethodAttributeInfo(AttributeInfo): 801 def init(self, data, class_file): 802 self.class_file = class_file 803 self.attribute_length = u4(data[0:4]) 804 self.class_index = u2(data[4:6]) 805 self.method_index = u2(data[6:8]) 806 return data[8:] 807 808 def serialize(self): 809 return su4(self.attribute_length)+su2(self.class_index)+su2(self.method_index) 810 811 class SignatureAttributeInfo(AttributeInfo): 812 def init(self, data, class_file): 813 self.class_file = class_file 814 self.attribute_length = u4(data[0:4]) 815 self.signature_index = u2(data[4:6]) 816 return data[6:] 817 818 def serialize(self): 819 return su4(self.attribute_length)+su2(self.signature_index) 820 821 class SourceDebugExtensionAttributeInfo(AttributeInfo): 822 def init(self, data, class_file): 823 self.class_file = class_file 824 self.attribute_length = u4(data[0:4]) 825 self.debug_extension = data[4:(4 + self.attribute_length)] 826 return data[(4+ self.attribute_length):] 827 828 def serialize(self): 829 return su4(self.attribute_length)+self.debug_extension 830 831 class ElementValue(object): 832 def __init__(self, tag): 833 self.tag = tag 834 835 def init(self, data, class_file): 836 self.class_file = class_file 837 tag = chr(u1(data[0:1])) 838 assert(tag == self.tag) 839 return data[1:] 840 841 def serialize(self): 842 return su1(ord(self.tag)) 843 844 class ConstValue(ElementValue): 845 def init(self, data, class_file): 846 data = super(ConstValue, self).init(data, class_file) 847 self.const_value_index = u2(data[0:2]) 848 return data[2:] 849 850 def serialize(self): 851 return super(ConstValue, self).serialize()+su2(self.const_value_index) 852 853 class EnumConstValue(ElementValue): 854 def init(self, data, class_file): 855 data = super(EnumConstValue, self).init(data, class_file) 856 self.type_name_index = u2(data[0:2]) 857 self.const_name_index = u2(data[2:4]) 858 return data[4:] 859 860 def serialize(self): 861 return super(EnumConstValue, self).serialize()+su2(self.type_name_index)+su2(self.const_name_index) 862 863 class ClassInfoValue(ElementValue): 864 def init(self, data, class_file): 865 data = super(ClassInfoValue, self).init(data, class_file) 866 self.class_info_index = u2(data[0:2]) 867 return data[2:] 868 869 def serialize(self): 870 return super(ClassInfoValue, self).serialize()+su2(self.class_info_index) 871 872 class AnnotationValue(ElementValue): 873 def init(self, data, class_file): 874 data = super(AnnotationValue, self).init(data, class_file) 875 self.annotation_value = Annotation() 876 return self.annotation_value.init(data, class_file) 877 878 def serialize(self): 879 return super(AnnotationValue, self).serialize()+self.annotation_value.serialize() 880 881 class ArrayValue(ElementValue): 882 def init(self, data, class_file): 883 data = super(ArrayValue, self).init(data, class_file) 884 num_values = u2(data[0:2]) 885 data = data[2:] 886 self.values = [] 887 for ii in xrange(num_values): 888 element_value = create_element_value(data) 889 data = element_value.init(data, class_file) 890 self.values.append(element_value) 891 return data 892 893 def serialize(self): 894 od = super(ArrayValue, self).serialize()+su2(len(self.values)) 895 od += "".join([v.serialize() for v in self.values]) 896 return od 897 898 # Exception. 899 900 class UnknownElementValue: 901 def __init__(self, tag): 902 self.tag = tag 903 def __str__(self): 904 return repr(self.tag) 905 906 def create_element_value(data): 907 tag = chr(u1(data[0:1])) 908 if tag in ('B', 'C', 'D', 'F', 'I', 'J', 'S', 'Z', 's'): 909 return ConstValue(tag) 910 elif tag == 'e': 911 return EnumConstValue(tag) 912 elif tag == 'c': 913 return ClassInfoValue(tag) 914 elif tag == '@': 915 return AnnotationValue(tag) 916 elif tag == '[': 917 return ArrayValue(tag) 918 else: 919 raise UnknownElementValue, tag 920 921 class Annotation(object): 922 def init(self, data, class_file): 923 self.class_file = class_file 924 self.type_index = u2(data[0:2]) 925 num_element_value_pairs = u2(data[2:4]) 926 data = data[4:] 927 self.element_value_pairs = [] 928 for ii in xrange(num_element_value_pairs): 929 element_name_index = u2(data[0:2]) 930 data = data[2:] 931 element_value = create_element_value(data) 932 data = element_value.init(data, class_file) 933 self.element_value_pairs.append((element_name_index, element_value)) 934 return data 935 936 def serialize(self): 937 od = su2(self.type_index)+su2(len(self.element_value_pairs)) 938 od += "".join([su2(evp[0])+evp[1].serialize() for evp in self.element_value_pairs]) 939 return od 940 941 class RuntimeAnnotationsAttributeInfo(AttributeInfo): 942 def init(self, data, class_file): 943 self.class_file = class_file 944 self.attribute_length = u4(data[0:4]) 945 num_annotations = u2(data[4:6]) 946 data = data[6:] 947 self.annotations = [] 948 for ii in xrange(num_annotations): 949 annotation = Annotation() 950 data = annotation.init(data, class_file) 951 self.annotations.append(annotation) 952 return data 953 954 def serialize(self): 955 od = su4(self.attribute_length)+su2(len(self.annotations)) 956 od += "".join([a.serialize() for a in self.annotations]) 957 return od 958 959 class RuntimeVisibleAnnotationsAttributeInfo(RuntimeAnnotationsAttributeInfo): 960 pass 961 962 class RuntimeInvisibleAnnotationsAttributeInfo(RuntimeAnnotationsAttributeInfo): 963 pass 964 965 class RuntimeParameterAnnotationsAttributeInfo(AttributeInfo): 966 def init(self, data, class_file): 967 self.class_file = class_file 968 self.attribute_length = u4(data[0:4]) 969 num_parameters = u1(data[4:5]) 970 data = data[5:] 971 self.parameter_annotations = [] 972 for ii in xrange(num_parameters): 973 num_annotations = u2(data[0:2]) 974 data = data[2:] 975 annotations = [] 976 for jj in xrange(num_annotations): 977 annotation = Annotation() 978 data = annotation.init(data, class_file) 979 annotations.append(annotation) 980 self.parameter_annotations.append(annotations) 981 return data 982 983 def serialize(self): 984 od = su4(self.attribute_length)+su1(len(self.parameter_annotations)) 985 for pa in self.parameter_annotations: 986 od += su2(len(pa)) 987 od += "".join([a.serialize() for a in pa]) 988 return od 989 990 class RuntimeVisibleParameterAnnotationsAttributeInfo(RuntimeParameterAnnotationsAttributeInfo): 991 pass 992 993 class RuntimeInvisibleParameterAnnotationsAttributeInfo(RuntimeParameterAnnotationsAttributeInfo): 994 pass 995 996 class AnnotationDefaultAttributeInfo(AttributeInfo): 997 def init(self, data, class_file): 998 self.class_file = class_file 999 self.attribute_length = u4(data[0:4]) 1000 data = data[4:] 1001 self.default_value = create_element_value(data) 1002 return self.default_value.init(data, class_file) 1003 1004 def serialize(self): 1005 return su4(self.attribute_length)+self.default_value.serialize() 1006 1007 # Child classes of the attribute information classes. 1008 1009 class ExceptionInfo: 1010 def init(self, data): 1011 self.start_pc = u2(data[0:2]) 1012 self.end_pc = u2(data[2:4]) 1013 self.handler_pc = u2(data[4:6]) 1014 self.catch_type = u2(data[6:8]) 1015 return data[8:] 1016 1017 def serialize(self): 1018 return su2(self.start_pc)+su2(self.end_pc)+su2(self.handler_pc)+su2(self.catch_type) 1019 1020 class InnerClassInfo(NameUtils): 1021 def init(self, data, class_file): 1022 self.class_file = class_file 1023 self.inner_class_info_index = u2(data[0:2]) 1024 self.outer_class_info_index = u2(data[2:4]) 1025 # Permit the NameUtils mix-in. 1026 self.name_index = self.inner_name_index = u2(data[4:6]) 1027 self.inner_class_access_flags = u2(data[6:8]) 1028 return data[8:] 1029 1030 def serialize(self): 1031 return su2(self.inner_class_info_index)+su2(self.outer_class_info_index)+su2(self.name_index)+su2(self.inner_class_access_flags) 1032 1033 class LineNumberInfo: 1034 def init(self, data): 1035 self.start_pc = u2(data[0:2]) 1036 self.line_number = u2(data[2:4]) 1037 return data[4:] 1038 1039 def serialize(self): 1040 return su2(self.start_pc)+su2(self.line_number) 1041 1042 class LocalVariableInfo(NameUtils, PythonNameUtils): 1043 def init(self, data, class_file): 1044 self.class_file = class_file 1045 self.start_pc = u2(data[0:2]) 1046 self.length = u2(data[2:4]) 1047 self.name_index = u2(data[4:6]) 1048 self.descriptor_index = u2(data[6:8]) 1049 self.index = u2(data[8:10]) 1050 return data[10:] 1051 1052 def get_descriptor(self): 1053 return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 1054 1055 def serialize(self): 1056 return su2(self.start_pc)+su2(self.length)+su2(self.name_index)+su2(self.descriptor_index)+su2(self.index) 1057 1058 # Exceptions. 1059 1060 class UnknownTag(Exception): 1061 def __init__(self, tag): 1062 self.tag = tag 1063 def __str__(self): 1064 return repr(self.tag) 1065 1066 class UnknownAttribute(Exception): 1067 def __init__(self, name): 1068 self.name = name 1069 1070 ATTR_NAMES_TO_CLASS = {"SourceFile": SourceFileAttributeInfo, 1071 "ConstantValue": ConstantValueAttributeInfo, 1072 "Code": CodeAttributeInfo, 1073 "Exceptions": ExceptionsAttributeInfo, 1074 "InnerClasses": InnerClassesAttributeInfo, 1075 "Synthetic": SyntheticAttributeInfo, 1076 "LineNumberTable": LineNumberAttributeInfo, 1077 "LocalVariableTable": LocalVariableAttributeInfo, 1078 "Deprecated": DeprecatedAttributeInfo, 1079 # Java SE 1.6, class file >= 50.0, VMSpec v3 s4.7.4 1080 "StackMapTable": StackMapTableAttributeInfo, 1081 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.7 1082 "EnclosingMethod": EnclosingMethodAttributeInfo, 1083 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.9 1084 "Signature": SignatureAttributeInfo, 1085 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.11 1086 "SourceDebugExtension": SourceDebugExtensionAttributeInfo, 1087 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.14 1088 "LocalVariableTypeTable": LocalVariableTypeAttributeInfo, 1089 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.16 1090 "RuntimeVisibleAnnotations": RuntimeVisibleAnnotationsAttributeInfo, 1091 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.17 1092 "RuntimeInvisibleAnnotations": RuntimeInvisibleAnnotationsAttributeInfo, 1093 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.18 1094 "RuntimeVisibleParameterAnnotations": RuntimeVisibleParameterAnnotationsAttributeInfo, 1095 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.19 1096 "RuntimeInvisibleParameterAnnotations": RuntimeInvisibleParameterAnnotationsAttributeInfo, 1097 # Java SE 1.5, class file >= 49.0, VMSpec v3 s4.7.20 1098 "AnnotationDefault": AnnotationDefaultAttributeInfo,} 1099 1100 # Abstractions for the main structures. 1101 1102 class ClassFile: 1103 1104 "A class representing a Java class file." 1105 1106 def __init__(self, s): 1107 1108 """ 1109 Process the given string 's', populating the object with the class 1110 file's details. 1111 """ 1112 1113 self.attribute_class_to_index = None 1114 magic = u4(s[0:]) 1115 if magic != 0xCAFEBABE: 1116 raise UnknownAttribute, magic 1117 self.minorv,self.majorv = u2(s[4:]),u2(s[6:]) 1118 self.constants, s = self._get_constants(s[8:]) 1119 self.access_flags, s = self._get_access_flags(s) 1120 self.this_class, s = self._get_this_class(s) 1121 self.super_class, s = self._get_super_class(s) 1122 self.interfaces, s = self._get_interfaces(s) 1123 self.fields, s = self._get_fields(s) 1124 self.methods, s = self._get_methods(s) 1125 self.attributes, s = self._get_attributes(s) 1126 1127 def serialize(self): 1128 od = su4(0xCAFEBABE)+su2(self.minorv)+su2(self.majorv) 1129 od += self._serialize_constants() 1130 od += self._serialize_access_flags() 1131 od += self._serialize_this_class() 1132 od += self._serialize_super_class() 1133 od += self._serialize_interfaces() 1134 od += self._serialize_fields() 1135 od += self._serialize_methods() 1136 od += self._serialize_attributes(self.attributes) 1137 return od 1138 1139 def _encode_const(self, c): 1140 od = '' 1141 if isinstance(c, Utf8Info): 1142 od += su1(1) 1143 elif isinstance(c, IntegerInfo): 1144 od += su1(3) 1145 elif isinstance(c, FloatInfo): 1146 od += su1(4) 1147 elif isinstance(c, LongInfo): 1148 od += su1(5) 1149 elif isinstance(c, DoubleInfo): 1150 od += su1(6) 1151 elif isinstance(c, ClassInfo): 1152 od += su1(7) 1153 elif isinstance(c, StringInfo): 1154 od += su1(8) 1155 elif isinstance(c, FieldRefInfo): 1156 od += su1(9) 1157 elif isinstance(c, InterfaceMethodRefInfo): # check subclass first 1158 od += su1(11) 1159 elif isinstance(c, MethodRefInfo): 1160 od += su1(10) 1161 elif isinstance(c, NameAndTypeInfo): 1162 od += su1(12) 1163 else: 1164 return od 1165 od += c.serialize() 1166 return od 1167 1168 def _decode_const(self, s): 1169 tag = u1(s[0:1]) 1170 if tag == 1: 1171 const = Utf8Info() 1172 elif tag == 3: 1173 const = IntegerInfo() 1174 elif tag == 4: 1175 const = FloatInfo() 1176 elif tag == 5: 1177 const = LongInfo() 1178 elif tag == 6: 1179 const = DoubleInfo() 1180 elif tag == 7: 1181 const = ClassInfo() 1182 elif tag == 8: 1183 const = StringInfo() 1184 elif tag == 9: 1185 const = FieldRefInfo() 1186 elif tag == 10: 1187 const = MethodRefInfo() 1188 elif tag == 11: 1189 const = InterfaceMethodRefInfo() 1190 elif tag == 12: 1191 const = NameAndTypeInfo() 1192 else: 1193 raise UnknownTag, tag 1194 1195 # Initialise the constant object. 1196 1197 s = const.init(s[1:], self) 1198 return const, s 1199 1200 def _get_constants_from_table(self, count, s): 1201 l = [] 1202 # Have to skip certain entries specially. 1203 i = 1 1204 while i < count: 1205 c, s = self._decode_const(s) 1206 l.append(c) 1207 # Add a blank entry after "large" entries. 1208 if isinstance(c, LargeNumInfo): 1209 l.append(None) 1210 i += 1 1211 i += 1 1212 return l, s 1213 1214 def _get_items_from_table(self, cls, number, s): 1215 l = [] 1216 for i in range(0, number): 1217 f = cls() 1218 s = f.init(s, self) 1219 l.append(f) 1220 return l, s 1221 1222 def _get_methods_from_table(self, number, s): 1223 return self._get_items_from_table(MethodInfo, number, s) 1224 1225 def _get_fields_from_table(self, number, s): 1226 return self._get_items_from_table(FieldInfo, number, s) 1227 1228 def _get_attribute_from_table(self, s): 1229 attribute_name_index = u2(s[0:2]) 1230 constant_name = self.constants[attribute_name_index - 1].bytes 1231 if constant_name in ATTR_NAMES_TO_CLASS: 1232 attribute = ATTR_NAMES_TO_CLASS[constant_name]() 1233 else: 1234 raise UnknownAttribute, constant_name 1235 s = attribute.init(s[2:], self) 1236 return attribute, s 1237 1238 def _get_attributes_from_table(self, number, s): 1239 attributes = [] 1240 for i in range(0, number): 1241 attribute, s = self._get_attribute_from_table(s) 1242 attributes.append(attribute) 1243 return attributes, s 1244 1245 def _get_constants(self, s): 1246 count = u2(s[0:2]) 1247 return self._get_constants_from_table(count, s[2:]) 1248 1249 def _serialize_constants(self): 1250 return su2(len(self.constants)+1)+"".join([self._encode_const(c) for c in self.constants]) 1251 1252 def _get_access_flags(self, s): 1253 return u2(s[0:2]), s[2:] 1254 1255 def _serialize_access_flags(self): 1256 return su2(self.access_flags) 1257 1258 def _get_this_class(self, s): 1259 index = u2(s[0:2]) 1260 return self.constants[index - 1], s[2:] 1261 1262 def _serialize_this_class(self): 1263 return su2(self.constants.index(self.this_class)+1) 1264 1265 def _serialize_super_class(self): 1266 return su2(self.constants.index(self.super_class)+1) 1267 1268 def _get_super_class(self, s): 1269 index = u2(s[0:2]) 1270 if index != 0: 1271 return self.constants[index - 1], s[2:] 1272 else: 1273 return None, s[2:] 1274 1275 def _get_interfaces(self, s): 1276 interfaces = [] 1277 number = u2(s[0:2]) 1278 s = s[2:] 1279 for i in range(0, number): 1280 index = u2(s[0:2]) 1281 interfaces.append(self.constants[index - 1]) 1282 s = s[2:] 1283 return interfaces, s 1284 1285 def _serialize_interfaces(self): 1286 return su2(len(self.interfaces))+"".join([su2(self.constants.index(interf)+1) for interf in self.interfaces]) 1287 1288 def _get_fields(self, s): 1289 number = u2(s[0:2]) 1290 return self._get_fields_from_table(number, s[2:]) 1291 1292 def _serialize_fields(self): 1293 od = su2(len(self.fields)) 1294 od += "".join([f.serialize() for f in self.fields]) 1295 return od 1296 1297 def _get_attributes(self, s): 1298 number = u2(s[0:2]) 1299 return self._get_attributes_from_table(number, s[2:]) 1300 1301 def _serialize_attributes(self, attrs): 1302 od = su2(len(attrs)) 1303 if len(attrs) == 0: return od 1304 if self.attribute_class_to_index == None: 1305 self.attribute_class_to_index = {} 1306 index = 0 1307 for c in self.constants: 1308 index += 1 1309 if isinstance(c, Utf8Info) and str(c) in ATTR_NAMES_TO_CLASS.keys(): 1310 self.attribute_class_to_index[ATTR_NAMES_TO_CLASS[str(c)]]=index 1311 for attribute in attrs: 1312 for (classtype,name_index) in self.attribute_class_to_index.iteritems(): 1313 if isinstance(attribute, classtype): 1314 od += su2(name_index) 1315 break 1316 od += attribute.serialize() 1317 return od 1318 1319 def _get_methods(self, s): 1320 number = u2(s[0:2]) 1321 return self._get_methods_from_table(number, s[2:]) 1322 1323 def _serialize_methods(self): 1324 od = su2(len(self.methods)) 1325 od += "".join([m.serialize() for m in self.methods]) 1326 return od 1327 1328 1329 if __name__ == "__main__": 1330 import sys 1331 f = open(sys.argv[1], "rb") 1332 in_data = f.read() 1333 c = ClassFile(in_data) 1334 f.close() 1335 out_data = c.serialize() 1336 assert(in_data == out_data) 1337 1338 # vim: tabstop=4 expandtab shiftwidth=4