1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 """ 7 8 import struct # for general decoding of class files 9 10 # Utility functions. 11 12 def u1(data): 13 return struct.unpack(">B", data[0:1])[0] 14 15 def u2(data): 16 return struct.unpack(">H", data[0:2])[0] 17 18 def u4(data): 19 return struct.unpack(">L", data[0:4])[0] 20 21 def s4(data): 22 return struct.unpack(">l", data[0:4])[0] 23 24 def s8(data): 25 return struct.unpack(">q", data[0:8])[0] 26 27 def f4(data): 28 return struct.unpack(">f", data[0:4])[0] 29 30 def f8(data): 31 return struct.unpack(">d", data[0:8])[0] 32 33 # Useful mix-ins. 34 35 class PythonNameUtils: 36 def get_python_name(self): 37 name = self.get_name() 38 if str(name) == "<init>": 39 return "__init__" 40 else: 41 return name 42 43 class NameUtils(PythonNameUtils): 44 def get_name(self): 45 if self.name_index != 0: 46 return self.class_file.constants[self.name_index - 1] 47 else: 48 # Some name indexes are zero to indicate special conditions. 49 return None 50 51 class NameAndTypeUtils(PythonNameUtils): 52 def get_name(self): 53 if self.name_and_type_index != 0: 54 return self.class_file.constants[self.name_and_type_index - 1].get_name() 55 else: 56 # Some name indexes are zero to indicate special conditions. 57 return None 58 59 def get_field_descriptor(self): 60 if self.name_and_type_index != 0: 61 return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() 62 else: 63 # Some name indexes are zero to indicate special conditions. 64 return None 65 66 def get_method_descriptor(self): 67 if self.name_and_type_index != 0: 68 return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() 69 else: 70 # Some name indexes are zero to indicate special conditions. 71 return None 72 73 class DescriptorUtils: 74 75 "Symbol parsing." 76 77 def _get_method_descriptor(self, s): 78 assert s[0] == "(" 79 params = [] 80 s = s[1:] 81 while s[0] != ")": 82 parameter_descriptor, s = self._get_parameter_descriptor(s) 83 params.append(parameter_descriptor) 84 if s[1] != "V": 85 return_type, s = self._get_field_type(s[1:]) 86 else: 87 return_type, s = None, s[1:] 88 return params, return_type 89 90 def _get_parameter_descriptor(self, s): 91 return self._get_field_type(s) 92 93 def _get_field_descriptor(self, s): 94 return self._get_field_type(s) 95 96 def _get_component_type(self, s): 97 return self._get_field_type(s) 98 99 def _get_field_type(self, s): 100 base_type, s = self._get_base_type(s) 101 object_type = None 102 array_type = None 103 if base_type == "L": 104 object_type, s = self._get_object_type(s) 105 elif base_type == "[": 106 array_type, s = self._get_array_type(s) 107 return (base_type, object_type, array_type), s 108 109 def _get_base_type(self, s): 110 if len(s) > 0: 111 return s[0], s[1:] 112 else: 113 return None, s 114 115 def _get_object_type(self, s): 116 if len(s) > 0: 117 s_end = s.find(";") 118 assert s_end != -1 119 return s[:s_end], s[s_end+1:] 120 else: 121 return None, s 122 123 def _get_array_type(self, s): 124 if len(s) > 0: 125 return self._get_component_type(s) 126 else: 127 return None, s 128 129 # Constant information. 130 # Objects of these classes are not directly aware of the class they reside in. 131 132 class ClassInfo(NameUtils): 133 def init(self, data, class_file): 134 self.class_file = class_file 135 self.name_index = u2(data[0:2]) 136 return data[2:] 137 138 class RefInfo(NameAndTypeUtils): 139 def init(self, data, class_file): 140 self.class_file = class_file 141 self.class_index = u2(data[0:2]) 142 self.name_and_type_index = u2(data[2:4]) 143 return data[4:] 144 145 class FieldRefInfo(RefInfo): 146 def get_descriptor(self): 147 return RefInfo.get_field_descriptor(self) 148 149 class MethodRefInfo(RefInfo): 150 def get_descriptor(self): 151 return RefInfo.get_method_descriptor(self) 152 153 class InterfaceMethodRefInfo(RefInfo): 154 def get_descriptor(self): 155 return RefInfo.get_method_descriptor(self) 156 157 class NameAndTypeInfo(NameUtils, DescriptorUtils): 158 def init(self, data, class_file): 159 self.class_file = class_file 160 self.name_index = u2(data[0:2]) 161 self.descriptor_index = u2(data[2:4]) 162 return data[4:] 163 164 def get_field_descriptor(self): 165 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 166 167 def get_method_descriptor(self): 168 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 169 170 class Utf8Info: 171 def init(self, data, class_file): 172 self.class_file = class_file 173 self.length = u2(data[0:2]) 174 self.bytes = data[2:2+self.length] 175 return data[2+self.length:] 176 177 def __str__(self): 178 return self.bytes 179 180 def __unicode__(self): 181 return unicode(self.bytes, "utf-8") 182 183 class StringInfo: 184 def init(self, data, class_file): 185 self.class_file = class_file 186 self.string_index = u2(data[0:2]) 187 return data[2:] 188 189 class SmallNumInfo: 190 def init(self, data, class_file): 191 self.class_file = class_file 192 self.bytes = data[0:4] 193 return data[4:] 194 195 class IntegerInfo(SmallNumInfo): 196 def get_value(self): 197 return s4(self.bytes) 198 199 class FloatInfo(SmallNumInfo): 200 def get_value(self): 201 return f4(self.bytes) 202 203 class LargeNumInfo: 204 def init(self, data, class_file): 205 self.class_file = class_file 206 self.high_bytes = u4(data[0:4]) 207 self.low_bytes = u4(data[4:8]) 208 return data[8:] 209 210 class LongInfo(LargeNumInfo): 211 def get_value(self): 212 return s8(self.high_bytes + self.low_bytes) 213 214 class DoubleInfo(LargeNumInfo): 215 def get_value(self): 216 return f8(self.high_bytes + self.low_bytes) 217 218 # Other information. 219 # Objects of these classes are generally aware of the class they reside in. 220 221 class ItemInfo(NameUtils, DescriptorUtils): 222 def init(self, data, class_file): 223 self.class_file = class_file 224 self.access_flags = u2(data[0:2]) 225 self.name_index = u2(data[2:4]) 226 self.descriptor_index = u2(data[4:6]) 227 self.attributes, data = self.class_file._get_attributes(data[6:]) 228 return data 229 230 class FieldInfo(ItemInfo): 231 def get_descriptor(self): 232 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 233 234 class MethodInfo(ItemInfo): 235 def get_descriptor(self): 236 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 237 238 class AttributeInfo: 239 def init(self, data, class_file): 240 self.attribute_length = u4(data[0:4]) 241 self.info = data[4:4+self.attribute_length] 242 return data[4+self.attribute_length:] 243 244 # NOTE: Decode the different attribute formats. 245 246 class SourceFileAttributeInfo(AttributeInfo, NameUtils): 247 def init(self, data, class_file): 248 self.class_file = class_file 249 self.attribute_length = u4(data[0:4]) 250 # Permit the NameUtils mix-in. 251 self.name_index = self.sourcefile_index = u2(data[4:6]) 252 253 class ConstantValueAttributeInfo(AttributeInfo): 254 def init(self, data, class_file): 255 self.class_file = class_file 256 self.attribute_length = u4(data[0:4]) 257 self.constant_value_index = u2(data[4:6]) 258 assert 4+self.attribute_length == 6 259 return data[4+self.attribute_length:] 260 261 def get_value(self): 262 return self.class_file.constants[self.constant_value_index - 1].get_value() 263 264 class CodeAttributeInfo(AttributeInfo): 265 def init(self, data, class_file): 266 self.class_file = class_file 267 self.attribute_length = u4(data[0:4]) 268 self.max_stack = u2(data[4:6]) 269 self.max_locals = u2(data[6:8]) 270 self.code_length = u4(data[8:12]) 271 end_of_code = 12+self.code_length 272 self.code = data[12:end_of_code] 273 self.exception_table_length = u2(data[end_of_code:end_of_code+2]) 274 self.exception_table = [] 275 data = data[end_of_code + 2:] 276 for i in range(0, self.exception_table_length): 277 exception = ExceptionInfo() 278 data = exception.init(data) 279 self.exception_table.append(exception) 280 self.attributes, data = self.class_file._get_attributes(data) 281 return data 282 283 class ExceptionsAttributeInfo(AttributeInfo): 284 def init(self, data, class_file): 285 self.class_file = class_file 286 self.attribute_length = u4(data[0:4]) 287 self.number_of_exceptions = u2(data[4:6]) 288 self.exception_index_table = [] 289 index = 6 290 for i in range(0, self.number_of_exceptions): 291 self.exception_index_table.append(u2(data[index:index+2])) 292 index += 2 293 return data[index:] 294 295 def get_exception(self, i): 296 exception_index = self.exception_index_table[i] 297 return self.class_file.constants[exception_index - 1] 298 299 class InnerClassesAttributeInfo(AttributeInfo): 300 def init(self, data, class_file): 301 self.class_file = class_file 302 self.attribute_length = u4(data[0:4]) 303 self.number_of_classes = u2(data[4:6]) 304 self.classes = [] 305 data = data[6:] 306 for i in range(0, self.number_of_classes): 307 inner_class = InnerClassInfo() 308 data = inner_class.init(data, self.class_file) 309 self.classes.append(inner_class) 310 return data 311 312 class SyntheticAttributeInfo(AttributeInfo): 313 pass 314 315 class LineNumberAttributeInfo(AttributeInfo): 316 def init(self, data, class_file): 317 self.class_file = class_file 318 self.attribute_length = u4(data[0:4]) 319 self.line_number_table_length = u2(data[4:6]) 320 self.line_number_table = [] 321 data = data[6:] 322 for i in range(0, self.line_number_table_length): 323 line_number = LineNumberInfo() 324 data = line_number.init(data) 325 self.line_number_table.append(line_number) 326 return data 327 328 class LocalVariableAttributeInfo(AttributeInfo): 329 def init(self, data, class_file): 330 self.class_file = class_file 331 self.attribute_length = u4(data[0:4]) 332 self.local_variable_table_length = u2(data[4:6]) 333 self.local_variable_table = [] 334 data = data[6:] 335 for i in range(0, self.local_variable_table_length): 336 local_variable = LocalVariableInfo() 337 data = local_variable.init(data) 338 self.local_variable_table.append(local_variable) 339 return data 340 341 class DeprecatedAttributeInfo(AttributeInfo): 342 pass 343 344 # Child classes of the attribute information classes. 345 346 class ExceptionInfo: 347 def init(self, data): 348 self.start_pc = u2(data[0:2]) 349 self.end_pc = u2(data[2:4]) 350 self.handler_pc = u2(data[4:6]) 351 self.catch_type = u2(data[6:8]) 352 return data[8:] 353 354 class InnerClassInfo(NameUtils): 355 def init(self, data, class_file): 356 self.class_file = class_file 357 self.inner_class_info_index = u2(data[0:2]) 358 self.outer_class_info_index = u2(data[2:4]) 359 # Permit the NameUtils mix-in. 360 self.name_index = self.inner_name_index = u2(data[4:6]) 361 self.inner_class_access_flags = u2(data[6:8]) 362 return data[8:] 363 364 class LineNumberInfo: 365 def init(self, data): 366 self.start_pc = u2(data[0:2]) 367 self.line_number = u2(data[2:4]) 368 return data[4:] 369 370 class LocalVariableInfo(NameUtils): 371 def init(self, data, class_file): 372 self.class_file = class_file 373 self.start_pc = u2(data[0:2]) 374 self.length = u2(data[2:4]) 375 self.name_index = u2(data[4:6]) 376 self.descriptor_index = u2(data[6:8]) 377 self.index = u2(data[8:10]) 378 return data[10:] 379 380 def get_descriptor(self): 381 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 382 383 # Exceptions. 384 385 class UnknownTag(Exception): 386 pass 387 388 class UnknownAttribute(Exception): 389 pass 390 391 # Abstractions for the main structures. 392 393 class ClassFile: 394 395 "A class representing a Java class file." 396 397 def __init__(self, s): 398 399 """ 400 Process the given string 's', populating the object with the class 401 file's details. 402 """ 403 404 self.constants, s = self._get_constants(s[8:]) 405 self.access_flags, s = self._get_access_flags(s) 406 self.this_class, s = self._get_this_class(s) 407 self.super_class, s = self._get_super_class(s) 408 self.interfaces, s = self._get_interfaces(s) 409 self.fields, s = self._get_fields(s) 410 self.methods, s = self._get_methods(s) 411 self.attributes, s = self._get_attributes(s) 412 413 def _decode_const(self, s): 414 tag = u1(s[0:1]) 415 if tag == 1: 416 const = Utf8Info() 417 elif tag == 3: 418 const = IntegerInfo() 419 elif tag == 4: 420 const = FloatInfo() 421 elif tag == 5: 422 const = LongInfo() 423 elif tag == 6: 424 const = DoubleInfo() 425 elif tag == 7: 426 const = ClassInfo() 427 elif tag == 8: 428 const = StringInfo() 429 elif tag == 9: 430 const = FieldRefInfo() 431 elif tag == 10: 432 const = MethodRefInfo() 433 elif tag == 11: 434 const = InterfaceMethodRefInfo() 435 elif tag == 12: 436 const = NameAndTypeInfo() 437 else: 438 raise UnknownTag, tag 439 440 # Initialise the constant object. 441 442 s = const.init(s[1:], self) 443 return const, s 444 445 def _get_constants_from_table(self, count, s): 446 l = [] 447 # Have to skip certain entries specially. 448 i = 1 449 while i < count: 450 c, s = self._decode_const(s) 451 l.append(c) 452 # Add a blank entry after "large" entries. 453 if isinstance(c, LargeNumInfo): 454 l.append(None) 455 i += 1 456 i += 1 457 return l, s 458 459 def _get_items_from_table(self, cls, number, s): 460 l = [] 461 for i in range(0, number): 462 f = cls() 463 s = f.init(s, self) 464 l.append(f) 465 return l, s 466 467 def _get_methods_from_table(self, number, s): 468 return self._get_items_from_table(MethodInfo, number, s) 469 470 def _get_fields_from_table(self, number, s): 471 return self._get_items_from_table(FieldInfo, number, s) 472 473 def _get_attribute_from_table(self, s): 474 attribute_name_index = u2(s[0:2]) 475 constant_name = self.constants[attribute_name_index - 1].bytes 476 if constant_name == "SourceFile": 477 attribute = SourceFileAttributeInfo() 478 elif constant_name == "ConstantValue": 479 attribute = ConstantValueAttributeInfo() 480 elif constant_name == "Code": 481 attribute = CodeAttributeInfo() 482 elif constant_name == "Exceptions": 483 attribute = ExceptionsAttributeInfo() 484 elif constant_name == "InnerClasses": 485 attribute = InnerClassesAttributeInfo() 486 elif constant_name == "Synthetic": 487 attribute = SyntheticAttributeInfo() 488 elif constant_name == "LineNumberTable": 489 attribute = LineNumberAttributeInfo() 490 elif constant_name == "LocalVariableTable": 491 attribute = LocalVariableAttributeInfo() 492 elif constant_name == "Deprecated": 493 attribute = DeprecatedAttributeInfo() 494 else: 495 raise UnknownAttribute, constant_name 496 s = attribute.init(s[2:], self) 497 return attribute, s 498 499 def _get_attributes_from_table(self, number, s): 500 attributes = [] 501 for i in range(0, number): 502 attribute, s = self._get_attribute_from_table(s) 503 attributes.append(attribute) 504 return attributes, s 505 506 def _get_constants(self, s): 507 count = u2(s[0:2]) 508 return self._get_constants_from_table(count, s[2:]) 509 510 def _get_access_flags(self, s): 511 return u2(s[0:2]), s[2:] 512 513 def _get_this_class(self, s): 514 index = u2(s[0:2]) 515 return self.constants[index - 1], s[2:] 516 517 _get_super_class = _get_this_class 518 519 def _get_interfaces(self, s): 520 interfaces = [] 521 number = u2(s[0:2]) 522 s = s[2:] 523 for i in range(0, number): 524 index = u2(s[0:2]) 525 interfaces.append(self.constants[index - 1]) 526 s = s[2:] 527 return interfaces, s 528 529 def _get_fields(self, s): 530 number = u2(s[0:2]) 531 return self._get_fields_from_table(number, s[2:]) 532 533 def _get_attributes(self, s): 534 number = u2(s[0:2]) 535 return self._get_attributes_from_table(number, s[2:]) 536 537 def _get_methods(self, s): 538 number = u2(s[0:2]) 539 return self._get_methods_from_table(number, s[2:]) 540 541 if __name__ == "__main__": 542 import sys 543 f = open(sys.argv[1]) 544 c = ClassFile(f.read()) 545 546 # vim: tabstop=4 expandtab shiftwidth=4