1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 """ 7 8 import struct # for general decoding of class files 9 10 # Utility functions. 11 12 def u1(data): 13 return struct.unpack(">B", data[0:1])[0] 14 15 def u2(data): 16 return struct.unpack(">H", data[0:2])[0] 17 18 def u4(data): 19 return struct.unpack(">L", data[0:4])[0] 20 21 def s4(data): 22 return struct.unpack(">l", data[0:4])[0] 23 24 def s8(data): 25 return struct.unpack(">q", data[0:8])[0] 26 27 def f4(data): 28 return struct.unpack(">f", data[0:4])[0] 29 30 def f8(data): 31 return struct.unpack(">d", data[0:8])[0] 32 33 # Useful mix-ins. 34 35 class NameUtils: 36 def get_name(self): 37 if self.name_index != 0: 38 return unicode(self.class_file.constants[self.name_index - 1]) 39 else: 40 # Some name indexes are zero to indicate special conditions. 41 return None 42 43 class NameAndTypeUtils: 44 def get_name(self): 45 if self.name_and_type_index != 0: 46 return self.class_file.constants[self.name_and_type_index - 1].get_name() 47 else: 48 # Some name indexes are zero to indicate special conditions. 49 return None 50 51 def get_field_descriptor(self): 52 if self.name_and_type_index != 0: 53 return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() 54 else: 55 # Some name indexes are zero to indicate special conditions. 56 return None 57 58 def get_method_descriptor(self): 59 if self.name_and_type_index != 0: 60 return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() 61 else: 62 # Some name indexes are zero to indicate special conditions. 63 return None 64 65 class DescriptorUtils: 66 67 "Symbol parsing." 68 69 def _get_method_descriptor(self, s): 70 assert s[0] == "(" 71 params = [] 72 s = s[1:] 73 while s[0] != ")": 74 parameter_descriptor, s = self._get_parameter_descriptor(s) 75 params.append(parameter_descriptor) 76 if s[1] != "V": 77 return_type, s = self._get_field_type(s[1:]) 78 else: 79 return_type, s = None, s[1:] 80 return params, return_type 81 82 def _get_parameter_descriptor(self, s): 83 return self._get_field_type(s) 84 85 def _get_field_descriptor(self, s): 86 return self._get_field_type(s) 87 88 def _get_component_type(self, s): 89 return self._get_field_type(s) 90 91 def _get_field_type(self, s): 92 base_type, s = self._get_base_type(s) 93 object_type = None 94 array_type = None 95 if base_type == "L": 96 object_type, s = self._get_object_type(s) 97 elif base_type == "[": 98 array_type, s = self._get_array_type(s) 99 return (base_type, object_type, array_type), s 100 101 def _get_base_type(self, s): 102 if len(s) > 0: 103 return s[0], s[1:] 104 else: 105 return None, s 106 107 def _get_object_type(self, s): 108 if len(s) > 0: 109 s_end = s.find(";") 110 assert s_end != -1 111 return s[:s_end], s[s_end+1:] 112 else: 113 return None, s 114 115 def _get_array_type(self, s): 116 if len(s) > 0: 117 return self._get_component_type(s) 118 else: 119 return None, s 120 121 # Constant information. 122 # Objects of these classes are not directly aware of the class they reside in. 123 124 class ClassInfo(NameUtils): 125 def init(self, data, class_file): 126 self.class_file = class_file 127 self.name_index = u2(data[0:2]) 128 return data[2:] 129 130 class RefInfo(NameAndTypeUtils): 131 def init(self, data, class_file): 132 self.class_file = class_file 133 self.class_index = u2(data[0:2]) 134 self.name_and_type_index = u2(data[2:4]) 135 return data[4:] 136 137 class FieldRefInfo(RefInfo): 138 def get_descriptor(self): 139 return RefInfo.get_field_descriptor(self) 140 141 class MethodRefInfo(RefInfo): 142 def get_descriptor(self): 143 return RefInfo.get_method_descriptor(self) 144 145 class InterfaceMethodRefInfo(RefInfo): 146 def get_descriptor(self): 147 return RefInfo.get_method_descriptor(self) 148 149 class NameAndTypeInfo(NameUtils, DescriptorUtils): 150 def init(self, data, class_file): 151 self.class_file = class_file 152 self.name_index = u2(data[0:2]) 153 self.descriptor_index = u2(data[2:4]) 154 return data[4:] 155 156 def get_field_descriptor(self): 157 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 158 159 def get_method_descriptor(self): 160 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 161 162 class Utf8Info: 163 def init(self, data, class_file): 164 self.class_file = class_file 165 self.length = u2(data[0:2]) 166 self.bytes = data[2:2+self.length] 167 return data[2+self.length:] 168 169 def __str__(self): 170 return self.bytes 171 172 def __unicode__(self): 173 return unicode(self.bytes, "utf-8") 174 175 class StringInfo: 176 def init(self, data, class_file): 177 self.class_file = class_file 178 self.string_index = u2(data[0:2]) 179 return data[2:] 180 181 class SmallNumInfo: 182 def init(self, data, class_file): 183 self.class_file = class_file 184 self.bytes = data[0:4] 185 return data[4:] 186 187 class IntegerInfo(SmallNumInfo): 188 def get_value(self): 189 return s4(self.bytes) 190 191 class FloatInfo(SmallNumInfo): 192 def get_value(self): 193 return f4(self.bytes) 194 195 class LargeNumInfo: 196 def init(self, data, class_file): 197 self.class_file = class_file 198 self.high_bytes = u4(data[0:4]) 199 self.low_bytes = u4(data[4:8]) 200 return data[8:] 201 202 class LongInfo(LargeNumInfo): 203 def get_value(self): 204 return s8(self.high_bytes + self.low_bytes) 205 206 class DoubleInfo(LargeNumInfo): 207 def get_value(self): 208 return f8(self.high_bytes + self.low_bytes) 209 210 # Other information. 211 # Objects of these classes are generally aware of the class they reside in. 212 213 class ItemInfo(NameUtils, DescriptorUtils): 214 def init(self, data, class_file): 215 self.class_file = class_file 216 self.access_flags = u2(data[0:2]) 217 self.name_index = u2(data[2:4]) 218 self.descriptor_index = u2(data[4:6]) 219 self.attributes, data = self.class_file._get_attributes(data[6:]) 220 return data 221 222 class FieldInfo(ItemInfo): 223 def get_descriptor(self): 224 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 225 226 class MethodInfo(ItemInfo): 227 def get_descriptor(self): 228 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 229 230 class AttributeInfo: 231 def init(self, data, class_file): 232 self.attribute_length = u4(data[0:4]) 233 self.info = data[4:4+self.attribute_length] 234 return data[4+self.attribute_length:] 235 236 # NOTE: Decode the different attribute formats. 237 238 class SourceFileAttributeInfo(AttributeInfo, NameUtils): 239 def init(self, data, class_file): 240 self.class_file = class_file 241 self.attribute_length = u4(data[0:4]) 242 # Permit the NameUtils mix-in. 243 self.name_index = self.sourcefile_index = u2(data[4:6]) 244 245 class ConstantValueAttributeInfo(AttributeInfo): 246 def init(self, data, class_file): 247 self.class_file = class_file 248 self.attribute_length = u4(data[0:4]) 249 self.constant_value_index = u2(data[4:6]) 250 assert 4+self.attribute_length == 6 251 return data[4+self.attribute_length:] 252 253 def get_value(self): 254 return self.class_file.constants[self.constant_value_index - 1].get_value() 255 256 class CodeAttributeInfo(AttributeInfo): 257 def init(self, data, class_file): 258 self.class_file = class_file 259 self.attribute_length = u4(data[0:4]) 260 self.max_stack = u2(data[4:6]) 261 self.max_locals = u2(data[6:8]) 262 self.code_length = u4(data[8:12]) 263 end_of_code = 12+self.code_length 264 self.code = data[12:end_of_code] 265 self.exception_table_length = u2(data[end_of_code:end_of_code+2]) 266 self.exception_table = [] 267 data = data[end_of_code + 2:] 268 for i in range(0, self.exception_table_length): 269 exception = ExceptionInfo() 270 data = exception.init(data) 271 self.exception_table.append(exception) 272 self.attributes, data = self.class_file._get_attributes(data) 273 return data 274 275 class ExceptionsAttributeInfo(AttributeInfo): 276 def init(self, data, class_file): 277 self.class_file = class_file 278 self.attribute_length = u4(data[0:4]) 279 self.number_of_exceptions = u2(data[4:6]) 280 self.exception_index_table = [] 281 index = 6 282 for i in range(0, self.number_of_exceptions): 283 self.exception_index_table.append(u2(data[index:index+2])) 284 index += 2 285 return data[index:] 286 287 def get_exception(self, i): 288 exception_index = self.exception_index_table[i] 289 return self.class_file.constants[exception_index - 1] 290 291 class InnerClassesAttributeInfo(AttributeInfo): 292 def init(self, data, class_file): 293 self.class_file = class_file 294 self.attribute_length = u4(data[0:4]) 295 self.number_of_classes = u2(data[4:6]) 296 self.classes = [] 297 data = data[6:] 298 for i in range(0, self.number_of_classes): 299 inner_class = InnerClassInfo() 300 data = inner_class.init(data, self.class_file) 301 self.classes.append(inner_class) 302 return data 303 304 class SyntheticAttributeInfo(AttributeInfo): 305 pass 306 307 class LineNumberAttributeInfo(AttributeInfo): 308 def init(self, data, class_file): 309 self.class_file = class_file 310 self.attribute_length = u4(data[0:4]) 311 self.line_number_table_length = u2(data[4:6]) 312 self.line_number_table = [] 313 data = data[6:] 314 for i in range(0, self.line_number_table_length): 315 line_number = LineNumberInfo() 316 data = line_number.init(data) 317 self.line_number_table.append(line_number) 318 return data 319 320 class LocalVariableAttributeInfo(AttributeInfo): 321 def init(self, data, class_file): 322 self.class_file = class_file 323 self.attribute_length = u4(data[0:4]) 324 self.local_variable_table_length = u2(data[4:6]) 325 self.local_variable_table = [] 326 data = data[6:] 327 for i in range(0, self.local_variable_table_length): 328 local_variable = LocalVariableInfo() 329 data = local_variable.init(data) 330 self.local_variable_table.append(local_variable) 331 return data 332 333 class DeprecatedAttributeInfo(AttributeInfo): 334 pass 335 336 # Child classes of the attribute information classes. 337 338 class ExceptionInfo: 339 def init(self, data): 340 self.start_pc = u2(data[0:2]) 341 self.end_pc = u2(data[2:4]) 342 self.handler_pc = u2(data[4:6]) 343 self.catch_type = u2(data[6:8]) 344 return data[8:] 345 346 class InnerClassInfo(NameUtils): 347 def init(self, data, class_file): 348 self.class_file = class_file 349 self.inner_class_info_index = u2(data[0:2]) 350 self.outer_class_info_index = u2(data[2:4]) 351 # Permit the NameUtils mix-in. 352 self.name_index = self.inner_name_index = u2(data[4:6]) 353 self.inner_class_access_flags = u2(data[6:8]) 354 return data[8:] 355 356 class LineNumberInfo: 357 def init(self, data): 358 self.start_pc = u2(data[0:2]) 359 self.line_number = u2(data[2:4]) 360 return data[4:] 361 362 class LocalVariableInfo(NameUtils): 363 def init(self, data, class_file): 364 self.class_file = class_file 365 self.start_pc = u2(data[0:2]) 366 self.length = u2(data[2:4]) 367 self.name_index = u2(data[4:6]) 368 self.descriptor_index = u2(data[6:8]) 369 self.index = u2(data[8:10]) 370 return data[10:] 371 372 def get_descriptor(self): 373 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 374 375 # Exceptions. 376 377 class UnknownTag(Exception): 378 pass 379 380 class UnknownAttribute(Exception): 381 pass 382 383 # Abstractions for the main structures. 384 385 class ClassFile: 386 387 "A class representing a Java class file." 388 389 def __init__(self, s): 390 391 """ 392 Process the given string 's', populating the object with the class 393 file's details. 394 """ 395 396 self.constants, s = self._get_constants(s[8:]) 397 self.access_flags, s = self._get_access_flags(s) 398 self.this_class, s = self._get_this_class(s) 399 self.super_class, s = self._get_super_class(s) 400 self.interfaces, s = self._get_interfaces(s) 401 self.fields, s = self._get_fields(s) 402 self.methods, s = self._get_methods(s) 403 self.attributes, s = self._get_attributes(s) 404 405 def _decode_const(self, s): 406 tag = u1(s[0:1]) 407 if tag == 1: 408 const = Utf8Info() 409 elif tag == 3: 410 const = IntegerInfo() 411 elif tag == 4: 412 const = FloatInfo() 413 elif tag == 5: 414 const = LongInfo() 415 elif tag == 6: 416 const = DoubleInfo() 417 elif tag == 7: 418 const = ClassInfo() 419 elif tag == 8: 420 const = StringInfo() 421 elif tag == 9: 422 const = FieldRefInfo() 423 elif tag == 10: 424 const = MethodRefInfo() 425 elif tag == 11: 426 const = InterfaceMethodRefInfo() 427 elif tag == 12: 428 const = NameAndTypeInfo() 429 else: 430 raise UnknownTag, tag 431 432 # Initialise the constant object. 433 434 s = const.init(s[1:], self) 435 return const, s 436 437 def _get_constants_from_table(self, count, s): 438 l = [] 439 # Have to skip certain entries specially. 440 i = 1 441 while i < count: 442 c, s = self._decode_const(s) 443 l.append(c) 444 # Add a blank entry after "large" entries. 445 if isinstance(c, LargeNumInfo): 446 l.append(None) 447 i += 1 448 i += 1 449 return l, s 450 451 def _get_items_from_table(self, cls, number, s): 452 l = [] 453 for i in range(0, number): 454 f = cls() 455 s = f.init(s, self) 456 l.append(f) 457 return l, s 458 459 def _get_methods_from_table(self, number, s): 460 return self._get_items_from_table(MethodInfo, number, s) 461 462 def _get_fields_from_table(self, number, s): 463 return self._get_items_from_table(FieldInfo, number, s) 464 465 def _get_attribute_from_table(self, s): 466 attribute_name_index = u2(s[0:2]) 467 constant_name = self.constants[attribute_name_index - 1].bytes 468 if constant_name == "SourceFile": 469 attribute = SourceFileAttributeInfo() 470 elif constant_name == "ConstantValue": 471 attribute = ConstantValueAttributeInfo() 472 elif constant_name == "Code": 473 attribute = CodeAttributeInfo() 474 elif constant_name == "Exceptions": 475 attribute = ExceptionsAttributeInfo() 476 elif constant_name == "InnerClasses": 477 attribute = InnerClassesAttributeInfo() 478 elif constant_name == "Synthetic": 479 attribute = SyntheticAttributeInfo() 480 elif constant_name == "LineNumberTable": 481 attribute = LineNumberAttributeInfo() 482 elif constant_name == "LocalVariableTable": 483 attribute = LocalVariableAttributeInfo() 484 elif constant_name == "Deprecated": 485 attribute = DeprecatedAttributeInfo() 486 else: 487 raise UnknownAttribute, constant_name 488 s = attribute.init(s[2:], self) 489 return attribute, s 490 491 def _get_attributes_from_table(self, number, s): 492 attributes = [] 493 for i in range(0, number): 494 attribute, s = self._get_attribute_from_table(s) 495 attributes.append(attribute) 496 return attributes, s 497 498 def _get_constants(self, s): 499 count = u2(s[0:2]) 500 return self._get_constants_from_table(count, s[2:]) 501 502 def _get_access_flags(self, s): 503 return u2(s[0:2]), s[2:] 504 505 def _get_this_class(self, s): 506 index = u2(s[0:2]) 507 return self.constants[index - 1], s[2:] 508 509 _get_super_class = _get_this_class 510 511 def _get_interfaces(self, s): 512 interfaces = [] 513 number = u2(s[0:2]) 514 s = s[2:] 515 for i in range(0, number): 516 index = u2(s[0:2]) 517 interfaces.append(self.constants[index - 1]) 518 s = s[2:] 519 return interfaces, s 520 521 def _get_fields(self, s): 522 number = u2(s[0:2]) 523 return self._get_fields_from_table(number, s[2:]) 524 525 def _get_attributes(self, s): 526 number = u2(s[0:2]) 527 return self._get_attributes_from_table(number, s[2:]) 528 529 def _get_methods(self, s): 530 number = u2(s[0:2]) 531 return self._get_methods_from_table(number, s[2:]) 532 533 if __name__ == "__main__": 534 import sys 535 f = open(sys.argv[1]) 536 c = ClassFile(f.read()) 537 538 # vim: tabstop=4 expandtab shiftwidth=4