1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 """ 7 8 import struct # for general decoding of class files 9 10 # Utility functions. 11 12 def u1(data): 13 return struct.unpack(">B", data[0:1])[0] 14 15 def u2(data): 16 return struct.unpack(">H", data[0:2])[0] 17 18 def u4(data): 19 return struct.unpack(">L", data[0:4])[0] 20 21 def s4(data): 22 return struct.unpack(">l", data[0:4])[0] 23 24 def s8(data): 25 return struct.unpack(">q", data[0:8])[0] 26 27 def f4(data): 28 return struct.unpack(">f", data[0:4])[0] 29 30 def f8(data): 31 return struct.unpack(">d", data[0:8])[0] 32 33 # Useful mix-ins. 34 35 class NameUtils: 36 def get_name(self): 37 if self.name_index != 0: 38 return unicode(self.class_file.constants[self.name_index - 1]) 39 else: 40 # Some name indexes are zero to indicate special conditions. 41 return None 42 43 # Constant information. 44 # Objects of these classes are not directly aware of the class they reside in. 45 46 class ClassInfo(NameUtils): 47 def init(self, data, class_file): 48 self.class_file = class_file 49 self.name_index = u2(data[0:2]) 50 return data[2:] 51 52 class RefInfo: 53 def init(self, data, class_file): 54 self.class_file = class_file 55 self.class_index = u2(data[0:2]) 56 self.name_and_type_index = u2(data[2:4]) 57 return data[4:] 58 59 class FieldRefInfo(RefInfo): 60 pass 61 62 class MethodRefInfo(RefInfo): 63 pass 64 65 class InterfaceMethodRefInfo(RefInfo): 66 pass 67 68 class NameAndTypeInfo(NameUtils): 69 def init(self, data, class_file): 70 self.class_file = class_file 71 self.name_index = u2(data[0:2]) 72 self.descriptor_index = u2(data[2:4]) 73 return data[4:] 74 75 class Utf8Info: 76 def init(self, data, class_file): 77 self.class_file = class_file 78 self.length = u2(data[0:2]) 79 self.bytes = data[2:2+self.length] 80 return data[2+self.length:] 81 82 def __str__(self): 83 return self.bytes 84 85 def __unicode__(self): 86 return unicode(self.bytes, "utf-8") 87 88 class StringInfo: 89 def init(self, data, class_file): 90 self.class_file = class_file 91 self.string_index = u2(data[0:2]) 92 return data[2:] 93 94 class SmallNumInfo: 95 def init(self, data, class_file): 96 self.class_file = class_file 97 self.bytes = data[0:4] 98 return data[4:] 99 100 class IntegerInfo(SmallNumInfo): 101 def get_value(self): 102 return s4(self.bytes) 103 104 class FloatInfo(SmallNumInfo): 105 def get_value(self): 106 return f4(self.bytes) 107 108 class LargeNumInfo: 109 def init(self, data, class_file): 110 self.class_file = class_file 111 self.high_bytes = u4(data[0:4]) 112 self.low_bytes = u4(data[4:8]) 113 return data[8:] 114 115 class LongInfo(LargeNumInfo): 116 def get_value(self): 117 return s8(self.high_bytes + self.low_bytes) 118 119 class DoubleInfo(LargeNumInfo): 120 def get_value(self): 121 return f8(self.high_bytes + self.low_bytes) 122 123 # Other information. 124 # Objects of these classes are generally aware of the class they reside in. 125 126 class ItemInfo(NameUtils): 127 def init(self, data, class_file): 128 self.class_file = class_file 129 self.access_flags = u2(data[0:2]) 130 self.name_index = u2(data[2:4]) 131 self.descriptor_index = u2(data[4:6]) 132 self.attributes, data = self.class_file._get_attributes(data[6:]) 133 return data 134 135 # Symbol parsing. 136 137 def _get_method_descriptor(self, s): 138 assert s[0] == "(" 139 params = [] 140 s = s[1:] 141 while s[0] != ")": 142 parameter_descriptor, s = self._get_parameter_descriptor(s) 143 params.append(parameter_descriptor) 144 if s[1] != "V": 145 return_type, s = self._get_field_type(s[1:]) 146 else: 147 return_type, s = None, s[1:] 148 return params, return_type 149 150 def _get_parameter_descriptor(self, s): 151 return self._get_field_type(s) 152 153 def _get_field_descriptor(self, s): 154 return self._get_field_type(s) 155 156 def _get_component_type(self, s): 157 return self._get_field_type(s) 158 159 def _get_field_type(self, s): 160 base_type, s = self._get_base_type(s) 161 object_type = None 162 array_type = None 163 if base_type == "L": 164 object_type, s = self._get_object_type(s) 165 elif base_type == "[": 166 array_type, s = self._get_array_type(s) 167 return (base_type, object_type, array_type), s 168 169 def _get_base_type(self, s): 170 if len(s) > 0: 171 return s[0], s[1:] 172 else: 173 return None, s 174 175 def _get_object_type(self, s): 176 if len(s) > 0: 177 s_end = s.find(";") 178 assert s_end != -1 179 return s[:s_end], s[s_end+1:] 180 else: 181 return None, s 182 183 def _get_array_type(self, s): 184 if len(s) > 0: 185 return self._get_component_type(s) 186 else: 187 return None, s 188 189 class FieldInfo(ItemInfo): 190 def get_descriptor(self): 191 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 192 193 class MethodInfo(ItemInfo): 194 def get_descriptor(self): 195 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 196 197 class AttributeInfo: 198 def init(self, data, class_file): 199 self.attribute_length = u4(data[0:4]) 200 self.info = data[4:4+self.attribute_length] 201 return data[4+self.attribute_length:] 202 203 # NOTE: Decode the different attribute formats. 204 205 class SourceFileAttributeInfo(AttributeInfo, NameUtils): 206 def init(self, data, class_file): 207 self.class_file = class_file 208 self.attribute_length = u4(data[0:4]) 209 # Permit the NameUtils mix-in. 210 self.name_index = self.sourcefile_index = u2(data[4:6]) 211 212 class ConstantValueAttributeInfo(AttributeInfo): 213 def init(self, data, class_file): 214 self.class_file = class_file 215 self.attribute_length = u4(data[0:4]) 216 self.constant_value_index = u2(data[4:6]) 217 assert 4+self.attribute_length == 6 218 return data[4+self.attribute_length:] 219 220 def get_value(self): 221 return self.class_file.constants[self.constant_value_index - 1].get_value() 222 223 class CodeAttributeInfo(AttributeInfo): 224 def init(self, data, class_file): 225 self.class_file = class_file 226 self.attribute_length = u4(data[0:4]) 227 self.max_stack = u2(data[4:6]) 228 self.max_locals = u2(data[6:8]) 229 self.code_length = u4(data[8:12]) 230 end_of_code = 12+self.code_length 231 self.code = data[12:end_of_code] 232 self.exception_table_length = u2(data[end_of_code:end_of_code+2]) 233 self.exception_table = [] 234 data = data[end_of_code + 2:] 235 for i in range(0, self.exception_table_length): 236 exception = ExceptionInfo() 237 data = exception.init(data) 238 self.exception_table.append(exception) 239 self.attributes, data = self.class_file._get_attributes(data) 240 return data 241 242 class ExceptionsAttributeInfo(AttributeInfo): 243 def init(self, data, class_file): 244 self.class_file = class_file 245 self.attribute_length = u4(data[0:4]) 246 self.number_of_exceptions = u2(data[4:6]) 247 self.exception_index_table = [] 248 index = 6 249 for i in range(0, self.number_of_exceptions): 250 self.exception_index_table.append(u2(data[index:index+2])) 251 index += 2 252 return data[index:] 253 254 def get_exception(self, i): 255 exception_index = self.exception_index_table[i] 256 return self.class_file.constants[exception_index - 1] 257 258 class InnerClassesAttributeInfo(AttributeInfo): 259 def init(self, data, class_file): 260 self.class_file = class_file 261 self.attribute_length = u4(data[0:4]) 262 self.number_of_classes = u2(data[4:6]) 263 self.classes = [] 264 data = data[6:] 265 for i in range(0, self.number_of_classes): 266 inner_class = InnerClassInfo() 267 data = inner_class.init(data, self.class_file) 268 self.classes.append(inner_class) 269 return data 270 271 class SyntheticAttributeInfo(AttributeInfo): 272 pass 273 274 class LineNumberAttributeInfo(AttributeInfo): 275 def init(self, data, class_file): 276 self.class_file = class_file 277 self.attribute_length = u4(data[0:4]) 278 self.line_number_table_length = u2(data[4:6]) 279 self.line_number_table = [] 280 data = data[6:] 281 for i in range(0, self.line_number_table_length): 282 line_number = LineNumberInfo() 283 data = line_number.init(data) 284 self.line_number_table.append(line_number) 285 return data 286 287 class LocalVariableAttributeInfo(AttributeInfo): 288 def init(self, data, class_file): 289 self.class_file = class_file 290 self.attribute_length = u4(data[0:4]) 291 self.local_variable_table_length = u2(data[4:6]) 292 self.local_variable_table = [] 293 data = data[6:] 294 for i in range(0, self.local_variable_table_length): 295 local_variable = LocalVariableInfo() 296 data = local_variable.init(data) 297 self.local_variable_table.append(local_variable) 298 return data 299 300 class DeprecatedAttributeInfo(AttributeInfo): 301 pass 302 303 # Child classes of the attribute information classes. 304 305 class ExceptionInfo: 306 def init(self, data): 307 self.start_pc = u2(data[0:2]) 308 self.end_pc = u2(data[2:4]) 309 self.handler_pc = u2(data[4:6]) 310 self.catch_type = u2(data[6:8]) 311 return data[8:] 312 313 class InnerClassInfo(NameUtils): 314 def init(self, data, class_file): 315 self.class_file = class_file 316 self.inner_class_info_index = u2(data[0:2]) 317 self.outer_class_info_index = u2(data[2:4]) 318 # Permit the NameUtils mix-in. 319 self.name_index = self.inner_name_index = u2(data[4:6]) 320 self.inner_class_access_flags = u2(data[6:8]) 321 return data[8:] 322 323 class LineNumberInfo: 324 def init(self, data): 325 self.start_pc = u2(data[0:2]) 326 self.line_number = u2(data[2:4]) 327 return data[4:] 328 329 class LocalVariableInfo(NameUtils): 330 def init(self, data, class_file): 331 self.class_file = class_file 332 self.start_pc = u2(data[0:2]) 333 self.length = u2(data[2:4]) 334 self.name_index = u2(data[4:6]) 335 self.descriptor_index = u2(data[6:8]) 336 self.index = u2(data[8:10]) 337 return data[10:] 338 339 def get_descriptor(self): 340 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 341 342 # Exceptions. 343 344 class UnknownTag(Exception): 345 pass 346 347 class UnknownAttribute(Exception): 348 pass 349 350 # Abstractions for the main structures. 351 352 class ClassFile: 353 354 "A class representing a Java class file." 355 356 def __init__(self, s): 357 358 """ 359 Process the given string 's', populating the object with the class 360 file's details. 361 """ 362 363 self.constants, s = self._get_constants(s[8:]) 364 self.access_flags, s = self._get_access_flags(s) 365 self.this_class, s = self._get_this_class(s) 366 self.super_class, s = self._get_super_class(s) 367 self.interfaces, s = self._get_interfaces(s) 368 self.fields, s = self._get_fields(s) 369 self.methods, s = self._get_methods(s) 370 self.attributes, s = self._get_attributes(s) 371 372 def _decode_const(self, s): 373 tag = u1(s[0:1]) 374 if tag == 1: 375 const = Utf8Info() 376 elif tag == 3: 377 const = IntegerInfo() 378 elif tag == 4: 379 const = FloatInfo() 380 elif tag == 5: 381 const = LongInfo() 382 elif tag == 6: 383 const = DoubleInfo() 384 elif tag == 7: 385 const = ClassInfo() 386 elif tag == 8: 387 const = StringInfo() 388 elif tag == 9: 389 const = FieldRefInfo() 390 elif tag == 10: 391 const = MethodRefInfo() 392 elif tag == 11: 393 const = InterfaceMethodRefInfo() 394 elif tag == 12: 395 const = NameAndTypeInfo() 396 else: 397 raise UnknownTag, tag 398 399 # Initialise the constant object. 400 401 s = const.init(s[1:], self) 402 return const, s 403 404 def _get_constants_from_table(self, count, s): 405 l = [] 406 # Have to skip certain entries specially. 407 i = 1 408 while i < count: 409 c, s = self._decode_const(s) 410 l.append(c) 411 # Add a blank entry after "large" entries. 412 if isinstance(c, LargeNumInfo): 413 l.append(None) 414 i += 1 415 i += 1 416 return l, s 417 418 def _get_items_from_table(self, cls, number, s): 419 l = [] 420 for i in range(0, number): 421 f = cls() 422 s = f.init(s, self) 423 l.append(f) 424 return l, s 425 426 def _get_methods_from_table(self, number, s): 427 return self._get_items_from_table(MethodInfo, number, s) 428 429 def _get_fields_from_table(self, number, s): 430 return self._get_items_from_table(FieldInfo, number, s) 431 432 def _get_attribute_from_table(self, s): 433 attribute_name_index = u2(s[0:2]) 434 constant_name = self.constants[attribute_name_index - 1].bytes 435 if constant_name == "SourceFile": 436 attribute = SourceFileAttributeInfo() 437 elif constant_name == "ConstantValue": 438 attribute = ConstantValueAttributeInfo() 439 elif constant_name == "Code": 440 attribute = CodeAttributeInfo() 441 elif constant_name == "Exceptions": 442 attribute = ExceptionsAttributeInfo() 443 elif constant_name == "InnerClasses": 444 attribute = InnerClassesAttributeInfo() 445 elif constant_name == "Synthetic": 446 attribute = SyntheticAttributeInfo() 447 elif constant_name == "LineNumberTable": 448 attribute = LineNumberAttributeInfo() 449 elif constant_name == "LocalVariableTable": 450 attribute = LocalVariableAttributeInfo() 451 elif constant_name == "Deprecated": 452 attribute = DeprecatedAttributeInfo() 453 else: 454 raise UnknownAttribute, constant_name 455 s = attribute.init(s[2:], self) 456 return attribute, s 457 458 def _get_attributes_from_table(self, number, s): 459 attributes = [] 460 for i in range(0, number): 461 attribute, s = self._get_attribute_from_table(s) 462 attributes.append(attribute) 463 return attributes, s 464 465 def _get_constants(self, s): 466 count = u2(s[0:2]) 467 return self._get_constants_from_table(count, s[2:]) 468 469 def _get_access_flags(self, s): 470 return u2(s[0:2]), s[2:] 471 472 def _get_this_class(self, s): 473 index = u2(s[0:2]) 474 return self.constants[index - 1], s[2:] 475 476 _get_super_class = _get_this_class 477 478 def _get_interfaces(self, s): 479 interfaces = [] 480 number = u2(s[0:2]) 481 s = s[2:] 482 for i in range(0, number): 483 index = u2(s[0:2]) 484 interfaces.append(self.constants[index - 1]) 485 s = s[2:] 486 return interfaces, s 487 488 def _get_fields(self, s): 489 number = u2(s[0:2]) 490 return self._get_fields_from_table(number, s[2:]) 491 492 def _get_attributes(self, s): 493 number = u2(s[0:2]) 494 return self._get_attributes_from_table(number, s[2:]) 495 496 def _get_methods(self, s): 497 number = u2(s[0:2]) 498 return self._get_methods_from_table(number, s[2:]) 499 500 if __name__ == "__main__": 501 import sys 502 f = open(sys.argv[1]) 503 c = ClassFile(f.read()) 504 505 # vim: tabstop=4 expandtab shiftwidth=4