1 #!/usr/bin/env python 2 3 """ 4 Java class file decoder. Specification found at the following URL: 5 http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html 6 """ 7 8 import struct 9 10 # Utility functions. 11 12 def u1(data): 13 return struct.unpack(">B", data[0:1])[0] 14 15 def u2(data): 16 return struct.unpack(">H", data[0:2])[0] 17 18 def u4(data): 19 return struct.unpack(">L", data[0:4])[0] 20 21 # Useful mix-ins. 22 23 class NameUtils: 24 def get_name(self): 25 if self.name_index != 0: 26 return unicode(self.class_file.constants[self.name_index - 1]) 27 else: 28 # Some name indexes are zero to indicate special conditions. 29 return None 30 31 # Constant information. 32 # Objects of these classes are not directly aware of the class they reside in. 33 34 class ClassInfo(NameUtils): 35 def init(self, data, class_file): 36 self.class_file = class_file 37 self.name_index = u2(data[0:2]) 38 return data[2:] 39 40 class RefInfo: 41 def init(self, data, class_file): 42 self.class_file = class_file 43 self.class_index = u2(data[0:2]) 44 self.name_and_type_index = u2(data[2:4]) 45 return data[4:] 46 47 class FieldRefInfo(RefInfo): 48 pass 49 50 class MethodRefInfo(RefInfo): 51 pass 52 53 class InterfaceMethodRefInfo(RefInfo): 54 pass 55 56 class NameAndTypeInfo(NameUtils): 57 def init(self, data, class_file): 58 self.class_file = class_file 59 self.name_index = u2(data[0:2]) 60 self.descriptor_index = u2(data[2:4]) 61 return data[4:] 62 63 class Utf8Info: 64 def init(self, data, class_file): 65 self.class_file = class_file 66 self.length = u2(data[0:2]) 67 self.bytes = data[2:2+self.length] 68 return data[2+self.length:] 69 70 def __str__(self): 71 return self.bytes 72 73 def __unicode__(self): 74 return unicode(self.bytes, "utf-8") 75 76 class StringInfo: 77 def init(self, data, class_file): 78 self.class_file = class_file 79 self.string_index = u2(data[0:2]) 80 return data[2:] 81 82 class SmallNumInfo: 83 def init(self, data, class_file): 84 self.class_file = class_file 85 self.bytes = u4(data[0:4]) 86 return data[4:] 87 88 class IntegerInfo(SmallNumInfo): 89 pass 90 91 class FloatInfo(SmallNumInfo): 92 pass 93 94 class LargeNumInfo: 95 def init(self, data, class_file): 96 self.class_file = class_file 97 self.high_bytes = u4(data[0:4]) 98 self.low_bytes = u4(data[4:8]) 99 return data[8:] 100 101 class LongInfo(LargeNumInfo): 102 pass 103 104 class DoubleInfo(LargeNumInfo): 105 pass 106 107 # Other information. 108 # Objects of these classes are generally aware of the class they reside in. 109 110 class ItemInfo(NameUtils): 111 def init(self, data, class_file): 112 self.class_file = class_file 113 self.access_flags = u2(data[0:2]) 114 self.name_index = u2(data[2:4]) 115 self.descriptor_index = u2(data[4:6]) 116 self.attributes, data = self.class_file._get_attributes(data[6:]) 117 return data 118 119 # Symbol parsing. 120 121 def _get_method_descriptor(self, s): 122 assert s[0] == "(" 123 params = [] 124 s = s[1:] 125 while s[0] != ")": 126 parameter_descriptor, s = self._get_parameter_descriptor(s) 127 params.append(parameter_descriptor) 128 if s[1] != "V": 129 return_type, s = self._get_field_type(s[1:]) 130 else: 131 return_type, s = None, s[1:] 132 return params, return_type 133 134 def _get_parameter_descriptor(self, s): 135 return self._get_field_type(s) 136 137 def _get_field_descriptor(self, s): 138 return self._get_field_type(s) 139 140 def _get_component_type(self, s): 141 return self._get_field_type(s) 142 143 def _get_field_type(self, s): 144 base_type, s = self._get_base_type(s) 145 object_type = None 146 array_type = None 147 if base_type == "L": 148 object_type, s = self._get_object_type(s) 149 elif base_type == "[": 150 array_type, s = self._get_array_type(s) 151 return (base_type, object_type, array_type), s 152 153 def _get_base_type(self, s): 154 if len(s) > 0: 155 return s[0], s[1:] 156 else: 157 return None, s 158 159 def _get_object_type(self, s): 160 if len(s) > 0: 161 s_end = s.find(";") 162 assert s_end != -1 163 return s[:s_end], s[s_end+1:] 164 else: 165 return None, s 166 167 def _get_array_type(self, s): 168 if len(s) > 0: 169 return self._get_component_type(s[1:]) 170 else: 171 return None, s 172 173 class FieldInfo(ItemInfo): 174 def get_descriptor(self): 175 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 176 177 class MethodInfo(ItemInfo): 178 def get_descriptor(self): 179 return self._get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 180 181 class AttributeInfo: 182 def init(self, data, class_file): 183 self.attribute_length = u4(data[0:4]) 184 self.info = data[4:4+self.attribute_length] 185 return data[4+self.attribute_length:] 186 187 # NOTE: Decode the different attribute formats. 188 189 class SourceFileAttributeInfo(AttributeInfo, NameUtils): 190 def init(self, data, class_file): 191 self.class_file = class_file 192 self.attribute_length = u4(data[0:4]) 193 # Permit the NameUtils mix-in. 194 self.name_index = self.sourcefile_index = u2(data[4:6]) 195 196 class ConstantValueAttributeInfo(AttributeInfo): 197 def init(self, data, class_file): 198 self.attribute_length = u4(data[0:4]) 199 self.constant_value_index = u2(data[4:6]) 200 assert 4+self.attribute_length == 6 201 return data[4+self.attribute_length:] 202 203 class CodeAttributeInfo(AttributeInfo): 204 def init(self, data, class_file): 205 self.class_file = class_file 206 self.attribute_length = u4(data[0:4]) 207 self.max_stack = u2(data[4:6]) 208 self.max_locals = u2(data[6:8]) 209 self.code_length = u4(data[8:12]) 210 end_of_code = 12+self.code_length 211 self.code = data[12:end_of_code] 212 self.exception_table_length = u2(data[end_of_code:end_of_code+2]) 213 self.exception_table = [] 214 data = data[end_of_code + 2:] 215 for i in range(0, self.exception_table_length): 216 exception = ExceptionInfo() 217 data = exception.init(data) 218 self.exception_table.append(exception) 219 self.attributes, data = self.class_file._get_attributes(data) 220 return data 221 222 class ExceptionsAttributeInfo(AttributeInfo): 223 def init(self, data, class_file): 224 self.class_file = class_file 225 self.attribute_length = u4(data[0:4]) 226 self.number_of_exceptions = u2(data[4:6]) 227 self.exception_index_table = [] 228 index = 6 229 for i in range(0, self.number_of_exceptions): 230 self.exception_index_table.append(u2(data[index:index+2])) 231 index += 2 232 return data[index:] 233 234 def get_exception(self, i): 235 exception_index = self.exception_index_table[i] 236 return self.class_file.constants[exception_index - 1] 237 238 class InnerClassesAttributeInfo(AttributeInfo): 239 def init(self, data, class_file): 240 self.class_file = class_file 241 self.attribute_length = u4(data[0:4]) 242 self.number_of_classes = u2(data[4:6]) 243 self.classes = [] 244 data = data[6:] 245 for i in range(0, self.number_of_classes): 246 inner_class = InnerClassInfo() 247 data = inner_class.init(data, self.class_file) 248 self.classes.append(inner_class) 249 return data 250 251 class SyntheticAttributeInfo(AttributeInfo): 252 pass 253 254 class LineNumberAttributeInfo(AttributeInfo): 255 def init(self, data, class_file): 256 self.class_file = class_file 257 self.attribute_length = u4(data[0:4]) 258 self.line_number_table_length = u2(data[4:6]) 259 self.line_number_table = [] 260 data = data[6:] 261 for i in range(0, self.line_number_table_length): 262 line_number = LineNumberInfo() 263 data = line_number.init(data) 264 self.line_number_table.append(line_number) 265 return data 266 267 class LocalVariableAttributeInfo(AttributeInfo): 268 def init(self, data, class_file): 269 self.class_file = class_file 270 self.attribute_length = u4(data[0:4]) 271 self.local_variable_table_length = u2(data[4:6]) 272 self.local_variable_table = [] 273 data = data[6:] 274 for i in range(0, self.local_variable_table_length): 275 local_variable = LocalVariableInfo() 276 data = local_variable.init(data) 277 self.local_variable_table.append(local_variable) 278 return data 279 280 class DeprecatedAttributeInfo(AttributeInfo): 281 pass 282 283 # Child classes of the attribute information classes. 284 285 class ExceptionInfo: 286 def init(self, data): 287 self.start_pc = u2(data[0:2]) 288 self.end_pc = u2(data[2:4]) 289 self.handler_pc = u2(data[4:6]) 290 self.catch_type = u2(data[6:8]) 291 return data[8:] 292 293 class InnerClassInfo(NameUtils): 294 def init(self, data, class_file): 295 self.class_file = class_file 296 self.inner_class_info_index = u2(data[0:2]) 297 self.outer_class_info_index = u2(data[2:4]) 298 # Permit the NameUtils mix-in. 299 self.name_index = self.inner_name_index = u2(data[4:6]) 300 self.inner_class_access_flags = u2(data[6:8]) 301 return data[8:] 302 303 class LineNumberInfo: 304 def init(self, data): 305 self.start_pc = u2(data[0:2]) 306 self.line_number = u2(data[2:4]) 307 return data[4:] 308 309 class LocalVariableInfo(NameUtils): 310 def init(self, data, class_file): 311 self.class_file = class_file 312 self.start_pc = u2(data[0:2]) 313 self.length = u2(data[2:4]) 314 self.name_index = u2(data[4:6]) 315 self.descriptor_index = u2(data[6:8]) 316 self.index = u2(data[8:10]) 317 return data[10:] 318 319 def get_descriptor(self): 320 return self._get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) 321 322 # Exceptions. 323 324 class UnknownTag(Exception): 325 pass 326 327 class UnknownAttribute(Exception): 328 pass 329 330 # Abstractions for the main structures. 331 332 class ClassFile: 333 334 "A class representing a Java class file." 335 336 def __init__(self, s): 337 338 """ 339 Process the given string 's', populating the object with the class 340 file's details. 341 """ 342 343 self.constants, s = self._get_constants(s[8:]) 344 self.access_flags, s = self._get_access_flags(s) 345 self.this_class, s = self._get_this_class(s) 346 self.super_class, s = self._get_super_class(s) 347 self.interfaces, s = self._get_interfaces(s) 348 self.fields, s = self._get_fields(s) 349 self.methods, s = self._get_methods(s) 350 self.attributes, s = self._get_attributes(s) 351 352 def _decode_const(self, s): 353 tag = u1(s[0:1]) 354 if tag == 1: 355 const = Utf8Info() 356 elif tag == 3: 357 const = IntegerInfo() 358 elif tag == 4: 359 const = FloatInfo() 360 elif tag == 5: 361 const = LongInfo() 362 elif tag == 6: 363 const = DoubleInfo() 364 elif tag == 7: 365 const = ClassInfo() 366 elif tag == 8: 367 const = StringInfo() 368 elif tag == 9: 369 const = FieldRefInfo() 370 elif tag == 10: 371 const = MethodRefInfo() 372 elif tag == 11: 373 const = InterfaceMethodRefInfo() 374 elif tag == 12: 375 const = NameAndTypeInfo() 376 else: 377 raise UnknownTag, tag 378 379 # Initialise the constant object. 380 381 s = const.init(s[1:], self) 382 return const, s 383 384 def _get_constants_from_table(self, count, s): 385 l = [] 386 # Have to skip certain entries specially. 387 i = 1 388 while i < count: 389 c, s = self._decode_const(s) 390 l.append(c) 391 # Add a blank entry after "large" entries. 392 if isinstance(c, LargeNumInfo): 393 l.append(None) 394 i += 1 395 i += 1 396 return l, s 397 398 def _get_items_from_table(self, cls, number, s): 399 l = [] 400 for i in range(0, number): 401 f = cls() 402 s = f.init(s, self) 403 l.append(f) 404 return l, s 405 406 def _get_methods_from_table(self, number, s): 407 return self._get_items_from_table(MethodInfo, number, s) 408 409 def _get_fields_from_table(self, number, s): 410 return self._get_items_from_table(FieldInfo, number, s) 411 412 def _get_attribute_from_table(self, s): 413 attribute_name_index = u2(s[0:2]) 414 constant_name = self.constants[attribute_name_index - 1].bytes 415 if constant_name == "SourceFile": 416 attribute = SourceFileAttributeInfo() 417 elif constant_name == "ConstantValue": 418 attribute = ConstantValueAttributeInfo() 419 elif constant_name == "Code": 420 attribute = CodeAttributeInfo() 421 elif constant_name == "Exceptions": 422 attribute = ExceptionsAttributeInfo() 423 elif constant_name == "InnerClasses": 424 attribute = InnerClassesAttributeInfo() 425 elif constant_name == "Synthetic": 426 attribute = SyntheticAttributeInfo() 427 elif constant_name == "LineNumberTable": 428 attribute = LineNumberAttributeInfo() 429 elif constant_name == "LocalVariableTable": 430 attribute = LocalVariableAttributeInfo() 431 elif constant_name == "Deprecated": 432 attribute = DeprecatedAttributeInfo() 433 else: 434 raise UnknownAttribute, constant_name 435 s = attribute.init(s[2:], self) 436 return attribute, s 437 438 def _get_attributes_from_table(self, number, s): 439 attributes = [] 440 for i in range(0, number): 441 attribute, s = self._get_attribute_from_table(s) 442 attributes.append(attribute) 443 return attributes, s 444 445 def _get_constants(self, s): 446 count = u2(s[0:2]) 447 return self._get_constants_from_table(count, s[2:]) 448 449 def _get_access_flags(self, s): 450 return u2(s[0:2]), s[2:] 451 452 def _get_this_class(self, s): 453 index = u2(s[0:2]) 454 return self.constants[index - 1], s[2:] 455 456 _get_super_class = _get_this_class 457 458 def _get_interfaces(self, s): 459 interfaces = [] 460 number = u2(s[0:2]) 461 s = s[2:] 462 for i in range(0, number): 463 index = u2(s[0:2]) 464 interfaces.append(self.constants[index - 1]) 465 s = s[2:] 466 return interfaces, s 467 468 def _get_fields(self, s): 469 number = u2(s[0:2]) 470 return self._get_fields_from_table(number, s[2:]) 471 472 def _get_attributes(self, s): 473 number = u2(s[0:2]) 474 return self._get_attributes_from_table(number, s[2:]) 475 476 def _get_methods(self, s): 477 number = u2(s[0:2]) 478 return self._get_methods_from_table(number, s[2:]) 479 480 if __name__ == "__main__": 481 import sys 482 f = open(sys.argv[1]) 483 c = ClassFile(f.read()) 484 485 # vim: tabstop=4 expandtab shiftwidth=4