paul@137 | 1 | #!/usr/bin/env python |
paul@137 | 2 | |
paul@137 | 3 | """ |
paul@137 | 4 | Java class file decoder. Specification found at the following URL: |
paul@137 | 5 | http://java.sun.com/docs/books/vmspec/2nd-edition/html/ClassFile.doc.html |
paul@186 | 6 | |
paul@186 | 7 | Copyright (C) 2004, 2005, 2006, 2011 Paul Boddie <paul@boddie.org.uk> |
paul@186 | 8 | Copyright (C) 2010 Braden Thomas <bradenthomas@me.com> |
paul@186 | 9 | |
paul@186 | 10 | This program is free software; you can redistribute it and/or modify it under |
paul@186 | 11 | the terms of the GNU Lesser General Public License as published by the Free |
paul@186 | 12 | Software Foundation; either version 3 of the License, or (at your option) any |
paul@186 | 13 | later version. |
paul@186 | 14 | |
paul@186 | 15 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@186 | 16 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@186 | 17 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
paul@186 | 18 | details. |
paul@186 | 19 | |
paul@186 | 20 | You should have received a copy of the GNU Lesser General Public License along |
paul@186 | 21 | with this program. If not, see <http://www.gnu.org/licenses/>. |
paul@137 | 22 | """ |
paul@137 | 23 | |
paul@137 | 24 | import struct # for general decoding of class files |
paul@137 | 25 | |
paul@137 | 26 | # Utility functions. |
paul@137 | 27 | |
paul@137 | 28 | def u1(data): |
paul@137 | 29 | return struct.unpack(">B", data[0:1])[0] |
paul@137 | 30 | |
paul@137 | 31 | def u2(data): |
paul@137 | 32 | return struct.unpack(">H", data[0:2])[0] |
paul@137 | 33 | |
paul@137 | 34 | def s2(data): |
paul@137 | 35 | return struct.unpack(">h", data[0:2])[0] |
paul@137 | 36 | |
paul@137 | 37 | def u4(data): |
paul@137 | 38 | return struct.unpack(">L", data[0:4])[0] |
paul@137 | 39 | |
paul@137 | 40 | def s4(data): |
paul@137 | 41 | return struct.unpack(">l", data[0:4])[0] |
paul@137 | 42 | |
paul@137 | 43 | def s8(data): |
paul@137 | 44 | return struct.unpack(">q", data[0:8])[0] |
paul@137 | 45 | |
paul@137 | 46 | def f4(data): |
paul@137 | 47 | return struct.unpack(">f", data[0:4])[0] |
paul@137 | 48 | |
paul@137 | 49 | def f8(data): |
paul@137 | 50 | return struct.unpack(">d", data[0:8])[0] |
paul@137 | 51 | |
paul@185 | 52 | def su1(value): |
paul@185 | 53 | return struct.pack(">B", value) |
paul@185 | 54 | |
paul@185 | 55 | def su2(value): |
paul@185 | 56 | return struct.pack(">H", value) |
paul@185 | 57 | |
paul@185 | 58 | def ss2(value): |
paul@185 | 59 | return struct.pack(">h", value) |
paul@185 | 60 | |
paul@185 | 61 | def su4(value): |
paul@185 | 62 | return struct.pack(">L", value) |
paul@185 | 63 | |
paul@185 | 64 | def ss4(value): |
paul@185 | 65 | return struct.pack(">l", value) |
paul@185 | 66 | |
paul@185 | 67 | def ss8(value): |
paul@185 | 68 | return struct.pack(">q", value) |
paul@185 | 69 | |
paul@185 | 70 | def sf4(value): |
paul@185 | 71 | return struct.pack(">f", value) |
paul@185 | 72 | |
paul@185 | 73 | def sf8(value): |
paul@185 | 74 | return struct.pack(">d", value) |
paul@185 | 75 | |
paul@137 | 76 | # Useful tables and constants. |
paul@137 | 77 | |
paul@137 | 78 | descriptor_base_type_mapping = { |
paul@137 | 79 | "B" : "int", |
paul@137 | 80 | "C" : "str", |
paul@137 | 81 | "D" : "float", |
paul@137 | 82 | "F" : "float", |
paul@137 | 83 | "I" : "int", |
paul@137 | 84 | "J" : "int", |
paul@137 | 85 | "L" : "object", |
paul@137 | 86 | "S" : "int", |
paul@137 | 87 | "Z" : "bool", |
paul@137 | 88 | "[" : "list" |
paul@137 | 89 | } |
paul@137 | 90 | |
paul@141 | 91 | type_names_to_default_values = { |
paul@141 | 92 | "int" : 0, |
paul@141 | 93 | "str" : u"", |
paul@141 | 94 | "float" : 0.0, |
paul@141 | 95 | "object" : None, |
paul@141 | 96 | "bool" : 0, # NOTE: Should be False. |
paul@141 | 97 | "list" : [] |
paul@141 | 98 | } |
paul@141 | 99 | |
paul@141 | 100 | def get_default_for_type(type_name): |
paul@141 | 101 | global type_names_to_default_values |
paul@141 | 102 | return type_names_to_default_values.get(type_name) |
paul@141 | 103 | |
paul@137 | 104 | PUBLIC, PRIVATE, PROTECTED, STATIC, FINAL, SUPER, SYNCHRONIZED, VOLATILE, TRANSIENT, NATIVE, INTERFACE, ABSTRACT, STRICT = \ |
paul@137 | 105 | 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800 |
paul@137 | 106 | |
paul@137 | 107 | def has_flags(flags, desired): |
paul@137 | 108 | desired_flags = reduce(lambda a, b: a | b, desired, 0) |
paul@137 | 109 | return (flags & desired_flags) == desired_flags |
paul@137 | 110 | |
paul@137 | 111 | # Useful mix-ins. |
paul@137 | 112 | |
paul@137 | 113 | class PythonMethodUtils: |
paul@137 | 114 | symbol_sep = "___" # was "$" |
paul@137 | 115 | type_sep = "__" # replaces "/" |
paul@137 | 116 | array_sep = "_array_" # was "[]" |
paul@137 | 117 | base_seps = ("_", "_") # was "<" and ">" |
paul@137 | 118 | |
paul@137 | 119 | def get_unqualified_python_name(self): |
paul@137 | 120 | name = self.get_name() |
paul@137 | 121 | if str(name) == "<init>": |
paul@137 | 122 | return "__init__" |
paul@137 | 123 | elif str(name) == "<clinit>": |
paul@137 | 124 | return "__clinit__" |
paul@137 | 125 | else: |
paul@137 | 126 | return str(name) |
paul@137 | 127 | |
paul@137 | 128 | def get_python_name(self): |
paul@137 | 129 | name = self.get_unqualified_python_name() |
paul@137 | 130 | if name == "__clinit__": |
paul@137 | 131 | return name |
paul@137 | 132 | return name + self.symbol_sep + self._get_descriptor_as_name() |
paul@137 | 133 | |
paul@137 | 134 | def _get_descriptor_as_name(self): |
paul@137 | 135 | l = [] |
paul@137 | 136 | for descriptor_type in self.get_descriptor()[0]: |
paul@137 | 137 | l.append(self._get_type_as_name(descriptor_type)) |
paul@137 | 138 | return self.symbol_sep.join(l) |
paul@137 | 139 | |
paul@137 | 140 | def _get_type_as_name(self, descriptor_type, s=""): |
paul@137 | 141 | base_type, object_type, array_type = descriptor_type |
paul@137 | 142 | if base_type == "L": |
paul@137 | 143 | return object_type.replace("/", self.type_sep) + s |
paul@137 | 144 | elif base_type == "[": |
paul@137 | 145 | return self._get_type_as_name(array_type, s + self.array_sep) |
paul@137 | 146 | else: |
paul@137 | 147 | return self.base_seps[0] + base_type + self.base_seps[1] + s |
paul@137 | 148 | |
paul@137 | 149 | class PythonNameUtils: |
paul@137 | 150 | def get_python_name(self): |
paul@137 | 151 | # NOTE: This may not be comprehensive. |
paul@137 | 152 | if not str(self.get_name()).startswith("["): |
paul@137 | 153 | return str(self.get_name()).replace("/", ".") |
paul@137 | 154 | else: |
paul@137 | 155 | return self._get_type_name( |
paul@137 | 156 | get_field_descriptor( |
paul@137 | 157 | str(self.get_name()) |
paul@137 | 158 | ) |
paul@137 | 159 | ).replace("/", ".") |
paul@137 | 160 | |
paul@137 | 161 | def _get_type_name(self, descriptor_type): |
paul@137 | 162 | base_type, object_type, array_type = descriptor_type |
paul@137 | 163 | if base_type == "L": |
paul@137 | 164 | return object_type |
paul@137 | 165 | elif base_type == "[": |
paul@137 | 166 | return self._get_type_name(array_type) |
paul@137 | 167 | else: |
paul@137 | 168 | return descriptor_base_type_mapping[base_type] |
paul@137 | 169 | |
paul@137 | 170 | class NameUtils: |
paul@137 | 171 | def get_name(self): |
paul@137 | 172 | if self.name_index != 0: |
paul@137 | 173 | return self.class_file.constants[self.name_index - 1] |
paul@137 | 174 | else: |
paul@137 | 175 | # Some name indexes are zero to indicate special conditions. |
paul@137 | 176 | return None |
paul@137 | 177 | |
paul@137 | 178 | class NameAndTypeUtils: |
paul@137 | 179 | def get_name(self): |
paul@137 | 180 | if self.name_and_type_index != 0: |
paul@137 | 181 | return self.class_file.constants[self.name_and_type_index - 1].get_name() |
paul@137 | 182 | else: |
paul@137 | 183 | # Some name indexes are zero to indicate special conditions. |
paul@137 | 184 | return None |
paul@137 | 185 | |
paul@137 | 186 | def get_field_descriptor(self): |
paul@137 | 187 | if self.name_and_type_index != 0: |
paul@137 | 188 | return self.class_file.constants[self.name_and_type_index - 1].get_field_descriptor() |
paul@137 | 189 | else: |
paul@137 | 190 | # Some name indexes are zero to indicate special conditions. |
paul@137 | 191 | return None |
paul@137 | 192 | |
paul@137 | 193 | def get_method_descriptor(self): |
paul@137 | 194 | if self.name_and_type_index != 0: |
paul@137 | 195 | return self.class_file.constants[self.name_and_type_index - 1].get_method_descriptor() |
paul@137 | 196 | else: |
paul@137 | 197 | # Some name indexes are zero to indicate special conditions. |
paul@137 | 198 | return None |
paul@137 | 199 | |
paul@137 | 200 | def get_class(self): |
paul@137 | 201 | return self.class_file.constants[self.class_index - 1] |
paul@137 | 202 | |
paul@137 | 203 | # Symbol parsing. |
paul@137 | 204 | |
paul@137 | 205 | def get_method_descriptor(s): |
paul@137 | 206 | assert s[0] == "(" |
paul@137 | 207 | params = [] |
paul@137 | 208 | s = s[1:] |
paul@137 | 209 | while s[0] != ")": |
paul@137 | 210 | parameter_descriptor, s = _get_parameter_descriptor(s) |
paul@137 | 211 | params.append(parameter_descriptor) |
paul@137 | 212 | if s[1] != "V": |
paul@137 | 213 | return_type, s = _get_field_type(s[1:]) |
paul@137 | 214 | else: |
paul@137 | 215 | return_type, s = None, s[1:] |
paul@137 | 216 | return params, return_type |
paul@137 | 217 | |
paul@137 | 218 | def get_field_descriptor(s): |
paul@137 | 219 | return _get_field_type(s)[0] |
paul@137 | 220 | |
paul@137 | 221 | def _get_parameter_descriptor(s): |
paul@137 | 222 | return _get_field_type(s) |
paul@137 | 223 | |
paul@137 | 224 | def _get_component_type(s): |
paul@137 | 225 | return _get_field_type(s) |
paul@137 | 226 | |
paul@137 | 227 | def _get_field_type(s): |
paul@137 | 228 | base_type, s = _get_base_type(s) |
paul@137 | 229 | object_type = None |
paul@137 | 230 | array_type = None |
paul@137 | 231 | if base_type == "L": |
paul@137 | 232 | object_type, s = _get_object_type(s) |
paul@137 | 233 | elif base_type == "[": |
paul@137 | 234 | array_type, s = _get_array_type(s) |
paul@137 | 235 | return (base_type, object_type, array_type), s |
paul@137 | 236 | |
paul@137 | 237 | def _get_base_type(s): |
paul@137 | 238 | if len(s) > 0: |
paul@137 | 239 | return s[0], s[1:] |
paul@137 | 240 | else: |
paul@137 | 241 | return None, s |
paul@137 | 242 | |
paul@137 | 243 | def _get_object_type(s): |
paul@137 | 244 | if len(s) > 0: |
paul@137 | 245 | s_end = s.find(";") |
paul@137 | 246 | assert s_end != -1 |
paul@137 | 247 | return s[:s_end], s[s_end+1:] |
paul@137 | 248 | else: |
paul@137 | 249 | return None, s |
paul@137 | 250 | |
paul@137 | 251 | def _get_array_type(s): |
paul@137 | 252 | if len(s) > 0: |
paul@137 | 253 | return _get_component_type(s) |
paul@137 | 254 | else: |
paul@137 | 255 | return None, s |
paul@137 | 256 | |
paul@137 | 257 | # Constant information. |
paul@137 | 258 | |
paul@137 | 259 | class ClassInfo(NameUtils, PythonNameUtils): |
paul@137 | 260 | def init(self, data, class_file): |
paul@137 | 261 | self.class_file = class_file |
paul@137 | 262 | self.name_index = u2(data[0:2]) |
paul@137 | 263 | return data[2:] |
paul@185 | 264 | def serialize(self): |
paul@185 | 265 | return su2(self.name_index) |
paul@137 | 266 | |
paul@137 | 267 | class RefInfo(NameAndTypeUtils): |
paul@137 | 268 | def init(self, data, class_file): |
paul@137 | 269 | self.class_file = class_file |
paul@137 | 270 | self.class_index = u2(data[0:2]) |
paul@137 | 271 | self.name_and_type_index = u2(data[2:4]) |
paul@137 | 272 | return data[4:] |
paul@185 | 273 | def serialize(self): |
paul@185 | 274 | return su2(self.class_index)+su2(self.name_and_type_index) |
paul@137 | 275 | |
paul@137 | 276 | class FieldRefInfo(RefInfo, PythonNameUtils): |
paul@137 | 277 | def get_descriptor(self): |
paul@137 | 278 | return RefInfo.get_field_descriptor(self) |
paul@137 | 279 | |
paul@137 | 280 | class MethodRefInfo(RefInfo, PythonMethodUtils): |
paul@137 | 281 | def get_descriptor(self): |
paul@137 | 282 | return RefInfo.get_method_descriptor(self) |
paul@137 | 283 | |
paul@137 | 284 | class InterfaceMethodRefInfo(MethodRefInfo): |
paul@137 | 285 | pass |
paul@137 | 286 | |
paul@137 | 287 | class NameAndTypeInfo(NameUtils, PythonNameUtils): |
paul@137 | 288 | def init(self, data, class_file): |
paul@137 | 289 | self.class_file = class_file |
paul@137 | 290 | self.name_index = u2(data[0:2]) |
paul@137 | 291 | self.descriptor_index = u2(data[2:4]) |
paul@137 | 292 | return data[4:] |
paul@137 | 293 | |
paul@185 | 294 | def serialize(self): |
paul@185 | 295 | return su2(self.name_index)+su2(self.descriptor_index) |
paul@185 | 296 | |
paul@137 | 297 | def get_field_descriptor(self): |
paul@137 | 298 | return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) |
paul@137 | 299 | |
paul@137 | 300 | def get_method_descriptor(self): |
paul@137 | 301 | return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) |
paul@137 | 302 | |
paul@137 | 303 | class Utf8Info: |
paul@137 | 304 | def init(self, data, class_file): |
paul@137 | 305 | self.class_file = class_file |
paul@137 | 306 | self.length = u2(data[0:2]) |
paul@137 | 307 | self.bytes = data[2:2+self.length] |
paul@137 | 308 | return data[2+self.length:] |
paul@137 | 309 | |
paul@185 | 310 | def serialize(self): |
paul@185 | 311 | return su2(self.length)+self.bytes |
paul@185 | 312 | |
paul@137 | 313 | def __str__(self): |
paul@137 | 314 | return self.bytes |
paul@137 | 315 | |
paul@137 | 316 | def __unicode__(self): |
paul@137 | 317 | return unicode(self.bytes, "utf-8") |
paul@137 | 318 | |
paul@137 | 319 | def get_value(self): |
paul@137 | 320 | return str(self) |
paul@137 | 321 | |
paul@137 | 322 | class StringInfo: |
paul@137 | 323 | def init(self, data, class_file): |
paul@137 | 324 | self.class_file = class_file |
paul@137 | 325 | self.string_index = u2(data[0:2]) |
paul@137 | 326 | return data[2:] |
paul@137 | 327 | |
paul@185 | 328 | def serialize(self): |
paul@185 | 329 | return su2(self.string_index) |
paul@185 | 330 | |
paul@137 | 331 | def __str__(self): |
paul@137 | 332 | return str(self.class_file.constants[self.string_index - 1]) |
paul@137 | 333 | |
paul@137 | 334 | def __unicode__(self): |
paul@137 | 335 | return unicode(self.class_file.constants[self.string_index - 1]) |
paul@137 | 336 | |
paul@137 | 337 | def get_value(self): |
paul@137 | 338 | return str(self) |
paul@137 | 339 | |
paul@137 | 340 | class SmallNumInfo: |
paul@137 | 341 | def init(self, data, class_file): |
paul@137 | 342 | self.class_file = class_file |
paul@137 | 343 | self.bytes = data[0:4] |
paul@137 | 344 | return data[4:] |
paul@185 | 345 | def serialize(self): |
paul@185 | 346 | return self.bytes |
paul@137 | 347 | |
paul@137 | 348 | class IntegerInfo(SmallNumInfo): |
paul@137 | 349 | def get_value(self): |
paul@137 | 350 | return s4(self.bytes) |
paul@137 | 351 | |
paul@137 | 352 | class FloatInfo(SmallNumInfo): |
paul@137 | 353 | def get_value(self): |
paul@137 | 354 | return f4(self.bytes) |
paul@137 | 355 | |
paul@137 | 356 | class LargeNumInfo: |
paul@137 | 357 | def init(self, data, class_file): |
paul@137 | 358 | self.class_file = class_file |
paul@137 | 359 | self.high_bytes = data[0:4] |
paul@137 | 360 | self.low_bytes = data[4:8] |
paul@137 | 361 | return data[8:] |
paul@185 | 362 | def serialize(self): |
paul@185 | 363 | return self.high_bytes+self.low_bytes |
paul@185 | 364 | |
paul@137 | 365 | |
paul@137 | 366 | class LongInfo(LargeNumInfo): |
paul@137 | 367 | def get_value(self): |
paul@137 | 368 | return s8(self.high_bytes + self.low_bytes) |
paul@137 | 369 | |
paul@137 | 370 | class DoubleInfo(LargeNumInfo): |
paul@137 | 371 | def get_value(self): |
paul@137 | 372 | return f8(self.high_bytes + self.low_bytes) |
paul@137 | 373 | |
paul@137 | 374 | # Other information. |
paul@137 | 375 | # Objects of these classes are generally aware of the class they reside in. |
paul@137 | 376 | |
paul@137 | 377 | class ItemInfo(NameUtils): |
paul@137 | 378 | def init(self, data, class_file): |
paul@137 | 379 | self.class_file = class_file |
paul@137 | 380 | self.access_flags = u2(data[0:2]) |
paul@137 | 381 | self.name_index = u2(data[2:4]) |
paul@137 | 382 | self.descriptor_index = u2(data[4:6]) |
paul@137 | 383 | self.attributes, data = self.class_file._get_attributes(data[6:]) |
paul@137 | 384 | return data |
paul@185 | 385 | def serialize(self): |
paul@185 | 386 | od = su2(self.access_flags)+su2(self.name_index)+su2(self.descriptor_index) |
paul@185 | 387 | od += self.class_file._serialize_attributes(self.attributes) |
paul@185 | 388 | return od |
paul@137 | 389 | |
paul@137 | 390 | class FieldInfo(ItemInfo, PythonNameUtils): |
paul@137 | 391 | def get_descriptor(self): |
paul@137 | 392 | return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) |
paul@137 | 393 | |
paul@137 | 394 | class MethodInfo(ItemInfo, PythonMethodUtils): |
paul@137 | 395 | def get_descriptor(self): |
paul@137 | 396 | return get_method_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) |
paul@137 | 397 | |
paul@137 | 398 | class AttributeInfo: |
paul@137 | 399 | def init(self, data, class_file): |
paul@137 | 400 | self.attribute_length = u4(data[0:4]) |
paul@137 | 401 | self.info = data[4:4+self.attribute_length] |
paul@137 | 402 | return data[4+self.attribute_length:] |
paul@185 | 403 | def serialize(self): |
paul@185 | 404 | return su4(self.attribute_length)+self.info |
paul@137 | 405 | |
paul@137 | 406 | # NOTE: Decode the different attribute formats. |
paul@137 | 407 | |
paul@137 | 408 | class SourceFileAttributeInfo(AttributeInfo, NameUtils, PythonNameUtils): |
paul@137 | 409 | def init(self, data, class_file): |
paul@137 | 410 | self.class_file = class_file |
paul@137 | 411 | self.attribute_length = u4(data[0:4]) |
paul@137 | 412 | # Permit the NameUtils mix-in. |
paul@137 | 413 | self.name_index = self.sourcefile_index = u2(data[4:6]) |
paul@137 | 414 | return data[6:] |
paul@185 | 415 | def serialize(self): |
paul@185 | 416 | return su4(self.attribute_length)+su2(self.name_index) |
paul@137 | 417 | |
paul@137 | 418 | class ConstantValueAttributeInfo(AttributeInfo): |
paul@137 | 419 | def init(self, data, class_file): |
paul@137 | 420 | self.class_file = class_file |
paul@137 | 421 | self.attribute_length = u4(data[0:4]) |
paul@137 | 422 | self.constant_value_index = u2(data[4:6]) |
paul@137 | 423 | assert 4+self.attribute_length == 6 |
paul@137 | 424 | return data[4+self.attribute_length:] |
paul@137 | 425 | |
paul@137 | 426 | def get_value(self): |
paul@137 | 427 | return self.class_file.constants[self.constant_value_index - 1].get_value() |
paul@137 | 428 | |
paul@185 | 429 | def serialize(self): |
paul@185 | 430 | return su4(self.attribute_length)+su2(self.constant_value_index) |
paul@185 | 431 | |
paul@137 | 432 | class CodeAttributeInfo(AttributeInfo): |
paul@137 | 433 | def init(self, data, class_file): |
paul@137 | 434 | self.class_file = class_file |
paul@137 | 435 | self.attribute_length = u4(data[0:4]) |
paul@137 | 436 | self.max_stack = u2(data[4:6]) |
paul@137 | 437 | self.max_locals = u2(data[6:8]) |
paul@137 | 438 | self.code_length = u4(data[8:12]) |
paul@137 | 439 | end_of_code = 12+self.code_length |
paul@137 | 440 | self.code = data[12:end_of_code] |
paul@137 | 441 | self.exception_table_length = u2(data[end_of_code:end_of_code+2]) |
paul@137 | 442 | self.exception_table = [] |
paul@137 | 443 | data = data[end_of_code + 2:] |
paul@137 | 444 | for i in range(0, self.exception_table_length): |
paul@137 | 445 | exception = ExceptionInfo() |
paul@137 | 446 | data = exception.init(data) |
paul@137 | 447 | self.exception_table.append(exception) |
paul@137 | 448 | self.attributes, data = self.class_file._get_attributes(data) |
paul@137 | 449 | return data |
paul@185 | 450 | def serialize(self): |
paul@185 | 451 | od = su4(self.attribute_length)+su2(self.max_stack)+su2(self.max_locals)+su4(self.code_length)+self.code |
paul@185 | 452 | od += su2(self.exception_table_length) |
paul@185 | 453 | for e in self.exception_table: |
paul@185 | 454 | od += e.serialize() |
paul@185 | 455 | od += self.class_file._serialize_attributes(self.attributes) |
paul@185 | 456 | return od |
paul@137 | 457 | |
paul@137 | 458 | class ExceptionsAttributeInfo(AttributeInfo): |
paul@137 | 459 | def init(self, data, class_file): |
paul@137 | 460 | self.class_file = class_file |
paul@137 | 461 | self.attribute_length = u4(data[0:4]) |
paul@137 | 462 | self.number_of_exceptions = u2(data[4:6]) |
paul@137 | 463 | self.exception_index_table = [] |
paul@137 | 464 | index = 6 |
paul@137 | 465 | for i in range(0, self.number_of_exceptions): |
paul@137 | 466 | self.exception_index_table.append(u2(data[index:index+2])) |
paul@137 | 467 | index += 2 |
paul@137 | 468 | return data[index:] |
paul@137 | 469 | |
paul@137 | 470 | def get_exception(self, i): |
paul@137 | 471 | exception_index = self.exception_index_table[i] |
paul@137 | 472 | return self.class_file.constants[exception_index - 1] |
paul@185 | 473 | |
paul@185 | 474 | def serialize(self): |
paul@185 | 475 | od = su4(self.attribute_length)+su2(self.number_of_exceptions) |
paul@185 | 476 | for ei in self.exception_index_table: |
paul@185 | 477 | od += su2(ei) |
paul@185 | 478 | return od |
paul@137 | 479 | |
paul@137 | 480 | class InnerClassesAttributeInfo(AttributeInfo): |
paul@137 | 481 | def init(self, data, class_file): |
paul@137 | 482 | self.class_file = class_file |
paul@137 | 483 | self.attribute_length = u4(data[0:4]) |
paul@137 | 484 | self.number_of_classes = u2(data[4:6]) |
paul@137 | 485 | self.classes = [] |
paul@137 | 486 | data = data[6:] |
paul@137 | 487 | for i in range(0, self.number_of_classes): |
paul@137 | 488 | inner_class = InnerClassInfo() |
paul@137 | 489 | data = inner_class.init(data, self.class_file) |
paul@137 | 490 | self.classes.append(inner_class) |
paul@137 | 491 | return data |
paul@137 | 492 | |
paul@185 | 493 | def serialize(self): |
paul@185 | 494 | od = su4(self.attribute_length)+su2(self.number_of_classes) |
paul@185 | 495 | for c in self.classes: |
paul@185 | 496 | od += c.serialize() |
paul@185 | 497 | return od |
paul@185 | 498 | |
paul@137 | 499 | class SyntheticAttributeInfo(AttributeInfo): |
paul@137 | 500 | pass |
paul@137 | 501 | |
paul@137 | 502 | class LineNumberAttributeInfo(AttributeInfo): |
paul@137 | 503 | def init(self, data, class_file): |
paul@137 | 504 | self.class_file = class_file |
paul@137 | 505 | self.attribute_length = u4(data[0:4]) |
paul@137 | 506 | self.line_number_table_length = u2(data[4:6]) |
paul@137 | 507 | self.line_number_table = [] |
paul@137 | 508 | data = data[6:] |
paul@137 | 509 | for i in range(0, self.line_number_table_length): |
paul@137 | 510 | line_number = LineNumberInfo() |
paul@137 | 511 | data = line_number.init(data) |
paul@137 | 512 | self.line_number_table.append(line_number) |
paul@137 | 513 | return data |
paul@185 | 514 | |
paul@185 | 515 | def serialize(self): |
paul@185 | 516 | od = su4(self.attribute_length)+su2(self.line_number_table_length) |
paul@185 | 517 | for ln in self.line_number_table: |
paul@185 | 518 | od += ln.serialize() |
paul@185 | 519 | return od |
paul@137 | 520 | |
paul@137 | 521 | class LocalVariableAttributeInfo(AttributeInfo): |
paul@137 | 522 | def init(self, data, class_file): |
paul@137 | 523 | self.class_file = class_file |
paul@137 | 524 | self.attribute_length = u4(data[0:4]) |
paul@137 | 525 | self.local_variable_table_length = u2(data[4:6]) |
paul@137 | 526 | self.local_variable_table = [] |
paul@137 | 527 | data = data[6:] |
paul@137 | 528 | for i in range(0, self.local_variable_table_length): |
paul@137 | 529 | local_variable = LocalVariableInfo() |
paul@137 | 530 | data = local_variable.init(data, self.class_file) |
paul@137 | 531 | self.local_variable_table.append(local_variable) |
paul@137 | 532 | return data |
paul@137 | 533 | |
paul@185 | 534 | def serialize(self): |
paul@185 | 535 | od = su4(self.attribute_length)+su2(self.local_variable_table_length) |
paul@185 | 536 | for lv in self.local_variable_table: |
paul@185 | 537 | od += lv.serialize() |
paul@185 | 538 | return od |
paul@185 | 539 | |
paul@137 | 540 | class DeprecatedAttributeInfo(AttributeInfo): |
paul@137 | 541 | pass |
paul@137 | 542 | |
paul@137 | 543 | # Child classes of the attribute information classes. |
paul@137 | 544 | |
paul@137 | 545 | class ExceptionInfo: |
paul@137 | 546 | def init(self, data): |
paul@137 | 547 | self.start_pc = u2(data[0:2]) |
paul@137 | 548 | self.end_pc = u2(data[2:4]) |
paul@137 | 549 | self.handler_pc = u2(data[4:6]) |
paul@137 | 550 | self.catch_type = u2(data[6:8]) |
paul@137 | 551 | return data[8:] |
paul@185 | 552 | def serialize(self): |
paul@185 | 553 | return su2(self.start_pc)+su2(self.end_pc)+su2(self.handler_pc)+su2(self.catch_type) |
paul@137 | 554 | |
paul@137 | 555 | class InnerClassInfo(NameUtils): |
paul@137 | 556 | def init(self, data, class_file): |
paul@137 | 557 | self.class_file = class_file |
paul@137 | 558 | self.inner_class_info_index = u2(data[0:2]) |
paul@137 | 559 | self.outer_class_info_index = u2(data[2:4]) |
paul@137 | 560 | # Permit the NameUtils mix-in. |
paul@137 | 561 | self.name_index = self.inner_name_index = u2(data[4:6]) |
paul@137 | 562 | self.inner_class_access_flags = u2(data[6:8]) |
paul@137 | 563 | return data[8:] |
paul@185 | 564 | def serialize(self): |
paul@185 | 565 | return su2(self.inner_class_info_index)+su2(self.outer_class_info_index)+su2(self.name_index)+su2(self.inner_class_access_flags) |
paul@137 | 566 | |
paul@137 | 567 | class LineNumberInfo: |
paul@137 | 568 | def init(self, data): |
paul@137 | 569 | self.start_pc = u2(data[0:2]) |
paul@137 | 570 | self.line_number = u2(data[2:4]) |
paul@137 | 571 | return data[4:] |
paul@185 | 572 | |
paul@185 | 573 | def serialize(self): |
paul@185 | 574 | return su2(self.start_pc)+su2(self.line_number) |
paul@137 | 575 | |
paul@137 | 576 | class LocalVariableInfo(NameUtils, PythonNameUtils): |
paul@137 | 577 | def init(self, data, class_file): |
paul@137 | 578 | self.class_file = class_file |
paul@137 | 579 | self.start_pc = u2(data[0:2]) |
paul@137 | 580 | self.length = u2(data[2:4]) |
paul@137 | 581 | self.name_index = u2(data[4:6]) |
paul@137 | 582 | self.descriptor_index = u2(data[6:8]) |
paul@137 | 583 | self.index = u2(data[8:10]) |
paul@137 | 584 | return data[10:] |
paul@137 | 585 | |
paul@137 | 586 | def get_descriptor(self): |
paul@137 | 587 | return get_field_descriptor(unicode(self.class_file.constants[self.descriptor_index - 1])) |
paul@185 | 588 | |
paul@185 | 589 | def serialize(self): |
paul@185 | 590 | return su2(self.start_pc)+su2(self.length)+su2(self.name_index)+su2(self.descriptor_index)+su2(self.index) |
paul@137 | 591 | |
paul@137 | 592 | # Exceptions. |
paul@137 | 593 | |
paul@137 | 594 | class UnknownTag(Exception): |
paul@137 | 595 | pass |
paul@137 | 596 | |
paul@137 | 597 | class UnknownAttribute(Exception): |
paul@137 | 598 | pass |
paul@137 | 599 | |
paul@137 | 600 | # Abstractions for the main structures. |
paul@137 | 601 | |
paul@137 | 602 | class ClassFile: |
paul@137 | 603 | |
paul@137 | 604 | "A class representing a Java class file." |
paul@137 | 605 | |
paul@137 | 606 | def __init__(self, s): |
paul@137 | 607 | |
paul@137 | 608 | """ |
paul@137 | 609 | Process the given string 's', populating the object with the class |
paul@137 | 610 | file's details. |
paul@137 | 611 | """ |
paul@137 | 612 | |
paul@185 | 613 | self.attribute_class_to_index = None |
paul@185 | 614 | self.minorv,self.majorv = u2(s[4:]),u2(s[6:]) |
paul@137 | 615 | self.constants, s = self._get_constants(s[8:]) |
paul@137 | 616 | self.access_flags, s = self._get_access_flags(s) |
paul@137 | 617 | self.this_class, s = self._get_this_class(s) |
paul@137 | 618 | self.super_class, s = self._get_super_class(s) |
paul@137 | 619 | self.interfaces, s = self._get_interfaces(s) |
paul@137 | 620 | self.fields, s = self._get_fields(s) |
paul@137 | 621 | self.methods, s = self._get_methods(s) |
paul@137 | 622 | self.attributes, s = self._get_attributes(s) |
paul@137 | 623 | |
paul@185 | 624 | def serialize(self): |
paul@185 | 625 | od = su4(0xCAFEBABE)+su2(self.minorv)+su2(self.majorv) |
paul@185 | 626 | od += self._serialize_constants() |
paul@185 | 627 | od += self._serialize_access_flags() |
paul@185 | 628 | od += self._serialize_this_class() |
paul@185 | 629 | od += self._serialize_super_class() |
paul@185 | 630 | od += self._serialize_interfaces() |
paul@185 | 631 | od += self._serialize_fields() |
paul@185 | 632 | od += self._serialize_methods() |
paul@185 | 633 | od += self._serialize_attributes(self.attributes) |
paul@185 | 634 | return od |
paul@185 | 635 | |
paul@185 | 636 | def _encode_const(self, c): |
paul@185 | 637 | od = '' |
paul@185 | 638 | if isinstance(c, Utf8Info): |
paul@185 | 639 | od += su1(1) |
paul@185 | 640 | elif isinstance(c, IntegerInfo): |
paul@185 | 641 | od += su1(3) |
paul@185 | 642 | elif isinstance(c, FloatInfo): |
paul@185 | 643 | od += su1(4) |
paul@185 | 644 | elif isinstance(c, LongInfo): |
paul@185 | 645 | od += su1(5) |
paul@185 | 646 | elif isinstance(c, DoubleInfo): |
paul@185 | 647 | od += su1(6) |
paul@185 | 648 | elif isinstance(c, ClassInfo): |
paul@185 | 649 | od += su1(7) |
paul@185 | 650 | elif isinstance(c, StringInfo): |
paul@185 | 651 | od += su1(8) |
paul@185 | 652 | elif isinstance(c, FieldRefInfo): |
paul@185 | 653 | od += su1(9) |
paul@185 | 654 | elif isinstance(c, MethodRefInfo): |
paul@185 | 655 | od += su1(10) |
paul@185 | 656 | elif isinstance(c, InterfaceMethodRefInfo): |
paul@185 | 657 | od += su1(11) |
paul@185 | 658 | elif isinstance(c, NameAndTypeInfo): |
paul@185 | 659 | od += su1(12) |
paul@185 | 660 | else: |
paul@185 | 661 | return od |
paul@185 | 662 | od += c.serialize() |
paul@185 | 663 | return od |
paul@185 | 664 | |
paul@137 | 665 | def _decode_const(self, s): |
paul@137 | 666 | tag = u1(s[0:1]) |
paul@137 | 667 | if tag == 1: |
paul@137 | 668 | const = Utf8Info() |
paul@137 | 669 | elif tag == 3: |
paul@137 | 670 | const = IntegerInfo() |
paul@137 | 671 | elif tag == 4: |
paul@137 | 672 | const = FloatInfo() |
paul@137 | 673 | elif tag == 5: |
paul@137 | 674 | const = LongInfo() |
paul@137 | 675 | elif tag == 6: |
paul@137 | 676 | const = DoubleInfo() |
paul@137 | 677 | elif tag == 7: |
paul@137 | 678 | const = ClassInfo() |
paul@137 | 679 | elif tag == 8: |
paul@137 | 680 | const = StringInfo() |
paul@137 | 681 | elif tag == 9: |
paul@137 | 682 | const = FieldRefInfo() |
paul@137 | 683 | elif tag == 10: |
paul@137 | 684 | const = MethodRefInfo() |
paul@137 | 685 | elif tag == 11: |
paul@137 | 686 | const = InterfaceMethodRefInfo() |
paul@137 | 687 | elif tag == 12: |
paul@137 | 688 | const = NameAndTypeInfo() |
paul@137 | 689 | else: |
paul@137 | 690 | raise UnknownTag, tag |
paul@137 | 691 | |
paul@137 | 692 | # Initialise the constant object. |
paul@137 | 693 | |
paul@137 | 694 | s = const.init(s[1:], self) |
paul@137 | 695 | return const, s |
paul@137 | 696 | |
paul@137 | 697 | def _get_constants_from_table(self, count, s): |
paul@137 | 698 | l = [] |
paul@137 | 699 | # Have to skip certain entries specially. |
paul@137 | 700 | i = 1 |
paul@137 | 701 | while i < count: |
paul@137 | 702 | c, s = self._decode_const(s) |
paul@137 | 703 | l.append(c) |
paul@137 | 704 | # Add a blank entry after "large" entries. |
paul@137 | 705 | if isinstance(c, LargeNumInfo): |
paul@137 | 706 | l.append(None) |
paul@137 | 707 | i += 1 |
paul@137 | 708 | i += 1 |
paul@137 | 709 | return l, s |
paul@137 | 710 | |
paul@137 | 711 | def _get_items_from_table(self, cls, number, s): |
paul@137 | 712 | l = [] |
paul@137 | 713 | for i in range(0, number): |
paul@137 | 714 | f = cls() |
paul@137 | 715 | s = f.init(s, self) |
paul@137 | 716 | l.append(f) |
paul@137 | 717 | return l, s |
paul@137 | 718 | |
paul@137 | 719 | def _get_methods_from_table(self, number, s): |
paul@137 | 720 | return self._get_items_from_table(MethodInfo, number, s) |
paul@137 | 721 | |
paul@137 | 722 | def _get_fields_from_table(self, number, s): |
paul@137 | 723 | return self._get_items_from_table(FieldInfo, number, s) |
paul@137 | 724 | |
paul@137 | 725 | def _get_attribute_from_table(self, s): |
paul@137 | 726 | attribute_name_index = u2(s[0:2]) |
paul@137 | 727 | constant_name = self.constants[attribute_name_index - 1].bytes |
paul@137 | 728 | if constant_name == "SourceFile": |
paul@137 | 729 | attribute = SourceFileAttributeInfo() |
paul@137 | 730 | elif constant_name == "ConstantValue": |
paul@137 | 731 | attribute = ConstantValueAttributeInfo() |
paul@137 | 732 | elif constant_name == "Code": |
paul@137 | 733 | attribute = CodeAttributeInfo() |
paul@137 | 734 | elif constant_name == "Exceptions": |
paul@137 | 735 | attribute = ExceptionsAttributeInfo() |
paul@137 | 736 | elif constant_name == "InnerClasses": |
paul@137 | 737 | attribute = InnerClassesAttributeInfo() |
paul@137 | 738 | elif constant_name == "Synthetic": |
paul@137 | 739 | attribute = SyntheticAttributeInfo() |
paul@137 | 740 | elif constant_name == "LineNumberTable": |
paul@137 | 741 | attribute = LineNumberAttributeInfo() |
paul@137 | 742 | elif constant_name == "LocalVariableTable": |
paul@137 | 743 | attribute = LocalVariableAttributeInfo() |
paul@137 | 744 | elif constant_name == "Deprecated": |
paul@137 | 745 | attribute = DeprecatedAttributeInfo() |
paul@137 | 746 | else: |
paul@137 | 747 | raise UnknownAttribute, constant_name |
paul@137 | 748 | s = attribute.init(s[2:], self) |
paul@137 | 749 | return attribute, s |
paul@137 | 750 | |
paul@137 | 751 | def _get_attributes_from_table(self, number, s): |
paul@137 | 752 | attributes = [] |
paul@137 | 753 | for i in range(0, number): |
paul@137 | 754 | attribute, s = self._get_attribute_from_table(s) |
paul@137 | 755 | attributes.append(attribute) |
paul@137 | 756 | return attributes, s |
paul@137 | 757 | |
paul@137 | 758 | def _get_constants(self, s): |
paul@137 | 759 | count = u2(s[0:2]) |
paul@137 | 760 | return self._get_constants_from_table(count, s[2:]) |
paul@137 | 761 | |
paul@185 | 762 | def _serialize_constants(self): |
paul@185 | 763 | return su2(len(self.constants)+1)+"".join([self._encode_const(c) for c in self.constants]) |
paul@185 | 764 | |
paul@137 | 765 | def _get_access_flags(self, s): |
paul@137 | 766 | return u2(s[0:2]), s[2:] |
paul@185 | 767 | |
paul@185 | 768 | def _serialize_access_flags(self): |
paul@185 | 769 | return su2(self.access_flags) |
paul@137 | 770 | |
paul@137 | 771 | def _get_this_class(self, s): |
paul@137 | 772 | index = u2(s[0:2]) |
paul@137 | 773 | return self.constants[index - 1], s[2:] |
paul@137 | 774 | |
paul@185 | 775 | def _serialize_this_class(self): |
paul@185 | 776 | return su2(self.constants.index(self.this_class)+1) |
paul@185 | 777 | |
paul@185 | 778 | def _serialize_super_class(self): |
paul@185 | 779 | return su2(self.constants.index(self.super_class)+1) |
paul@185 | 780 | |
paul@165 | 781 | def _get_super_class(self, s): |
paul@165 | 782 | index = u2(s[0:2]) |
paul@165 | 783 | if index != 0: |
paul@165 | 784 | return self.constants[index - 1], s[2:] |
paul@165 | 785 | else: |
paul@165 | 786 | return None, s[2:] |
paul@137 | 787 | |
paul@137 | 788 | def _get_interfaces(self, s): |
paul@137 | 789 | interfaces = [] |
paul@137 | 790 | number = u2(s[0:2]) |
paul@137 | 791 | s = s[2:] |
paul@137 | 792 | for i in range(0, number): |
paul@137 | 793 | index = u2(s[0:2]) |
paul@137 | 794 | interfaces.append(self.constants[index - 1]) |
paul@137 | 795 | s = s[2:] |
paul@137 | 796 | return interfaces, s |
paul@137 | 797 | |
paul@185 | 798 | def _serialize_interfaces(self): |
paul@185 | 799 | return su2(len(self.interfaces))+"".join([su2(self.interfaces.index(interf)+1) for interf in self.interfaces]) |
paul@185 | 800 | |
paul@137 | 801 | def _get_fields(self, s): |
paul@137 | 802 | number = u2(s[0:2]) |
paul@137 | 803 | return self._get_fields_from_table(number, s[2:]) |
paul@137 | 804 | |
paul@185 | 805 | def _serialize_fields(self): |
paul@185 | 806 | od = su2(len(self.fields)) |
paul@185 | 807 | od += "".join([f.serialize() for f in self.fields]) |
paul@185 | 808 | return od |
paul@185 | 809 | |
paul@137 | 810 | def _get_attributes(self, s): |
paul@137 | 811 | number = u2(s[0:2]) |
paul@137 | 812 | return self._get_attributes_from_table(number, s[2:]) |
paul@137 | 813 | |
paul@185 | 814 | def _serialize_attributes(self, attrs): |
paul@185 | 815 | od = su2(len(attrs)) |
paul@185 | 816 | if len(attrs) == 0: return od |
paul@185 | 817 | if self.attribute_class_to_index == None: |
paul@185 | 818 | self.attribute_class_to_index = {} |
paul@185 | 819 | attr_names_to_class = {"SourceFile":SourceFileAttributeInfo, "ConstantValue":ConstantValueAttributeInfo, |
paul@185 | 820 | "Code":CodeAttributeInfo, "Exceptions":ExceptionsAttributeInfo, |
paul@185 | 821 | "InnerClasses":InnerClassesAttributeInfo, "Synthetic":SyntheticAttributeInfo, |
paul@185 | 822 | "LineNumberTable":LineNumberAttributeInfo, "LocalVariableTable":LocalVariableAttributeInfo, |
paul@185 | 823 | "Deprecated":DeprecatedAttributeInfo} |
paul@185 | 824 | index = 0 |
paul@185 | 825 | for c in self.constants: |
paul@185 | 826 | index += 1 |
paul@185 | 827 | if isinstance(c, Utf8Info) and str(c) in attr_names_to_class.keys(): |
paul@185 | 828 | self.attribute_class_to_index[attr_names_to_class[str(c)]]=index |
paul@185 | 829 | for attribute in attrs: |
paul@185 | 830 | for (classtype,name_index) in self.attribute_class_to_index.iteritems(): |
paul@185 | 831 | if isinstance(attribute, classtype): |
paul@185 | 832 | od += su2(name_index) |
paul@185 | 833 | break |
paul@185 | 834 | od += attribute.serialize() |
paul@185 | 835 | return od |
paul@185 | 836 | |
paul@137 | 837 | def _get_methods(self, s): |
paul@137 | 838 | number = u2(s[0:2]) |
paul@137 | 839 | return self._get_methods_from_table(number, s[2:]) |
paul@137 | 840 | |
paul@185 | 841 | def _serialize_methods(self): |
paul@185 | 842 | od = su2(len(self.methods)) |
paul@185 | 843 | od += "".join([m.serialize() for m in self.methods]) |
paul@185 | 844 | return od |
paul@185 | 845 | |
paul@185 | 846 | |
paul@137 | 847 | if __name__ == "__main__": |
paul@137 | 848 | import sys |
paul@137 | 849 | f = open(sys.argv[1], "rb") |
paul@137 | 850 | c = ClassFile(f.read()) |
paul@137 | 851 | f.close() |
paul@137 | 852 | |
paul@137 | 853 | # vim: tabstop=4 expandtab shiftwidth=4 |