PR other/51165
[official-gcc.git] / libjava / contrib / classfile.py
bloba8bb14c0b8f6a0f0c1f051e0113605e6de22de31
1 ## Copyright (C) 2004, 2005 Free Software Foundation
2 ## Written by Gary Benson <gbenson@redhat.com>
3 ##
4 ## This program is free software; you can redistribute it and/or modify
5 ## it under the terms of the GNU General Public License as published by
6 ## the Free Software Foundation; either version 2 of the License, or
7 ## (at your option) any later version.
8 ##
9 ## This program is distributed in the hope that it will be useful,
10 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 ## GNU General Public License for more details.
14 """Read Java(TM) class files."""
16 import cStringIO as StringIO
17 import struct
19 class Class:
20 def __init__(self, arg):
21 if hasattr(arg, "read"):
22 self.fp = arg
23 elif type(arg) == type(""):
24 if arg.startswith("\xca\xfe\xba\xbe"):
25 self.fp = StringIO.StringIO(arg)
26 else:
27 self.fp = open(arg, "r")
28 else:
29 raise TypeError, type(arg)
31 magic = self._read_int()
32 assert magic == 0xcafebabeL
33 minor, major = self._read(">HH")
34 self.version = (major, minor)
36 self.pool_integrity_checks = None
37 try:
38 assert False
39 except AssertionError:
40 self.pool_integrity_checks = []
42 self._read_constants_pool()
44 self.access_flags = self._read_short()
45 self.name = self._read_reference_Class()
46 self.super = self._read_reference_Class()
48 self.interfaces = self._read_interfaces()
49 self.fields = self._read_fieldsormethods()
50 self.methods = self._read_fieldsormethods()
51 self.attributes = self._read_attributes()
53 if self.pool_integrity_checks is not None:
54 for index, tag in self.pool_integrity_checks:
55 assert self.constants[index][0] == tag
57 del self.fp, self.pool_integrity_checks
59 def __repr__(self):
60 result = []
61 attrs = [attr for attr in dir(self)
62 if not attr.startswith("_") and attr != "Member"]
63 attrs.sort()
64 for attr in attrs:
65 result.append("%-13s %s" % (
66 attr + ":", attr == "constants" and
67 "<ELIDED>" or repr(getattr(self, attr))))
68 return "\n".join(result)
70 def _read_constants_pool(self):
71 self.constants = {}
72 skip = False
73 for i in xrange(1, self._read_short()):
74 if skip:
75 skip = False
76 continue
77 tag = {
78 1: "Utf8", 3: "Integer", 4: "Float", 5: "Long",
79 6: "Double", 7: "Class", 8: "String", 9: "Fieldref",
80 10: "Methodref", 11: "InterfaceMethodref",
81 12: "NameAndType"}[self._read_byte()]
82 skip = tag in ("Long", "Double") # crack crack crack!
83 self.constants[i] = (tag, getattr(self, "_read_constant_" + tag)())
85 def _read_interfaces(self):
86 result = []
87 for i in xrange(self._read_short()):
88 result.append(self._read_reference_Class())
89 return result
91 def _read_fieldsormethods(self):
92 result = []
93 for i in xrange(self._read_short()):
94 result.append(self.Member(self))
95 return result
97 class Member:
98 def __init__(self, source):
99 self.access_flags = source._read_short()
100 self.name = source._read_reference_Utf8()
101 self.descriptor = source._read_reference_Utf8()
102 self.attributes = source._read_attributes()
104 def __repr__(self):
105 result = []
106 attrs = [attr for attr in dir(self) if not attr.startswith("_")]
107 attrs.sort()
108 for attr in attrs:
109 value = getattr(self, attr)
110 if attr == "attributes" and value.has_key("Code"):
111 value = value.copy()
112 value.update({"Code": "<ELIDED>"})
113 result.append("%-13s %s" % (
114 attr + ":", repr(value).replace(
115 "'Code': '<ELIDED>'", "'Code': <ELIDED>")))
116 return ("\n%s" % (15 * " ")).join(result)
118 def _read_attributes(self):
119 result = {}
120 for i in xrange(self._read_short()):
121 name = self._read_reference_Utf8()
122 data = self.fp.read(self._read_int())
123 assert not result.has_key(name)
124 result[name] = data
125 return result
127 # Constants pool reference reader convenience functions
129 def _read_reference_Utf8(self):
130 return self._read_references("Utf8")[0]
132 def _read_reference_Class(self):
133 return self._read_references("Class")[0]
135 def _read_reference_Class_NameAndType(self):
136 return self._read_references("Class", "NameAndType")
138 def _read_references(self, *args):
139 result = []
140 for arg in args:
141 index = self._read_short()
142 if self.pool_integrity_checks is not None:
143 self.pool_integrity_checks.append((index, arg))
144 result.append(index)
145 return result
147 # Constants pool constant reader functions
149 def _read_constant_Utf8(self):
150 constant = self.fp.read(self._read_short())
151 try:
152 constant = constant.decode("utf-8")
153 except UnicodeError:
154 constant = _bork_utf8_decode(constant)
155 try:
156 constant = constant.encode("us-ascii")
157 except UnicodeError:
158 pass
159 return constant
161 def _read_constant_Integer(self):
162 return self._read_int()
164 def _read_constant_Float(self):
165 return self._read(">f")[0]
167 def _read_constant_Long(self):
168 return self._read(">q")[0]
170 def _read_constant_Double(self):
171 return self._read(">d")[0]
173 _read_constant_Class = _read_reference_Utf8
174 _read_constant_String = _read_reference_Utf8
175 _read_constant_Fieldref = _read_reference_Class_NameAndType
176 _read_constant_Methodref = _read_reference_Class_NameAndType
177 _read_constant_InterfaceMethodref = _read_reference_Class_NameAndType
179 def _read_constant_NameAndType(self):
180 return self._read_reference_Utf8(), self._read_reference_Utf8()
182 # Generic reader functions
184 def _read_int(self):
185 # XXX how else to read 32 bits on a 64-bit box?
186 h, l = map(long, self._read(">HH"))
187 return (h << 16) + l
189 def _read_short(self):
190 return self._read(">H")[0]
192 def _read_byte(self):
193 return self._read("B")[0]
195 def _read(self, fmt):
196 return struct.unpack(fmt, self.fp.read(struct.calcsize(fmt)))
198 def _bork_utf8_decode(data):
199 # more crack!
200 bytes, unicode = map(ord, data), ""
201 while bytes:
202 b1 = bytes.pop(0)
203 if b1 & 0x80:
204 assert b1 & 0x40
205 b2 = bytes.pop(0)
206 assert b2 & 0xC0 == 0x80
207 if b1 & 0x20:
208 assert not b1 & 0x10
209 b3 = bytes.pop(0)
210 assert b3 & 0xC0 == 0x80
211 unicode += unichr(
212 ((b1 & 0x0f) << 12) + ((b2 & 0x3f) << 6) + (b3 & 0x3f))
213 else:
214 unicode += unichr(((b1 & 0x1f) << 6) + (b2 & 0x3f))
215 else:
216 unicode += unichr(b1)
217 return unicode
219 if __name__ == "__main__":
220 print Class("/usr/share/katana/build/ListDependentClasses.class")