Merge mozilla-central and tracemonkey. (a=blockers)
[mozilla-central.git] / config / optimizejars.py
blob234d82bf0392cc7a334b9ac9749a921fea76ab06
1 # ***** BEGIN LICENSE BLOCK *****
2 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 # The contents of this file are subject to the Mozilla Public License Version
5 # 1.1 (the "License"); you may not use this file except in compliance with
6 # the License. You may obtain a copy of the License at
7 # http://www.mozilla.org/MPL/
9 # Software distributed under the License is distributed on an "AS IS" basis,
10 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 # for the specific language governing rights and limitations under the
12 # License.
14 # The Original Code is mozilla.org code
16 # The Initial Developer of the Original Code is
17 # Mozilla Foundation.
18 # Portions created by the Initial Developer are Copyright (C) 2010
19 # the Initial Developer. All Rights Reserved.
21 # Contributor(s):
22 # Taras Glek <tglek@mozilla.com>
24 # Alternatively, the contents of this file may be used under the terms of
25 # either the GNU General Public License Version 2 or later (the "GPL"), or
26 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 # in which case the provisions of the GPL or the LGPL are applicable instead
28 # of those above. If you wish to allow use of your version of this file only
29 # under the terms of either the GPL or the LGPL, and not to allow others to
30 # use your version of this file under the terms of the MPL, indicate your
31 # decision by deleting the provisions above and replace them with the notice
32 # and other provisions required by the GPL or the LGPL. If you do not delete
33 # the provisions above, a recipient may use your version of this file under
34 # the terms of any one of the MPL, the GPL or the LGPL.
36 # ***** END LICENSE BLOCK *****
38 import sys, os, subprocess, struct
40 local_file_header = [
41 ("signature", "uint32"),
42 ("min_version", "uint16"),
43 ("general_flag", "uint16"),
44 ("compression", "uint16"),
45 ("lastmod_time", "uint16"),
46 ("lastmod_date", "uint16"),
47 ("crc32", "uint32"),
48 ("compressed_size", "uint32"),
49 ("uncompressed_size", "uint32"),
50 ("filename_size", "uint16"),
51 ("extra_field_size", "uint16"),
52 ("filename", "filename_size"),
53 ("extra_field", "extra_field_size"),
54 ("data", "compressed_size")
57 cdir_entry = [
58 ("signature", "uint32"),
59 ("creator_version", "uint16"),
60 ("min_version", "uint16"),
61 ("general_flag", "uint16"),
62 ("compression", "uint16"),
63 ("lastmod_time", "uint16"),
64 ("lastmod_date", "uint16"),
65 ("crc32", "uint32"),
66 ("compressed_size", "uint32"),
67 ("uncompressed_size", "uint32"),
68 ("filename_size", "uint16"),
69 ("extrafield_size", "uint16"),
70 ("filecomment_size", "uint16"),
71 ("disknum", "uint16"),
72 ("internal_attr", "uint16"),
73 ("external_attr", "uint32"),
74 ("offset", "uint32"),
75 ("filename", "filename_size"),
76 ("extrafield", "extrafield_size"),
77 ("filecomment", "filecomment_size"),
80 cdir_end = [
81 ("signature", "uint32"),
82 ("disk_num", "uint16"),
83 ("cdir_disk", "uint16"),
84 ("disk_entries", "uint16"),
85 ("cdir_entries", "uint16"),
86 ("cdir_size", "uint32"),
87 ("cdir_offset", "uint32"),
88 ("comment_size", "uint16"),
91 type_mapping = { "uint32":"I", "uint16":"H"}
93 def format_struct (format):
94 string_fields = {}
95 fmt = "<"
96 for (name,value) in iter(format):
97 try:
98 fmt += type_mapping[value][0]
99 except KeyError:
100 string_fields[name] = value
101 return (fmt, string_fields)
103 def size_of(format):
104 return struct.calcsize(format_struct(format)[0])
106 class MyStruct:
107 def __init__(self, format, string_fields):
108 self.__dict__["struct_members"] = {}
109 self.__dict__["format"] = format
110 self.__dict__["string_fields"] = string_fields
112 def addMember(self, name, value):
113 self.__dict__["struct_members"][name] = value
115 def __getattr__(self, item):
116 try:
117 return self.__dict__["struct_members"][item]
118 except:
119 pass
120 print("no %s" %item)
121 print(self.__dict__["struct_members"])
122 raise AttributeError
124 def __setattr__(self, item, value):
125 if item in self.__dict__["struct_members"]:
126 self.__dict__["struct_members"][item] = value
127 else:
128 raise AttributeError
130 def pack(self):
131 extra_data = ""
132 values = []
133 string_fields = self.__dict__["string_fields"]
134 struct_members = self.__dict__["struct_members"]
135 format = self.__dict__["format"]
136 for (name,_) in format:
137 if name in string_fields:
138 extra_data = extra_data + struct_members[name]
139 else:
140 values.append(struct_members[name]);
141 return struct.pack(format_struct(format)[0], *values) + extra_data
143 ENDSIG = 0x06054b50
145 def assert_true(cond, msg):
146 if not cond:
147 raise Exception(msg)
148 exit(1)
150 class BinaryBlob:
151 def __init__(self, f):
152 self.data = open(f, "rb").read()
153 self.offset = 0
154 self.length = len(self.data)
156 def readAt(self, pos, length):
157 self.offset = pos + length
158 return self.data[pos:self.offset]
160 def read_struct (self, format, offset = None):
161 if offset == None:
162 offset = self.offset
163 (fstr, string_fields) = format_struct(format)
164 size = struct.calcsize(fstr)
165 data = self.readAt(offset, size)
166 ret = struct.unpack(fstr, data)
167 retstruct = MyStruct(format, string_fields)
168 i = 0
169 for (name,_) in iter(format):
170 member_desc = None
171 if not name in string_fields:
172 member_data = ret[i]
173 i = i + 1
174 else:
175 # zip has data fields which are described by other struct fields, this does
176 # additional reads to fill em in
177 member_desc = string_fields[name]
178 member_data = self.readAt(self.offset, retstruct.__getattr__(member_desc))
179 retstruct.addMember(name, member_data)
180 # sanity check serialization code
181 data = self.readAt(offset, self.offset - offset)
182 out_data = retstruct.pack()
183 assert_true(out_data == data, "Serialization fail %d !=%d"% (len(out_data), len(data)))
184 return retstruct
186 def optimizejar(jar, outjar, inlog = None):
187 if inlog is not None:
188 inlog = open(inlog).read().rstrip()
189 # in the case of an empty log still move the index forward
190 if len(inlog) == 0:
191 inlog = []
192 else:
193 inlog = inlog.split("\n")
194 outlog = []
195 jarblob = BinaryBlob(jar)
196 dirend = jarblob.read_struct(cdir_end, jarblob.length - size_of(cdir_end))
197 assert_true(dirend.signature == ENDSIG, "no signature in the end");
198 cdir_offset = dirend.cdir_offset
199 readahead = 0
200 if inlog is None and cdir_offset == 4:
201 readahead = struct.unpack("<I", jarblob.readAt(0, 4))[0]
202 print("%s: startup data ends at byte %d" % (outjar, readahead));
204 jarblob.offset = cdir_offset
205 central_directory = []
206 for i in range(0, dirend.cdir_entries):
207 entry = jarblob.read_struct(cdir_entry)
208 central_directory.append(entry)
210 reordered_count = 0
211 if inlog is not None:
212 dup_guard = set()
213 for ordered_name in inlog:
214 if ordered_name in dup_guard:
215 continue
216 else:
217 dup_guard.add(ordered_name)
218 found = False
219 for i in range(reordered_count, len(central_directory)):
220 if central_directory[i].filename == ordered_name:
221 # swap the cdir entries
222 tmp = central_directory[i]
223 central_directory[i] = central_directory[reordered_count]
224 central_directory[reordered_count] = tmp
225 reordered_count = reordered_count + 1
226 found = True
227 break
228 if not found:
229 print( "Can't find '%s' in %s" % (ordered_name, jar))
231 outfd = open(outjar, "wb")
232 out_offset = 0
233 if inlog is not None:
234 # have to put central directory at offset 4 cos 0 confuses some tools.
235 # This also lets us specify how many entries should be preread
236 dirend.cdir_offset = 4
237 # make room for central dir + end of dir + 4 extra bytes at front
238 out_offset = dirend.cdir_offset + dirend.cdir_size + size_of(cdir_end)
239 outfd.seek(out_offset)
241 cdir_data = ""
242 written_count = 0
243 # store number of bytes suggested for readahead
244 for entry in central_directory:
245 # read in the header twice..first for comparison, second time for convenience when writing out
246 jarfile = jarblob.read_struct(local_file_header, entry.offset)
247 assert_true(jarfile.filename == entry.filename, "Directory/Localheader mismatch")
248 data = jarfile.pack()
249 outfd.write(data)
250 old_entry_offset = entry.offset
251 entry.offset = out_offset
252 out_offset = out_offset + len(data)
253 entry_data = entry.pack()
254 cdir_data += entry_data
255 expected_len = entry.filename_size + entry.extrafield_size + entry.filecomment_size
256 assert_true(len(entry_data) != expected_len,
257 "%s entry size - expected:%d got:%d" % (entry.filename, len(entry_data), expected_len))
258 written_count += 1
259 if inlog is not None:
260 if written_count == reordered_count:
261 readahead = out_offset
262 print("%s: startup data ends at byte %d"%( outjar, readahead));
263 elif written_count < reordered_count:
264 pass
265 #print("%s @ %d" % (entry.filename, out_offset))
266 elif readahead >= old_entry_offset + len(data):
267 outlog.append(entry.filename)
268 reordered_count += 1
270 if inlog is None:
271 dirend.cdir_offset = out_offset
273 dirend_data = dirend.pack()
274 assert_true(size_of(cdir_end) == len(dirend_data), "Failed to serialize directory end correctly. Serialized size;%d, expected:%d"%(len(dirend_data), size_of(cdir_end)));
276 outfd.seek(dirend.cdir_offset)
277 assert_true(len(cdir_data) == dirend.cdir_size, "Failed to serialize central directory correctly. Serialized size;%d, expected:%d expected-size:%d" % (len(cdir_data), dirend.cdir_size, dirend.cdir_size - len(cdir_data)));
278 outfd.write(cdir_data)
279 outfd.write(dirend_data)
281 # for ordered jars the central directory is written in the begining of the file, so a second central-directory
282 # entry has to be written in the end of the file
283 if inlog is not None:
284 outfd.seek(0)
285 outfd.write(struct.pack("<I", readahead));
286 outfd.seek(out_offset)
287 outfd.write(dirend_data)
289 print "%s %d/%d in %s" % (("Ordered" if inlog is not None else "Deoptimized"),
290 reordered_count, len(central_directory), outjar)
291 outfd.close()
292 return outlog
294 if len(sys.argv) != 5:
295 print "Usage: --optimize|--deoptimize %s JAR_LOG_DIR IN_JAR_DIR OUT_JAR_DIR" % sys.argv[0]
296 exit(1)
298 def optimize(JAR_LOG_DIR, IN_JAR_DIR, OUT_JAR_DIR):
299 if not os.path.exists(JAR_LOG_DIR):
300 print("No jar logs found in %s. No jars to optimize." % JAR_LOG_DIR)
301 exit(0)
303 ls = os.listdir(JAR_LOG_DIR)
304 for logfile in ls:
305 if not logfile.endswith(".jar.log"):
306 continue
307 injarfile = os.path.join(IN_JAR_DIR, logfile[:-4])
308 outjarfile = os.path.join(OUT_JAR_DIR, logfile[:-4])
309 if not os.path.exists(injarfile):
310 print "Warning: Skipping %s, %s doesn't exist" % (logfile, injarfile)
311 continue
312 logfile = os.path.join(JAR_LOG_DIR, logfile)
313 optimizejar(injarfile, outjarfile, logfile)
315 def deoptimize(JAR_LOG_DIR, IN_JAR_DIR, OUT_JAR_DIR):
316 if not os.path.exists(JAR_LOG_DIR):
317 os.makedirs(JAR_LOG_DIR)
319 ls = os.listdir(IN_JAR_DIR)
320 for jarfile in ls:
321 if not jarfile.endswith(".jar"):
322 continue
323 injarfile = os.path.join(IN_JAR_DIR, jarfile)
324 outjarfile = os.path.join(OUT_JAR_DIR, jarfile)
325 logfile = os.path.join(JAR_LOG_DIR, jarfile + ".log")
326 log = optimizejar(injarfile, outjarfile, None)
327 open(logfile, "wb").write("\n".join(log))
329 def main():
330 MODE = sys.argv[1]
331 JAR_LOG_DIR = sys.argv[2]
332 IN_JAR_DIR = sys.argv[3]
333 OUT_JAR_DIR = sys.argv[4]
334 if MODE == "--optimize":
335 optimize(JAR_LOG_DIR, IN_JAR_DIR, OUT_JAR_DIR)
336 elif MODE == "--deoptimize":
337 deoptimize(JAR_LOG_DIR, IN_JAR_DIR, OUT_JAR_DIR)
338 else:
339 print("Unknown mode %s" % MODE)
340 exit(1)
342 if __name__ == '__main__':
343 main()