1 # ***** BEGIN LICENSE BLOCK *****
2 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 # The contents of this file are subject to the Mozilla Public License Version
5 # 1.1 (the "License"); you may not use this file except in compliance with
6 # the License. You may obtain a copy of the License at
7 # http://www.mozilla.org/MPL/
9 # Software distributed under the License is distributed on an "AS IS" basis,
10 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 # for the specific language governing rights and limitations under the
14 # The Original Code is mozilla.org code
16 # The Initial Developer of the Original Code is
18 # Portions created by the Initial Developer are Copyright (C) 2010
19 # the Initial Developer. All Rights Reserved.
22 # Taras Glek <tglek@mozilla.com>
24 # Alternatively, the contents of this file may be used under the terms of
25 # either the GNU General Public License Version 2 or later (the "GPL"), or
26 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 # in which case the provisions of the GPL or the LGPL are applicable instead
28 # of those above. If you wish to allow use of your version of this file only
29 # under the terms of either the GPL or the LGPL, and not to allow others to
30 # use your version of this file under the terms of the MPL, indicate your
31 # decision by deleting the provisions above and replace them with the notice
32 # and other provisions required by the GPL or the LGPL. If you do not delete
33 # the provisions above, a recipient may use your version of this file under
34 # the terms of any one of the MPL, the GPL or the LGPL.
36 # ***** END LICENSE BLOCK *****
38 import sys
, os
, subprocess
, struct
41 ("signature", "uint32"),
42 ("min_version", "uint16"),
43 ("general_flag", "uint16"),
44 ("compression", "uint16"),
45 ("lastmod_time", "uint16"),
46 ("lastmod_date", "uint16"),
48 ("compressed_size", "uint32"),
49 ("uncompressed_size", "uint32"),
50 ("filename_size", "uint16"),
51 ("extra_field_size", "uint16"),
52 ("filename", "filename_size"),
53 ("extra_field", "extra_field_size"),
54 ("data", "compressed_size")
58 ("signature", "uint32"),
59 ("creator_version", "uint16"),
60 ("min_version", "uint16"),
61 ("general_flag", "uint16"),
62 ("compression", "uint16"),
63 ("lastmod_time", "uint16"),
64 ("lastmod_date", "uint16"),
66 ("compressed_size", "uint32"),
67 ("uncompressed_size", "uint32"),
68 ("filename_size", "uint16"),
69 ("extrafield_size", "uint16"),
70 ("filecomment_size", "uint16"),
71 ("disknum", "uint16"),
72 ("internal_attr", "uint16"),
73 ("external_attr", "uint32"),
75 ("filename", "filename_size"),
76 ("extrafield", "extrafield_size"),
77 ("filecomment", "filecomment_size"),
81 ("signature", "uint32"),
82 ("disk_num", "uint16"),
83 ("cdir_disk", "uint16"),
84 ("disk_entries", "uint16"),
85 ("cdir_entries", "uint16"),
86 ("cdir_size", "uint32"),
87 ("cdir_offset", "uint32"),
88 ("comment_size", "uint16"),
91 type_mapping
= { "uint32":"I", "uint16":"H"}
93 def format_struct (format
):
96 for (name
,value
) in iter(format
):
98 fmt
+= type_mapping
[value
][0]
100 string_fields
[name
] = value
101 return (fmt
, string_fields
)
104 return struct
.calcsize(format_struct(format
)[0])
107 def __init__(self
, format
, string_fields
):
108 self
.__dict
__["struct_members"] = {}
109 self
.__dict
__["format"] = format
110 self
.__dict
__["string_fields"] = string_fields
112 def addMember(self
, name
, value
):
113 self
.__dict
__["struct_members"][name
] = value
115 def __getattr__(self
, item
):
117 return self
.__dict
__["struct_members"][item
]
121 print(self
.__dict
__["struct_members"])
124 def __setattr__(self
, item
, value
):
125 if item
in self
.__dict
__["struct_members"]:
126 self
.__dict
__["struct_members"][item
] = value
133 string_fields
= self
.__dict
__["string_fields"]
134 struct_members
= self
.__dict
__["struct_members"]
135 format
= self
.__dict
__["format"]
136 for (name
,_
) in format
:
137 if name
in string_fields
:
138 extra_data
= extra_data
+ struct_members
[name
]
140 values
.append(struct_members
[name
]);
141 return struct
.pack(format_struct(format
)[0], *values
) + extra_data
145 def assert_true(cond
, msg
):
151 def __init__(self
, f
):
152 self
.data
= open(f
, "rb").read()
154 self
.length
= len(self
.data
)
156 def readAt(self
, pos
, length
):
157 self
.offset
= pos
+ length
158 return self
.data
[pos
:self
.offset
]
160 def read_struct (self
, format
, offset
= None):
163 (fstr
, string_fields
) = format_struct(format
)
164 size
= struct
.calcsize(fstr
)
165 data
= self
.readAt(offset
, size
)
166 ret
= struct
.unpack(fstr
, data
)
167 retstruct
= MyStruct(format
, string_fields
)
169 for (name
,_
) in iter(format
):
171 if not name
in string_fields
:
175 # zip has data fields which are described by other struct fields, this does
176 # additional reads to fill em in
177 member_desc
= string_fields
[name
]
178 member_data
= self
.readAt(self
.offset
, retstruct
.__getattr
__(member_desc
))
179 retstruct
.addMember(name
, member_data
)
180 # sanity check serialization code
181 data
= self
.readAt(offset
, self
.offset
- offset
)
182 out_data
= retstruct
.pack()
183 assert_true(out_data
== data
, "Serialization fail %d !=%d"% (len(out_data
), len(data
)))
186 def optimizejar(jar
, outjar
, inlog
= None):
187 if inlog
is not None:
188 inlog
= open(inlog
).read().rstrip()
189 # in the case of an empty log still move the index forward
193 inlog
= inlog
.split("\n")
195 jarblob
= BinaryBlob(jar
)
196 dirend
= jarblob
.read_struct(cdir_end
, jarblob
.length
- size_of(cdir_end
))
197 assert_true(dirend
.signature
== ENDSIG
, "no signature in the end");
198 cdir_offset
= dirend
.cdir_offset
200 if inlog
is None and cdir_offset
== 4:
201 readahead
= struct
.unpack("<I", jarblob
.readAt(0, 4))[0]
202 print("%s: startup data ends at byte %d" % (outjar
, readahead
));
204 jarblob
.offset
= cdir_offset
205 central_directory
= []
206 for i
in range(0, dirend
.cdir_entries
):
207 entry
= jarblob
.read_struct(cdir_entry
)
208 central_directory
.append(entry
)
211 if inlog
is not None:
213 for ordered_name
in inlog
:
214 if ordered_name
in dup_guard
:
217 dup_guard
.add(ordered_name
)
219 for i
in range(reordered_count
, len(central_directory
)):
220 if central_directory
[i
].filename
== ordered_name
:
221 # swap the cdir entries
222 tmp
= central_directory
[i
]
223 central_directory
[i
] = central_directory
[reordered_count
]
224 central_directory
[reordered_count
] = tmp
225 reordered_count
= reordered_count
+ 1
229 print( "Can't find '%s' in %s" % (ordered_name
, jar
))
231 outfd
= open(outjar
, "wb")
233 if inlog
is not None:
234 # have to put central directory at offset 4 cos 0 confuses some tools.
235 # This also lets us specify how many entries should be preread
236 dirend
.cdir_offset
= 4
237 # make room for central dir + end of dir + 4 extra bytes at front
238 out_offset
= dirend
.cdir_offset
+ dirend
.cdir_size
+ size_of(cdir_end
)
239 outfd
.seek(out_offset
)
243 # store number of bytes suggested for readahead
244 for entry
in central_directory
:
245 # read in the header twice..first for comparison, second time for convenience when writing out
246 jarfile
= jarblob
.read_struct(local_file_header
, entry
.offset
)
247 assert_true(jarfile
.filename
== entry
.filename
, "Directory/Localheader mismatch")
248 data
= jarfile
.pack()
250 old_entry_offset
= entry
.offset
251 entry
.offset
= out_offset
252 out_offset
= out_offset
+ len(data
)
253 entry_data
= entry
.pack()
254 cdir_data
+= entry_data
255 expected_len
= entry
.filename_size
+ entry
.extrafield_size
+ entry
.filecomment_size
256 assert_true(len(entry_data
) != expected_len
,
257 "%s entry size - expected:%d got:%d" % (entry
.filename
, len(entry_data
), expected_len
))
259 if inlog
is not None:
260 if written_count
== reordered_count
:
261 readahead
= out_offset
262 print("%s: startup data ends at byte %d"%( outjar
, readahead
));
263 elif written_count
< reordered_count
:
265 #print("%s @ %d" % (entry.filename, out_offset))
266 elif readahead
>= old_entry_offset
+ len(data
):
267 outlog
.append(entry
.filename
)
271 dirend
.cdir_offset
= out_offset
273 dirend_data
= dirend
.pack()
274 assert_true(size_of(cdir_end
) == len(dirend_data
), "Failed to serialize directory end correctly. Serialized size;%d, expected:%d"%(len(dirend_data
), size_of(cdir_end
)));
276 outfd
.seek(dirend
.cdir_offset
)
277 assert_true(len(cdir_data
) == dirend
.cdir_size
, "Failed to serialize central directory correctly. Serialized size;%d, expected:%d expected-size:%d" % (len(cdir_data
), dirend
.cdir_size
, dirend
.cdir_size
- len(cdir_data
)));
278 outfd
.write(cdir_data
)
279 outfd
.write(dirend_data
)
281 # for ordered jars the central directory is written in the begining of the file, so a second central-directory
282 # entry has to be written in the end of the file
283 if inlog
is not None:
285 outfd
.write(struct
.pack("<I", readahead
));
286 outfd
.seek(out_offset
)
287 outfd
.write(dirend_data
)
289 print "%s %d/%d in %s" % (("Ordered" if inlog
is not None else "Deoptimized"),
290 reordered_count
, len(central_directory
), outjar
)
294 if len(sys
.argv
) != 5:
295 print "Usage: --optimize|--deoptimize %s JAR_LOG_DIR IN_JAR_DIR OUT_JAR_DIR" % sys
.argv
[0]
298 def optimize(JAR_LOG_DIR
, IN_JAR_DIR
, OUT_JAR_DIR
):
299 if not os
.path
.exists(JAR_LOG_DIR
):
300 print("No jar logs found in %s. No jars to optimize." % JAR_LOG_DIR
)
303 ls
= os
.listdir(JAR_LOG_DIR
)
305 if not logfile
.endswith(".jar.log"):
307 injarfile
= os
.path
.join(IN_JAR_DIR
, logfile
[:-4])
308 outjarfile
= os
.path
.join(OUT_JAR_DIR
, logfile
[:-4])
309 if not os
.path
.exists(injarfile
):
310 print "Warning: Skipping %s, %s doesn't exist" % (logfile
, injarfile
)
312 logfile
= os
.path
.join(JAR_LOG_DIR
, logfile
)
313 optimizejar(injarfile
, outjarfile
, logfile
)
315 def deoptimize(JAR_LOG_DIR
, IN_JAR_DIR
, OUT_JAR_DIR
):
316 if not os
.path
.exists(JAR_LOG_DIR
):
317 os
.makedirs(JAR_LOG_DIR
)
319 ls
= os
.listdir(IN_JAR_DIR
)
321 if not jarfile
.endswith(".jar"):
323 injarfile
= os
.path
.join(IN_JAR_DIR
, jarfile
)
324 outjarfile
= os
.path
.join(OUT_JAR_DIR
, jarfile
)
325 logfile
= os
.path
.join(JAR_LOG_DIR
, jarfile
+ ".log")
326 log
= optimizejar(injarfile
, outjarfile
, None)
327 open(logfile
, "wb").write("\n".join(log
))
331 JAR_LOG_DIR
= sys
.argv
[2]
332 IN_JAR_DIR
= sys
.argv
[3]
333 OUT_JAR_DIR
= sys
.argv
[4]
334 if MODE
== "--optimize":
335 optimize(JAR_LOG_DIR
, IN_JAR_DIR
, OUT_JAR_DIR
)
336 elif MODE
== "--deoptimize":
337 deoptimize(JAR_LOG_DIR
, IN_JAR_DIR
, OUT_JAR_DIR
)
339 print("Unknown mode %s" % MODE
)
342 if __name__
== '__main__':