1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 import sys
, os
, subprocess
, struct
, re
8 ("signature", "uint32"),
9 ("min_version", "uint16"),
10 ("general_flag", "uint16"),
11 ("compression", "uint16"),
12 ("lastmod_time", "uint16"),
13 ("lastmod_date", "uint16"),
15 ("compressed_size", "uint32"),
16 ("uncompressed_size", "uint32"),
17 ("filename_size", "uint16"),
18 ("extra_field_size", "uint16"),
19 ("filename", "filename_size"),
20 ("extra_field", "extra_field_size"),
21 ("data", "compressed_size")
25 ("signature", "uint32"),
26 ("creator_version", "uint16"),
27 ("min_version", "uint16"),
28 ("general_flag", "uint16"),
29 ("compression", "uint16"),
30 ("lastmod_time", "uint16"),
31 ("lastmod_date", "uint16"),
33 ("compressed_size", "uint32"),
34 ("uncompressed_size", "uint32"),
35 ("filename_size", "uint16"),
36 ("extrafield_size", "uint16"),
37 ("filecomment_size", "uint16"),
38 ("disknum", "uint16"),
39 ("internal_attr", "uint16"),
40 ("external_attr", "uint32"),
42 ("filename", "filename_size"),
43 ("extrafield", "extrafield_size"),
44 ("filecomment", "filecomment_size"),
48 ("signature", "uint32"),
49 ("disk_num", "uint16"),
50 ("cdir_disk", "uint16"),
51 ("disk_entries", "uint16"),
52 ("cdir_entries", "uint16"),
53 ("cdir_size", "uint32"),
54 ("cdir_offset", "uint32"),
55 ("comment_size", "uint16"),
58 type_mapping
= { "uint32":"I", "uint16":"H"}
60 def format_struct (format
):
63 for (name
,value
) in iter(format
):
65 fmt
+= type_mapping
[value
][0]
67 string_fields
[name
] = value
68 return (fmt
, string_fields
)
71 return struct
.calcsize(format_struct(format
)[0])
74 def __init__(self
, format
, string_fields
):
75 self
.__dict
__["struct_members"] = {}
76 self
.__dict
__["format"] = format
77 self
.__dict
__["string_fields"] = string_fields
79 def addMember(self
, name
, value
):
80 self
.__dict
__["struct_members"][name
] = value
82 def __getattr__(self
, item
):
84 return self
.__dict
__["struct_members"][item
]
88 print(self
.__dict
__["struct_members"])
91 def __setattr__(self
, item
, value
):
92 if item
in self
.__dict
__["struct_members"]:
93 self
.__dict
__["struct_members"][item
] = value
100 string_fields
= self
.__dict
__["string_fields"]
101 struct_members
= self
.__dict
__["struct_members"]
102 format
= self
.__dict
__["format"]
103 for (name
,_
) in format
:
104 if name
in string_fields
:
105 extra_data
= extra_data
+ struct_members
[name
]
107 values
.append(struct_members
[name
]);
108 return struct
.pack(format_struct(format
)[0], *values
) + extra_data
112 def assert_true(cond
, msg
):
118 def __init__(self
, f
):
119 self
.data
= open(f
, "rb").read()
121 self
.length
= len(self
.data
)
123 def readAt(self
, pos
, length
):
124 self
.offset
= pos
+ length
125 return self
.data
[pos
:self
.offset
]
127 def read_struct (self
, format
, offset
= None):
130 (fstr
, string_fields
) = format_struct(format
)
131 size
= struct
.calcsize(fstr
)
132 data
= self
.readAt(offset
, size
)
133 ret
= struct
.unpack(fstr
, data
)
134 retstruct
= MyStruct(format
, string_fields
)
136 for (name
,_
) in iter(format
):
138 if not name
in string_fields
:
142 # zip has data fields which are described by other struct fields, this does
143 # additional reads to fill em in
144 member_desc
= string_fields
[name
]
145 member_data
= self
.readAt(self
.offset
, retstruct
.__getattr
__(member_desc
))
146 retstruct
.addMember(name
, member_data
)
147 # sanity check serialization code
148 data
= self
.readAt(offset
, self
.offset
- offset
)
149 out_data
= retstruct
.pack()
150 assert_true(out_data
== data
, "Serialization fail %d !=%d"% (len(out_data
), len(data
)))
153 def optimizejar(jar
, outjar
, inlog
= None):
154 if inlog
is not None:
155 inlog
= open(inlog
).read().rstrip()
156 # in the case of an empty log still move the index forward
160 inlog
= inlog
.split("\n")
162 jarblob
= BinaryBlob(jar
)
163 dirend
= jarblob
.read_struct(cdir_end
, jarblob
.length
- size_of(cdir_end
))
164 assert_true(dirend
.signature
== ENDSIG
, "no signature in the end");
165 cdir_offset
= dirend
.cdir_offset
167 if inlog
is None and cdir_offset
== 4:
168 readahead
= struct
.unpack("<I", jarblob
.readAt(0, 4))[0]
169 print("%s: startup data ends at byte %d" % (outjar
, readahead
));
172 jarblob
.offset
= cdir_offset
173 central_directory
= []
174 for i
in range(0, dirend
.cdir_entries
):
175 entry
= jarblob
.read_struct(cdir_entry
)
176 if entry
.filename
[-1:] == "/":
177 total_stripped
+= len(entry
.pack())
179 total_stripped
+= entry
.extrafield_size
180 central_directory
.append(entry
)
183 if inlog
is not None:
185 for ordered_name
in inlog
:
186 if ordered_name
in dup_guard
:
189 dup_guard
.add(ordered_name
)
191 for i
in range(reordered_count
, len(central_directory
)):
192 if central_directory
[i
].filename
== ordered_name
:
193 # swap the cdir entries
194 tmp
= central_directory
[i
]
195 central_directory
[i
] = central_directory
[reordered_count
]
196 central_directory
[reordered_count
] = tmp
197 reordered_count
= reordered_count
+ 1
201 print( "Can't find '%s' in %s" % (ordered_name
, jar
))
203 outfd
= open(outjar
, "wb")
205 if inlog
is not None:
206 # have to put central directory at offset 4 cos 0 confuses some tools.
207 # This also lets us specify how many entries should be preread
208 dirend
.cdir_offset
= 4
209 # make room for central dir + end of dir + 4 extra bytes at front
210 out_offset
= dirend
.cdir_offset
+ dirend
.cdir_size
+ size_of(cdir_end
) - total_stripped
211 outfd
.seek(out_offset
)
218 # store number of bytes suggested for readahead
219 for entry
in central_directory
:
220 # read in the header twice..first for comparison, second time for convenience when writing out
221 jarfile
= jarblob
.read_struct(local_file_header
, entry
.offset
)
222 assert_true(jarfile
.filename
== entry
.filename
, "Directory/Localheader mismatch")
223 # drop directory entries
224 if entry
.filename
[-1:] == "/":
225 total_stripped
+= len(jarfile
.pack())
226 dirend
.cdir_entries
-= 1
228 # drop extra field data
230 total_stripped
+= jarfile
.extra_field_size
;
231 entry
.extrafield
= jarfile
.extra_field
= ""
232 entry
.extrafield_size
= jarfile
.extra_field_size
= 0
234 entry
.lastmod_date
= jarfile
.lastmod_date
= ((2010 - 1980) << 9) |
(1 << 5) |
1
235 entry
.lastmod_time
= jarfile
.lastmod_time
= 0
236 data
= jarfile
.pack()
238 old_entry_offset
= entry
.offset
239 entry
.offset
= out_offset
240 out_offset
= out_offset
+ len(data
)
241 entry_data
= entry
.pack()
242 cdir_data
+= entry_data
243 expected_len
= entry
.filename_size
+ entry
.extrafield_size
+ entry
.filecomment_size
244 assert_true(len(entry_data
) != expected_len
,
245 "%s entry size - expected:%d got:%d" % (entry
.filename
, len(entry_data
), expected_len
))
248 if entry
.crc32
in crc_mapping
:
250 dupe_bytes
+= entry
.compressed_size
+ len(data
) + len(entry_data
)
251 print("%s\n\tis a duplicate of\n%s\n---"%(entry
.filename
, crc_mapping
[entry
.crc32
]))
253 crc_mapping
[entry
.crc32
] = entry
.filename
;
255 if inlog
is not None:
256 if written_count
== reordered_count
:
257 readahead
= out_offset
258 print("%s: startup data ends at byte %d"%( outjar
, readahead
));
259 elif written_count
< reordered_count
:
261 #print("%s @ %d" % (entry.filename, out_offset))
262 elif readahead
>= old_entry_offset
+ len(data
):
263 outlog
.append(entry
.filename
)
267 dirend
.cdir_offset
= out_offset
270 print("WARNING: Found %d duplicate files taking %d bytes"%(dups_found
, dupe_bytes
))
272 dirend
.cdir_size
= len(cdir_data
)
273 dirend
.disk_entries
= dirend
.cdir_entries
274 dirend_data
= dirend
.pack()
275 assert_true(size_of(cdir_end
) == len(dirend_data
), "Failed to serialize directory end correctly. Serialized size;%d, expected:%d"%(len(dirend_data
), size_of(cdir_end
)));
277 outfd
.seek(dirend
.cdir_offset
)
278 outfd
.write(cdir_data
)
279 outfd
.write(dirend_data
)
281 # for ordered jars the central directory is written in the begining of the file, so a second central-directory
282 # entry has to be written in the end of the file
283 if inlog
is not None:
285 outfd
.write(struct
.pack("<I", readahead
));
286 outfd
.seek(out_offset
)
287 outfd
.write(dirend_data
)
289 print "Stripped %d bytes" % total_stripped
290 print "%s %d/%d in %s" % (("Ordered" if inlog
is not None else "Deoptimized"),
291 reordered_count
, len(central_directory
), outjar
)
295 if len(sys
.argv
) != 5:
296 print "Usage: --optimize|--deoptimize %s JAR_LOG_DIR IN_JAR_DIR OUT_JAR_DIR" % sys
.argv
[0]
299 jar_regex
= re
.compile("\\.jar?$")
301 def optimize(JAR_LOG_DIR
, IN_JAR_DIR
, OUT_JAR_DIR
):
302 ls
= os
.listdir(IN_JAR_DIR
)
304 if not re
.search(jar_regex
, jarfile
):
306 injarfile
= os
.path
.join(IN_JAR_DIR
, jarfile
)
307 outjarfile
= os
.path
.join(OUT_JAR_DIR
, jarfile
)
308 logfile
= os
.path
.join(JAR_LOG_DIR
, jarfile
+ ".log")
309 if not os
.path
.isfile(logfile
):
311 optimizejar(injarfile
, outjarfile
, logfile
)
313 def deoptimize(JAR_LOG_DIR
, IN_JAR_DIR
, OUT_JAR_DIR
):
314 if not os
.path
.exists(JAR_LOG_DIR
):
315 os
.makedirs(JAR_LOG_DIR
)
317 ls
= os
.listdir(IN_JAR_DIR
)
319 if not re
.search(jar_regex
, jarfile
):
321 injarfile
= os
.path
.join(IN_JAR_DIR
, jarfile
)
322 outjarfile
= os
.path
.join(OUT_JAR_DIR
, jarfile
)
323 logfile
= os
.path
.join(JAR_LOG_DIR
, jarfile
+ ".log")
324 log
= optimizejar(injarfile
, outjarfile
, None)
325 open(logfile
, "wb").write("\n".join(log
))
329 JAR_LOG_DIR
= sys
.argv
[2]
330 IN_JAR_DIR
= sys
.argv
[3]
331 OUT_JAR_DIR
= sys
.argv
[4]
332 if MODE
== "--optimize":
333 optimize(JAR_LOG_DIR
, IN_JAR_DIR
, OUT_JAR_DIR
)
334 elif MODE
== "--deoptimize":
335 deoptimize(JAR_LOG_DIR
, IN_JAR_DIR
, OUT_JAR_DIR
)
337 print("Unknown mode %s" % MODE
)
340 if __name__
== '__main__':