config/optimizejars.py

   1 # ***** BEGIN LICENSE BLOCK *****
   2 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
   3 #
   4 # The contents of this file are subject to the Mozilla Public License Version
   5 # 1.1 (the "License"); you may not use this file except in compliance with
   6 # the License. You may obtain a copy of the License at
   7 # http://www.mozilla.org/MPL/
   8 #
   9 # Software distributed under the License is distributed on an "AS IS" basis,
  10 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  11 # for the specific language governing rights and limitations under the
  12 # License.
  13 #
  14 # The Original Code is mozilla.org code
  15 #
  16 # The Initial Developer of the Original Code is
  17 # Mozilla Foundation.
  18 # Portions created by the Initial Developer are Copyright (C) 2010
  19 # the Initial Developer. All Rights Reserved.
  20 #
  21 # Contributor(s):
  22 #   Taras Glek <tglek@mozilla.com>
  23 #
  24 # Alternatively, the contents of this file may be used under the terms of
  25 # either the GNU General Public License Version 2 or later (the "GPL"), or
  26 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  27 # in which case the provisions of the GPL or the LGPL are applicable instead
  28 # of those above. If you wish to allow use of your version of this file only
  29 # under the terms of either the GPL or the LGPL, and not to allow others to
  30 # use your version of this file under the terms of the MPL, indicate your
  31 # decision by deleting the provisions above and replace them with the notice
  32 # and other provisions required by the GPL or the LGPL. If you do not delete
  33 # the provisions above, a recipient may use your version of this file under
  34 # the terms of any one of the MPL, the GPL or the LGPL.
  35 #
  36 # ***** END LICENSE BLOCK *****
  37
  38 import sys, os, subprocess, struct
  39
  40 local_file_header = [
  41     ("signature", "uint32"),
  42     ("min_version", "uint16"),
  43     ("general_flag", "uint16"),
  44     ("compression", "uint16"),
  45     ("lastmod_time", "uint16"),
  46     ("lastmod_date", "uint16"),
  47     ("crc32", "uint32"),
  48     ("compressed_size", "uint32"),
  49     ("uncompressed_size", "uint32"),
  50     ("filename_size", "uint16"),
  51     ("extra_field_size", "uint16"),
  52     ("filename", "filename_size"),
  53     ("extra_field", "extra_field_size"),
  54     ("data", "compressed_size")
  55 ]
  56
  57 cdir_entry = [
  58     ("signature", "uint32"),
  59     ("creator_version", "uint16"),
  60     ("min_version", "uint16"),
  61     ("general_flag", "uint16"),
  62     ("compression", "uint16"),
  63     ("lastmod_time", "uint16"),
  64     ("lastmod_date", "uint16"),
  65     ("crc32", "uint32"),
  66     ("compressed_size", "uint32"),
  67     ("uncompressed_size", "uint32"),
  68     ("filename_size", "uint16"),
  69     ("extrafield_size", "uint16"),
  70     ("filecomment_size", "uint16"),
  71     ("disknum", "uint16"),
  72     ("internal_attr", "uint16"),
  73     ("external_attr", "uint32"),
  74     ("offset", "uint32"),
  75     ("filename", "filename_size"),
  76     ("extrafield", "extrafield_size"),
  77     ("filecomment", "filecomment_size"),
  78 ]
  79
  80 cdir_end = [
  81     ("signature", "uint32"),
  82     ("disk_num", "uint16"),
  83     ("cdir_disk", "uint16"),
  84     ("disk_entries", "uint16"),
  85     ("cdir_entries", "uint16"),
  86     ("cdir_size", "uint32"),
  87     ("cdir_offset", "uint32"),
  88     ("comment_size", "uint16"),
  89 ]
  90
  91 type_mapping = { "uint32":"I", "uint16":"H"}
  92
  93 def format_struct (format):
  94     string_fields = {}
  95     fmt = "<"
  96     for (name,value) in iter(format):
  97         try:
  98             fmt += type_mapping[value][0]
  99         except KeyError:
 100             string_fields[name] = value
 101     return (fmt, string_fields)
 102
 103 def size_of(format):
 104     return struct.calcsize(format_struct(format)[0])
 105
 106 class MyStruct:
 107     def __init__(self, format, string_fields):
 108         self.__dict__["struct_members"] = {}
 109         self.__dict__["format"] = format
 110         self.__dict__["string_fields"] = string_fields
 111
 112     def addMember(self, name, value):
 113         self.__dict__["struct_members"][name] = value
 114
 115     def __getattr__(self, item):
 116         try:
 117             return self.__dict__["struct_members"][item]
 118         except:
 119             pass
 120         print("no %s" %item)
 121         print(self.__dict__["struct_members"])
 122         raise AttributeError
 123
 124     def __setattr__(self, item, value):
 125         if item in self.__dict__["struct_members"]:
 126             self.__dict__["struct_members"][item] = value
 127         else:
 128             raise AttributeError
 129
 130     def pack(self):
 131         extra_data = ""
 132         values = []
 133         string_fields = self.__dict__["string_fields"]
 134         struct_members = self.__dict__["struct_members"]
 135         format = self.__dict__["format"]
 136         for (name,_) in format:
 137             if name in string_fields:
 138                 extra_data = extra_data + struct_members[name]
 139             else:
 140                 values.append(struct_members[name]);
 141         return struct.pack(format_struct(format)[0], *values) + extra_data
 142
 143 ENDSIG = 0x06054b50
 144
 145 def assert_true(cond, msg):
 146     if not cond:
 147         raise Exception(msg)
 148         exit(1)
 149
 150 class BinaryBlob:
 151     def __init__(self, f):
 152        self.data = open(f, "rb").read()
 153        self.offset = 0
 154        self.length = len(self.data)
 155
 156     def readAt(self, pos, length):
 157         self.offset = pos + length
 158         return self.data[pos:self.offset]
 159
 160     def read_struct (self, format, offset = None):
 161         if offset == None:
 162             offset = self.offset
 163         (fstr, string_fields) = format_struct(format)
 164         size = struct.calcsize(fstr)
 165         data = self.readAt(offset, size)
 166         ret = struct.unpack(fstr, data)
 167         retstruct = MyStruct(format, string_fields)
 168         i = 0
 169         for (name,_) in iter(format):
 170             member_desc = None
 171             if not name in string_fields:
 172                 member_data = ret[i]
 173                 i = i + 1
 174             else:
 175                 # zip has data fields which are described by other struct fields, this does
 176                 # additional reads to fill em in
 177                 member_desc = string_fields[name]
 178                 member_data = self.readAt(self.offset, retstruct.__getattr__(member_desc))
 179             retstruct.addMember(name, member_data)
 180         # sanity check serialization code
 181         data = self.readAt(offset, self.offset - offset)
 182         out_data = retstruct.pack()
 183         assert_true(out_data == data, "Serialization fail %d !=%d"% (len(out_data), len(data)))
 184         return retstruct
 185
 186 def optimizejar(jar, outjar, inlog = None):
 187     if inlog is not None:
 188         inlog = open(inlog).read().rstrip()
 189         # in the case of an empty log still move the index forward
 190         if len(inlog) == 0:
 191             inlog = []
 192         else:
 193             inlog = inlog.split("\n")
 194     outlog = []
 195     jarblob = BinaryBlob(jar)
 196     dirend = jarblob.read_struct(cdir_end, jarblob.length - size_of(cdir_end))
 197     assert_true(dirend.signature == ENDSIG, "no signature in the end");
 198     cdir_offset = dirend.cdir_offset
 199     readahead = 0
 200     if inlog is None and cdir_offset == 4:
 201         readahead = struct.unpack("<I", jarblob.readAt(0, 4))[0]
 202         print("%s: startup data ends at byte %d" % (outjar, readahead));
 203
 204     jarblob.offset = cdir_offset
 205     central_directory = []
 206     for i  in range(0, dirend.cdir_entries):
 207         entry = jarblob.read_struct(cdir_entry)
 208         central_directory.append(entry)
 209
 210     reordered_count = 0
 211     if inlog is not None:
 212         dup_guard = set()
 213         for ordered_name in inlog:
 214             if ordered_name in dup_guard:
 215                 continue
 216             else:
 217                 dup_guard.add(ordered_name)
 218             found = False
 219             for i in range(reordered_count, len(central_directory)):
 220                 if central_directory[i].filename == ordered_name:
 221                     # swap the cdir entries
 222                     tmp = central_directory[i]
 223                     central_directory[i] = central_directory[reordered_count]
 224                     central_directory[reordered_count] = tmp
 225                     reordered_count = reordered_count + 1
 226                     found = True
 227                     break
 228             if not found:
 229                 print( "Can't find '%s' in %s" % (ordered_name, jar))
 230
 231     outfd = open(outjar, "wb")
 232     out_offset = 0
 233     if inlog is not None:
 234         # have to put central directory at offset 4 cos 0 confuses some tools.
 235         # This also lets us specify how many entries should be preread
 236         dirend.cdir_offset = 4
 237         # make room for central dir + end of dir + 4 extra bytes at front
 238         out_offset = dirend.cdir_offset + dirend.cdir_size + size_of(cdir_end)
 239         outfd.seek(out_offset)
 240
 241     cdir_data = ""
 242     written_count = 0
 243     # store number of bytes suggested for readahead
 244     for entry in central_directory:
 245         # read in the header twice..first for comparison, second time for convenience when writing out
 246         jarfile = jarblob.read_struct(local_file_header, entry.offset)
 247         assert_true(jarfile.filename == entry.filename, "Directory/Localheader mismatch")
 248         data = jarfile.pack()
 249         outfd.write(data)
 250         old_entry_offset = entry.offset
 251         entry.offset = out_offset
 252         out_offset = out_offset + len(data)
 253         entry_data = entry.pack()
 254         cdir_data += entry_data
 255         expected_len = entry.filename_size + entry.extrafield_size + entry.filecomment_size
 256         assert_true(len(entry_data) != expected_len,
 257                     "%s entry size - expected:%d got:%d" % (entry.filename, len(entry_data), expected_len))
 258         written_count += 1
 259         if inlog is not None:
 260             if written_count == reordered_count:
 261                 readahead = out_offset
 262                 print("%s: startup data ends at byte %d"%( outjar, readahead));
 263             elif written_count < reordered_count:
 264                 pass
 265                 #print("%s @ %d" % (entry.filename, out_offset))
 266         elif readahead >= old_entry_offset + len(data):
 267             outlog.append(entry.filename)
 268             reordered_count += 1
 269
 270     if inlog is None:
 271         dirend.cdir_offset = out_offset
 272
 273     dirend_data = dirend.pack()
 274     assert_true(size_of(cdir_end) == len(dirend_data), "Failed to serialize directory end correctly. Serialized size;%d, expected:%d"%(len(dirend_data), size_of(cdir_end)));
 275
 276     outfd.seek(dirend.cdir_offset)
 277     assert_true(len(cdir_data) == dirend.cdir_size, "Failed to serialize central directory correctly. Serialized size;%d, expected:%d expected-size:%d" % (len(cdir_data), dirend.cdir_size, dirend.cdir_size - len(cdir_data)));
 278     outfd.write(cdir_data)
 279     outfd.write(dirend_data)
 280
 281     # for ordered jars the central directory is written in the begining of the file, so a second central-directory
 282     # entry has to be written in the end of the file
 283     if inlog is not None:
 284         outfd.seek(0)
 285         outfd.write(struct.pack("<I", readahead));
 286         outfd.seek(out_offset)
 287         outfd.write(dirend_data)
 288
 289     print "%s %d/%d in %s" % (("Ordered" if inlog is not None else "Deoptimized"),
 290                               reordered_count, len(central_directory), outjar)
 291     outfd.close()
 292     return outlog
 293
 294 if len(sys.argv) != 5:
 295     print "Usage: --optimize|--deoptimize %s JAR_LOG_DIR IN_JAR_DIR OUT_JAR_DIR" % sys.argv[0]
 296     exit(1)
 297
 298 def optimize(JAR_LOG_DIR, IN_JAR_DIR, OUT_JAR_DIR):
 299     if not os.path.exists(JAR_LOG_DIR):
 300         print("No jar logs found in %s. No jars to optimize." % JAR_LOG_DIR)
 301         exit(0)
 302
 303     ls = os.listdir(JAR_LOG_DIR)
 304     for logfile in ls:
 305         if not logfile.endswith(".jar.log"):
 306             continue
 307         injarfile = os.path.join(IN_JAR_DIR, logfile[:-4])
 308         outjarfile = os.path.join(OUT_JAR_DIR, logfile[:-4])
 309         if not os.path.exists(injarfile):
 310             print "Warning: Skipping %s, %s doesn't exist" % (logfile, injarfile)
 311             continue
 312         logfile = os.path.join(JAR_LOG_DIR, logfile)
 313         optimizejar(injarfile, outjarfile, logfile)
 314
 315 def deoptimize(JAR_LOG_DIR, IN_JAR_DIR, OUT_JAR_DIR):
 316     if not os.path.exists(JAR_LOG_DIR):
 317         os.makedirs(JAR_LOG_DIR)
 318
 319     ls = os.listdir(IN_JAR_DIR)
 320     for jarfile in ls:
 321         if not jarfile.endswith(".jar"):
 322             continue
 323         injarfile = os.path.join(IN_JAR_DIR, jarfile)
 324         outjarfile = os.path.join(OUT_JAR_DIR, jarfile)
 325         logfile = os.path.join(JAR_LOG_DIR, jarfile + ".log")
 326         log = optimizejar(injarfile, outjarfile, None)
 327         open(logfile, "wb").write("\n".join(log))
 328
 329 def main():
 330     MODE = sys.argv[1]
 331     JAR_LOG_DIR = sys.argv[2]
 332     IN_JAR_DIR = sys.argv[3]
 333     OUT_JAR_DIR = sys.argv[4]
 334     if MODE == "--optimize":
 335         optimize(JAR_LOG_DIR, IN_JAR_DIR, OUT_JAR_DIR)
 336     elif MODE == "--deoptimize":
 337         deoptimize(JAR_LOG_DIR, IN_JAR_DIR, OUT_JAR_DIR)
 338     else:
 339         print("Unknown mode %s" % MODE)
 340         exit(1)
 341
 342 if __name__ == '__main__':
 343     main()