3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Code for handling Java jar files.
19 Jar files are just zip files with a particular interpretation for certain files
20 in the zip under the META-INF/ directory. So we can read and write them using
21 the standard zipfile module.
23 The specification for jar files is at
24 http://docs.oracle.com/javase/7/docs/technotes/guides/jar/jar.html
26 from __future__
import with_statement
35 _MANIFEST_NAME
= 'META-INF/MANIFEST.MF'
38 class Error(Exception):
42 class InvalidJarError(Error
):
46 class JarWriteError(Error
):
50 class Manifest(object):
51 """The parsed manifest from a jar file.
54 main_section: a dict representing the main (first) section of the manifest.
55 Each key is a string that is an attribute, such as 'Manifest-Version', and
56 the corresponding value is a string that is the value of the attribute,
58 sections: a dict representing the other sections of the manifest. Each key
59 is a string that is the value of the 'Name' attribute for the section,
60 and the corresponding value is a dict like the main_section one, for the
64 def __init__(self
, main_section
, sections
):
65 self
.main_section
= main_section
66 self
.sections
= sections
69 def ReadManifest(jar_file_name
):
70 """Read and parse the manifest out of the given jar.
73 jar_file_name: the name of the jar from which the manifest is to be read.
76 A parsed Manifest object, or None if the jar has no manifest.
79 IOError: if the jar does not exist or cannot be read.
81 with zipfile
.ZipFile(jar_file_name
) as jar
:
83 manifest_string
= jar
.read(_MANIFEST_NAME
)
86 return _ParseManifest(manifest_string
)
89 def _ParseManifest(manifest_string
):
90 """Parse a Manifest object out of the given string.
93 manifest_string: a str or unicode that is the manifest contents.
96 A Manifest object parsed out of the string.
99 InvalidJarError: if the manifest is not well-formed.
102 manifest_string
= '\n'.join(manifest_string
.splitlines()).rstrip('\n')
103 section_strings
= manifest_string
.split('\n\n')
104 parsed_sections
= [_ParseManifestSection(s
) for s
in section_strings
]
105 main_section
= parsed_sections
[0]
107 sections
= dict((entry
['Name'], entry
) for entry
in parsed_sections
[1:])
109 raise InvalidJarError('Manifest entry has no Name attribute: %s' % entry
)
110 return Manifest(main_section
, sections
)
113 def _ParseManifestSection(section
):
114 """Parse a dict out of the given manifest section string.
117 section: a str or unicode that is the manifest section. It looks something
118 like this (without the >):
120 > Some-Attribute: some value
121 > Another-Attribute: another value
124 A dict where the keys are the attributes (here, 'Name', 'Some-Attribute',
125 'Another-Attribute'), and the values are the corresponding attribute values.
128 InvalidJarError: if the manifest section is not well-formed.
131 section
= section
.replace('\n ', '')
133 return dict(line
.split(': ', 1) for line
in section
.split('\n'))
135 raise InvalidJarError('Invalid manifest %r' % section
)
138 def Make(input_directory
, output_directory
, base_name
, maximum_size
=sys
.maxint
,
139 include_predicate
=lambda name
: True):
140 """Makes one or more jars from a directory hierarchy.
143 input_directory: a string that is the root of the directory hierarchy from
144 which files will be put in the jar.
145 output_directory: a string that is the directory to put the output jars.
146 base_name: the name to be used for each output jar. If the name is 'foo'
147 then each jar will be called 'foo-nnnn.jar', where nnnn is a sequence of
149 maximum_size: the maximum allowed total uncompressed size of the files in
151 include_predicate: a function that is called once for each file in the
152 directory hierarchy. It is given the name that the file will have in the
153 output jar(s), and it must return a true value if the file is to be
157 IOError: if input files cannot be read or output jars cannot be written.
158 JarWriteError: if an input file is bigger than maximum_size.
161 abs_dir
= os
.path
.abspath(input_directory
)
162 for dirpath
, _
, files
in os
.walk(abs_dir
):
163 if dirpath
== abs_dir
:
166 assert dirpath
.startswith(abs_dir
)
167 prefix
= dirpath
[len(abs_dir
) + 1:].replace(os
.sep
, '/') + '/'
170 zip_names
.extend([prefix
+ f
for f
in files
])
172 with
_Maker(output_directory
, base_name
, maximum_size
) as maker
:
173 for name
in sorted(zip_names
):
174 abs_fs_name
= os
.path
.join(abs_dir
, os
.path
.normpath(name
))
176 if include_predicate(name
):
177 size
= os
.path
.getsize(abs_fs_name
)
178 if size
> maximum_size
:
180 'File %s has size %d which is bigger than the maximum '
181 'jar size %d' % (abs_fs_name
, size
, maximum_size
))
182 maker
.Write(name
, abs_fs_name
)
185 def SplitJar(input_jar
, output_directory
, maximum_size
=sys
.maxint
,
186 include_predicate
=lambda name
: True):
187 """Copies an input jar into a directory, splitting if necessary.
189 If its size is > maximum_size, then new jars will be created in
190 output_directory, called foo-0000.jar, foo-0001.jar, etc. The jar manifest
191 (META-INF/MANIFEST.MF) is not included in the split jars, and neither is the
192 index (INDEX.LIST) if any. Manifests are not heavily used at runtime, and
193 it's not clear what the correct manifest would be in each individual jar.
196 input_jar: a string that is the path to the jar to be copied.
197 output_directory: a string that is the directory to put the copy or copies.
198 maximum_size: the maximum allowed total uncompressed size of the files in
200 include_predicate: a function that is called once for each entry in the
201 input jar. It is given the name of the entry, and must return a true value
202 if the entry is to be included in the output jar(s).
205 IOError: if the input jar cannot be read or the output jars cannot be
207 ValueError: if input_jar does not end with '.jar'.
208 JarWriteError: if an entry in the input jar is bigger than maximum_size.
210 if not input_jar
.lower().endswith('.jar'):
211 raise ValueError('Does not end with .jar: %s' % input_jar
)
213 base_name
= os
.path
.splitext(os
.path
.basename(input_jar
))[0]
214 with
_Maker(output_directory
, base_name
, maximum_size
) as maker
:
215 for name
, contents
in JarContents(input_jar
):
216 if (name
!= 'META-INF/MANIFEST.MF' and name
!= 'INDEX.LIST' and
217 include_predicate(name
)):
219 if size
> maximum_size
:
221 'Entry %s in %s has size %d which is bigger than the maximum jar '
222 'size %d' % (name
, input_jar
, size
, maximum_size
))
223 maker
.WriteStr(name
, contents
)
226 def JarContents(jar_path
):
227 """Generates (name, contents) pairs for the given jar.
229 Each generated tuple consists of the relative name within the jar of an entry,
230 for example 'java/lang/Object.class', and a str that is the corresponding
234 jar_path: a str that is the path to the jar.
237 A (name, contents) pair.
239 with zipfile
.ZipFile(jar_path
) as jar
:
240 for name
in jar
.namelist():
241 yield name
, jar
.read(name
)
244 class _Maker(object):
245 """Writes jars to contain the entries supplied to its Write method.
247 This class is designed to be used in a with statement.
250 def __init__(self
, output_directory
, base_name
, maximum_size
=sys
.maxint
):
251 self
.base_name
= base_name
252 self
.output_directory
= os
.path
.normpath(output_directory
)
253 self
.maximum_size
= maximum_size
255 if not os
.path
.exists(self
.output_directory
):
256 os
.makedirs(self
.output_directory
)
257 elif not os
.path
.isdir(self
.output_directory
):
258 raise JarWriteError('Not a directory: %s' % self
.output_directory
)
263 self
.current_jar
= None
264 self
.current_jar_size
= 0
270 def __exit__(self
, t
, value
, traceback
):
272 self
.current_jar
.close()
274 def WriteStr(self
, name
, content
):
275 """Write a str as an entry to a jar, creating a new one if necessary.
277 If the total uncompressed size of all the entries written to the current jar
278 excludes the maximum, the current jar will be closed and a new one created.
281 name: the relative name of the jar entry, for example
282 'java/lang/String.class'.
283 content: a str that is the bytes to be written to the jar entry.
285 self
._WriteEntry
(len(content
),
286 lambda: self
.current_jar
.writestr(name
, content
))
288 def Write(self
, name
, path
):
289 """Write file as an entry to a jar, creating a new one if necessary.
291 If the total uncompressed size of all the entries written to the current jar
292 excludes the maximum, the current jar will be closed and a new one created.
295 name: the relative name of the jar entry, for example
296 'java/lang/String.class'.
297 path: a str that is the path of the file to be written
299 self
._WriteEntry
(os
.path
.getsize(path
),
300 lambda: self
.current_jar
.write(path
, name
))
302 def _WriteEntry(self
, size
, write_func
):
303 """Write an entry to a jar, creating a new one if necessary.
305 If the total uncompressed size of all the entries written to the current jar
306 excludes the maximum, the current jar will be closed and a new one created.
309 size: the size in bytes of the new entry, uncompressed.
310 write_func: a function that writes that entry to self.current_jar.
312 if self
.current_jar_size
+ size
> self
.maximum_size
:
313 self
.current_jar
.close()
314 self
.current_jar
= None
315 if not self
.current_jar
:
316 jar_name
= '%s-%04d.jar' % (self
.base_name
, self
.jar_suffix
)
318 full_jar_name
= os
.path
.join(self
.output_directory
, jar_name
)
319 self
.current_jar
= zipfile
.ZipFile(
320 full_jar_name
, 'w', zipfile
.ZIP_DEFLATED
)
321 self
.current_jar_size
= 0
322 self
.current_jar_size
+= size