1.9.30 sync.
[gae.git] / python / google / appengine / tools / jarfile.py
blob26c387b24875e2f3601350bfda47942b39882539
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Code for handling Java jar files.
19 Jar files are just zip files with a particular interpretation for certain files
20 in the zip under the META-INF/ directory. So we can read and write them using
21 the standard zipfile module.
23 The specification for jar files is at
24 http://docs.oracle.com/javase/7/docs/technotes/guides/jar/jar.html
25 """
26 from __future__ import with_statement
30 import os
31 import sys
32 import zipfile
35 _MANIFEST_NAME = 'META-INF/MANIFEST.MF'
38 class Error(Exception):
39 pass
42 class InvalidJarError(Error):
43 pass
46 class JarWriteError(Error):
47 pass
50 class Manifest(object):
51 """The parsed manifest from a jar file.
53 Attributes:
54 main_section: a dict representing the main (first) section of the manifest.
55 Each key is a string that is an attribute, such as 'Manifest-Version', and
56 the corresponding value is a string that is the value of the attribute,
57 such as '1.0'.
58 sections: a dict representing the other sections of the manifest. Each key
59 is a string that is the value of the 'Name' attribute for the section,
60 and the corresponding value is a dict like the main_section one, for the
61 other attributes.
62 """
64 def __init__(self, main_section, sections):
65 self.main_section = main_section
66 self.sections = sections
69 def ReadManifest(jar_file_name):
70 """Read and parse the manifest out of the given jar.
72 Args:
73 jar_file_name: the name of the jar from which the manifest is to be read.
75 Returns:
76 A parsed Manifest object, or None if the jar has no manifest.
78 Raises:
79 IOError: if the jar does not exist or cannot be read.
80 """
81 with zipfile.ZipFile(jar_file_name) as jar:
82 try:
83 manifest_string = jar.read(_MANIFEST_NAME)
84 except KeyError:
85 return None
86 return _ParseManifest(manifest_string)
89 def _ParseManifest(manifest_string):
90 """Parse a Manifest object out of the given string.
92 Args:
93 manifest_string: a str or unicode that is the manifest contents.
95 Returns:
96 A Manifest object parsed out of the string.
98 Raises:
99 InvalidJarError: if the manifest is not well-formed.
102 manifest_string = '\n'.join(manifest_string.splitlines()).rstrip('\n')
103 section_strings = manifest_string.split('\n\n')
104 parsed_sections = [_ParseManifestSection(s) for s in section_strings]
105 main_section = parsed_sections[0]
106 try:
107 sections = dict((entry['Name'], entry) for entry in parsed_sections[1:])
108 except KeyError:
109 raise InvalidJarError('Manifest entry has no Name attribute: %s' % entry)
110 return Manifest(main_section, sections)
113 def _ParseManifestSection(section):
114 """Parse a dict out of the given manifest section string.
116 Args:
117 section: a str or unicode that is the manifest section. It looks something
118 like this (without the >):
119 > Name: section-name
120 > Some-Attribute: some value
121 > Another-Attribute: another value
123 Returns:
124 A dict where the keys are the attributes (here, 'Name', 'Some-Attribute',
125 'Another-Attribute'), and the values are the corresponding attribute values.
127 Raises:
128 InvalidJarError: if the manifest section is not well-formed.
131 section = section.replace('\n ', '')
132 try:
133 return dict(line.split(': ', 1) for line in section.split('\n'))
134 except ValueError:
135 raise InvalidJarError('Invalid manifest %r' % section)
138 def Make(input_directory, output_directory, base_name, maximum_size=sys.maxint,
139 include_predicate=lambda name: True):
140 """Makes one or more jars from a directory hierarchy.
142 Args:
143 input_directory: a string that is the root of the directory hierarchy from
144 which files will be put in the jar.
145 output_directory: a string that is the directory to put the output jars.
146 base_name: the name to be used for each output jar. If the name is 'foo'
147 then each jar will be called 'foo-nnnn.jar', where nnnn is a sequence of
148 digits.
149 maximum_size: the maximum allowed total uncompressed size of the files in
150 any given jar.
151 include_predicate: a function that is called once for each file in the
152 directory hierarchy. It is given the name that the file will have in the
153 output jar(s), and it must return a true value if the file is to be
154 included.
156 Raises:
157 IOError: if input files cannot be read or output jars cannot be written.
158 JarWriteError: if an input file is bigger than maximum_size.
160 zip_names = []
161 abs_dir = os.path.abspath(input_directory)
162 for dirpath, _, files in os.walk(abs_dir):
163 if dirpath == abs_dir:
164 prefix = ''
165 else:
166 assert dirpath.startswith(abs_dir)
167 prefix = dirpath[len(abs_dir) + 1:].replace(os.sep, '/') + '/'
170 zip_names.extend([prefix + f for f in files])
172 with _Maker(output_directory, base_name, maximum_size) as maker:
173 for name in sorted(zip_names):
174 abs_fs_name = os.path.join(abs_dir, os.path.normpath(name))
176 if include_predicate(name):
177 size = os.path.getsize(abs_fs_name)
178 if size > maximum_size:
179 raise JarWriteError(
180 'File %s has size %d which is bigger than the maximum '
181 'jar size %d' % (abs_fs_name, size, maximum_size))
182 maker.Write(name, abs_fs_name)
185 def SplitJar(input_jar, output_directory, maximum_size=sys.maxint,
186 include_predicate=lambda name: True):
187 """Copies an input jar into a directory, splitting if necessary.
189 If its size is > maximum_size, then new jars will be created in
190 output_directory, called foo-0000.jar, foo-0001.jar, etc. The jar manifest
191 (META-INF/MANIFEST.MF) is not included in the split jars, and neither is the
192 index (INDEX.LIST) if any. Manifests are not heavily used at runtime, and
193 it's not clear what the correct manifest would be in each individual jar.
195 Args:
196 input_jar: a string that is the path to the jar to be copied.
197 output_directory: a string that is the directory to put the copy or copies.
198 maximum_size: the maximum allowed total uncompressed size of the files in
199 any given jar.
200 include_predicate: a function that is called once for each entry in the
201 input jar. It is given the name of the entry, and must return a true value
202 if the entry is to be included in the output jar(s).
204 Raises:
205 IOError: if the input jar cannot be read or the output jars cannot be
206 written.
207 ValueError: if input_jar does not end with '.jar'.
208 JarWriteError: if an entry in the input jar is bigger than maximum_size.
210 if not input_jar.lower().endswith('.jar'):
211 raise ValueError('Does not end with .jar: %s' % input_jar)
213 base_name = os.path.splitext(os.path.basename(input_jar))[0]
214 with _Maker(output_directory, base_name, maximum_size) as maker:
215 for name, contents in JarContents(input_jar):
216 if (name != 'META-INF/MANIFEST.MF' and name != 'INDEX.LIST' and
217 include_predicate(name)):
218 size = len(contents)
219 if size > maximum_size:
220 raise JarWriteError(
221 'Entry %s in %s has size %d which is bigger than the maximum jar '
222 'size %d' % (name, input_jar, size, maximum_size))
223 maker.WriteStr(name, contents)
226 def JarContents(jar_path):
227 """Generates (name, contents) pairs for the given jar.
229 Each generated tuple consists of the relative name within the jar of an entry,
230 for example 'java/lang/Object.class', and a str that is the corresponding
231 contents.
233 Args:
234 jar_path: a str that is the path to the jar.
236 Yields:
237 A (name, contents) pair.
239 with zipfile.ZipFile(jar_path) as jar:
240 for name in jar.namelist():
241 yield name, jar.read(name)
244 class _Maker(object):
245 """Writes jars to contain the entries supplied to its Write method.
247 This class is designed to be used in a with statement.
250 def __init__(self, output_directory, base_name, maximum_size=sys.maxint):
251 self.base_name = base_name
252 self.output_directory = os.path.normpath(output_directory)
253 self.maximum_size = maximum_size
255 if not os.path.exists(self.output_directory):
256 os.makedirs(self.output_directory)
257 elif not os.path.isdir(self.output_directory):
258 raise JarWriteError('Not a directory: %s' % self.output_directory)
263 self.current_jar = None
264 self.current_jar_size = 0
265 self.jar_suffix = 0
267 def __enter__(self):
268 return self
270 def __exit__(self, t, value, traceback):
271 if self.current_jar:
272 self.current_jar.close()
274 def WriteStr(self, name, content):
275 """Write a str as an entry to a jar, creating a new one if necessary.
277 If the total uncompressed size of all the entries written to the current jar
278 excludes the maximum, the current jar will be closed and a new one created.
280 Args:
281 name: the relative name of the jar entry, for example
282 'java/lang/String.class'.
283 content: a str that is the bytes to be written to the jar entry.
285 self._WriteEntry(len(content),
286 lambda: self.current_jar.writestr(name, content))
288 def Write(self, name, path):
289 """Write file as an entry to a jar, creating a new one if necessary.
291 If the total uncompressed size of all the entries written to the current jar
292 excludes the maximum, the current jar will be closed and a new one created.
294 Args:
295 name: the relative name of the jar entry, for example
296 'java/lang/String.class'.
297 path: a str that is the path of the file to be written
299 self._WriteEntry(os.path.getsize(path),
300 lambda: self.current_jar.write(path, name))
302 def _WriteEntry(self, size, write_func):
303 """Write an entry to a jar, creating a new one if necessary.
305 If the total uncompressed size of all the entries written to the current jar
306 excludes the maximum, the current jar will be closed and a new one created.
308 Args:
309 size: the size in bytes of the new entry, uncompressed.
310 write_func: a function that writes that entry to self.current_jar.
312 if self.current_jar_size + size > self.maximum_size:
313 self.current_jar.close()
314 self.current_jar = None
315 if not self.current_jar:
316 jar_name = '%s-%04d.jar' % (self.base_name, self.jar_suffix)
317 self.jar_suffix += 1
318 full_jar_name = os.path.join(self.output_directory, jar_name)
319 self.current_jar = zipfile.ZipFile(
320 full_jar_name, 'w', zipfile.ZIP_DEFLATED)
321 self.current_jar_size = 0
322 self.current_jar_size += size
323 write_func()