Add option for excluding paths from conversion
[cvs2svn.git] / cvs2svn_lib / apple_single_filter.py
blob0a9a0abd161cfc2fa2a45df8012e3f21270d67eb
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2007-2008 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """A stream filter for extracting the data fork from AppleSingle data.
19 Some Macintosh CVS clients store resource fork data along with the
20 contents of the file (called the data fork) by encoding both in an
21 'AppleSingle' data stream before storing them to CVS. This file
22 contains a stream filter for extracting the data fork from such data
23 streams. (Any other forks are discarded.)
25 See the following for some random information about this format and
26 how it is used by Macintosh CVS clients:
28 http://users.phg-online.de/tk/netatalk/doc/Apple/v1/
29 http://rfc.net/rfc1740.html
30 http://ximbiot.com/cvs/cvshome/cyclic/cvs/dev-mac.html
31 http://www.maccvs.org/faq.html#resfiles
32 http://www.heilancoo.net/MacCVSClient/MacCVSClientDoc/storage-formats.html
34 """
37 import struct
38 from cStringIO import StringIO
41 class AppleSingleFormatError(IOError):
42 """The stream was not in correct AppleSingle format."""
44 pass
47 class AppleSingleIncorrectMagicError(AppleSingleFormatError):
48 """The file didn't start with the correct magic number."""
50 def __init__(self, data_read, eof):
51 AppleSingleFormatError.__init__(self)
52 self.data_read = data_read
53 self.eof = eof
56 class AppleSingleEOFError(AppleSingleFormatError):
57 """EOF was reached where AppleSingle doesn't allow it."""
59 pass
62 class AppleSingleFilter(object):
63 """A stream that reads the data fork from an AppleSingle stream.
65 If the constructor discovers that the file is not a legitimate
66 AppleSingle stream, then it raises an AppleSingleFormatError. In
67 the special case that the magic number is incorrect, it raises
68 AppleSingleIncorrectMagicError with data_read set to the data that
69 have been read so far from the input stream. (This allows the
70 caller the option to fallback to treating the input stream as a
71 normal binary data stream.)"""
73 # The header is:
75 # Magic number 4 bytes
76 # Version number 4 bytes
77 # File system or filler 16 bytes
78 # Number of entries 2 bytes
79 magic_struct = '>i'
80 magic_len = struct.calcsize(magic_struct)
82 # The part of the header after the magic number:
83 rest_of_header_struct = '>i16sH'
84 rest_of_header_len = struct.calcsize(rest_of_header_struct)
86 # Each entry is:
88 # Entry ID 4 bytes
89 # Offset 4 bytes
90 # Length 4 bytes
91 entry_struct = '>iii'
92 entry_len = struct.calcsize(entry_struct)
94 apple_single_magic = 0x00051600
95 apple_single_version_1 = 0x00010000
96 apple_single_version_2 = 0x00020000
97 apple_single_filler = '\0' * 16
99 apple_single_data_fork_entry_id = 1
101 def __init__(self, stream):
102 self.stream = stream
104 # Check for the AppleSingle magic number:
105 s = self._read_exactly(self.magic_len)
106 if len(s) < self.magic_len:
107 raise AppleSingleIncorrectMagicError(s, True)
109 (magic,) = struct.unpack(self.magic_struct, s)
110 if magic != self.apple_single_magic:
111 raise AppleSingleIncorrectMagicError(s, False)
113 # Read the rest of the header:
114 s = self._read_exactly(self.rest_of_header_len)
115 if len(s) < self.rest_of_header_len:
116 raise AppleSingleEOFError('AppleSingle header incomplete')
118 (version, filler, num_entries) = \
119 struct.unpack(self.rest_of_header_struct, s)
121 if version == self.apple_single_version_1:
122 self._prepare_apple_single_v1_file(num_entries)
123 elif version == self.apple_single_version_2:
124 if filler != self.apple_single_filler:
125 raise AppleSingleFormatError('Incorrect filler')
126 self._prepare_apple_single_v2_file(num_entries)
127 else:
128 raise AppleSingleFormatError('Unknown AppleSingle version')
130 def _read_exactly(self, size):
131 """Read and return exactly SIZE characters from the stream.
133 This method is to deal with the fact that stream.read(size) is
134 allowed to return less than size characters. If EOF is reached
135 before SIZE characters have been read, return the characters that
136 have been read so far."""
138 retval = []
139 length_remaining = size
140 while length_remaining > 0:
141 s = self.stream.read(length_remaining)
142 if not s:
143 break
144 retval.append(s)
145 length_remaining -= len(s)
147 return ''.join(retval)
149 def _prepare_apple_single_file(self, num_entries):
150 entries = self._read_exactly(num_entries * self.entry_len)
151 if len(entries) < num_entries * self.entry_len:
152 raise AppleSingleEOFError('Incomplete entries list')
154 for i in range(num_entries):
155 entry = entries[i * self.entry_len : (i + 1) * self.entry_len]
156 (entry_id, offset, length) = struct.unpack(self.entry_struct, entry)
157 if entry_id == self.apple_single_data_fork_entry_id:
158 break
159 else:
160 raise AppleSingleFormatError('No data fork found')
162 # The data fork is located at [offset : offset + length]. Read up
163 # to the start of the data:
164 n = offset - self.magic_len - self.rest_of_header_len - len(entries)
165 if n < 0:
166 raise AppleSingleFormatError('Invalid offset to AppleSingle data fork')
168 max_chunk_size = 65536
169 while n > 0:
170 s = self.stream.read(min(n, max_chunk_size))
171 if not s:
172 raise AppleSingleEOFError(
173 'Offset to AppleSingle data fork past end of file'
175 n -= len(s)
177 self.length_remaining = length
179 def _prepare_apple_single_v1_file(self, num_entries):
180 self._prepare_apple_single_file(num_entries)
182 def _prepare_apple_single_v2_file(self, num_entries):
183 self._prepare_apple_single_file(num_entries)
185 def read(self, size=-1):
186 if size == 0 or self.length_remaining == 0:
187 return ''
188 elif size < 0:
189 s = self._read_exactly(self.length_remaining)
190 if len(s) < self.length_remaining:
191 raise AppleSingleEOFError('AppleSingle data fork truncated')
192 self.length_remaining = 0
193 return s
194 else:
195 # The length of this read is allowed to be shorter than the
196 # requested size:
197 s = self.stream.read(min(size, self.length_remaining))
198 if not s:
199 raise AppleSingleEOFError()
200 self.length_remaining -= len(s)
201 return s
203 def close(self):
204 self.stream.close()
205 self.stream = None
208 class CompoundStream(object):
209 """A stream that reads from a series of streams, one after the other."""
211 def __init__(self, streams, stream_index=0):
212 self.streams = list(streams)
213 self.stream_index = stream_index
215 def read(self, size=-1):
216 if size < 0:
217 retval = []
218 while self.stream_index < len(self.streams):
219 retval.append(self.streams[self.stream_index].read())
220 self.stream_index += 1
221 return ''.join(retval)
222 else:
223 while self.stream_index < len(self.streams):
224 s = self.streams[self.stream_index].read(size)
225 if s:
226 # This may not be the full size requested, but that is OK:
227 return s
228 else:
229 # That stream was empty; proceed to the next stream:
230 self.stream_index += 1
232 # No streams are left:
233 return ''
235 def close(self):
236 for stream in self.streams:
237 stream.close()
238 self.streams = None
241 def get_maybe_apple_single_stream(stream):
242 """Treat STREAM as AppleSingle if possible; otherwise treat it literally.
244 If STREAM is in AppleSingle format, then return a stream that will
245 output the data fork of the original stream. Otherwise, return a
246 stream that will output the original file contents literally.
248 Be careful not to read from STREAM after it has already hit EOF."""
250 try:
251 return AppleSingleFilter(stream)
252 except AppleSingleIncorrectMagicError, e:
253 # This is OK; the file is not AppleSingle, so we read it normally:
254 string_io = StringIO(e.data_read)
255 if e.eof:
256 # The original stream already reached EOF, so the part already
257 # read contains the complete file contents. Nevertheless return
258 # a CompoundStream to make sure that the stream's close() method
259 # is called:
260 return CompoundStream([stream, string_io], stream_index=1)
261 else:
262 # The stream needs to output the part already read followed by
263 # whatever hasn't been read of the original stream:
264 return CompoundStream([string_io, stream])
267 def get_maybe_apple_single(data):
268 """Treat DATA as AppleSingle if possible; otherwise treat it literally.
270 If DATA is in AppleSingle format, then return its data fork.
271 Otherwise, return the original DATA."""
273 return get_maybe_apple_single_stream(StringIO(data)).read()
276 if __name__ == '__main__':
277 # For fun and testing, allow use of this file as a pipe if it is
278 # invoked as a script. Specifically, if stdin is in AppleSingle
279 # format, then output only its data fork; otherwise, output it
280 # unchanged.
282 # This might not work on systems where sys.stdin is opened in text
283 # mode.
285 # Remember to set PYTHONPATH to point to the main cvs2svn directory.
287 import sys
289 #CHUNK_SIZE = -1
290 CHUNK_SIZE = 100
292 if CHUNK_SIZE < 0:
293 sys.stdout.write(get_maybe_apple_single(sys.stdin.read()))
294 else:
295 f = get_maybe_apple_single_stream(sys.stdin)
296 while True:
297 s = f.read(CHUNK_SIZE)
298 if not s:
299 break
300 sys.stdout.write(s)