1 """Macintosh binhex compression/decompression.
4 binhex(inputfilename, outputfilename)
5 hexbin(inputfilename, outputfilename)
9 # Jack Jansen, CWI, August 1995.
11 # The module is supposed to be as compatible as possible. Especially the
12 # easy interface should work "as expected" on any platform.
13 # XXXX Note: currently, textfiles appear in mac-form on all platforms.
14 # We seem to lack a simple character-translate in python.
15 # (we should probably use ISO-Latin-1 on all but the mac platform).
16 # XXXX The simple routines are too simple: they expect to hold the complete
17 # files in-core. Should be fixed.
18 # XXXX It would be nice to handle AppleDouble format on unix
19 # (for servers serving macs).
20 # XXXX I don't understand what happens when you get 0x90 times the same byte on
21 # input. The resulting code (xx 90 90) would appear to be interpreted as an
22 # escaped *value* of 0x90. All coders I've seen appear to ignore this nicety...
30 __all__
= ["binhex","hexbin","Error"]
32 class Error(Exception):
35 # States (what have we written)
36 [_DID_HEADER
, _DID_DATA
, _DID_RSRC
] = range(3)
39 REASONABLY_LARGE
= 32768 # Minimal amount we pass the rle-coder
44 # This code is no longer byte-order dependent
53 def getfileinfo(name
):
55 fp
= io
.open(name
, 'rb')
56 # Quick check for textfile
63 dir, file = os
.path
.split(name
)
64 file = file.replace(':', '-', 1)
65 return file, finfo
, dsize
, 0
68 def __init__(self
, *args
):
71 def read(self
, *args
):
74 def write(self
, *args
):
80 class _Hqxcoderengine
:
81 """Write data to the coder in 3-byte chunks"""
83 def __init__(self
, ofp
):
87 self
.linelen
= LINELEN
- 1
89 def write(self
, data
):
90 self
.data
= self
.data
+ data
91 datalen
= len(self
.data
)
92 todo
= (datalen
// 3) * 3
93 data
= self
.data
[:todo
]
94 self
.data
= self
.data
[todo
:]
97 self
.hqxdata
= self
.hqxdata
+ binascii
.b2a_hqx(data
)
100 def _flush(self
, force
):
102 while first
<= len(self
.hqxdata
) - self
.linelen
:
103 last
= first
+ self
.linelen
104 self
.ofp
.write(self
.hqxdata
[first
:last
] + b
'\n')
105 self
.linelen
= LINELEN
107 self
.hqxdata
= self
.hqxdata
[first
:]
109 self
.ofp
.write(self
.hqxdata
+ b
':\n')
113 self
.hqxdata
= self
.hqxdata
+ binascii
.b2a_hqx(self
.data
)
118 class _Rlecoderengine
:
119 """Write data to the RLE-coder in suitably large chunks"""
121 def __init__(self
, ofp
):
125 def write(self
, data
):
126 self
.data
= self
.data
+ data
127 if len(self
.data
) < REASONABLY_LARGE
:
129 rledata
= binascii
.rlecode_hqx(self
.data
)
130 self
.ofp
.write(rledata
)
135 rledata
= binascii
.rlecode_hqx(self
.data
)
136 self
.ofp
.write(rledata
)
141 def __init__(self
, name_finfo_dlen_rlen
, ofp
):
142 name
, finfo
, dlen
, rlen
= name_finfo_dlen_rlen
143 if isinstance(ofp
, str):
145 ofp
= io
.open(ofname
, 'wb')
146 ofp
.write(b
'(This file must be converted with BinHex 4.0)\r\r:')
147 hqxer
= _Hqxcoderengine(ofp
)
148 self
.ofp
= _Rlecoderengine(hqxer
)
154 self
._writeinfo
(name
, finfo
)
155 self
.state
= _DID_HEADER
157 def _writeinfo(self
, name
, finfo
):
160 raise Error('Filename too long')
161 d
= bytes([nl
]) + name
.encode("latin-1") + b
'\0'
162 tp
, cr
= finfo
.Type
, finfo
.Creator
163 if isinstance(tp
, str):
164 tp
= tp
.encode("latin-1")
165 if isinstance(cr
, str):
166 cr
= cr
.encode("latin-1")
169 # Force all structs to be packed with big-endian
170 d3
= struct
.pack('>h', finfo
.Flags
)
171 d4
= struct
.pack('>ii', self
.dlen
, self
.rlen
)
172 info
= d
+ d2
+ d3
+ d4
176 def _write(self
, data
):
177 self
.crc
= binascii
.crc_hqx(data
, self
.crc
)
181 # XXXX Should this be here??
182 # self.crc = binascii.crc_hqx('\0\0', self.crc)
187 self
.ofp
.write(struct
.pack(fmt
, self
.crc
))
190 def write(self
, data
):
191 if self
.state
!= _DID_HEADER
:
192 raise Error('Writing data at the wrong time')
193 self
.dlen
= self
.dlen
- len(data
)
196 def close_data(self
):
198 raise Error('Incorrect data size, diff=%r' % (self
.rlen
,))
200 self
.state
= _DID_DATA
202 def write_rsrc(self
, data
):
203 if self
.state
< _DID_DATA
:
205 if self
.state
!= _DID_DATA
:
206 raise Error('Writing resource data at the wrong time')
207 self
.rlen
= self
.rlen
- len(data
)
211 if self
.state
< _DID_DATA
:
213 if self
.state
!= _DID_DATA
:
214 raise Error('Close at the wrong time')
216 raise Error("Incorrect resource-datasize, diff=%r" % (self
.rlen
,))
222 def binhex(inp
, out
):
223 """binhex(infilename, outfilename): create binhex-encoded copy of a file"""
224 finfo
= getfileinfo(inp
)
225 ofp
= BinHex(finfo
, out
)
227 ifp
= io
.open(inp
, 'rb')
228 # XXXX Do textfile translation on non-mac systems
236 ifp
= openrsrc(inp
, 'rb')
244 class _Hqxdecoderengine
:
245 """Read data via the decoder in 4-byte chunks"""
247 def __init__(self
, ifp
):
251 def read(self
, totalwtd
):
252 """Read at least wtd bytes (or until EOF)"""
256 # The loop here is convoluted, since we don't really now how
257 # much to decode: there may be newlines in the incoming data.
259 if self
.eof
: return decdata
260 wtd
= ((wtd
+ 2) // 3) * 4
261 data
= self
.ifp
.read(wtd
)
263 # Next problem: there may not be a complete number of
264 # bytes in what we pass to a2b. Solve by yet another
269 decdatacur
, self
.eof
= binascii
.a2b_hqx(data
)
271 except binascii
.Incomplete
:
273 newdata
= self
.ifp
.read(1)
275 raise Error('Premature EOF on binhex file')
276 data
= data
+ newdata
277 decdata
= decdata
+ decdatacur
278 wtd
= totalwtd
- len(decdata
)
279 if not decdata
and not self
.eof
:
280 raise Error('Premature EOF on binhex file')
286 class _Rledecoderengine
:
287 """Read data via the RLE-coder"""
289 def __init__(self
, ifp
):
291 self
.pre_buffer
= b
''
292 self
.post_buffer
= b
''
296 if wtd
> len(self
.post_buffer
):
297 self
._fill
(wtd
- len(self
.post_buffer
))
298 rv
= self
.post_buffer
[:wtd
]
299 self
.post_buffer
= self
.post_buffer
[wtd
:]
302 def _fill(self
, wtd
):
303 self
.pre_buffer
= self
.pre_buffer
+ self
.ifp
.read(wtd
+ 4)
305 self
.post_buffer
= self
.post_buffer
+ \
306 binascii
.rledecode_hqx(self
.pre_buffer
)
307 self
.pre_buffer
= b
''
311 # Obfuscated code ahead. We have to take care that we don't
312 # end up with an orphaned RUNCHAR later on. So, we keep a couple
313 # of bytes in the buffer, depending on what the end of
314 # the buffer looks like:
315 # '\220\0\220' - Keep 3 bytes: repeated \220 (escaped as \220\0)
316 # '?\220' - Keep 2 bytes: repeated something-else
317 # '\220\0' - Escaped \220: Keep 2 bytes.
318 # '?\220?' - Complete repeat sequence: decode all
319 # otherwise: keep 1 byte.
321 mark
= len(self
.pre_buffer
)
322 if self
.pre_buffer
[-3:] == RUNCHAR
+ b
'\0' + RUNCHAR
:
324 elif self
.pre_buffer
[-1:] == RUNCHAR
:
326 elif self
.pre_buffer
[-2:] == RUNCHAR
+ b
'\0':
328 elif self
.pre_buffer
[-2:-1] == RUNCHAR
:
333 self
.post_buffer
= self
.post_buffer
+ \
334 binascii
.rledecode_hqx(self
.pre_buffer
[:mark
])
335 self
.pre_buffer
= self
.pre_buffer
[mark
:]
341 def __init__(self
, ifp
):
342 if isinstance(ifp
, str):
343 ifp
= io
.open(ifp
, 'rb')
345 # Find initial colon.
350 raise Error("No binhex data found")
351 # Cater for \r\n terminated lines (which show up as \n\r, hence
352 # all lines start with \r)
358 hqxifp
= _Hqxdecoderengine(ifp
)
359 self
.ifp
= _Rledecoderengine(hqxifp
)
363 def _read(self
, len):
364 data
= self
.ifp
.read(len)
365 self
.crc
= binascii
.crc_hqx(data
, self
.crc
)
369 filecrc
= struct
.unpack('>h', self
.ifp
.read(2))[0] & 0xffff
370 #self.crc = binascii.crc_hqx('\0\0', self.crc)
371 # XXXX Is this needed??
372 self
.crc
= self
.crc
& 0xffff
373 if filecrc
!= self
.crc
:
374 raise Error('CRC error, computed %x, read %x'
375 % (self
.crc
, filecrc
))
378 def _readheader(self
):
380 fname
= self
._read
(ord(len))
381 rest
= self
._read
(1 + 4 + 4 + 2 + 4 + 4)
386 flags
= struct
.unpack('>h', rest
[9:11])[0]
387 self
.dlen
= struct
.unpack('>l', rest
[11:15])[0]
388 self
.rlen
= struct
.unpack('>l', rest
[15:19])[0]
392 self
.FInfo
.Creator
= creator
393 self
.FInfo
.Type
= type
394 self
.FInfo
.Flags
= flags
396 self
.state
= _DID_HEADER
399 if self
.state
!= _DID_HEADER
:
400 raise Error('Read data at wrong time')
403 n
= min(n
, self
.dlen
)
408 rv
= rv
+ self
._read
(n
-len(rv
))
409 self
.dlen
= self
.dlen
- n
412 def close_data(self
):
413 if self
.state
!= _DID_HEADER
:
414 raise Error('close_data at wrong time')
416 dummy
= self
._read
(self
.dlen
)
418 self
.state
= _DID_DATA
420 def read_rsrc(self
, *n
):
421 if self
.state
== _DID_HEADER
:
423 if self
.state
!= _DID_DATA
:
424 raise Error('Read resource data at wrong time')
427 n
= min(n
, self
.rlen
)
430 self
.rlen
= self
.rlen
- n
435 dummy
= self
.read_rsrc(self
.rlen
)
437 self
.state
= _DID_RSRC
440 def hexbin(inp
, out
):
441 """hexbin(infilename, outfilename) - Decode binhexed file"""
447 ofp
= io
.open(out
, 'wb')
448 # XXXX Do translation on non-mac systems
456 d
= ifp
.read_rsrc(128000)
458 ofp
= openrsrc(out
, 'wb')
461 d
= ifp
.read_rsrc(128000)