1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
18 ErrFormat
= errors
.New("zip: not a valid zip file")
19 ErrAlgorithm
= errors
.New("zip: unsupported compression algorithm")
20 ErrChecksum
= errors
.New("zip: checksum error")
29 type ReadCloser
struct {
41 func (f
*File
) hasDataDescriptor() bool {
42 return f
.Flags
&0x8 != 0
45 // OpenReader will open the Zip file specified by name and return a ReadCloser.
46 func OpenReader(name
string) (*ReadCloser
, error
) {
47 f
, err
:= os
.Open(name
)
57 if err
:= r
.init(f
, fi
.Size()); err
!= nil {
65 // NewReader returns a new Reader reading from r, which is assumed to
66 // have the given size in bytes.
67 func NewReader(r io
.ReaderAt
, size
int64) (*Reader
, error
) {
69 if err
:= zr
.init(r
, size
); err
!= nil {
75 func (z
*Reader
) init(r io
.ReaderAt
, size
int64) error
{
76 end
, err
:= readDirectoryEnd(r
, size
)
81 z
.File
= make([]*File
, 0, end
.directoryRecords
)
82 z
.Comment
= end
.comment
83 rs
:= io
.NewSectionReader(r
, 0, size
)
84 if _
, err
= rs
.Seek(int64(end
.directoryOffset
), os
.SEEK_SET
); err
!= nil {
87 buf
:= bufio
.NewReader(rs
)
89 // The count of files inside a zip is truncated to fit in a uint16.
90 // Gloss over this by reading headers until we encounter
91 // a bad one, and then only report a ErrFormat or UnexpectedEOF if
92 // the file count modulo 65536 is incorrect.
94 f
:= &File
{zipr
: r
, zipsize
: size
}
95 err
= readDirectoryHeader(f
, buf
)
96 if err
== ErrFormat || err
== io
.ErrUnexpectedEOF
{
102 z
.File
= append(z
.File
, f
)
104 if uint16(len(z
.File
)) != uint16(end
.directoryRecords
) { // only compare 16 bits here
105 // Return the readDirectoryHeader error if we read
106 // the wrong number of directory entries.
112 // Close closes the Zip file, rendering it unusable for I/O.
113 func (rc
*ReadCloser
) Close() error
{
117 // DataOffset returns the offset of the file's possibly-compressed
118 // data, relative to the beginning of the zip file.
120 // Most callers should instead use Open, which transparently
121 // decompresses data and verifies checksums.
122 func (f
*File
) DataOffset() (offset
int64, err error
) {
123 bodyOffset
, err
:= f
.findBodyOffset()
127 return f
.headerOffset
+ bodyOffset
, nil
130 // Open returns a ReadCloser that provides access to the File's contents.
131 // Multiple files may be read concurrently.
132 func (f
*File
) Open() (rc io
.ReadCloser
, err error
) {
133 bodyOffset
, err
:= f
.findBodyOffset()
137 size
:= int64(f
.CompressedSize64
)
138 r
:= io
.NewSectionReader(f
.zipr
, f
.headerOffset
+bodyOffset
, size
)
139 dcomp
:= decompressor(f
.Method
)
146 if f
.hasDataDescriptor() {
147 desr
= io
.NewSectionReader(f
.zipr
, f
.headerOffset
+bodyOffset
+size
, dataDescriptorLen
)
149 rc
= &checksumReader
{rc
, crc32
.NewIEEE(), f
, desr
, nil}
153 type checksumReader
struct {
157 desr io
.Reader
// if non-nil, where to read the data descriptor
158 err error
// sticky error
161 func (r
*checksumReader
) Read(b
[]byte) (n
int, err error
) {
165 n
, err
= r
.rc
.Read(b
)
172 if err1
:= readDataDescriptor(r
.desr
, r
.f
); err1
!= nil {
174 } else if r
.hash
.Sum32() != r
.f
.CRC32
{
178 // If there's not a data descriptor, we still compare
179 // the CRC32 of what we've read against the file header
180 // or TOC's CRC32, if it seems like it was set.
181 if r
.f
.CRC32
!= 0 && r
.hash
.Sum32() != r
.f
.CRC32
{
190 func (r
*checksumReader
) Close() error
{ return r
.rc
.Close() }
192 // findBodyOffset does the minimum work to verify the file has a header
193 // and returns the file body offset.
194 func (f
*File
) findBodyOffset() (int64, error
) {
195 var buf
[fileHeaderLen
]byte
196 if _
, err
:= f
.zipr
.ReadAt(buf
[:], f
.headerOffset
); err
!= nil {
200 if sig
:= b
.uint32(); sig
!= fileHeaderSignature
{
203 b
= b
[22:] // skip over most of the header
204 filenameLen
:= int(b
.uint16())
205 extraLen
:= int(b
.uint16())
206 return int64(fileHeaderLen
+ filenameLen
+ extraLen
), nil
209 // readDirectoryHeader attempts to read a directory header from r.
210 // It returns io.ErrUnexpectedEOF if it cannot read a complete header,
211 // and ErrFormat if it doesn't find a valid header signature.
212 func readDirectoryHeader(f
*File
, r io
.Reader
) error
{
213 var buf
[directoryHeaderLen
]byte
214 if _
, err
:= io
.ReadFull(r
, buf
[:]); err
!= nil {
218 if sig
:= b
.uint32(); sig
!= directoryHeaderSignature
{
221 f
.CreatorVersion
= b
.uint16()
222 f
.ReaderVersion
= b
.uint16()
224 f
.Method
= b
.uint16()
225 f
.ModifiedTime
= b
.uint16()
226 f
.ModifiedDate
= b
.uint16()
228 f
.CompressedSize
= b
.uint32()
229 f
.UncompressedSize
= b
.uint32()
230 f
.CompressedSize64
= uint64(f
.CompressedSize
)
231 f
.UncompressedSize64
= uint64(f
.UncompressedSize
)
232 filenameLen
:= int(b
.uint16())
233 extraLen
:= int(b
.uint16())
234 commentLen
:= int(b
.uint16())
235 b
= b
[4:] // skipped start disk number and internal attributes (2x uint16)
236 f
.ExternalAttrs
= b
.uint32()
237 f
.headerOffset
= int64(b
.uint32())
238 d
:= make([]byte, filenameLen
+extraLen
+commentLen
)
239 if _
, err
:= io
.ReadFull(r
, d
); err
!= nil {
242 f
.Name
= string(d
[:filenameLen
])
243 f
.Extra
= d
[filenameLen
: filenameLen
+extraLen
]
244 f
.Comment
= string(d
[filenameLen
+extraLen
:])
246 if len(f
.Extra
) > 0 {
247 b
:= readBuf(f
.Extra
)
248 for len(b
) >= 4 { // need at least tag and size
251 if int(size
) > len(b
) {
254 if tag
== zip64ExtraId
{
255 // update directory values from the zip64 extra block
256 eb
:= readBuf(b
[:size
])
258 f
.UncompressedSize64
= eb
.uint64()
261 f
.CompressedSize64
= eb
.uint64()
264 f
.headerOffset
= int64(eb
.uint64())
269 // Should have consumed the whole header.
277 func readDataDescriptor(r io
.Reader
, f
*File
) error
{
278 var buf
[dataDescriptorLen
]byte
280 // The spec says: "Although not originally assigned a
281 // signature, the value 0x08074b50 has commonly been adopted
282 // as a signature value for the data descriptor record.
283 // Implementers should be aware that ZIP files may be
284 // encountered with or without this signature marking data
285 // descriptors and should account for either case when reading
286 // ZIP files to ensure compatibility."
288 // dataDescriptorLen includes the size of the signature but
289 // first read just those 4 bytes to see if it exists.
290 if _
, err
:= io
.ReadFull(r
, buf
[:4]); err
!= nil {
294 maybeSig
:= readBuf(buf
[:4])
295 if maybeSig
.uint32() != dataDescriptorSignature
{
296 // No data descriptor signature. Keep these four
300 if _
, err
:= io
.ReadFull(r
, buf
[off
:12]); err
!= nil {
303 b
:= readBuf(buf
[:12])
304 if b
.uint32() != f
.CRC32
{
308 // The two sizes that follow here can be either 32 bits or 64 bits
309 // but the spec is not very clear on this and different
310 // interpretations has been made causing incompatibilities. We
311 // already have the sizes from the central directory so we can
312 // just ignore these.
317 func readDirectoryEnd(r io
.ReaderAt
, size
int64) (dir
*directoryEnd
, err error
) {
318 // look for directoryEndSignature in the last 1k, then in the last 65k
320 var directoryEndOffset
int64
321 for i
, bLen
:= range []int64{1024, 65 * 1024} {
325 buf
= make([]byte, int(bLen
))
326 if _
, err
:= r
.ReadAt(buf
, size
-bLen
); err
!= nil && err
!= io
.EOF
{
329 if p
:= findSignatureInBlock(buf
); p
>= 0 {
331 directoryEndOffset
= size
- bLen
+ int64(p
)
334 if i
== 1 || bLen
== size
{
335 return nil, ErrFormat
339 // read header into struct
340 b
:= readBuf(buf
[4:]) // skip signature
342 diskNbr
: uint32(b
.uint16()),
343 dirDiskNbr
: uint32(b
.uint16()),
344 dirRecordsThisDisk
: uint64(b
.uint16()),
345 directoryRecords
: uint64(b
.uint16()),
346 directorySize
: uint64(b
.uint32()),
347 directoryOffset
: uint64(b
.uint32()),
348 commentLen
: b
.uint16(),
350 l
:= int(d
.commentLen
)
352 return nil, errors
.New("zip: invalid comment length")
354 d
.comment
= string(b
[:l
])
356 p
, err
:= findDirectory64End(r
, directoryEndOffset
)
357 if err
== nil && p
>= 0 {
358 err
= readDirectory64End(r
, p
, d
)
364 // Make sure directoryOffset points to somewhere in our file.
365 if o
:= int64(d
.directoryOffset
); o
< 0 || o
>= size
{
366 return nil, ErrFormat
371 // findDirectory64End tries to read the zip64 locator just before the
372 // directory end and returns the offset of the zip64 directory end if
374 func findDirectory64End(r io
.ReaderAt
, directoryEndOffset
int64) (int64, error
) {
375 locOffset
:= directoryEndOffset
- directory64LocLen
377 return -1, nil // no need to look for a header outside the file
379 buf
:= make([]byte, directory64LocLen
)
380 if _
, err
:= r
.ReadAt(buf
, locOffset
); err
!= nil {
384 if sig
:= b
.uint32(); sig
!= directory64LocSignature
{
387 b
= b
[4:] // skip number of the disk with the start of the zip64 end of central directory
388 p
:= b
.uint64() // relative offset of the zip64 end of central directory record
392 // readDirectory64End reads the zip64 directory end and updates the
393 // directory end with the zip64 directory end values.
394 func readDirectory64End(r io
.ReaderAt
, offset
int64, d
*directoryEnd
) (err error
) {
395 buf
:= make([]byte, directory64EndLen
)
396 if _
, err
:= r
.ReadAt(buf
, offset
); err
!= nil {
401 if sig
:= b
.uint32(); sig
!= directory64EndSignature
{
405 b
= b
[12:] // skip dir size, version and version needed (uint64 + 2x uint16)
406 d
.diskNbr
= b
.uint32() // number of this disk
407 d
.dirDiskNbr
= b
.uint32() // number of the disk with the start of the central directory
408 d
.dirRecordsThisDisk
= b
.uint64() // total number of entries in the central directory on this disk
409 d
.directoryRecords
= b
.uint64() // total number of entries in the central directory
410 d
.directorySize
= b
.uint64() // size of the central directory
411 d
.directoryOffset
= b
.uint64() // offset of start of central directory with respect to the starting disk number
416 func findSignatureInBlock(b
[]byte) int {
417 for i
:= len(b
) - directoryEndLen
; i
>= 0; i
-- {
418 // defined from directoryEndSignature in struct.go
419 if b
[i
] == 'P' && b
[i
+1] == 'K' && b
[i
+2] == 0x05 && b
[i
+3] == 0x06 {
420 // n is length of comment
421 n
:= int(b
[i
+directoryEndLen
-2]) |
int(b
[i
+directoryEndLen
-1])<<8
422 if n
+directoryEndLen
+i
<= len(b
) {
432 func (b
*readBuf
) uint16() uint16 {
433 v
:= binary
.LittleEndian
.Uint16(*b
)
438 func (b
*readBuf
) uint32() uint32 {
439 v
:= binary
.LittleEndian
.Uint32(*b
)
444 func (b
*readBuf
) uint64() uint64 {
445 v
:= binary
.LittleEndian
.Uint64(*b
)