1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
22 ErrHeader
= errors
.New("archive/tar: invalid tar header")
25 const maxNanoSecondIntSize
= 9
27 // A Reader provides sequential access to the contents of a tar archive.
28 // A tar archive consists of a sequence of files.
29 // The Next method advances to the next file in the archive (including the first),
30 // and then it can be treated as an io.Reader to access the file's data.
34 nb
int64 // number of unread bytes for current file entry
35 pad
int64 // amount of padding (ignored) after current file entry
38 // NewReader creates a new Reader reading from r.
39 func NewReader(r io
.Reader
) *Reader
{ return &Reader
{r
: r
} }
41 // Next advances to the next entry in the tar archive.
42 func (tr
*Reader
) Next() (*Header
, error
) {
54 // Check for PAX/GNU header.
57 // PAX extended header
58 headers
, err
:= parsePAX(tr
)
62 // We actually read the whole file,
63 // but this skips alignment padding
66 mergePAX(hdr
, headers
)
69 // We have a GNU long name header. Its contents are the real file name.
70 realname
, err
:= ioutil
.ReadAll(tr
)
75 hdr
.Name
= cString(realname
)
78 // We have a GNU long link header.
79 realname
, err
:= ioutil
.ReadAll(tr
)
84 hdr
.Linkname
= cString(realname
)
90 // mergePAX merges well known headers according to PAX standard.
91 // In general headers with the same name as those found
92 // in the header struct overwrite those found in the header
93 // struct with higher precision or longer values. Esp. useful
94 // for name and linkname fields.
95 func mergePAX(hdr
*Header
, headers
map[string]string) error
{
96 for k
, v
:= range headers
{
107 uid
, err
:= strconv
.ParseInt(v
, 10, 0)
113 gid
, err
:= strconv
.ParseInt(v
, 10, 0)
119 t
, err
:= parsePAXTime(v
)
125 t
, err
:= parsePAXTime(v
)
131 t
, err
:= parsePAXTime(v
)
137 size
, err
:= strconv
.ParseInt(v
, 10, 0)
141 hdr
.Size
= int64(size
)
148 // parsePAXTime takes a string of the form %d.%d as described in
149 // the PAX specification.
150 func parsePAXTime(t
string) (time
.Time
, error
) {
152 pos
:= bytes
.IndexByte(buf
, '.')
153 var seconds
, nanoseconds
int64
156 seconds
, err
= strconv
.ParseInt(t
, 10, 0)
158 return time
.Time
{}, err
161 seconds
, err
= strconv
.ParseInt(string(buf
[:pos
]), 10, 0)
163 return time
.Time
{}, err
165 nano_buf
:= string(buf
[pos
+1:])
166 // Pad as needed before converting to a decimal.
167 // For example .030 -> .030000000 -> 30000000 nanoseconds
168 if len(nano_buf
) < maxNanoSecondIntSize
{
170 nano_buf
+= strings
.Repeat("0", maxNanoSecondIntSize
-len(nano_buf
))
171 } else if len(nano_buf
) > maxNanoSecondIntSize
{
173 nano_buf
= nano_buf
[:maxNanoSecondIntSize
]
175 nanoseconds
, err
= strconv
.ParseInt(string(nano_buf
), 10, 0)
177 return time
.Time
{}, err
180 ts
:= time
.Unix(seconds
, nanoseconds
)
184 // parsePAX parses PAX headers.
185 // If an extended header (type 'x') is invalid, ErrHeader is returned
186 func parsePAX(r io
.Reader
) (map[string]string, error
) {
187 buf
, err
:= ioutil
.ReadAll(r
)
191 headers
:= make(map[string]string)
192 // Each record is constructed as
193 // "%d %s=%s\n", length, keyword, value
195 // or the header was empty to start with.
197 // The size field ends at the first space.
198 sp
= bytes
.IndexByte(buf
, ' ')
200 return nil, ErrHeader
202 // Parse the first token as a decimal integer.
203 n
, err
:= strconv
.ParseInt(string(buf
[:sp
]), 10, 0)
205 return nil, ErrHeader
207 // Extract everything between the decimal and the n -1 on the
208 // beginning to to eat the ' ', -1 on the end to skip the newline.
210 record
, buf
= buf
[sp
+1:n
-1], buf
[n
:]
211 // The first equals is guaranteed to mark the end of the key.
212 // Everything else is value.
213 eq
:= bytes
.IndexByte(record
, '=')
215 return nil, ErrHeader
217 key
, value
:= record
[:eq
], record
[eq
+1:]
218 headers
[string(key
)] = string(value
)
223 // cString parses bytes as a NUL-terminated C-style string.
224 // If a NUL byte is not found then the whole slice is returned as a string.
225 func cString(b
[]byte) string {
227 for n
< len(b
) && b
[n
] != 0 {
230 return string(b
[0:n
])
233 func (tr
*Reader
) octal(b
[]byte) int64 {
234 // Check for binary format first.
235 if len(b
) > 0 && b
[0]&0x80 != 0 {
237 for i
, c
:= range b
{
239 c
&= 0x7f // ignore signal bit in first byte
246 // Removing leading spaces.
247 for len(b
) > 0 && b
[0] == ' ' {
250 // Removing trailing NULs and spaces.
251 for len(b
) > 0 && (b
[len(b
)-1] == ' ' || b
[len(b
)-1] == '\x00') {
254 x
, err
:= strconv
.ParseUint(cString(b
), 8, 64)
261 // skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
262 func (tr
*Reader
) skipUnread() {
263 nr
:= tr
.nb
+ tr
.pad
// number of bytes to skip
265 if sr
, ok
:= tr
.r
.(io
.Seeker
); ok
{
266 if _
, err
:= sr
.Seek(nr
, os
.SEEK_CUR
); err
== nil {
270 _
, tr
.err
= io
.CopyN(ioutil
.Discard
, tr
.r
, nr
)
273 func (tr
*Reader
) verifyChecksum(header
[]byte) bool {
278 given
:= tr
.octal(header
[148:156])
279 unsigned
, signed
:= checksum(header
)
280 return given
== unsigned || given
== signed
283 func (tr
*Reader
) readHeader() *Header
{
284 header
:= make([]byte, blockSize
)
285 if _
, tr
.err
= io
.ReadFull(tr
.r
, header
); tr
.err
!= nil {
289 // Two blocks of zero bytes marks the end of the archive.
290 if bytes
.Equal(header
, zeroBlock
[0:blockSize
]) {
291 if _
, tr
.err
= io
.ReadFull(tr
.r
, header
); tr
.err
!= nil {
294 if bytes
.Equal(header
, zeroBlock
[0:blockSize
]) {
297 tr
.err
= ErrHeader
// zero block and then non-zero block
302 if !tr
.verifyChecksum(header
) {
311 hdr
.Name
= cString(s
.next(100))
312 hdr
.Mode
= tr
.octal(s
.next(8))
313 hdr
.Uid
= int(tr
.octal(s
.next(8)))
314 hdr
.Gid
= int(tr
.octal(s
.next(8)))
315 hdr
.Size
= tr
.octal(s
.next(12))
316 hdr
.ModTime
= time
.Unix(tr
.octal(s
.next(12)), 0)
318 hdr
.Typeflag
= s
.next(1)[0]
319 hdr
.Linkname
= cString(s
.next(100))
321 // The remainder of the header depends on the value of magic.
322 // The original (v7) version of tar had no explicit magic field,
323 // so its magic bytes, like the rest of the block, are NULs.
324 magic
:= string(s
.next(8)) // contains version field as well.
327 case "ustar\x0000": // POSIX tar (1003.1-1988)
328 if string(header
[508:512]) == "tar\x00" {
333 case "ustar \x00": // old GNU tar
338 case "posix", "gnu", "star":
339 hdr
.Uname
= cString(s
.next(32))
340 hdr
.Gname
= cString(s
.next(32))
341 devmajor
:= s
.next(8)
342 devminor
:= s
.next(8)
343 if hdr
.Typeflag
== TypeChar || hdr
.Typeflag
== TypeBlock
{
344 hdr
.Devmajor
= tr
.octal(devmajor
)
345 hdr
.Devminor
= tr
.octal(devminor
)
350 prefix
= cString(s
.next(155))
352 prefix
= cString(s
.next(131))
353 hdr
.AccessTime
= time
.Unix(tr
.octal(s
.next(12)), 0)
354 hdr
.ChangeTime
= time
.Unix(tr
.octal(s
.next(12)), 0)
357 hdr
.Name
= prefix
+ "/" + hdr
.Name
366 // Maximum value of hdr.Size is 64 GB (12 octal digits),
367 // so there's no risk of int64 overflowing.
368 tr
.nb
= int64(hdr
.Size
)
369 tr
.pad
= -tr
.nb
& (blockSize
- 1) // blockSize is a power of two
374 // Read reads from the current entry in the tar archive.
375 // It returns 0, io.EOF when it reaches the end of that entry,
376 // until Next is called to advance to the next entry.
377 func (tr
*Reader
) Read(b
[]byte) (n
int, err error
) {
383 if int64(len(b
)) > tr
.nb
{
386 n
, err
= tr
.r
.Read(b
)
389 if err
== io
.EOF
&& tr
.nb
> 0 {
390 err
= io
.ErrUnexpectedEOF