1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
22 ErrHeader
= errors
.New("archive/tar: invalid tar header")
25 const maxNanoSecondIntSize
= 9
27 // A Reader provides sequential access to the contents of a tar archive.
28 // A tar archive consists of a sequence of files.
29 // The Next method advances to the next file in the archive (including the first),
30 // and then it can be treated as an io.Reader to access the file's data.
34 pad
int64 // amount of padding (ignored) after current file entry
35 curr numBytesReader
// reader for current file entry
38 // A numBytesReader is an io.Reader with a numBytes method, returning the number
39 // of bytes remaining in the underlying encoded data.
40 type numBytesReader
interface {
45 // A regFileReader is a numBytesReader for reading file data from a tar archive.
46 type regFileReader
struct {
47 r io
.Reader
// underlying reader
48 nb
int64 // number of unread bytes for current file entry
51 // A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive.
52 type sparseFileReader
struct {
53 rfr
*regFileReader
// reads the sparse-encoded file data
54 sp
[]sparseEntry
// the sparse map for the file
55 pos
int64 // keeps track of file position
56 tot
int64 // total size of the file
59 // Keywords for GNU sparse files in a PAX extended header
61 paxGNUSparseNumBlocks
= "GNU.sparse.numblocks"
62 paxGNUSparseOffset
= "GNU.sparse.offset"
63 paxGNUSparseNumBytes
= "GNU.sparse.numbytes"
64 paxGNUSparseMap
= "GNU.sparse.map"
65 paxGNUSparseName
= "GNU.sparse.name"
66 paxGNUSparseMajor
= "GNU.sparse.major"
67 paxGNUSparseMinor
= "GNU.sparse.minor"
68 paxGNUSparseSize
= "GNU.sparse.size"
69 paxGNUSparseRealSize
= "GNU.sparse.realsize"
72 // Keywords for old GNU sparse headers
74 oldGNUSparseMainHeaderOffset
= 386
75 oldGNUSparseMainHeaderIsExtendedOffset
= 482
76 oldGNUSparseMainHeaderNumEntries
= 4
77 oldGNUSparseExtendedHeaderIsExtendedOffset
= 504
78 oldGNUSparseExtendedHeaderNumEntries
= 21
79 oldGNUSparseOffsetSize
= 12
80 oldGNUSparseNumBytesSize
= 12
83 // NewReader creates a new Reader reading from r.
84 func NewReader(r io
.Reader
) *Reader
{ return &Reader
{r
: r
} }
86 // Next advances to the next entry in the tar archive.
87 func (tr
*Reader
) Next() (*Header
, error
) {
99 // Check for PAX/GNU header.
100 switch hdr
.Typeflag
{
102 // PAX extended header
103 headers
, err
:= parsePAX(tr
)
107 // We actually read the whole file,
108 // but this skips alignment padding
110 hdr
= tr
.readHeader()
111 mergePAX(hdr
, headers
)
113 // Check for a PAX format sparse file
114 sp
, err
:= tr
.checkForGNUSparsePAXHeaders(hdr
, headers
)
120 // Current file is a PAX format GNU sparse file.
121 // Set the current file reader to a sparse file reader.
122 tr
.curr
= &sparseFileReader
{rfr
: tr
.curr
.(*regFileReader
), sp
: sp
, tot
: hdr
.Size
}
125 case TypeGNULongName
:
126 // We have a GNU long name header. Its contents are the real file name.
127 realname
, err
:= ioutil
.ReadAll(tr
)
131 hdr
, err
:= tr
.Next()
132 hdr
.Name
= cString(realname
)
134 case TypeGNULongLink
:
135 // We have a GNU long link header.
136 realname
, err
:= ioutil
.ReadAll(tr
)
140 hdr
, err
:= tr
.Next()
141 hdr
.Linkname
= cString(realname
)
147 // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
148 // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to
149 // be treated as a regular file.
150 func (tr
*Reader
) checkForGNUSparsePAXHeaders(hdr
*Header
, headers
map[string]string) ([]sparseEntry
, error
) {
151 var sparseFormat
string
153 // Check for sparse format indicators
154 major
, majorOk
:= headers
[paxGNUSparseMajor
]
155 minor
, minorOk
:= headers
[paxGNUSparseMinor
]
156 sparseName
, sparseNameOk
:= headers
[paxGNUSparseName
]
157 _
, sparseMapOk
:= headers
[paxGNUSparseMap
]
158 sparseSize
, sparseSizeOk
:= headers
[paxGNUSparseSize
]
159 sparseRealSize
, sparseRealSizeOk
:= headers
[paxGNUSparseRealSize
]
161 // Identify which, if any, sparse format applies from which PAX headers are set
162 if majorOk
&& minorOk
{
163 sparseFormat
= major
+ "." + minor
164 } else if sparseNameOk
&& sparseMapOk
{
166 } else if sparseSizeOk
{
169 // Not a PAX format GNU sparse file.
173 // Check for unknown sparse format
174 if sparseFormat
!= "0.0" && sparseFormat
!= "0.1" && sparseFormat
!= "1.0" {
178 // Update hdr from GNU sparse PAX headers
180 hdr
.Name
= sparseName
183 realSize
, err
:= strconv
.ParseInt(sparseSize
, 10, 0)
185 return nil, ErrHeader
188 } else if sparseRealSizeOk
{
189 realSize
, err
:= strconv
.ParseInt(sparseRealSize
, 10, 0)
191 return nil, ErrHeader
196 // Set up the sparse map, according to the particular sparse format in use
199 switch sparseFormat
{
201 sp
, err
= readGNUSparseMap0x1(headers
)
203 sp
, err
= readGNUSparseMap1x0(tr
.curr
)
208 // mergePAX merges well known headers according to PAX standard.
209 // In general headers with the same name as those found
210 // in the header struct overwrite those found in the header
211 // struct with higher precision or longer values. Esp. useful
212 // for name and linkname fields.
213 func mergePAX(hdr
*Header
, headers
map[string]string) error
{
214 for k
, v
:= range headers
{
225 uid
, err
:= strconv
.ParseInt(v
, 10, 0)
231 gid
, err
:= strconv
.ParseInt(v
, 10, 0)
237 t
, err
:= parsePAXTime(v
)
243 t
, err
:= parsePAXTime(v
)
249 t
, err
:= parsePAXTime(v
)
255 size
, err
:= strconv
.ParseInt(v
, 10, 0)
259 hdr
.Size
= int64(size
)
261 if strings
.HasPrefix(k
, paxXattr
) {
262 if hdr
.Xattrs
== nil {
263 hdr
.Xattrs
= make(map[string]string)
265 hdr
.Xattrs
[k
[len(paxXattr
):]] = v
272 // parsePAXTime takes a string of the form %d.%d as described in
273 // the PAX specification.
274 func parsePAXTime(t
string) (time
.Time
, error
) {
276 pos
:= bytes
.IndexByte(buf
, '.')
277 var seconds
, nanoseconds
int64
280 seconds
, err
= strconv
.ParseInt(t
, 10, 0)
282 return time
.Time
{}, err
285 seconds
, err
= strconv
.ParseInt(string(buf
[:pos
]), 10, 0)
287 return time
.Time
{}, err
289 nano_buf
:= string(buf
[pos
+1:])
290 // Pad as needed before converting to a decimal.
291 // For example .030 -> .030000000 -> 30000000 nanoseconds
292 if len(nano_buf
) < maxNanoSecondIntSize
{
294 nano_buf
+= strings
.Repeat("0", maxNanoSecondIntSize
-len(nano_buf
))
295 } else if len(nano_buf
) > maxNanoSecondIntSize
{
297 nano_buf
= nano_buf
[:maxNanoSecondIntSize
]
299 nanoseconds
, err
= strconv
.ParseInt(string(nano_buf
), 10, 0)
301 return time
.Time
{}, err
304 ts
:= time
.Unix(seconds
, nanoseconds
)
308 // parsePAX parses PAX headers.
309 // If an extended header (type 'x') is invalid, ErrHeader is returned
310 func parsePAX(r io
.Reader
) (map[string]string, error
) {
311 buf
, err
:= ioutil
.ReadAll(r
)
316 // For GNU PAX sparse format 0.0 support.
317 // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
318 var sparseMap bytes
.Buffer
320 headers
:= make(map[string]string)
321 // Each record is constructed as
322 // "%d %s=%s\n", length, keyword, value
324 // or the header was empty to start with.
326 // The size field ends at the first space.
327 sp
= bytes
.IndexByte(buf
, ' ')
329 return nil, ErrHeader
331 // Parse the first token as a decimal integer.
332 n
, err
:= strconv
.ParseInt(string(buf
[:sp
]), 10, 0)
334 return nil, ErrHeader
336 // Extract everything between the decimal and the n -1 on the
337 // beginning to eat the ' ', -1 on the end to skip the newline.
339 record
, buf
= buf
[sp
+1:n
-1], buf
[n
:]
340 // The first equals is guaranteed to mark the end of the key.
341 // Everything else is value.
342 eq
:= bytes
.IndexByte(record
, '=')
344 return nil, ErrHeader
346 key
, value
:= record
[:eq
], record
[eq
+1:]
348 keyStr
:= string(key
)
349 if keyStr
== paxGNUSparseOffset || keyStr
== paxGNUSparseNumBytes
{
350 // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
351 sparseMap
.Write(value
)
352 sparseMap
.Write([]byte{','})
354 // Normal key. Set the value in the headers map.
355 headers
[keyStr
] = string(value
)
358 if sparseMap
.Len() != 0 {
359 // Add sparse info to headers, chopping off the extra comma
360 sparseMap
.Truncate(sparseMap
.Len() - 1)
361 headers
[paxGNUSparseMap
] = sparseMap
.String()
366 // cString parses bytes as a NUL-terminated C-style string.
367 // If a NUL byte is not found then the whole slice is returned as a string.
368 func cString(b
[]byte) string {
370 for n
< len(b
) && b
[n
] != 0 {
373 return string(b
[0:n
])
376 func (tr
*Reader
) octal(b
[]byte) int64 {
377 // Check for binary format first.
378 if len(b
) > 0 && b
[0]&0x80 != 0 {
380 for i
, c
:= range b
{
382 c
&= 0x7f // ignore signal bit in first byte
389 // Because unused fields are filled with NULs, we need
390 // to skip leading NULs. Fields may also be padded with
392 // So we remove leading and trailing NULs and spaces to
394 b
= bytes
.Trim(b
, " \x00")
399 x
, err
:= strconv
.ParseUint(cString(b
), 8, 64)
406 // skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
407 func (tr
*Reader
) skipUnread() {
408 nr
:= tr
.numBytes() + tr
.pad
// number of bytes to skip
409 tr
.curr
, tr
.pad
= nil, 0
410 if sr
, ok
:= tr
.r
.(io
.Seeker
); ok
{
411 if _
, err
:= sr
.Seek(nr
, os
.SEEK_CUR
); err
== nil {
415 _
, tr
.err
= io
.CopyN(ioutil
.Discard
, tr
.r
, nr
)
418 func (tr
*Reader
) verifyChecksum(header
[]byte) bool {
423 given
:= tr
.octal(header
[148:156])
424 unsigned
, signed
:= checksum(header
)
425 return given
== unsigned || given
== signed
428 func (tr
*Reader
) readHeader() *Header
{
429 header
:= make([]byte, blockSize
)
430 if _
, tr
.err
= io
.ReadFull(tr
.r
, header
); tr
.err
!= nil {
434 // Two blocks of zero bytes marks the end of the archive.
435 if bytes
.Equal(header
, zeroBlock
[0:blockSize
]) {
436 if _
, tr
.err
= io
.ReadFull(tr
.r
, header
); tr
.err
!= nil {
439 if bytes
.Equal(header
, zeroBlock
[0:blockSize
]) {
442 tr
.err
= ErrHeader
// zero block and then non-zero block
447 if !tr
.verifyChecksum(header
) {
456 hdr
.Name
= cString(s
.next(100))
457 hdr
.Mode
= tr
.octal(s
.next(8))
458 hdr
.Uid
= int(tr
.octal(s
.next(8)))
459 hdr
.Gid
= int(tr
.octal(s
.next(8)))
460 hdr
.Size
= tr
.octal(s
.next(12))
461 hdr
.ModTime
= time
.Unix(tr
.octal(s
.next(12)), 0)
463 hdr
.Typeflag
= s
.next(1)[0]
464 hdr
.Linkname
= cString(s
.next(100))
466 // The remainder of the header depends on the value of magic.
467 // The original (v7) version of tar had no explicit magic field,
468 // so its magic bytes, like the rest of the block, are NULs.
469 magic
:= string(s
.next(8)) // contains version field as well.
472 case magic
[:6] == "ustar\x00": // POSIX tar (1003.1-1988)
473 if string(header
[508:512]) == "tar\x00" {
478 case magic
== "ustar \x00": // old GNU tar
483 case "posix", "gnu", "star":
484 hdr
.Uname
= cString(s
.next(32))
485 hdr
.Gname
= cString(s
.next(32))
486 devmajor
:= s
.next(8)
487 devminor
:= s
.next(8)
488 if hdr
.Typeflag
== TypeChar || hdr
.Typeflag
== TypeBlock
{
489 hdr
.Devmajor
= tr
.octal(devmajor
)
490 hdr
.Devminor
= tr
.octal(devminor
)
495 prefix
= cString(s
.next(155))
497 prefix
= cString(s
.next(131))
498 hdr
.AccessTime
= time
.Unix(tr
.octal(s
.next(12)), 0)
499 hdr
.ChangeTime
= time
.Unix(tr
.octal(s
.next(12)), 0)
502 hdr
.Name
= prefix
+ "/" + hdr
.Name
511 // Maximum value of hdr.Size is 64 GB (12 octal digits),
512 // so there's no risk of int64 overflowing.
513 nb
:= int64(hdr
.Size
)
514 tr
.pad
= -nb
& (blockSize
- 1) // blockSize is a power of two
516 // Set the current file reader.
517 tr
.curr
= ®FileReader
{r
: tr
.r
, nb
: nb
}
519 // Check for old GNU sparse format entry.
520 if hdr
.Typeflag
== TypeGNUSparse
{
521 // Get the real size of the file.
522 hdr
.Size
= tr
.octal(header
[483:495])
524 // Read the sparse map.
525 sp
:= tr
.readOldGNUSparseMap(header
)
529 // Current file is a GNU sparse file. Update the current file reader.
530 tr
.curr
= &sparseFileReader
{rfr
: tr
.curr
.(*regFileReader
), sp
: sp
, tot
: hdr
.Size
}
536 // A sparseEntry holds a single entry in a sparse file's sparse map.
537 // A sparse entry indicates the offset and size in a sparse file of a
539 type sparseEntry
struct {
544 // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
545 // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
546 // then one or more extension headers are used to store the rest of the sparse map.
547 func (tr
*Reader
) readOldGNUSparseMap(header
[]byte) []sparseEntry
{
548 isExtended
:= header
[oldGNUSparseMainHeaderIsExtendedOffset
] != 0
549 spCap
:= oldGNUSparseMainHeaderNumEntries
551 spCap
+= oldGNUSparseExtendedHeaderNumEntries
553 sp
:= make([]sparseEntry
, 0, spCap
)
554 s
:= slicer(header
[oldGNUSparseMainHeaderOffset
:])
556 // Read the four entries from the main tar header
557 for i
:= 0; i
< oldGNUSparseMainHeaderNumEntries
; i
++ {
558 offset
:= tr
.octal(s
.next(oldGNUSparseOffsetSize
))
559 numBytes
:= tr
.octal(s
.next(oldGNUSparseNumBytesSize
))
564 if offset
== 0 && numBytes
== 0 {
567 sp
= append(sp
, sparseEntry
{offset
: offset
, numBytes
: numBytes
})
571 // There are more entries. Read an extension header and parse its entries.
572 sparseHeader
:= make([]byte, blockSize
)
573 if _
, tr
.err
= io
.ReadFull(tr
.r
, sparseHeader
); tr
.err
!= nil {
576 isExtended
= sparseHeader
[oldGNUSparseExtendedHeaderIsExtendedOffset
] != 0
577 s
= slicer(sparseHeader
)
578 for i
:= 0; i
< oldGNUSparseExtendedHeaderNumEntries
; i
++ {
579 offset
:= tr
.octal(s
.next(oldGNUSparseOffsetSize
))
580 numBytes
:= tr
.octal(s
.next(oldGNUSparseNumBytesSize
))
585 if offset
== 0 && numBytes
== 0 {
588 sp
= append(sp
, sparseEntry
{offset
: offset
, numBytes
: numBytes
})
594 // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0.
595 // The sparse map is stored just before the file data and padded out to the nearest block boundary.
596 func readGNUSparseMap1x0(r io
.Reader
) ([]sparseEntry
, error
) {
597 buf
:= make([]byte, 2*blockSize
)
598 sparseHeader
:= buf
[:blockSize
]
600 // readDecimal is a helper function to read a decimal integer from the sparse map
601 // while making sure to read from the file in blocks of size blockSize
602 readDecimal
:= func() (int64, error
) {
604 nl
:= bytes
.IndexByte(sparseHeader
, '\n')
606 if len(sparseHeader
) >= blockSize
{
610 oldLen
:= len(sparseHeader
)
611 newLen
:= oldLen
+ blockSize
612 if cap(sparseHeader
) < newLen
{
613 // There's more header, but we need to make room for the next block
614 copy(buf
, sparseHeader
)
615 sparseHeader
= buf
[:newLen
]
617 // There's more header, and we can just reslice
618 sparseHeader
= sparseHeader
[:newLen
]
621 // Now that sparseHeader is large enough, read next block
622 if _
, err
:= io
.ReadFull(r
, sparseHeader
[oldLen
:newLen
]); err
!= nil {
626 // Look for a newline in the new data
627 nl
= bytes
.IndexByte(sparseHeader
[oldLen
:newLen
], '\n')
632 nl
+= oldLen
// We want the position from the beginning
634 // Now that we've found a newline, read a number
635 n
, err
:= strconv
.ParseInt(string(sparseHeader
[:nl
]), 10, 0)
640 // Update sparseHeader to consume this number
641 sparseHeader
= sparseHeader
[nl
+1:]
645 // Read the first block
646 if _
, err
:= io
.ReadFull(r
, sparseHeader
); err
!= nil {
650 // The first line contains the number of entries
651 numEntries
, err
:= readDecimal()
656 // Read all the entries
657 sp
:= make([]sparseEntry
, 0, numEntries
)
658 for i
:= int64(0); i
< numEntries
; i
++ {
660 offset
, err
:= readDecimal()
665 numBytes
, err
:= readDecimal()
670 sp
= append(sp
, sparseEntry
{offset
: offset
, numBytes
: numBytes
})
676 // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1.
677 // The sparse map is stored in the PAX headers.
678 func readGNUSparseMap0x1(headers
map[string]string) ([]sparseEntry
, error
) {
679 // Get number of entries
680 numEntriesStr
, ok
:= headers
[paxGNUSparseNumBlocks
]
682 return nil, ErrHeader
684 numEntries
, err
:= strconv
.ParseInt(numEntriesStr
, 10, 0)
686 return nil, ErrHeader
689 sparseMap
:= strings
.Split(headers
[paxGNUSparseMap
], ",")
691 // There should be two numbers in sparseMap for each entry
692 if int64(len(sparseMap
)) != 2*numEntries
{
693 return nil, ErrHeader
696 // Loop through the entries in the sparse map
697 sp
:= make([]sparseEntry
, 0, numEntries
)
698 for i
:= int64(0); i
< numEntries
; i
++ {
699 offset
, err
:= strconv
.ParseInt(sparseMap
[2*i
], 10, 0)
701 return nil, ErrHeader
703 numBytes
, err
:= strconv
.ParseInt(sparseMap
[2*i
+1], 10, 0)
705 return nil, ErrHeader
707 sp
= append(sp
, sparseEntry
{offset
: offset
, numBytes
: numBytes
})
713 // numBytes returns the number of bytes left to read in the current file's entry
714 // in the tar archive, or 0 if there is no current file.
715 func (tr
*Reader
) numBytes() int64 {
717 // No current file, so no bytes
720 return tr
.curr
.numBytes()
723 // Read reads from the current entry in the tar archive.
724 // It returns 0, io.EOF when it reaches the end of that entry,
725 // until Next is called to advance to the next entry.
726 func (tr
*Reader
) Read(b
[]byte) (n
int, err error
) {
730 n
, err
= tr
.curr
.Read(b
)
731 if err
!= nil && err
!= io
.EOF
{
737 func (rfr
*regFileReader
) Read(b
[]byte) (n
int, err error
) {
742 if int64(len(b
)) > rfr
.nb
{
745 n
, err
= rfr
.r
.Read(b
)
748 if err
== io
.EOF
&& rfr
.nb
> 0 {
749 err
= io
.ErrUnexpectedEOF
754 // numBytes returns the number of bytes left to read in the file's data in the tar archive.
755 func (rfr
*regFileReader
) numBytes() int64 {
759 // readHole reads a sparse file hole ending at offset toOffset
760 func (sfr
*sparseFileReader
) readHole(b
[]byte, toOffset
int64) int {
761 n64
:= toOffset
- sfr
.pos
762 if n64
> int64(len(b
)) {
766 for i
:= 0; i
< n
; i
++ {
773 // Read reads the sparse file data in expanded form.
774 func (sfr
*sparseFileReader
) Read(b
[]byte) (n
int, err error
) {
775 if len(sfr
.sp
) == 0 {
776 // No more data fragments to read from.
777 if sfr
.pos
< sfr
.tot
{
778 // We're in the last hole
779 n
= sfr
.readHole(b
, sfr
.tot
)
782 // Otherwise, we're at the end of the file
785 if sfr
.pos
< sfr
.sp
[0].offset
{
787 n
= sfr
.readHole(b
, sfr
.sp
[0].offset
)
791 // We're not in a hole, so we'll read from the next data fragment
792 posInFragment
:= sfr
.pos
- sfr
.sp
[0].offset
793 bytesLeft
:= sfr
.sp
[0].numBytes
- posInFragment
794 if int64(len(b
)) > bytesLeft
{
798 n
, err
= sfr
.rfr
.Read(b
)
801 if int64(n
) == bytesLeft
{
802 // We're done with this fragment
806 if err
== io
.EOF
&& sfr
.pos
< sfr
.tot
{
807 // We reached the end of the last fragment's data, but there's a final hole
813 // numBytes returns the number of bytes left to read in the sparse file's
814 // sparse-encoded data in the tar archive.
815 func (sfr
*sparseFileReader
) numBytes() int64 {