1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
9 // Format represents the tar archive format.
11 // The original tar format was introduced in Unix V7.
12 // Since then, there have been multiple competing formats attempting to
13 // standardize or extend the V7 format to overcome its limitations.
14 // The most common formats are the USTAR, PAX, and GNU formats,
15 // each with their own advantages and limitations.
17 // The following table captures the capabilities of each format:
19 // | USTAR | PAX | GNU
20 // ------------------+--------+-----------+----------
21 // Name | 256B | unlimited | unlimited
22 // Linkname | 100B | unlimited | unlimited
23 // Size | uint33 | unlimited | uint89
24 // Mode | uint21 | uint21 | uint57
25 // Uid/Gid | uint21 | unlimited | uint57
26 // Uname/Gname | 32B | unlimited | 32B
27 // ModTime | uint33 | unlimited | int89
28 // AccessTime | n/a | unlimited | int89
29 // ChangeTime | n/a | unlimited | int89
30 // Devmajor/Devminor | uint21 | uint21 | uint57
31 // ------------------+--------+-----------+----------
32 // string encoding | ASCII | UTF-8 | binary
33 // sub-second times | no | yes | no
34 // sparse files | no | yes | yes
36 // The table's upper portion shows the Header fields, where each format reports
37 // the maximum number of bytes allowed for each string field and
38 // the integer type used to store each numeric field
39 // (where timestamps are stored as the number of seconds since the Unix epoch).
41 // The table's lower portion shows specialized features of each format,
42 // such as supported string encodings, support for sub-second timestamps,
43 // or support for sparse files.
45 // The Writer currently provides no support for sparse files.
48 // Constants to identify various tar formats.
50 // Deliberately hide the meaning of constants from public API.
51 _ Format
= (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc...
53 // FormatUnknown indicates that the format is unknown.
56 // The format of the original Unix V7 tar tool prior to standardization.
59 // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988.
61 // While this format is compatible with most tar readers,
62 // the format has several limitations making it unsuitable for some usages.
63 // Most notably, it cannot support sparse files, files larger than 8GiB,
64 // filenames larger than 256 characters, and non-ASCII filenames.
67 // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
70 // FormatPAX represents the PAX header format defined in POSIX.1-2001.
72 // PAX extends USTAR by writing a special file with Typeflag TypeXHeader
73 // preceding the original header. This file contains a set of key-value
74 // records, which are used to overcome USTAR's shortcomings, in addition to
75 // providing the ability to have sub-second resolution for timestamps.
77 // Some newer formats add their own extensions to PAX by defining their
78 // own keys and assigning certain semantic meaning to the associated values.
79 // For example, sparse file support in PAX is implemented using keys
80 // defined by the GNU manual (e.g., "GNU.sparse.map").
83 // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html
86 // FormatGNU represents the GNU header format.
88 // The GNU header format is older than the USTAR and PAX standards and
89 // is not compatible with them. The GNU format supports
90 // arbitrary file sizes, filenames of arbitrary encoding and length,
91 // sparse files, and other features.
93 // It is recommended that PAX be chosen over GNU unless the target
94 // application can only parse GNU formatted archives.
97 // https://www.gnu.org/software/tar/manual/html_node/Standard.html
100 // Schily's tar format, which is incompatible with USTAR.
101 // This does not cover STAR extensions to the PAX format; these fall under
108 func (f Format
) has(f2 Format
) bool { return f
&f2
!= 0 }
109 func (f
*Format
) mayBe(f2 Format
) { *f |
= f2
}
110 func (f
*Format
) mayOnlyBe(f2 Format
) { *f
&= f2
}
111 func (f
*Format
) mustNotBe(f2 Format
) { *f
&^= f2
}
113 var formatNames
= map[Format
]string{
114 formatV7
: "V7", FormatUSTAR
: "USTAR", FormatPAX
: "PAX", FormatGNU
: "GNU", formatSTAR
: "STAR",
117 func (f Format
) String() string {
119 for f2
:= Format(1); f2
< formatMax
; f2
<<= 1 {
121 ss
= append(ss
, formatNames
[f2
])
130 return "(" + strings
.Join(ss
, " | ") + ")"
134 // Magics used to identify various formats.
136 magicGNU
, versionGNU
= "ustar ", " \x00"
137 magicUSTAR
, versionUSTAR
= "ustar\x00", "00"
138 trailerSTAR
= "tar\x00"
141 // Size constants from various tar specifications.
143 blockSize
= 512 // Size of each block in a tar stream
144 nameSize
= 100 // Max length of the name field in USTAR format
145 prefixSize
= 155 // Max length of the prefix field in USTAR format
148 // blockPadding computes the number of bytes needed to pad offset up to the
149 // nearest block edge where 0 <= n < blockSize.
150 func blockPadding(offset
int64) (n
int64) {
151 return -offset
& (blockSize
- 1)
156 type block
[blockSize
]byte
158 // Convert block to any number of formats.
159 func (b
*block
) toV7() *headerV7
{ return (*headerV7
)(b
) }
160 func (b
*block
) toGNU() *headerGNU
{ return (*headerGNU
)(b
) }
161 func (b
*block
) toSTAR() *headerSTAR
{ return (*headerSTAR
)(b
) }
162 func (b
*block
) toUSTAR() *headerUSTAR
{ return (*headerUSTAR
)(b
) }
163 func (b
*block
) toSparse() sparseArray
{ return sparseArray(b
[:]) }
165 // GetFormat checks that the block is a valid tar header based on the checksum.
166 // It then attempts to guess the specific format based on magic values.
167 // If the checksum fails, then FormatUnknown is returned.
168 func (b
*block
) getFormat() Format
{
171 value
:= p
.parseOctal(b
.toV7().chksum())
172 chksum1
, chksum2
:= b
.computeChecksum()
173 if p
.err
!= nil ||
(value
!= chksum1
&& value
!= chksum2
) {
177 // Guess the magic values.
178 magic
:= string(b
.toUSTAR().magic())
179 version
:= string(b
.toUSTAR().version())
180 trailer
:= string(b
.toSTAR().trailer())
182 case magic
== magicUSTAR
&& trailer
== trailerSTAR
:
184 case magic
== magicUSTAR
:
185 return FormatUSTAR | FormatPAX
186 case magic
== magicGNU
&& version
== versionGNU
:
193 // setFormat writes the magic values necessary for specified format
194 // and then updates the checksum accordingly.
195 func (b
*block
) setFormat(format Format
) {
196 // Set the magic values.
198 case format
.has(formatV7
):
200 case format
.has(FormatGNU
):
201 copy(b
.toGNU().magic(), magicGNU
)
202 copy(b
.toGNU().version(), versionGNU
)
203 case format
.has(formatSTAR
):
204 copy(b
.toSTAR().magic(), magicUSTAR
)
205 copy(b
.toSTAR().version(), versionUSTAR
)
206 copy(b
.toSTAR().trailer(), trailerSTAR
)
207 case format
.has(FormatUSTAR | FormatPAX
):
208 copy(b
.toUSTAR().magic(), magicUSTAR
)
209 copy(b
.toUSTAR().version(), versionUSTAR
)
211 panic("invalid format")
215 // This field is special in that it is terminated by a NULL then space.
217 field
:= b
.toV7().chksum()
218 chksum
, _
:= b
.computeChecksum() // Possible values are 256..128776
219 f
.formatOctal(field
[:7], chksum
) // Never fails since 128776 < 262143
223 // computeChecksum computes the checksum for the header block.
224 // POSIX specifies a sum of the unsigned byte values, but the Sun tar used
225 // signed byte values.
226 // We compute and return both.
227 func (b
*block
) computeChecksum() (unsigned
, signed
int64) {
228 for i
, c
:= range b
{
229 if 148 <= i
&& i
< 156 {
230 c
= ' ' // Treat the checksum field itself as all spaces.
233 signed
+= int64(int8(c
))
235 return unsigned
, signed
238 // Reset clears the block with all zeros.
239 func (b
*block
) reset() {
243 type headerV7
[blockSize
]byte
245 func (h
*headerV7
) name() []byte { return h
[000:][:100] }
246 func (h
*headerV7
) mode() []byte { return h
[100:][:8] }
247 func (h
*headerV7
) uid() []byte { return h
[108:][:8] }
248 func (h
*headerV7
) gid() []byte { return h
[116:][:8] }
249 func (h
*headerV7
) size() []byte { return h
[124:][:12] }
250 func (h
*headerV7
) modTime() []byte { return h
[136:][:12] }
251 func (h
*headerV7
) chksum() []byte { return h
[148:][:8] }
252 func (h
*headerV7
) typeFlag() []byte { return h
[156:][:1] }
253 func (h
*headerV7
) linkName() []byte { return h
[157:][:100] }
255 type headerGNU
[blockSize
]byte
257 func (h
*headerGNU
) v7() *headerV7
{ return (*headerV7
)(h
) }
258 func (h
*headerGNU
) magic() []byte { return h
[257:][:6] }
259 func (h
*headerGNU
) version() []byte { return h
[263:][:2] }
260 func (h
*headerGNU
) userName() []byte { return h
[265:][:32] }
261 func (h
*headerGNU
) groupName() []byte { return h
[297:][:32] }
262 func (h
*headerGNU
) devMajor() []byte { return h
[329:][:8] }
263 func (h
*headerGNU
) devMinor() []byte { return h
[337:][:8] }
264 func (h
*headerGNU
) accessTime() []byte { return h
[345:][:12] }
265 func (h
*headerGNU
) changeTime() []byte { return h
[357:][:12] }
266 func (h
*headerGNU
) sparse() sparseArray
{ return sparseArray(h
[386:][:24*4+1]) }
267 func (h
*headerGNU
) realSize() []byte { return h
[483:][:12] }
269 type headerSTAR
[blockSize
]byte
271 func (h
*headerSTAR
) v7() *headerV7
{ return (*headerV7
)(h
) }
272 func (h
*headerSTAR
) magic() []byte { return h
[257:][:6] }
273 func (h
*headerSTAR
) version() []byte { return h
[263:][:2] }
274 func (h
*headerSTAR
) userName() []byte { return h
[265:][:32] }
275 func (h
*headerSTAR
) groupName() []byte { return h
[297:][:32] }
276 func (h
*headerSTAR
) devMajor() []byte { return h
[329:][:8] }
277 func (h
*headerSTAR
) devMinor() []byte { return h
[337:][:8] }
278 func (h
*headerSTAR
) prefix() []byte { return h
[345:][:131] }
279 func (h
*headerSTAR
) accessTime() []byte { return h
[476:][:12] }
280 func (h
*headerSTAR
) changeTime() []byte { return h
[488:][:12] }
281 func (h
*headerSTAR
) trailer() []byte { return h
[508:][:4] }
283 type headerUSTAR
[blockSize
]byte
285 func (h
*headerUSTAR
) v7() *headerV7
{ return (*headerV7
)(h
) }
286 func (h
*headerUSTAR
) magic() []byte { return h
[257:][:6] }
287 func (h
*headerUSTAR
) version() []byte { return h
[263:][:2] }
288 func (h
*headerUSTAR
) userName() []byte { return h
[265:][:32] }
289 func (h
*headerUSTAR
) groupName() []byte { return h
[297:][:32] }
290 func (h
*headerUSTAR
) devMajor() []byte { return h
[329:][:8] }
291 func (h
*headerUSTAR
) devMinor() []byte { return h
[337:][:8] }
292 func (h
*headerUSTAR
) prefix() []byte { return h
[345:][:155] }
294 type sparseArray
[]byte
296 func (s sparseArray
) entry(i
int) sparseElem
{ return sparseElem(s
[i
*24:]) }
297 func (s sparseArray
) isExtended() []byte { return s
[24*s
.maxEntries():][:1] }
298 func (s sparseArray
) maxEntries() int { return len(s
) / 24 }
300 type sparseElem
[]byte
302 func (s sparseElem
) offset() []byte { return s
[00:][:12] }
303 func (s sparseElem
) length() []byte { return s
[12:][:12] }