c-family: char8_t and aliasing in C vs C++ [PR111884]
[official-gcc.git] / libgo / go / archive / tar / format.go
blob21b9d9d4dbc628f7f18a370d7bb10f597b0efc70
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package tar
7 import "strings"
9 // Format represents the tar archive format.
11 // The original tar format was introduced in Unix V7.
12 // Since then, there have been multiple competing formats attempting to
13 // standardize or extend the V7 format to overcome its limitations.
14 // The most common formats are the USTAR, PAX, and GNU formats,
15 // each with their own advantages and limitations.
17 // The following table captures the capabilities of each format:
19 // | USTAR | PAX | GNU
20 // ------------------+--------+-----------+----------
21 // Name | 256B | unlimited | unlimited
22 // Linkname | 100B | unlimited | unlimited
23 // Size | uint33 | unlimited | uint89
24 // Mode | uint21 | uint21 | uint57
25 // Uid/Gid | uint21 | unlimited | uint57
26 // Uname/Gname | 32B | unlimited | 32B
27 // ModTime | uint33 | unlimited | int89
28 // AccessTime | n/a | unlimited | int89
29 // ChangeTime | n/a | unlimited | int89
30 // Devmajor/Devminor | uint21 | uint21 | uint57
31 // ------------------+--------+-----------+----------
32 // string encoding | ASCII | UTF-8 | binary
33 // sub-second times | no | yes | no
34 // sparse files | no | yes | yes
36 // The table's upper portion shows the Header fields, where each format reports
37 // the maximum number of bytes allowed for each string field and
38 // the integer type used to store each numeric field
39 // (where timestamps are stored as the number of seconds since the Unix epoch).
41 // The table's lower portion shows specialized features of each format,
42 // such as supported string encodings, support for sub-second timestamps,
43 // or support for sparse files.
45 // The Writer currently provides no support for sparse files.
46 type Format int
48 // Constants to identify various tar formats.
49 const (
50 // Deliberately hide the meaning of constants from public API.
51 _ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc...
53 // FormatUnknown indicates that the format is unknown.
54 FormatUnknown
56 // The format of the original Unix V7 tar tool prior to standardization.
57 formatV7
59 // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988.
61 // While this format is compatible with most tar readers,
62 // the format has several limitations making it unsuitable for some usages.
63 // Most notably, it cannot support sparse files, files larger than 8GiB,
64 // filenames larger than 256 characters, and non-ASCII filenames.
66 // Reference:
67 // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
68 FormatUSTAR
70 // FormatPAX represents the PAX header format defined in POSIX.1-2001.
72 // PAX extends USTAR by writing a special file with Typeflag TypeXHeader
73 // preceding the original header. This file contains a set of key-value
74 // records, which are used to overcome USTAR's shortcomings, in addition to
75 // providing the ability to have sub-second resolution for timestamps.
77 // Some newer formats add their own extensions to PAX by defining their
78 // own keys and assigning certain semantic meaning to the associated values.
79 // For example, sparse file support in PAX is implemented using keys
80 // defined by the GNU manual (e.g., "GNU.sparse.map").
82 // Reference:
83 // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html
84 FormatPAX
86 // FormatGNU represents the GNU header format.
88 // The GNU header format is older than the USTAR and PAX standards and
89 // is not compatible with them. The GNU format supports
90 // arbitrary file sizes, filenames of arbitrary encoding and length,
91 // sparse files, and other features.
93 // It is recommended that PAX be chosen over GNU unless the target
94 // application can only parse GNU formatted archives.
96 // Reference:
97 // https://www.gnu.org/software/tar/manual/html_node/Standard.html
98 FormatGNU
100 // Schily's tar format, which is incompatible with USTAR.
101 // This does not cover STAR extensions to the PAX format; these fall under
102 // the PAX format.
103 formatSTAR
105 formatMax
108 func (f Format) has(f2 Format) bool { return f&f2 != 0 }
109 func (f *Format) mayBe(f2 Format) { *f |= f2 }
110 func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 }
111 func (f *Format) mustNotBe(f2 Format) { *f &^= f2 }
113 var formatNames = map[Format]string{
114 formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR",
117 func (f Format) String() string {
118 var ss []string
119 for f2 := Format(1); f2 < formatMax; f2 <<= 1 {
120 if f.has(f2) {
121 ss = append(ss, formatNames[f2])
124 switch len(ss) {
125 case 0:
126 return "<unknown>"
127 case 1:
128 return ss[0]
129 default:
130 return "(" + strings.Join(ss, " | ") + ")"
134 // Magics used to identify various formats.
135 const (
136 magicGNU, versionGNU = "ustar ", " \x00"
137 magicUSTAR, versionUSTAR = "ustar\x00", "00"
138 trailerSTAR = "tar\x00"
141 // Size constants from various tar specifications.
142 const (
143 blockSize = 512 // Size of each block in a tar stream
144 nameSize = 100 // Max length of the name field in USTAR format
145 prefixSize = 155 // Max length of the prefix field in USTAR format
148 // blockPadding computes the number of bytes needed to pad offset up to the
149 // nearest block edge where 0 <= n < blockSize.
150 func blockPadding(offset int64) (n int64) {
151 return -offset & (blockSize - 1)
154 var zeroBlock block
156 type block [blockSize]byte
158 // Convert block to any number of formats.
159 func (b *block) toV7() *headerV7 { return (*headerV7)(b) }
160 func (b *block) toGNU() *headerGNU { return (*headerGNU)(b) }
161 func (b *block) toSTAR() *headerSTAR { return (*headerSTAR)(b) }
162 func (b *block) toUSTAR() *headerUSTAR { return (*headerUSTAR)(b) }
163 func (b *block) toSparse() sparseArray { return sparseArray(b[:]) }
165 // GetFormat checks that the block is a valid tar header based on the checksum.
166 // It then attempts to guess the specific format based on magic values.
167 // If the checksum fails, then FormatUnknown is returned.
168 func (b *block) getFormat() Format {
169 // Verify checksum.
170 var p parser
171 value := p.parseOctal(b.toV7().chksum())
172 chksum1, chksum2 := b.computeChecksum()
173 if p.err != nil || (value != chksum1 && value != chksum2) {
174 return FormatUnknown
177 // Guess the magic values.
178 magic := string(b.toUSTAR().magic())
179 version := string(b.toUSTAR().version())
180 trailer := string(b.toSTAR().trailer())
181 switch {
182 case magic == magicUSTAR && trailer == trailerSTAR:
183 return formatSTAR
184 case magic == magicUSTAR:
185 return FormatUSTAR | FormatPAX
186 case magic == magicGNU && version == versionGNU:
187 return FormatGNU
188 default:
189 return formatV7
193 // setFormat writes the magic values necessary for specified format
194 // and then updates the checksum accordingly.
195 func (b *block) setFormat(format Format) {
196 // Set the magic values.
197 switch {
198 case format.has(formatV7):
199 // Do nothing.
200 case format.has(FormatGNU):
201 copy(b.toGNU().magic(), magicGNU)
202 copy(b.toGNU().version(), versionGNU)
203 case format.has(formatSTAR):
204 copy(b.toSTAR().magic(), magicUSTAR)
205 copy(b.toSTAR().version(), versionUSTAR)
206 copy(b.toSTAR().trailer(), trailerSTAR)
207 case format.has(FormatUSTAR | FormatPAX):
208 copy(b.toUSTAR().magic(), magicUSTAR)
209 copy(b.toUSTAR().version(), versionUSTAR)
210 default:
211 panic("invalid format")
214 // Update checksum.
215 // This field is special in that it is terminated by a NULL then space.
216 var f formatter
217 field := b.toV7().chksum()
218 chksum, _ := b.computeChecksum() // Possible values are 256..128776
219 f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143
220 field[7] = ' '
223 // computeChecksum computes the checksum for the header block.
224 // POSIX specifies a sum of the unsigned byte values, but the Sun tar used
225 // signed byte values.
226 // We compute and return both.
227 func (b *block) computeChecksum() (unsigned, signed int64) {
228 for i, c := range b {
229 if 148 <= i && i < 156 {
230 c = ' ' // Treat the checksum field itself as all spaces.
232 unsigned += int64(c)
233 signed += int64(int8(c))
235 return unsigned, signed
238 // Reset clears the block with all zeros.
239 func (b *block) reset() {
240 *b = block{}
243 type headerV7 [blockSize]byte
245 func (h *headerV7) name() []byte { return h[000:][:100] }
246 func (h *headerV7) mode() []byte { return h[100:][:8] }
247 func (h *headerV7) uid() []byte { return h[108:][:8] }
248 func (h *headerV7) gid() []byte { return h[116:][:8] }
249 func (h *headerV7) size() []byte { return h[124:][:12] }
250 func (h *headerV7) modTime() []byte { return h[136:][:12] }
251 func (h *headerV7) chksum() []byte { return h[148:][:8] }
252 func (h *headerV7) typeFlag() []byte { return h[156:][:1] }
253 func (h *headerV7) linkName() []byte { return h[157:][:100] }
255 type headerGNU [blockSize]byte
257 func (h *headerGNU) v7() *headerV7 { return (*headerV7)(h) }
258 func (h *headerGNU) magic() []byte { return h[257:][:6] }
259 func (h *headerGNU) version() []byte { return h[263:][:2] }
260 func (h *headerGNU) userName() []byte { return h[265:][:32] }
261 func (h *headerGNU) groupName() []byte { return h[297:][:32] }
262 func (h *headerGNU) devMajor() []byte { return h[329:][:8] }
263 func (h *headerGNU) devMinor() []byte { return h[337:][:8] }
264 func (h *headerGNU) accessTime() []byte { return h[345:][:12] }
265 func (h *headerGNU) changeTime() []byte { return h[357:][:12] }
266 func (h *headerGNU) sparse() sparseArray { return sparseArray(h[386:][:24*4+1]) }
267 func (h *headerGNU) realSize() []byte { return h[483:][:12] }
269 type headerSTAR [blockSize]byte
271 func (h *headerSTAR) v7() *headerV7 { return (*headerV7)(h) }
272 func (h *headerSTAR) magic() []byte { return h[257:][:6] }
273 func (h *headerSTAR) version() []byte { return h[263:][:2] }
274 func (h *headerSTAR) userName() []byte { return h[265:][:32] }
275 func (h *headerSTAR) groupName() []byte { return h[297:][:32] }
276 func (h *headerSTAR) devMajor() []byte { return h[329:][:8] }
277 func (h *headerSTAR) devMinor() []byte { return h[337:][:8] }
278 func (h *headerSTAR) prefix() []byte { return h[345:][:131] }
279 func (h *headerSTAR) accessTime() []byte { return h[476:][:12] }
280 func (h *headerSTAR) changeTime() []byte { return h[488:][:12] }
281 func (h *headerSTAR) trailer() []byte { return h[508:][:4] }
283 type headerUSTAR [blockSize]byte
285 func (h *headerUSTAR) v7() *headerV7 { return (*headerV7)(h) }
286 func (h *headerUSTAR) magic() []byte { return h[257:][:6] }
287 func (h *headerUSTAR) version() []byte { return h[263:][:2] }
288 func (h *headerUSTAR) userName() []byte { return h[265:][:32] }
289 func (h *headerUSTAR) groupName() []byte { return h[297:][:32] }
290 func (h *headerUSTAR) devMajor() []byte { return h[329:][:8] }
291 func (h *headerUSTAR) devMinor() []byte { return h[337:][:8] }
292 func (h *headerUSTAR) prefix() []byte { return h[345:][:155] }
294 type sparseArray []byte
296 func (s sparseArray) entry(i int) sparseElem { return sparseElem(s[i*24:]) }
297 func (s sparseArray) isExtended() []byte { return s[24*s.maxEntries():][:1] }
298 func (s sparseArray) maxEntries() int { return len(s) / 24 }
300 type sparseElem []byte
302 func (s sparseElem) offset() []byte { return s[00:][:12] }
303 func (s sparseElem) length() []byte { return s[12:][:12] }