cmd/go: check for another GCC error message
[official-gcc.git] / libgo / go / cmd / go / internal / cache / cache.go
blob794d63d20b0378a6a44b67b63a7e8f87153d1eb7
1 // Copyright 2017 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package cache implements a build artifact cache.
6 package cache
8 import (
9 "bytes"
10 "crypto/sha256"
11 "encoding/hex"
12 "errors"
13 "fmt"
14 "io"
15 "io/ioutil"
16 "os"
17 "path/filepath"
18 "strconv"
19 "strings"
20 "time"
23 // An ActionID is a cache action key, the hash of a complete description of a
24 // repeatable computation (command line, environment variables,
25 // input file contents, executable contents).
26 type ActionID [HashSize]byte
28 // An OutputID is a cache output key, the hash of an output of a computation.
29 type OutputID [HashSize]byte
31 // A Cache is a package cache, backed by a file system directory tree.
32 type Cache struct {
33 dir string
34 log *os.File
35 now func() time.Time
38 // Open opens and returns the cache in the given directory.
40 // It is safe for multiple processes on a single machine to use the
41 // same cache directory in a local file system simultaneously.
42 // They will coordinate using operating system file locks and may
43 // duplicate effort but will not corrupt the cache.
45 // However, it is NOT safe for multiple processes on different machines
46 // to share a cache directory (for example, if the directory were stored
47 // in a network file system). File locking is notoriously unreliable in
48 // network file systems and may not suffice to protect the cache.
50 func Open(dir string) (*Cache, error) {
51 info, err := os.Stat(dir)
52 if err != nil {
53 return nil, err
55 if !info.IsDir() {
56 return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
58 for i := 0; i < 256; i++ {
59 name := filepath.Join(dir, fmt.Sprintf("%02x", i))
60 if err := os.MkdirAll(name, 0777); err != nil {
61 return nil, err
64 f, err := os.OpenFile(filepath.Join(dir, "log.txt"), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0666)
65 if err != nil {
66 return nil, err
68 c := &Cache{
69 dir: dir,
70 log: f,
71 now: time.Now,
73 return c, nil
76 // fileName returns the name of the file corresponding to the given id.
77 func (c *Cache) fileName(id [HashSize]byte, key string) string {
78 return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
81 var errMissing = errors.New("cache entry not found")
83 const (
84 // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
85 hexSize = HashSize * 2
86 entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1
89 // verify controls whether to run the cache in verify mode.
90 // In verify mode, the cache always returns errMissing from Get
91 // but then double-checks in Put that the data being written
92 // exactly matches any existing entry. This provides an easy
93 // way to detect program behavior that would have been different
94 // had the cache entry been returned from Get.
96 // verify is enabled by setting the environment variable
97 // GODEBUG=gocacheverify=1.
98 var verify = false
100 func init() { initEnv() }
102 func initEnv() {
103 verify = false
104 debugHash = false
105 debug := strings.Split(os.Getenv("GODEBUG"), ",")
106 for _, f := range debug {
107 if f == "gocacheverify=1" {
108 verify = true
110 if f == "gocachehash=1" {
111 debugHash = true
116 // Get looks up the action ID in the cache,
117 // returning the corresponding output ID and file size, if any.
118 // Note that finding an output ID does not guarantee that the
119 // saved file for that output ID is still available.
120 func (c *Cache) Get(id ActionID) (Entry, error) {
121 if verify {
122 return Entry{}, errMissing
124 return c.get(id)
127 type Entry struct {
128 OutputID OutputID
129 Size int64
130 Time time.Time
133 // get is Get but does not respect verify mode, so that Put can use it.
134 func (c *Cache) get(id ActionID) (Entry, error) {
135 missing := func() (Entry, error) {
136 fmt.Fprintf(c.log, "%d miss %x\n", c.now().Unix(), id)
137 return Entry{}, errMissing
139 f, err := os.Open(c.fileName(id, "a"))
140 if err != nil {
141 return missing()
143 defer f.Close()
144 entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
145 if n, err := io.ReadFull(f, entry); n != entrySize || err != io.ErrUnexpectedEOF {
146 return missing()
148 if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' {
149 return missing()
151 eid, entry := entry[3:3+hexSize], entry[3+hexSize:]
152 eout, entry := entry[1:1+hexSize], entry[1+hexSize:]
153 esize, entry := entry[1:1+20], entry[1+20:]
154 etime, entry := entry[1:1+20], entry[1+20:]
155 var buf [HashSize]byte
156 if _, err := hex.Decode(buf[:], eid); err != nil || buf != id {
157 return missing()
159 if _, err := hex.Decode(buf[:], eout); err != nil {
160 return missing()
162 i := 0
163 for i < len(esize) && esize[i] == ' ' {
166 size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
167 if err != nil || size < 0 {
168 return missing()
170 i = 0
171 for i < len(etime) && etime[i] == ' ' {
174 tm, err := strconv.ParseInt(string(etime[i:]), 10, 64)
175 if err != nil || size < 0 {
176 return missing()
179 fmt.Fprintf(c.log, "%d get %x\n", c.now().Unix(), id)
181 c.used(c.fileName(id, "a"))
183 return Entry{buf, size, time.Unix(0, tm)}, nil
186 // GetBytes looks up the action ID in the cache and returns
187 // the corresponding output bytes.
188 // GetBytes should only be used for data that can be expected to fit in memory.
189 func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
190 entry, err := c.Get(id)
191 if err != nil {
192 return nil, entry, err
194 data, _ := ioutil.ReadFile(c.OutputFile(entry.OutputID))
195 if sha256.Sum256(data) != entry.OutputID {
196 return nil, entry, errMissing
198 return data, entry, nil
201 // OutputFile returns the name of the cache file storing output with the given OutputID.
202 func (c *Cache) OutputFile(out OutputID) string {
203 file := c.fileName(out, "d")
204 c.used(file)
205 return file
208 // Time constants for cache expiration.
210 // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour),
211 // to avoid causing many unnecessary inode updates. The mtimes therefore
212 // roughly reflect "time of last use" but may in fact be older by at most an hour.
214 // We scan the cache for entries to delete at most once per trimInterval (1 day).
216 // When we do scan the cache, we delete entries that have not been used for
217 // at least trimLimit (5 days). Statistics gathered from a month of usage by
218 // Go developers found that essentially all reuse of cached entries happened
219 // within 5 days of the previous reuse. See golang.org/issue/22990.
220 const (
221 mtimeInterval = 1 * time.Hour
222 trimInterval = 24 * time.Hour
223 trimLimit = 5 * 24 * time.Hour
226 // used makes a best-effort attempt to update mtime on file,
227 // so that mtime reflects cache access time.
229 // Because the reflection only needs to be approximate,
230 // and to reduce the amount of disk activity caused by using
231 // cache entries, used only updates the mtime if the current
232 // mtime is more than an hour old. This heuristic eliminates
233 // nearly all of the mtime updates that would otherwise happen,
234 // while still keeping the mtimes useful for cache trimming.
235 func (c *Cache) used(file string) {
236 info, err := os.Stat(file)
237 if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval {
238 return
240 os.Chtimes(file, c.now(), c.now())
243 // Trim removes old cache entries that are likely not to be reused.
244 func (c *Cache) Trim() {
245 now := c.now()
247 // We maintain in dir/trim.txt the time of the last completed cache trim.
248 // If the cache has been trimmed recently enough, do nothing.
249 // This is the common case.
250 data, _ := ioutil.ReadFile(filepath.Join(c.dir, "trim.txt"))
251 t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
252 if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval {
253 return
256 // Trim each of the 256 subdirectories.
257 // We subtract an additional mtimeInterval
258 // to account for the imprecision of our "last used" mtimes.
259 cutoff := now.Add(-trimLimit - mtimeInterval)
260 for i := 0; i < 256; i++ {
261 subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i))
262 c.trimSubdir(subdir, cutoff)
265 ioutil.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666)
268 // trimSubdir trims a single cache subdirectory.
269 func (c *Cache) trimSubdir(subdir string, cutoff time.Time) {
270 // Read all directory entries from subdir before removing
271 // any files, in case removing files invalidates the file offset
272 // in the directory scan. Also, ignore error from f.Readdirnames,
273 // because we don't care about reporting the error and we still
274 // want to process any entries found before the error.
275 f, err := os.Open(subdir)
276 if err != nil {
277 return
279 names, _ := f.Readdirnames(-1)
280 f.Close()
282 for _, name := range names {
283 // Remove only cache entries (xxxx-a and xxxx-d).
284 if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") {
285 continue
287 entry := filepath.Join(subdir, name)
288 info, err := os.Stat(entry)
289 if err == nil && info.ModTime().Before(cutoff) {
290 os.Remove(entry)
295 // putIndexEntry adds an entry to the cache recording that executing the action
296 // with the given id produces an output with the given output id (hash) and size.
297 func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
298 // Note: We expect that for one reason or another it may happen
299 // that repeating an action produces a different output hash
300 // (for example, if the output contains a time stamp or temp dir name).
301 // While not ideal, this is also not a correctness problem, so we
302 // don't make a big deal about it. In particular, we leave the action
303 // cache entries writable specifically so that they can be overwritten.
305 // Setting GODEBUG=gocacheverify=1 does make a big deal:
306 // in verify mode we are double-checking that the cache entries
307 // are entirely reproducible. As just noted, this may be unrealistic
308 // in some cases but the check is also useful for shaking out real bugs.
309 entry := []byte(fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()))
310 if verify && allowVerify {
311 old, err := c.get(id)
312 if err == nil && (old.OutputID != out || old.Size != size) {
313 // panic to show stack trace, so we can see what code is generating this cache entry.
314 msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size)
315 panic(msg)
318 file := c.fileName(id, "a")
319 if err := ioutil.WriteFile(file, entry, 0666); err != nil {
320 os.Remove(file)
321 return err
323 os.Chtimes(file, c.now(), c.now()) // mainly for tests
325 fmt.Fprintf(c.log, "%d put %x %x %d\n", c.now().Unix(), id, out, size)
326 return nil
329 // Put stores the given output in the cache as the output for the action ID.
330 // It may read file twice. The content of file must not change between the two passes.
331 func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
332 return c.put(id, file, true)
335 // PutNoVerify is like Put but disables the verify check
336 // when GODEBUG=goverifycache=1 is set.
337 // It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
338 // like test output containing times and the like.
339 func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
340 return c.put(id, file, false)
343 func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
344 // Compute output ID.
345 h := sha256.New()
346 if _, err := file.Seek(0, 0); err != nil {
347 return OutputID{}, 0, err
349 size, err := io.Copy(h, file)
350 if err != nil {
351 return OutputID{}, 0, err
353 var out OutputID
354 h.Sum(out[:0])
356 // Copy to cached output file (if not already present).
357 if err := c.copyFile(file, out, size); err != nil {
358 return out, size, err
361 // Add to cache index.
362 return out, size, c.putIndexEntry(id, out, size, allowVerify)
365 // PutBytes stores the given bytes in the cache as the output for the action ID.
366 func (c *Cache) PutBytes(id ActionID, data []byte) error {
367 _, _, err := c.Put(id, bytes.NewReader(data))
368 return err
371 // copyFile copies file into the cache, expecting it to have the given
372 // output ID and size, if that file is not present already.
373 func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
374 name := c.fileName(out, "d")
375 info, err := os.Stat(name)
376 if err == nil && info.Size() == size {
377 // Check hash.
378 if f, err := os.Open(name); err == nil {
379 h := sha256.New()
380 io.Copy(h, f)
381 f.Close()
382 var out2 OutputID
383 h.Sum(out2[:0])
384 if out == out2 {
385 return nil
388 // Hash did not match. Fall through and rewrite file.
391 // Copy file to cache directory.
392 mode := os.O_RDWR | os.O_CREATE
393 if err == nil && info.Size() > size { // shouldn't happen but fix in case
394 mode |= os.O_TRUNC
396 f, err := os.OpenFile(name, mode, 0666)
397 if err != nil {
398 return err
400 defer f.Close()
401 if size == 0 {
402 // File now exists with correct size.
403 // Only one possible zero-length file, so contents are OK too.
404 // Early return here makes sure there's a "last byte" for code below.
405 return nil
408 // From here on, if any of the I/O writing the file fails,
409 // we make a best-effort attempt to truncate the file f
410 // before returning, to avoid leaving bad bytes in the file.
412 // Copy file to f, but also into h to double-check hash.
413 if _, err := file.Seek(0, 0); err != nil {
414 f.Truncate(0)
415 return err
417 h := sha256.New()
418 w := io.MultiWriter(f, h)
419 if _, err := io.CopyN(w, file, size-1); err != nil {
420 f.Truncate(0)
421 return err
423 // Check last byte before writing it; writing it will make the size match
424 // what other processes expect to find and might cause them to start
425 // using the file.
426 buf := make([]byte, 1)
427 if _, err := file.Read(buf); err != nil {
428 f.Truncate(0)
429 return err
431 h.Write(buf)
432 sum := h.Sum(nil)
433 if !bytes.Equal(sum, out[:]) {
434 f.Truncate(0)
435 return fmt.Errorf("file content changed underfoot")
438 // Commit cache file entry.
439 if _, err := f.Write(buf); err != nil {
440 f.Truncate(0)
441 return err
443 if err := f.Close(); err != nil {
444 // Data might not have been written,
445 // but file may look like it is the right size.
446 // To be extra careful, remove cached file.
447 os.Remove(name)
448 return err
450 os.Chtimes(name, c.now(), c.now()) // mainly for tests
452 return nil