1 // Copyright 2017 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package cache implements a build artifact cache.
23 // An ActionID is a cache action key, the hash of a complete description of a
24 // repeatable computation (command line, environment variables,
25 // input file contents, executable contents).
26 type ActionID
[HashSize
]byte
28 // An OutputID is a cache output key, the hash of an output of a computation.
29 type OutputID
[HashSize
]byte
31 // A Cache is a package cache, backed by a file system directory tree.
38 // Open opens and returns the cache in the given directory.
40 // It is safe for multiple processes on a single machine to use the
41 // same cache directory in a local file system simultaneously.
42 // They will coordinate using operating system file locks and may
43 // duplicate effort but will not corrupt the cache.
45 // However, it is NOT safe for multiple processes on different machines
46 // to share a cache directory (for example, if the directory were stored
47 // in a network file system). File locking is notoriously unreliable in
48 // network file systems and may not suffice to protect the cache.
50 func Open(dir
string) (*Cache
, error
) {
51 info
, err
:= os
.Stat(dir
)
56 return nil, &os
.PathError
{Op
: "open", Path
: dir
, Err
: fmt
.Errorf("not a directory")}
58 for i
:= 0; i
< 256; i
++ {
59 name
:= filepath
.Join(dir
, fmt
.Sprintf("%02x", i
))
60 if err
:= os
.MkdirAll(name
, 0777); err
!= nil {
64 f
, err
:= os
.OpenFile(filepath
.Join(dir
, "log.txt"), os
.O_WRONLY|os
.O_APPEND|os
.O_CREATE
, 0666)
76 // fileName returns the name of the file corresponding to the given id.
77 func (c
*Cache
) fileName(id
[HashSize
]byte, key
string) string {
78 return filepath
.Join(c
.dir
, fmt
.Sprintf("%02x", id
[0]), fmt
.Sprintf("%x", id
)+"-"+key
)
81 var errMissing
= errors
.New("cache entry not found")
84 // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
85 hexSize
= HashSize
* 2
86 entrySize
= 2 + 1 + hexSize
+ 1 + hexSize
+ 1 + 20 + 1 + 20 + 1
89 // verify controls whether to run the cache in verify mode.
90 // In verify mode, the cache always returns errMissing from Get
91 // but then double-checks in Put that the data being written
92 // exactly matches any existing entry. This provides an easy
93 // way to detect program behavior that would have been different
94 // had the cache entry been returned from Get.
96 // verify is enabled by setting the environment variable
97 // GODEBUG=gocacheverify=1.
100 func init() { initEnv() }
105 debug
:= strings
.Split(os
.Getenv("GODEBUG"), ",")
106 for _
, f
:= range debug
{
107 if f
== "gocacheverify=1" {
110 if f
== "gocachehash=1" {
116 // Get looks up the action ID in the cache,
117 // returning the corresponding output ID and file size, if any.
118 // Note that finding an output ID does not guarantee that the
119 // saved file for that output ID is still available.
120 func (c
*Cache
) Get(id ActionID
) (Entry
, error
) {
122 return Entry
{}, errMissing
133 // get is Get but does not respect verify mode, so that Put can use it.
134 func (c
*Cache
) get(id ActionID
) (Entry
, error
) {
135 missing
:= func() (Entry
, error
) {
136 fmt
.Fprintf(c
.log
, "%d miss %x\n", c
.now().Unix(), id
)
137 return Entry
{}, errMissing
139 f
, err
:= os
.Open(c
.fileName(id
, "a"))
144 entry
:= make([]byte, entrySize
+1) // +1 to detect whether f is too long
145 if n
, err
:= io
.ReadFull(f
, entry
); n
!= entrySize || err
!= io
.ErrUnexpectedEOF
{
148 if entry
[0] != 'v' || entry
[1] != '1' || entry
[2] != ' ' || entry
[3+hexSize
] != ' ' || entry
[3+hexSize
+1+hexSize
] != ' ' || entry
[3+hexSize
+1+hexSize
+1+20] != ' ' || entry
[entrySize
-1] != '\n' {
151 eid
, entry
:= entry
[3:3+hexSize
], entry
[3+hexSize
:]
152 eout
, entry
:= entry
[1:1+hexSize
], entry
[1+hexSize
:]
153 esize
, entry
:= entry
[1:1+20], entry
[1+20:]
154 etime
, entry
:= entry
[1:1+20], entry
[1+20:]
155 var buf
[HashSize
]byte
156 if _
, err
:= hex
.Decode(buf
[:], eid
); err
!= nil || buf
!= id
{
159 if _
, err
:= hex
.Decode(buf
[:], eout
); err
!= nil {
163 for i
< len(esize
) && esize
[i
] == ' ' {
166 size
, err
:= strconv
.ParseInt(string(esize
[i
:]), 10, 64)
167 if err
!= nil || size
< 0 {
171 for i
< len(etime
) && etime
[i
] == ' ' {
174 tm
, err
:= strconv
.ParseInt(string(etime
[i
:]), 10, 64)
175 if err
!= nil || size
< 0 {
179 fmt
.Fprintf(c
.log
, "%d get %x\n", c
.now().Unix(), id
)
181 c
.used(c
.fileName(id
, "a"))
183 return Entry
{buf
, size
, time
.Unix(0, tm
)}, nil
186 // GetBytes looks up the action ID in the cache and returns
187 // the corresponding output bytes.
188 // GetBytes should only be used for data that can be expected to fit in memory.
189 func (c
*Cache
) GetBytes(id ActionID
) ([]byte, Entry
, error
) {
190 entry
, err
:= c
.Get(id
)
192 return nil, entry
, err
194 data
, _
:= ioutil
.ReadFile(c
.OutputFile(entry
.OutputID
))
195 if sha256
.Sum256(data
) != entry
.OutputID
{
196 return nil, entry
, errMissing
198 return data
, entry
, nil
201 // OutputFile returns the name of the cache file storing output with the given OutputID.
202 func (c
*Cache
) OutputFile(out OutputID
) string {
203 file
:= c
.fileName(out
, "d")
208 // Time constants for cache expiration.
210 // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour),
211 // to avoid causing many unnecessary inode updates. The mtimes therefore
212 // roughly reflect "time of last use" but may in fact be older by at most an hour.
214 // We scan the cache for entries to delete at most once per trimInterval (1 day).
216 // When we do scan the cache, we delete entries that have not been used for
217 // at least trimLimit (5 days). Statistics gathered from a month of usage by
218 // Go developers found that essentially all reuse of cached entries happened
219 // within 5 days of the previous reuse. See golang.org/issue/22990.
221 mtimeInterval
= 1 * time
.Hour
222 trimInterval
= 24 * time
.Hour
223 trimLimit
= 5 * 24 * time
.Hour
226 // used makes a best-effort attempt to update mtime on file,
227 // so that mtime reflects cache access time.
229 // Because the reflection only needs to be approximate,
230 // and to reduce the amount of disk activity caused by using
231 // cache entries, used only updates the mtime if the current
232 // mtime is more than an hour old. This heuristic eliminates
233 // nearly all of the mtime updates that would otherwise happen,
234 // while still keeping the mtimes useful for cache trimming.
235 func (c
*Cache
) used(file
string) {
236 info
, err
:= os
.Stat(file
)
237 if err
== nil && c
.now().Sub(info
.ModTime()) < mtimeInterval
{
240 os
.Chtimes(file
, c
.now(), c
.now())
243 // Trim removes old cache entries that are likely not to be reused.
244 func (c
*Cache
) Trim() {
247 // We maintain in dir/trim.txt the time of the last completed cache trim.
248 // If the cache has been trimmed recently enough, do nothing.
249 // This is the common case.
250 data
, _
:= ioutil
.ReadFile(filepath
.Join(c
.dir
, "trim.txt"))
251 t
, err
:= strconv
.ParseInt(strings
.TrimSpace(string(data
)), 10, 64)
252 if err
== nil && now
.Sub(time
.Unix(t
, 0)) < trimInterval
{
256 // Trim each of the 256 subdirectories.
257 // We subtract an additional mtimeInterval
258 // to account for the imprecision of our "last used" mtimes.
259 cutoff
:= now
.Add(-trimLimit
- mtimeInterval
)
260 for i
:= 0; i
< 256; i
++ {
261 subdir
:= filepath
.Join(c
.dir
, fmt
.Sprintf("%02x", i
))
262 c
.trimSubdir(subdir
, cutoff
)
265 ioutil
.WriteFile(filepath
.Join(c
.dir
, "trim.txt"), []byte(fmt
.Sprintf("%d", now
.Unix())), 0666)
268 // trimSubdir trims a single cache subdirectory.
269 func (c
*Cache
) trimSubdir(subdir
string, cutoff time
.Time
) {
270 // Read all directory entries from subdir before removing
271 // any files, in case removing files invalidates the file offset
272 // in the directory scan. Also, ignore error from f.Readdirnames,
273 // because we don't care about reporting the error and we still
274 // want to process any entries found before the error.
275 f
, err
:= os
.Open(subdir
)
279 names
, _
:= f
.Readdirnames(-1)
282 for _
, name
:= range names
{
283 // Remove only cache entries (xxxx-a and xxxx-d).
284 if !strings
.HasSuffix(name
, "-a") && !strings
.HasSuffix(name
, "-d") {
287 entry
:= filepath
.Join(subdir
, name
)
288 info
, err
:= os
.Stat(entry
)
289 if err
== nil && info
.ModTime().Before(cutoff
) {
295 // putIndexEntry adds an entry to the cache recording that executing the action
296 // with the given id produces an output with the given output id (hash) and size.
297 func (c
*Cache
) putIndexEntry(id ActionID
, out OutputID
, size
int64, allowVerify
bool) error
{
298 // Note: We expect that for one reason or another it may happen
299 // that repeating an action produces a different output hash
300 // (for example, if the output contains a time stamp or temp dir name).
301 // While not ideal, this is also not a correctness problem, so we
302 // don't make a big deal about it. In particular, we leave the action
303 // cache entries writable specifically so that they can be overwritten.
305 // Setting GODEBUG=gocacheverify=1 does make a big deal:
306 // in verify mode we are double-checking that the cache entries
307 // are entirely reproducible. As just noted, this may be unrealistic
308 // in some cases but the check is also useful for shaking out real bugs.
309 entry
:= []byte(fmt
.Sprintf("v1 %x %x %20d %20d\n", id
, out
, size
, time
.Now().UnixNano()))
310 if verify
&& allowVerify
{
311 old
, err
:= c
.get(id
)
312 if err
== nil && (old
.OutputID
!= out || old
.Size
!= size
) {
313 // panic to show stack trace, so we can see what code is generating this cache entry.
314 msg
:= fmt
.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id
, reverseHash(id
), out
, size
, old
.OutputID
, old
.Size
)
318 file
:= c
.fileName(id
, "a")
319 if err
:= ioutil
.WriteFile(file
, entry
, 0666); err
!= nil {
323 os
.Chtimes(file
, c
.now(), c
.now()) // mainly for tests
325 fmt
.Fprintf(c
.log
, "%d put %x %x %d\n", c
.now().Unix(), id
, out
, size
)
329 // Put stores the given output in the cache as the output for the action ID.
330 // It may read file twice. The content of file must not change between the two passes.
331 func (c
*Cache
) Put(id ActionID
, file io
.ReadSeeker
) (OutputID
, int64, error
) {
332 return c
.put(id
, file
, true)
335 // PutNoVerify is like Put but disables the verify check
336 // when GODEBUG=goverifycache=1 is set.
337 // It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
338 // like test output containing times and the like.
339 func (c
*Cache
) PutNoVerify(id ActionID
, file io
.ReadSeeker
) (OutputID
, int64, error
) {
340 return c
.put(id
, file
, false)
343 func (c
*Cache
) put(id ActionID
, file io
.ReadSeeker
, allowVerify
bool) (OutputID
, int64, error
) {
344 // Compute output ID.
346 if _
, err
:= file
.Seek(0, 0); err
!= nil {
347 return OutputID
{}, 0, err
349 size
, err
:= io
.Copy(h
, file
)
351 return OutputID
{}, 0, err
356 // Copy to cached output file (if not already present).
357 if err
:= c
.copyFile(file
, out
, size
); err
!= nil {
358 return out
, size
, err
361 // Add to cache index.
362 return out
, size
, c
.putIndexEntry(id
, out
, size
, allowVerify
)
365 // PutBytes stores the given bytes in the cache as the output for the action ID.
366 func (c
*Cache
) PutBytes(id ActionID
, data
[]byte) error
{
367 _
, _
, err
:= c
.Put(id
, bytes
.NewReader(data
))
371 // copyFile copies file into the cache, expecting it to have the given
372 // output ID and size, if that file is not present already.
373 func (c
*Cache
) copyFile(file io
.ReadSeeker
, out OutputID
, size
int64) error
{
374 name
:= c
.fileName(out
, "d")
375 info
, err
:= os
.Stat(name
)
376 if err
== nil && info
.Size() == size
{
378 if f
, err
:= os
.Open(name
); err
== nil {
388 // Hash did not match. Fall through and rewrite file.
391 // Copy file to cache directory.
392 mode
:= os
.O_RDWR | os
.O_CREATE
393 if err
== nil && info
.Size() > size
{ // shouldn't happen but fix in case
396 f
, err
:= os
.OpenFile(name
, mode
, 0666)
402 // File now exists with correct size.
403 // Only one possible zero-length file, so contents are OK too.
404 // Early return here makes sure there's a "last byte" for code below.
408 // From here on, if any of the I/O writing the file fails,
409 // we make a best-effort attempt to truncate the file f
410 // before returning, to avoid leaving bad bytes in the file.
412 // Copy file to f, but also into h to double-check hash.
413 if _
, err
:= file
.Seek(0, 0); err
!= nil {
418 w
:= io
.MultiWriter(f
, h
)
419 if _
, err
:= io
.CopyN(w
, file
, size
-1); err
!= nil {
423 // Check last byte before writing it; writing it will make the size match
424 // what other processes expect to find and might cause them to start
426 buf
:= make([]byte, 1)
427 if _
, err
:= file
.Read(buf
); err
!= nil {
433 if !bytes
.Equal(sum
, out
[:]) {
435 return fmt
.Errorf("file content changed underfoot")
438 // Commit cache file entry.
439 if _
, err
:= f
.Write(buf
); err
!= nil {
443 if err
:= f
.Close(); err
!= nil {
444 // Data might not have been written,
445 // but file may look like it is the right size.
446 // To be extra careful, remove cached file.
450 os
.Chtimes(name
, c
.now(), c
.now()) // mainly for tests