1 // Copyright 2017 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
15 "cmd/go/internal/base"
16 "cmd/go/internal/cache"
18 "cmd/go/internal/load"
20 "cmd/internal/buildid"
25 // Go packages and binaries are stamped with build IDs that record both
26 // the action ID, which is a hash of the inputs to the action that produced
27 // the packages or binary, and the content ID, which is a hash of the action
28 // output, namely the archive or binary itself. The hash is the same one
29 // used by the build artifact cache (see cmd/go/internal/cache), but
30 // truncated when stored in packages and binaries, as the full length is not
31 // needed and is a bit unwieldy. The precise form is
33 // actionID/[.../]contentID
35 // where the actionID and contentID are prepared by hashToString below.
36 // and are found by looking for the first or last slash.
37 // Usually the buildID is simply actionID/contentID, but see below for an
40 // The build ID serves two primary purposes.
42 // 1. The action ID half allows installed packages and binaries to serve as
43 // one-element cache entries. If we intend to build math.a with a given
44 // set of inputs summarized in the action ID, and the installed math.a already
45 // has that action ID, we can reuse the installed math.a instead of rebuilding it.
47 // 2. The content ID half allows the easy preparation of action IDs for steps
48 // that consume a particular package or binary. The content hash of every
49 // input file for a given action must be included in the action ID hash.
50 // Storing the content ID in the build ID lets us read it from the file with
51 // minimal I/O, instead of reading and hashing the entire file.
52 // This is especially effective since packages and binaries are typically
53 // the largest inputs to an action.
55 // Separating action ID from content ID is important for reproducible builds.
56 // The compiler is compiled with itself. If an output were represented by its
57 // own action ID (instead of content ID) when computing the action ID of
58 // the next step in the build process, then the compiler could never have its
59 // own input action ID as its output action ID (short of a miraculous hash collision).
60 // Instead we use the content IDs to compute the next action ID, and because
61 // the content IDs converge, so too do the action IDs and therefore the
62 // build IDs and the overall compiler binary. See cmd/dist's cmdbootstrap
63 // for the actual convergence sequence.
65 // The “one-element cache” purpose is a bit more complex for installed
66 // binaries. For a binary, like cmd/gofmt, there are two steps: compile
67 // cmd/gofmt/*.go into main.a, and then link main.a into the gofmt binary.
68 // We do not install gofmt's main.a, only the gofmt binary. Being able to
69 // decide that the gofmt binary is up-to-date means computing the action ID
70 // for the final link of the gofmt binary and comparing it against the
71 // already-installed gofmt binary. But computing the action ID for the link
72 // means knowing the content ID of main.a, which we did not keep.
73 // To sidestep this problem, each binary actually stores an expanded build ID:
75 // actionID(binary)/actionID(main.a)/contentID(main.a)/contentID(binary)
77 // (Note that this can be viewed equivalently as:
79 // actionID(binary)/buildID(main.a)/contentID(binary)
81 // Storing the buildID(main.a) in the middle lets the computations that care
82 // about the prefix or suffix halves ignore the middle and preserves the
83 // original build ID as a contiguous string.)
85 // During the build, when it's time to build main.a, the gofmt binary has the
86 // information needed to decide whether the eventual link would produce
87 // the same binary: if the action ID for main.a's inputs matches and then
88 // the action ID for the link step matches when assuming the given main.a
89 // content ID, then the binary as a whole is up-to-date and need not be rebuilt.
91 // This is all a bit complex and may be simplified once we can rely on the
92 // main cache, but at least at the start we will be using the content-based
93 // staleness determination without a cache beyond the usual installed
94 // package and binary locations.
96 const buildIDSeparator
= "/"
98 // actionID returns the action ID half of a build ID.
99 func actionID(buildID
string) string {
100 i
:= strings
.Index(buildID
, buildIDSeparator
)
107 // contentID returns the content ID half of a build ID.
108 func contentID(buildID
string) string {
109 return buildID
[strings
.LastIndex(buildID
, buildIDSeparator
)+1:]
112 // hashToString converts the hash h to a string to be recorded
113 // in package archives and binaries as part of the build ID.
114 // We use the first 96 bits of the hash and encode it in base64,
115 // resulting in a 16-byte string. Because this is only used for
116 // detecting the need to rebuild installed files (not for lookups
117 // in the object file cache), 96 bits are sufficient to drive the
118 // probability of a false "do not need to rebuild" decision to effectively zero.
119 // We embed two different hashes in archives and four in binaries,
120 // so cutting to 16 bytes is a significant savings when build IDs are displayed.
121 // (16*4+3 = 67 bytes compared to 64*4+3 = 259 bytes for the
122 // more straightforward option of printing the entire h in hex).
123 func hashToString(h
[cache
.HashSize
]byte) string {
124 const b64
= "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
126 var dst
[chunks
* 4]byte
127 for i
:= 0; i
< chunks
; i
++ {
128 v
:= uint32(h
[3*i
])<<16 |
uint32(h
[3*i
+1])<<8 |
uint32(h
[3*i
+2])
129 dst
[4*i
+0] = b64
[(v
>>18)&0x3F]
130 dst
[4*i
+1] = b64
[(v
>>12)&0x3F]
131 dst
[4*i
+2] = b64
[(v
>>6)&0x3F]
132 dst
[4*i
+3] = b64
[v
&0x3F]
134 return string(dst
[:])
137 // toolID returns the unique ID to use for the current copy of the
138 // named tool (asm, compile, cover, link).
140 // It is important that if the tool changes (for example a compiler bug is fixed
141 // and the compiler reinstalled), toolID returns a different string, so that old
142 // package archives look stale and are rebuilt (with the fixed compiler).
143 // This suggests using a content hash of the tool binary, as stored in the build ID.
145 // Unfortunately, we can't just open the tool binary, because the tool might be
146 // invoked via a wrapper program specified by -toolexec and we don't know
147 // what the wrapper program does. In particular, we want "-toolexec toolstash"
148 // to continue working: it does no good if "-toolexec toolstash" is executing a
149 // stashed copy of the compiler but the go command is acting as if it will run
150 // the standard copy of the compiler. The solution is to ask the tool binary to tell
151 // us its own build ID using the "-V=full" flag now supported by all tools.
152 // Then we know we're getting the build ID of the compiler that will actually run
153 // during the build. (How does the compiler binary know its own content hash?
154 // We store it there using updateBuildID after the standard link step.)
156 // A final twist is that we'd prefer to have reproducible builds for release toolchains.
157 // It should be possible to cross-compile for Windows from either Linux or Mac
158 // or Windows itself and produce the same binaries, bit for bit. If the tool ID,
159 // which influences the action ID half of the build ID, is based on the content ID,
160 // then the Linux compiler binary and Mac compiler binary will have different tool IDs
161 // and therefore produce executables with different action IDs.
162 // To avoids this problem, for releases we use the release version string instead
163 // of the compiler binary's content hash. This assumes that all compilers built
164 // on all different systems are semantically equivalent, which is of course only true
165 // modulo bugs. (Producing the exact same executables also requires that the different
166 // build setups agree on details like $GOROOT and file name paths, but at least the
167 // tool IDs do not make it impossible.)
168 func (b
*Builder
) toolID(name
string) string {
170 id
:= b
.toolIDCache
[name
]
177 cmdline
:= str
.StringList(cfg
.BuildToolexec
, base
.Tool(name
), "-V=full")
178 cmd
:= exec
.Command(cmdline
[0], cmdline
[1:]...)
179 cmd
.Env
= base
.EnvForDir(cmd
.Dir
, os
.Environ())
180 var stdout
, stderr bytes
.Buffer
183 if err
:= cmd
.Run(); err
!= nil {
184 base
.Fatalf("go tool %s: %v\n%s%s", name
, err
, stdout
.Bytes(), stderr
.Bytes())
187 line
:= stdout
.String()
188 f
:= strings
.Fields(line
)
189 if len(f
) < 3 || f
[0] != name || f
[1] != "version" || f
[2] == "devel" && !strings
.HasPrefix(f
[len(f
)-1], "buildID=") {
190 base
.Fatalf("go tool %s -V=full: unexpected output:\n\t%s", name
, line
)
193 // On the development branch, use the content ID part of the build ID.
194 id
= contentID(f
[len(f
)-1])
196 // For a release, the output is like: "compile version go1.9.1". Use the whole line.
201 b
.toolIDCache
[name
] = id
207 // gccToolID returns the unique ID to use for a tool that is invoked
208 // by the GCC driver. This is in particular gccgo, but this can also
209 // be used for gcc, g++, gfortran, etc.; those tools all use the GCC
210 // driver under different names. The approach used here should also
211 // work for sufficiently new versions of clang. Unlike toolID, the
212 // name argument is the program to run. The language argument is the
213 // type of input file as passed to the GCC driver's -x option.
215 // For these tools we have no -V=full option to dump the build ID,
216 // but we can run the tool with -v -### to reliably get the compiler proper
217 // and hash that. That will work in the presence of -toolexec.
219 // In order to get reproducible builds for released compilers, we
220 // detect a released compiler by the absence of "experimental" in the
221 // --version output, and in that case we just use the version string.
222 func (b
*Builder
) gccgoToolID(name
, language
string) (string, error
) {
223 key
:= name
+ "." + language
225 id
:= b
.toolIDCache
[key
]
232 // Invoke the driver with -### to see the subcommands and the
233 // version strings. Use -x to set the language. Pretend to
234 // compile an empty file on standard input.
235 cmdline
:= str
.StringList(cfg
.BuildToolexec
, name
, "-###", "-x", language
, "-c", "-")
236 cmd
:= exec
.Command(cmdline
[0], cmdline
[1:]...)
237 cmd
.Env
= base
.EnvForDir(cmd
.Dir
, os
.Environ())
238 // Force untranslated output so that we see the string "version".
239 cmd
.Env
= append(cmd
.Env
, "LC_ALL=C")
240 out
, err
:= cmd
.CombinedOutput()
242 return "", fmt
.Errorf("%s: %v; output: %q", name
, err
, out
)
246 lines
:= strings
.Split(string(out
), "\n")
247 for _
, line
:= range lines
{
248 if fields
:= strings
.Fields(line
); len(fields
) > 1 && fields
[1] == "version" {
254 return "", fmt
.Errorf("%s: can not find version number in %q", name
, out
)
257 if !strings
.Contains(version
, "experimental") {
258 // This is a release. Use this line as the tool ID.
261 // This is a development version. The first line with
262 // a leading space is the compiler proper.
264 for _
, line
:= range lines
{
265 if len(line
) > 1 && line
[0] == ' ' {
271 return "", fmt
.Errorf("%s: can not find compilation command in %q", name
, out
)
274 fields
:= strings
.Fields(compiler
)
275 if len(fields
) == 0 {
276 return "", fmt
.Errorf("%s: compilation command confusion %q", name
, out
)
279 if !strings
.ContainsAny(exe
, `/\`) {
280 if lp
, err
:= exec
.LookPath(exe
); err
== nil {
284 if _
, err
:= os
.Stat(exe
); err
!= nil {
285 return "", fmt
.Errorf("%s: can not find compiler %q: %v; output %q", name
, exe
, err
, out
)
291 b
.toolIDCache
[name
] = id
297 // Check if assembler used by gccgo is GNU as.
298 func assemblerIsGas() bool {
299 cmd
:= exec
.Command(BuildToolchain
.compiler(), "-print-prog-name=as")
300 assembler
, err
:= cmd
.Output()
302 cmd
:= exec
.Command(strings
.TrimSpace(string(assembler
)), "--version")
303 out
, err
:= cmd
.Output()
304 if err
== nil && strings
.Contains(string(out
), "GNU") {
314 // gccgoBuildIDELFFile creates an assembler file that records the
315 // action's build ID in an SHF_EXCLUDE section.
316 func (b
*Builder
) gccgoBuildIDELFFile(a
*Action
) (string, error
) {
317 sfile
:= a
.Objdir
+ "_buildid.s"
320 if cfg
.Goos
!= "solaris" ||
assemblerIsGas() {
321 fmt
.Fprintf(&buf
, "\t"+`.section .go.buildid,"e"`+"\n")
322 } else if cfg
.Goarch
== "sparc" || cfg
.Goarch
== "sparc64" {
323 fmt
.Fprintf(&buf
, "\t"+`.section ".go.buildid",#exclude`+"\n")
324 } else { // cfg.Goarch == "386" || cfg.Goarch == "amd64"
325 fmt
.Fprintf(&buf
, "\t"+`.section .go.buildid,#exclude`+"\n")
327 fmt
.Fprintf(&buf
, "\t.byte ")
328 for i
:= 0; i
< len(a
.buildID
); i
++ {
331 fmt
.Fprintf(&buf
, "\n\t.byte ")
333 fmt
.Fprintf(&buf
, ",")
336 fmt
.Fprintf(&buf
, "%#02x", a
.buildID
[i
])
338 fmt
.Fprintf(&buf
, "\n")
339 if cfg
.Goos
!= "solaris" {
340 fmt
.Fprintf(&buf
, "\t"+`.section .note.GNU-stack,"",@progbits`+"\n")
341 fmt
.Fprintf(&buf
, "\t"+`.section .note.GNU-split-stack,"",@progbits`+"\n")
344 if cfg
.BuildN || cfg
.BuildX
{
345 for _
, line
:= range bytes
.Split(buf
.Bytes(), []byte("\n")) {
346 b
.Showcmd("", "echo '%s' >> %s", line
, sfile
)
353 if err
:= ioutil
.WriteFile(sfile
, buf
.Bytes(), 0666); err
!= nil {
360 // gccgoBuildIDXCOFFFile creates an assembler file that records the
361 // action's build ID in a CSECT (AIX linker deletes CSECTs that are
362 // not referenced in the output file).
363 func (b
*Builder
) gccgoBuildIDXCOFFFile(a
*Action
) (string, error
) {
364 sfile
:= a
.Objdir
+ "_buildid.s"
367 fmt
.Fprintf(&buf
, "\t.csect .go.buildid[XO]\n")
368 fmt
.Fprintf(&buf
, "\t.byte ")
369 for i
:= 0; i
< len(a
.buildID
); i
++ {
372 fmt
.Fprintf(&buf
, "\n\t.byte ")
374 fmt
.Fprintf(&buf
, ",")
377 fmt
.Fprintf(&buf
, "%#02x", a
.buildID
[i
])
379 fmt
.Fprintf(&buf
, "\n")
381 if cfg
.BuildN || cfg
.BuildX
{
382 for _
, line
:= range bytes
.Split(buf
.Bytes(), []byte("\n")) {
383 b
.Showcmd("", "echo '%s' >> %s", line
, sfile
)
390 if err
:= ioutil
.WriteFile(sfile
, buf
.Bytes(), 0666); err
!= nil {
397 // buildID returns the build ID found in the given file.
398 // If no build ID is found, buildID returns the content hash of the file.
399 func (b
*Builder
) buildID(file
string) string {
401 id
:= b
.buildIDCache
[file
]
408 id
, err
:= buildid
.ReadFile(file
)
410 id
= b
.fileHash(file
)
414 b
.buildIDCache
[file
] = id
420 // fileHash returns the content hash of the named file.
421 func (b
*Builder
) fileHash(file
string) string {
422 sum
, err
:= cache
.FileHash(file
)
426 return hashToString(sum
)
429 // useCache tries to satisfy the action a, which has action ID actionHash,
430 // by using a cached result from an earlier build. At the moment, the only
431 // cached result is the installed package or binary at target.
432 // If useCache decides that the cache can be used, it sets a.buildID
433 // and a.built for use by parent actions and then returns true.
434 // Otherwise it sets a.buildID to a temporary build ID for use in the build
435 // and returns false. When useCache returns false the expectation is that
436 // the caller will build the target and then call updateBuildID to finish the
437 // build ID computation.
438 // When useCache returns false, it may have initiated buffering of output
439 // during a's work. The caller should defer b.flushOutput(a), to make sure
440 // that flushOutput is eventually called regardless of whether the action
441 // succeeds. The flushOutput call must happen after updateBuildID.
442 func (b
*Builder
) useCache(a
*Action
, p
*load
.Package
, actionHash cache
.ActionID
, target
string) bool {
443 // The second half of the build ID here is a placeholder for the content hash.
444 // It's important that the overall buildID be unlikely verging on impossible
445 // to appear in the output by chance, but that should be taken care of by
446 // the actionID half; if it also appeared in the input that would be like an
447 // engineered 96-bit partial SHA256 collision.
448 a
.actionID
= actionHash
449 actionID
:= hashToString(actionHash
)
450 contentID
:= actionID
// temporary placeholder, likely unique
451 a
.buildID
= actionID
+ buildIDSeparator
+ contentID
453 // Executable binaries also record the main build ID in the middle.
454 // See "Build IDs" comment above.
455 if a
.Mode
== "link" {
457 a
.buildID
= actionID
+ buildIDSeparator
+ mainpkg
.buildID
+ buildIDSeparator
+ contentID
460 // Check to see if target exists and matches the expected action ID.
461 // If so, it's up to date and we can reuse it instead of rebuilding it.
463 if target
!= "" && !cfg
.BuildA
{
464 buildID
, _
= buildid
.ReadFile(target
)
465 if strings
.HasPrefix(buildID
, actionID
+buildIDSeparator
) {
468 // Poison a.Target to catch uses later in the build.
469 a
.Target
= "DO NOT USE - " + a
.Mode
474 // Special case for building a main package: if the only thing we
475 // want the package for is to link a binary, and the binary is
476 // already up-to-date, then to avoid a rebuild, report the package
477 // as up-to-date as well. See "Build IDs" comment above.
478 // TODO(rsc): Rewrite this code to use a TryCache func on the link action.
479 if target
!= "" && !cfg
.BuildA
&& a
.Mode
== "build" && len(a
.triggers
) == 1 && a
.triggers
[0].Mode
== "link" {
480 buildID
, err
:= buildid
.ReadFile(target
)
482 id
:= strings
.Split(buildID
, buildIDSeparator
)
483 if len(id
) == 4 && id
[1] == actionID
{
484 // Temporarily assume a.buildID is the package build ID
485 // stored in the installed binary, and see if that makes
486 // the upcoming link action ID a match. If so, report that
487 // we built the package, safe in the knowledge that the
488 // link step will not ask us for the actual package file.
489 // Note that (*Builder).LinkAction arranged that all of
490 // a.triggers[0]'s dependencies other than a are also
491 // dependencies of a, so that we can be sure that,
492 // other than a.buildID, b.linkActionID is only accessing
493 // build IDs of completed actions.
494 oldBuildID
:= a
.buildID
495 a
.buildID
= id
[1] + buildIDSeparator
+ id
[2]
496 linkID
:= hashToString(b
.linkActionID(a
.triggers
[0]))
498 // Poison a.Target to catch uses later in the build.
499 a
.Target
= "DO NOT USE - main build pseudo-cache Target"
500 a
.built
= "DO NOT USE - main build pseudo-cache built"
503 // Otherwise restore old build ID for main build.
504 a
.buildID
= oldBuildID
509 // Special case for linking a test binary: if the only thing we
510 // want the binary for is to run the test, and the test result is cached,
511 // then to avoid the link step, report the link as up-to-date.
512 // We avoid the nested build ID problem in the previous special case
513 // by recording the test results in the cache under the action ID half.
514 if !cfg
.BuildA
&& len(a
.triggers
) == 1 && a
.triggers
[0].TryCache
!= nil && a
.triggers
[0].TryCache(b
, a
.triggers
[0]) {
515 a
.Target
= "DO NOT USE - pseudo-cache Target"
516 a
.built
= "DO NOT USE - pseudo-cache built"
520 if b
.ComputeStaleOnly
{
521 // Invoked during go list only to compute and record staleness.
522 if p
:= a
.Package
; p
!= nil && !p
.Stale
{
525 p
.StaleReason
= "build -a flag in use"
527 p
.StaleReason
= "build ID mismatch"
528 for _
, p1
:= range p
.Internal
.Imports
{
529 if p1
.Stale
&& p1
.StaleReason
!= "" {
530 if strings
.HasPrefix(p1
.StaleReason
, "stale dependency: ") {
531 p
.StaleReason
= p1
.StaleReason
534 if strings
.HasPrefix(p
.StaleReason
, "build ID mismatch") {
535 p
.StaleReason
= "stale dependency: " + p1
.ImportPath
542 // Fall through to update a.buildID from the build artifact cache,
543 // which will affect the computation of buildIDs for targets
544 // higher up in the dependency graph.
547 // Check the build artifact cache.
548 // We treat hits in this cache as being "stale" for the purposes of go list
549 // (in effect, "stale" means whether p.Target is up-to-date),
550 // but we're still happy to use results from the build artifact cache.
551 if c
:= cache
.Default(); c
!= nil {
553 entry
, err
:= c
.Get(actionHash
)
555 file
:= c
.OutputFile(entry
.OutputID
)
556 info
, err1
:= os
.Stat(file
)
557 buildID
, err2
:= buildid
.ReadFile(file
)
558 if err1
== nil && err2
== nil && info
.Size() == entry
.Size
{
559 stdout
, stdoutEntry
, err
:= c
.GetBytes(cache
.Subkey(a
.actionID
, "stdout"))
562 if cfg
.BuildX || cfg
.BuildN
{
563 b
.Showcmd("", "%s # internal", joinUnambiguously(str
.StringList("cat", c
.OutputFile(stdoutEntry
.OutputID
))))
566 b
.Print(string(stdout
))
570 a
.Target
= "DO NOT USE - using cache"
572 if p
:= a
.Package
; p
!= nil {
573 // Clearer than explaining that something else is stale.
574 p
.StaleReason
= "not installed but available in build cache"
582 // Begin saving output for later writing to cache.
586 if b
.ComputeStaleOnly
{
593 // flushOutput flushes the output being queued in a.
594 func (b
*Builder
) flushOutput(a
*Action
) {
595 b
.Print(string(a
.output
))
599 // updateBuildID updates the build ID in the target written by action a.
600 // It requires that useCache was called for action a and returned false,
601 // and that the build was then carried out and given the temporary
602 // a.buildID to record as the build ID in the resulting package or binary.
603 // updateBuildID computes the final content ID and updates the build IDs
606 // Keep in sync with src/cmd/buildid/buildid.go
607 func (b
*Builder
) updateBuildID(a
*Action
, target
string, rewrite
bool) error
{
608 if cfg
.BuildX || cfg
.BuildN
{
610 b
.Showcmd("", "%s # internal", joinUnambiguously(str
.StringList(base
.Tool("buildid"), "-w", target
)))
617 // Find occurrences of old ID and compute new content-based ID.
618 r
, err
:= os
.Open(target
)
622 matches
, hash
, err
:= buildid
.FindAndHash(r
, a
.buildID
, 0)
627 newID
:= a
.buildID
[:strings
.LastIndex(a
.buildID
, buildIDSeparator
)] + buildIDSeparator
+ hashToString(hash
)
628 if len(newID
) != len(a
.buildID
) {
629 return fmt
.Errorf("internal error: build ID length mismatch %q vs %q", a
.buildID
, newID
)
632 // Replace with new content-based ID.
634 if len(matches
) == 0 {
635 // Assume the user specified -buildid= to override what we were going to choose.
640 w
, err
:= os
.OpenFile(target
, os
.O_WRONLY
, 0)
644 err
= buildid
.Rewrite(w
, matches
, newID
)
649 if err
:= w
.Close(); err
!= nil {
654 // Cache package builds, but not binaries (link steps).
655 // The expectation is that binaries are not reused
656 // nearly as often as individual packages, and they're
657 // much larger, so the cache-footprint-to-utility ratio
658 // of binaries is much lower for binaries.
659 // Not caching the link step also makes sure that repeated "go run" at least
660 // always rerun the linker, so that they don't get too fast.
661 // (We don't want people thinking go is a scripting language.)
662 // Note also that if we start caching binaries, then we will
663 // copy the binaries out of the cache to run them, and then
664 // that will mean the go process is itself writing a binary
665 // and then executing it, so we will need to defend against
666 // ETXTBSY problems as discussed in exec.go and golang.org/issue/22220.
667 if c
:= cache
.Default(); c
!= nil && a
.Mode
== "build" {
668 r
, err
:= os
.Open(target
)
671 panic("internal error: a.output not set")
673 outputID
, _
, err
:= c
.Put(a
.actionID
, r
)
674 if err
== nil && cfg
.BuildX
{
675 b
.Showcmd("", "%s # internal", joinUnambiguously(str
.StringList("cp", target
, c
.OutputFile(outputID
))))
677 c
.PutBytes(cache
.Subkey(a
.actionID
, "stdout"), a
.output
)