1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package goobj implements reading of Go object files and archives.
7 // TODO(rsc): Decide where this package should live. (golang.org/issue/6932)
8 // TODO(rsc): Decide the appropriate integer types for various fields.
9 // TODO(rsc): Write tests. (File format still up in the air a little.)
22 // A SymKind describes the kind of memory represented by a symbol.
25 // This list is taken from include/link.h.
27 // Defined SymKind values.
28 // TODO(rsc): Give idiomatic Go names.
29 // TODO(rsc): Reduce the number of symbol types in the object files.
33 // readonly, executable
37 // readonly, non-executable
45 SSYMTAB
// TODO: move to unmapped section
49 // writable, non-executable
52 SMACHO
// Mach-O __nl_symbol_ptr
75 // A Sym is a named symbol in an object file.
77 SymID
// symbol identifier (name and version)
78 Kind SymKind
// kind of symbol
79 DupOK
bool // are duplicate definitions okay?
80 Size
int // size of corresponding data
81 Type SymID
// symbol for Go type information
82 Data Data
// memory image of symbol
83 Reloc
[]Reloc
// relocations to apply to Data
84 Func
*Func
// additional data for functions
87 // A SymID - the combination of Name and Version - uniquely identifies
88 // a symbol within a package.
90 // Name is the name of a symbol.
93 // Version is zero for symbols with global visibility.
94 // Symbols with only file visibility (such as file-level static
95 // declarations in C) have a non-zero version distinguising
96 // a symbol in one file from a symbol of the same name
101 // A Data is a reference to data stored in an object file.
102 // It records the offset and size of the data, so that a client can
103 // read the data only if necessary.
109 // A Reloc describes a relocation applied to a memory image to refer
110 // to an address within a particular symbol.
112 // The bytes at [Offset, Offset+Size) within the memory image
113 // should be updated to refer to the address Add bytes after the start
114 // of the symbol Sym.
120 // The Type records the form of address expected in the bytes
121 // described by the previous fields: absolute, PC-relative, and so on.
122 // TODO(rsc): The interpretation of Type is not exposed by this package.
126 // A Var describes a variable in a function stack frame: a declared
127 // local variable, an input argument, or an output result.
129 // The combination of Name, Kind, and Offset uniquely
130 // identifies a variable in a function stack frame.
131 // Using fewer of these - in particular, using only Name - does not.
132 Name
string // Name of variable.
133 Kind
int // TODO(rsc): Define meaning.
134 Offset
int // Frame offset. TODO(rsc): Define meaning.
136 Type SymID
// Go type for variable.
139 // Func contains additional per-symbol information specific to functions.
141 Args
int // size in bytes of of argument frame: inputs and outputs
142 Frame
int // size in bytes of local variable frame
143 Var
[]Var
// detail about local variables
144 PCSP Data
// PC → SP offset map
145 PCFile Data
// PC → file number map (index into File)
146 PCLine Data
// PC → line number map
147 PCData
[]Data
// PC → runtime support data map
148 FuncData
[]FuncData
// non-PC-specific runtime support data
149 File
[]string // paths indexed by PCFile
152 // TODO: Add PCData []byte and PCDataIter (similar to liblink).
154 // A FuncData is a single function-specific data value.
155 type FuncData
struct {
156 Sym SymID
// symbol holding data
157 Offset
int64 // offset into symbol for funcdata pointer
160 // A Package is a parsed Go object file or archive defining a Go package.
161 type Package
struct {
162 ImportPath
string // import path denoting this package
163 Imports
[]string // packages imported by this package
164 Syms
[]*Sym
// symbols defined by this package
165 MaxVersion
int // maximum Version in any SymID in Syms
169 archiveHeader
= []byte("!<arch>\n")
170 archiveMagic
= []byte("`\n")
171 goobjHeader
= []byte("go objec") // truncated to size of archiveHeader
173 errCorruptArchive
= errors
.New("corrupt archive")
174 errTruncatedArchive
= errors
.New("truncated archive")
175 errNotArchive
= errors
.New("unrecognized archive format")
177 errCorruptObject
= errors
.New("corrupt object file")
178 errTruncatedObject
= errors
.New("truncated object file")
179 errNotObject
= errors
.New("unrecognized object file format")
182 // An objReader is an object file reader.
183 type objReader
struct {
195 // importPathToPrefix returns the prefix that will be used in the
196 // final symbol table for the given import path.
197 // We escape '%', '"', all control characters and non-ASCII bytes,
198 // and any '.' after the final slash.
200 // See ../../../cmd/ld/lib.c:/^pathtoprefix and
201 // ../../../cmd/gc/subr.c:/^pathtoprefix.
202 func importPathToPrefix(s
string) string {
203 // find index of last slash, if any, or else -1.
204 // used for determining whether an index is after the last slash.
205 slash
:= strings
.LastIndex(s
, "/")
207 // check for chars that need escaping
209 for r
:= 0; r
< len(s
); r
++ {
210 if c
:= s
[r
]; c
<= ' ' ||
(c
== '.' && r
> slash
) || c
== '%' || c
== '"' || c
>= 0x7F {
221 const hex
= "0123456789abcdef"
222 p
:= make([]byte, 0, len(s
)+2*n
)
223 for r
:= 0; r
< len(s
); r
++ {
224 if c
:= s
[r
]; c
<= ' ' ||
(c
== '.' && r
> slash
) || c
== '%' || c
== '"' || c
>= 0x7F {
225 p
= append(p
, '%', hex
[c
>>4], hex
[c
&0xF])
234 // init initializes r to read package p from f.
235 func (r
*objReader
) init(f io
.ReadSeeker
, p
*Package
) {
238 r
.offset
, _
= f
.Seek(0, 1)
239 r
.limit
, _
= f
.Seek(0, 2)
241 r
.b
= bufio
.NewReader(f
)
242 r
.pkgprefix
= importPathToPrefix(p
.ImportPath
) + "."
245 // error records that an error occurred.
246 // It returns only the first error, so that an error
247 // caused by an earlier error does not discard information
248 // about the earlier error.
249 func (r
*objReader
) error(err error
) error
{
252 err
= io
.ErrUnexpectedEOF
256 // panic("corrupt") // useful for debugging
260 // readByte reads and returns a byte from the input file.
261 // On I/O error or EOF, it records the error but returns byte 0.
262 // A sequence of 0 bytes will eventually terminate any
263 // parsing state in the object file. In particular, it ends the
264 // reading of a varint.
265 func (r
*objReader
) readByte() byte {
269 if r
.offset
>= r
.limit
{
270 r
.error(io
.ErrUnexpectedEOF
)
273 b
, err
:= r
.b
.ReadByte()
276 err
= io
.ErrUnexpectedEOF
286 // read reads exactly len(b) bytes from the input file.
287 // If an error occurs, read returns the error but also
288 // records it, so it is safe for callers to ignore the result
289 // as long as delaying the report is not a problem.
290 func (r
*objReader
) readFull(b
[]byte) error
{
294 if r
.offset
+int64(len(b
)) > r
.limit
{
295 return r
.error(io
.ErrUnexpectedEOF
)
297 n
, err
:= io
.ReadFull(r
.b
, b
)
305 // readInt reads a zigzag varint from the input file.
306 func (r
*objReader
) readInt() int {
309 for shift
:= uint(0); ; shift
+= 7 {
311 r
.error(errCorruptObject
)
315 u |
= uint64(c
&0x7F) << shift
321 v
:= int64(u
>>1) ^ (int64(u
) << 63 >> 63)
322 if int64(int(v
)) != v
{
323 r
.error(errCorruptObject
) // TODO
329 // readString reads a length-delimited string from the input file.
330 func (r
*objReader
) readString() string {
332 buf
:= make([]byte, n
)
337 // readSymID reads a SymID from the input file.
338 func (r
*objReader
) readSymID() SymID
{
339 name
, vers
:= r
.readString(), r
.readInt()
341 // In a symbol name in an object file, "". denotes the
342 // prefix for the package in which the object file has been found.
344 name
= strings
.Replace(name
, `"".`, r
.pkgprefix
, -1)
346 // An individual object file only records version 0 (extern) or 1 (static).
347 // To make static symbols unique across all files being read, we
348 // replace version 1 with the version corresponding to the current
349 // file number. The number is incremented on each call to parseObject.
351 vers
= r
.p
.MaxVersion
354 return SymID
{name
, vers
}
357 // readData reads a data reference from the input file.
358 func (r
*objReader
) readData() Data
{
360 d
:= Data
{Offset
: r
.offset
, Size
: int64(n
)}
365 // skip skips n bytes in the input.
366 func (r
*objReader
) skip(n
int64) {
368 r
.error(fmt
.Errorf("debug/goobj: internal error: misuse of skip"))
370 if n
< int64(len(r
.tmp
)) {
371 // Since the data is so small, a just reading from the buffered
372 // reader is better than flushing the buffer and seeking.
373 r
.readFull(r
.tmp
[:n
])
374 } else if n
<= int64(r
.b
.Buffered()) {
375 // Even though the data is not small, it has already been read.
376 // Advance the buffer instead of seeking.
377 for n
> int64(len(r
.tmp
)) {
379 n
-= int64(len(r
.tmp
))
381 r
.readFull(r
.tmp
[:n
])
383 // Seek, giving up buffered data.
384 _
, err
:= r
.f
.Seek(r
.offset
+n
, 0)
393 // Parse parses an object file or archive from r,
394 // assuming that its import path is pkgpath.
395 func Parse(r io
.ReadSeeker
, pkgpath
string) (*Package
, error
) {
400 p
.ImportPath
= pkgpath
404 err
:= rd
.readFull(rd
.tmp
[:8])
407 err
= io
.ErrUnexpectedEOF
414 return nil, errNotObject
416 case bytes
.Equal(rd
.tmp
[:8], archiveHeader
):
417 if err
:= rd
.parseArchive(); err
!= nil {
420 case bytes
.Equal(rd
.tmp
[:8], goobjHeader
):
421 if err
:= rd
.parseObject(goobjHeader
); err
!= nil {
429 // trimSpace removes trailing spaces from b and returns the corresponding string.
430 // This effectively parses the form used in archive headers.
431 func trimSpace(b
[]byte) string {
432 return string(bytes
.TrimRight(b
, " "))
435 // parseArchive parses a Unix archive of Go object files.
436 // TODO(rsc): Need to skip non-Go object files.
437 // TODO(rsc): Maybe record table of contents in r.p so that
438 // linker can avoid having code to parse archives too.
439 func (r
*objReader
) parseArchive() error
{
440 for r
.offset
< r
.limit
{
441 if err
:= r
.readFull(r
.tmp
[:60]); err
!= nil {
446 // Each file is preceded by this text header (slice indices in first column):
454 // We only care about name, size, and magic.
455 // The fields are space-padded on the right.
456 // The size is in decimal.
457 // The file data - size bytes - follows the header.
458 // Headers are 2-byte aligned, so if size is odd, an extra padding
459 // byte sits between the file data and the next header.
460 // The file data that follows is padded to an even number of bytes:
461 // if size is odd, an extra padding byte is inserted betw the next header.
463 return errTruncatedArchive
465 if !bytes
.Equal(data
[58:60], archiveMagic
) {
466 return errCorruptArchive
468 name
:= trimSpace(data
[0:16])
469 size
, err
:= strconv
.ParseInt(trimSpace(data
[48:58]), 10, 64)
471 return errCorruptArchive
474 fsize
:= size
+ size
&1
475 if fsize
< 0 || fsize
< size
{
476 return errCorruptArchive
479 case "__.SYMDEF", "__.GOSYMDEF", "__.PKGDEF":
483 r
.limit
= r
.offset
+ size
484 if err
:= r
.parseObject(nil); err
!= nil {
485 return fmt
.Errorf("parsing archive member %q: %v", name
, err
)
487 r
.skip(r
.limit
- r
.offset
)
497 // parseObject parses a single Go object file.
498 // The prefix is the bytes already read from the file,
499 // typically in order to detect that this is an object file.
500 // The object file consists of a textual header ending in "\n!\n"
501 // and then the part we want to parse begins.
502 // The format of that part is defined in a comment at the top
503 // of src/liblink/objfile.c.
504 func (r
*objReader
) parseObject(prefix
[]byte) error
{
505 // TODO(rsc): Maybe use prefix and the initial input to
506 // record the header line from the file, which would
507 // give the architecture and other version information.
512 c1
, c2
, c3
= c2
, c3
, r
.readByte()
513 if c3
== 0 { // NUL or EOF, either is bad
514 return errCorruptObject
516 if c1
== '\n' && c2
== '!' && c3
== '\n' {
521 r
.readFull(r
.tmp
[:8])
522 if !bytes
.Equal(r
.tmp
[:8], []byte("\x00\x00go13ld")) {
523 return r
.error(errCorruptObject
)
526 // Direct package dependencies.
532 r
.p
.Imports
= append(r
.p
.Imports
, s
)
537 if b
:= r
.readByte(); b
!= 0xfe {
539 return r
.error(errCorruptObject
)
545 s
:= &Sym
{SymID
: r
.readSymID()}
546 r
.p
.Syms
= append(r
.p
.Syms
, s
)
547 s
.Kind
= SymKind(typ
)
548 s
.DupOK
= r
.readInt() != 0
550 s
.Type
= r
.readSymID()
551 s
.Data
= r
.readData()
552 s
.Reloc
= make([]Reloc
, r
.readInt())
553 for i
:= range s
.Reloc
{
555 rel
.Offset
= r
.readInt()
556 rel
.Size
= r
.readInt()
557 rel
.Type
= r
.readInt()
558 rel
.Add
= r
.readInt()
559 r
.readInt() // Xadd - ignored
560 rel
.Sym
= r
.readSymID()
561 r
.readSymID() // Xsym - ignored
568 f
.Frame
= r
.readInt()
569 f
.Var
= make([]Var
, r
.readInt())
570 for i
:= range f
.Var
{
572 v
.Name
= r
.readSymID().Name
573 v
.Offset
= r
.readInt()
575 v
.Type
= r
.readSymID()
578 f
.PCSP
= r
.readData()
579 f
.PCFile
= r
.readData()
580 f
.PCLine
= r
.readData()
581 f
.PCData
= make([]Data
, r
.readInt())
582 for i
:= range f
.PCData
{
583 f
.PCData
[i
] = r
.readData()
585 f
.FuncData
= make([]FuncData
, r
.readInt())
586 for i
:= range f
.FuncData
{
587 f
.FuncData
[i
].Sym
= r
.readSymID()
589 for i
:= range f
.FuncData
{
590 f
.FuncData
[i
].Offset
= int64(r
.readInt()) // TODO
592 f
.File
= make([]string, r
.readInt())
593 for i
:= range f
.File
{
594 f
.File
[i
] = r
.readSymID().Name
599 r
.readFull(r
.tmp
[:7])
600 if !bytes
.Equal(r
.tmp
[:7], []byte("\xffgo13ld")) {
601 return r
.error(errCorruptObject
)