libgo/go/debug/goobj/read.go

   1 // Copyright 2013 The Go Authors.  All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // Package goobj implements reading of Go object files and archives.
   6 //
   7 // TODO(rsc): Decide where this package should live. (golang.org/issue/6932)
   8 // TODO(rsc): Decide the appropriate integer types for various fields.
   9 // TODO(rsc): Write tests. (File format still up in the air a little.)
  10 package goobj
  11
  12 import (
  13         "bufio"
  14         "bytes"
  15         "errors"
  16         "fmt"
  17         "io"
  18         "strconv"
  19         "strings"
  20 )
  21
  22 // A SymKind describes the kind of memory represented by a symbol.
  23 type SymKind int
  24
  25 // This list is taken from include/link.h.
  26
  27 // Defined SymKind values.
  28 // TODO(rsc): Give idiomatic Go names.
  29 // TODO(rsc): Reduce the number of symbol types in the object files.
  30 const (
  31         _ SymKind = iota
  32
  33         // readonly, executable
  34         STEXT
  35         SELFRXSECT
  36
  37         // readonly, non-executable
  38         STYPE
  39         SSTRING
  40         SGOSTRING
  41         SGOFUNC
  42         SRODATA
  43         SFUNCTAB
  44         STYPELINK
  45         SSYMTAB // TODO: move to unmapped section
  46         SPCLNTAB
  47         SELFROSECT
  48
  49         // writable, non-executable
  50         SMACHOPLT
  51         SELFSECT
  52         SMACHO // Mach-O __nl_symbol_ptr
  53         SMACHOGOT
  54         SNOPTRDATA
  55         SINITARR
  56         SDATA
  57         SWINDOWS
  58         SBSS
  59         SNOPTRBSS
  60         STLSBSS
  61
  62         // not mapped
  63         SXREF
  64         SMACHOSYMSTR
  65         SMACHOSYMTAB
  66         SMACHOINDIRECTPLT
  67         SMACHOINDIRECTGOT
  68         SFILE
  69         SFILEPATH
  70         SCONST
  71         SDYNIMPORT
  72         SHOSTOBJ
  73 )
  74
  75 // A Sym is a named symbol in an object file.
  76 type Sym struct {
  77         SymID         // symbol identifier (name and version)
  78         Kind  SymKind // kind of symbol
  79         DupOK bool    // are duplicate definitions okay?
  80         Size  int     // size of corresponding data
  81         Type  SymID   // symbol for Go type information
  82         Data  Data    // memory image of symbol
  83         Reloc []Reloc // relocations to apply to Data
  84         Func  *Func   // additional data for functions
  85 }
  86
  87 // A SymID - the combination of Name and Version - uniquely identifies
  88 // a symbol within a package.
  89 type SymID struct {
  90         // Name is the name of a symbol.
  91         Name string
  92
  93         // Version is zero for symbols with global visibility.
  94         // Symbols with only file visibility (such as file-level static
  95         // declarations in C) have a non-zero version distinguising
  96         // a symbol in one file from a symbol of the same name
  97         // in another file
  98         Version int
  99 }
 100
 101 // A Data is a reference to data stored in an object file.
 102 // It records the offset and size of the data, so that a client can
 103 // read the data only if necessary.
 104 type Data struct {
 105         Offset int64
 106         Size   int64
 107 }
 108
 109 // A Reloc describes a relocation applied to a memory image to refer
 110 // to an address within a particular symbol.
 111 type Reloc struct {
 112         // The bytes at [Offset, Offset+Size) within the memory image
 113         // should be updated to refer to the address Add bytes after the start
 114         // of the symbol Sym.
 115         Offset int
 116         Size   int
 117         Sym    SymID
 118         Add    int
 119
 120         // The Type records the form of address expected in the bytes
 121         // described by the previous fields: absolute, PC-relative, and so on.
 122         // TODO(rsc): The interpretation of Type is not exposed by this package.
 123         Type int
 124 }
 125
 126 // A Var describes a variable in a function stack frame: a declared
 127 // local variable, an input argument, or an output result.
 128 type Var struct {
 129         // The combination of Name, Kind, and Offset uniquely
 130         // identifies a variable in a function stack frame.
 131         // Using fewer of these - in particular, using only Name - does not.
 132         Name   string // Name of variable.
 133         Kind   int    // TODO(rsc): Define meaning.
 134         Offset int    // Frame offset. TODO(rsc): Define meaning.
 135
 136         Type SymID // Go type for variable.
 137 }
 138
 139 // Func contains additional per-symbol information specific to functions.
 140 type Func struct {
 141         Args     int        // size in bytes of of argument frame: inputs and outputs
 142         Frame    int        // size in bytes of local variable frame
 143         Var      []Var      // detail about local variables
 144         PCSP     Data       // PC → SP offset map
 145         PCFile   Data       // PC → file number map (index into File)
 146         PCLine   Data       // PC → line number map
 147         PCData   []Data     // PC → runtime support data map
 148         FuncData []FuncData // non-PC-specific runtime support data
 149         File     []string   // paths indexed by PCFile
 150 }
 151
 152 // TODO: Add PCData []byte and PCDataIter (similar to liblink).
 153
 154 // A FuncData is a single function-specific data value.
 155 type FuncData struct {
 156         Sym    SymID // symbol holding data
 157         Offset int64 // offset into symbol for funcdata pointer
 158 }
 159
 160 // A Package is a parsed Go object file or archive defining a Go package.
 161 type Package struct {
 162         ImportPath string   // import path denoting this package
 163         Imports    []string // packages imported by this package
 164         Syms       []*Sym   // symbols defined by this package
 165         MaxVersion int      // maximum Version in any SymID in Syms
 166 }
 167
 168 var (
 169         archiveHeader = []byte("!<arch>\n")
 170         archiveMagic  = []byte("`\n")
 171         goobjHeader   = []byte("go objec") // truncated to size of archiveHeader
 172
 173         errCorruptArchive   = errors.New("corrupt archive")
 174         errTruncatedArchive = errors.New("truncated archive")
 175         errNotArchive       = errors.New("unrecognized archive format")
 176
 177         errCorruptObject   = errors.New("corrupt object file")
 178         errTruncatedObject = errors.New("truncated object file")
 179         errNotObject       = errors.New("unrecognized object file format")
 180 )
 181
 182 // An objReader is an object file reader.
 183 type objReader struct {
 184         p         *Package
 185         b         *bufio.Reader
 186         f         io.ReadSeeker
 187         err       error
 188         offset    int64
 189         limit     int64
 190         tmp       [256]byte
 191         pkg       string
 192         pkgprefix string
 193 }
 194
 195 // importPathToPrefix returns the prefix that will be used in the
 196 // final symbol table for the given import path.
 197 // We escape '%', '"', all control characters and non-ASCII bytes,
 198 // and any '.' after the final slash.
 199 //
 200 // See ../../../cmd/ld/lib.c:/^pathtoprefix and
 201 // ../../../cmd/gc/subr.c:/^pathtoprefix.
 202 func importPathToPrefix(s string) string {
 203         // find index of last slash, if any, or else -1.
 204         // used for determining whether an index is after the last slash.
 205         slash := strings.LastIndex(s, "/")
 206
 207         // check for chars that need escaping
 208         n := 0
 209         for r := 0; r < len(s); r++ {
 210                 if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F {
 211                         n++
 212                 }
 213         }
 214
 215         // quick exit
 216         if n == 0 {
 217                 return s
 218         }
 219
 220         // escape
 221         const hex = "0123456789abcdef"
 222         p := make([]byte, 0, len(s)+2*n)
 223         for r := 0; r < len(s); r++ {
 224                 if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F {
 225                         p = append(p, '%', hex[c>>4], hex[c&0xF])
 226                 } else {
 227                         p = append(p, c)
 228                 }
 229         }
 230
 231         return string(p)
 232 }
 233
 234 // init initializes r to read package p from f.
 235 func (r *objReader) init(f io.ReadSeeker, p *Package) {
 236         r.f = f
 237         r.p = p
 238         r.offset, _ = f.Seek(0, 1)
 239         r.limit, _ = f.Seek(0, 2)
 240         f.Seek(r.offset, 0)
 241         r.b = bufio.NewReader(f)
 242         r.pkgprefix = importPathToPrefix(p.ImportPath) + "."
 243 }
 244
 245 // error records that an error occurred.
 246 // It returns only the first error, so that an error
 247 // caused by an earlier error does not discard information
 248 // about the earlier error.
 249 func (r *objReader) error(err error) error {
 250         if r.err == nil {
 251                 if err == io.EOF {
 252                         err = io.ErrUnexpectedEOF
 253                 }
 254                 r.err = err
 255         }
 256         // panic("corrupt") // useful for debugging
 257         return r.err
 258 }
 259
 260 // readByte reads and returns a byte from the input file.
 261 // On I/O error or EOF, it records the error but returns byte 0.
 262 // A sequence of 0 bytes will eventually terminate any
 263 // parsing state in the object file. In particular, it ends the
 264 // reading of a varint.
 265 func (r *objReader) readByte() byte {
 266         if r.err != nil {
 267                 return 0
 268         }
 269         if r.offset >= r.limit {
 270                 r.error(io.ErrUnexpectedEOF)
 271                 return 0
 272         }
 273         b, err := r.b.ReadByte()
 274         if err != nil {
 275                 if err == io.EOF {
 276                         err = io.ErrUnexpectedEOF
 277                 }
 278                 r.error(err)
 279                 b = 0
 280         } else {
 281                 r.offset++
 282         }
 283         return b
 284 }
 285
 286 // read reads exactly len(b) bytes from the input file.
 287 // If an error occurs, read returns the error but also
 288 // records it, so it is safe for callers to ignore the result
 289 // as long as delaying the report is not a problem.
 290 func (r *objReader) readFull(b []byte) error {
 291         if r.err != nil {
 292                 return r.err
 293         }
 294         if r.offset+int64(len(b)) > r.limit {
 295                 return r.error(io.ErrUnexpectedEOF)
 296         }
 297         n, err := io.ReadFull(r.b, b)
 298         r.offset += int64(n)
 299         if err != nil {
 300                 return r.error(err)
 301         }
 302         return nil
 303 }
 304
 305 // readInt reads a zigzag varint from the input file.
 306 func (r *objReader) readInt() int {
 307         var u uint64
 308
 309         for shift := uint(0); ; shift += 7 {
 310                 if shift >= 64 {
 311                         r.error(errCorruptObject)
 312                         return 0
 313                 }
 314                 c := r.readByte()
 315                 u |= uint64(c&0x7F) << shift
 316                 if c&0x80 == 0 {
 317                         break
 318                 }
 319         }
 320
 321         v := int64(u>>1) ^ (int64(u) << 63 >> 63)
 322         if int64(int(v)) != v {
 323                 r.error(errCorruptObject) // TODO
 324                 return 0
 325         }
 326         return int(v)
 327 }
 328
 329 // readString reads a length-delimited string from the input file.
 330 func (r *objReader) readString() string {
 331         n := r.readInt()
 332         buf := make([]byte, n)
 333         r.readFull(buf)
 334         return string(buf)
 335 }
 336
 337 // readSymID reads a SymID from the input file.
 338 func (r *objReader) readSymID() SymID {
 339         name, vers := r.readString(), r.readInt()
 340
 341         // In a symbol name in an object file, "". denotes the
 342         // prefix for the package in which the object file has been found.
 343         // Expand it.
 344         name = strings.Replace(name, `"".`, r.pkgprefix, -1)
 345
 346         // An individual object file only records version 0 (extern) or 1 (static).
 347         // To make static symbols unique across all files being read, we
 348         // replace version 1 with the version corresponding to the current
 349         // file number. The number is incremented on each call to parseObject.
 350         if vers != 0 {
 351                 vers = r.p.MaxVersion
 352         }
 353
 354         return SymID{name, vers}
 355 }
 356
 357 // readData reads a data reference from the input file.
 358 func (r *objReader) readData() Data {
 359         n := r.readInt()
 360         d := Data{Offset: r.offset, Size: int64(n)}
 361         r.skip(int64(n))
 362         return d
 363 }
 364
 365 // skip skips n bytes in the input.
 366 func (r *objReader) skip(n int64) {
 367         if n < 0 {
 368                 r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
 369         }
 370         if n < int64(len(r.tmp)) {
 371                 // Since the data is so small, a just reading from the buffered
 372                 // reader is better than flushing the buffer and seeking.
 373                 r.readFull(r.tmp[:n])
 374         } else if n <= int64(r.b.Buffered()) {
 375                 // Even though the data is not small, it has already been read.
 376                 // Advance the buffer instead of seeking.
 377                 for n > int64(len(r.tmp)) {
 378                         r.readFull(r.tmp[:])
 379                         n -= int64(len(r.tmp))
 380                 }
 381                 r.readFull(r.tmp[:n])
 382         } else {
 383                 // Seek, giving up buffered data.
 384                 _, err := r.f.Seek(r.offset+n, 0)
 385                 if err != nil {
 386                         r.error(err)
 387                 }
 388                 r.offset += n
 389                 r.b.Reset(r.f)
 390         }
 391 }
 392
 393 // Parse parses an object file or archive from r,
 394 // assuming that its import path is pkgpath.
 395 func Parse(r io.ReadSeeker, pkgpath string) (*Package, error) {
 396         if pkgpath == "" {
 397                 pkgpath = `""`
 398         }
 399         p := new(Package)
 400         p.ImportPath = pkgpath
 401
 402         var rd objReader
 403         rd.init(r, p)
 404         err := rd.readFull(rd.tmp[:8])
 405         if err != nil {
 406                 if err == io.EOF {
 407                         err = io.ErrUnexpectedEOF
 408                 }
 409                 return nil, err
 410         }
 411
 412         switch {
 413         default:
 414                 return nil, errNotObject
 415
 416         case bytes.Equal(rd.tmp[:8], archiveHeader):
 417                 if err := rd.parseArchive(); err != nil {
 418                         return nil, err
 419                 }
 420         case bytes.Equal(rd.tmp[:8], goobjHeader):
 421                 if err := rd.parseObject(goobjHeader); err != nil {
 422                         return nil, err
 423                 }
 424         }
 425
 426         return p, nil
 427 }
 428
 429 // trimSpace removes trailing spaces from b and returns the corresponding string.
 430 // This effectively parses the form used in archive headers.
 431 func trimSpace(b []byte) string {
 432         return string(bytes.TrimRight(b, " "))
 433 }
 434
 435 // parseArchive parses a Unix archive of Go object files.
 436 // TODO(rsc): Need to skip non-Go object files.
 437 // TODO(rsc): Maybe record table of contents in r.p so that
 438 // linker can avoid having code to parse archives too.
 439 func (r *objReader) parseArchive() error {
 440         for r.offset < r.limit {
 441                 if err := r.readFull(r.tmp[:60]); err != nil {
 442                         return err
 443                 }
 444                 data := r.tmp[:60]
 445
 446                 // Each file is preceded by this text header (slice indices in first column):
 447                 //       0:16   name
 448                 //      16:28 date
 449                 //      28:34 uid
 450                 //      34:40 gid
 451                 //      40:48 mode
 452                 //      48:58 size
 453                 //      58:60 magic - `\n
 454                 // We only care about name, size, and magic.
 455                 // The fields are space-padded on the right.
 456                 // The size is in decimal.
 457                 // The file data - size bytes - follows the header.
 458                 // Headers are 2-byte aligned, so if size is odd, an extra padding
 459                 // byte sits between the file data and the next header.
 460                 // The file data that follows is padded to an even number of bytes:
 461                 // if size is odd, an extra padding byte is inserted betw the next header.
 462                 if len(data) < 60 {
 463                         return errTruncatedArchive
 464                 }
 465                 if !bytes.Equal(data[58:60], archiveMagic) {
 466                         return errCorruptArchive
 467                 }
 468                 name := trimSpace(data[0:16])
 469                 size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64)
 470                 if err != nil {
 471                         return errCorruptArchive
 472                 }
 473                 data = data[60:]
 474                 fsize := size + size&1
 475                 if fsize < 0 || fsize < size {
 476                         return errCorruptArchive
 477                 }
 478                 switch name {
 479                 case "__.SYMDEF", "__.GOSYMDEF", "__.PKGDEF":
 480                         r.skip(size)
 481                 default:
 482                         oldLimit := r.limit
 483                         r.limit = r.offset + size
 484                         if err := r.parseObject(nil); err != nil {
 485                                 return fmt.Errorf("parsing archive member %q: %v", name, err)
 486                         }
 487                         r.skip(r.limit - r.offset)
 488                         r.limit = oldLimit
 489                 }
 490                 if size&1 != 0 {
 491                         r.skip(1)
 492                 }
 493         }
 494         return nil
 495 }
 496
 497 // parseObject parses a single Go object file.
 498 // The prefix is the bytes already read from the file,
 499 // typically in order to detect that this is an object file.
 500 // The object file consists of a textual header ending in "\n!\n"
 501 // and then the part we want to parse begins.
 502 // The format of that part is defined in a comment at the top
 503 // of src/liblink/objfile.c.
 504 func (r *objReader) parseObject(prefix []byte) error {
 505         // TODO(rsc): Maybe use prefix and the initial input to
 506         // record the header line from the file, which would
 507         // give the architecture and other version information.
 508
 509         r.p.MaxVersion++
 510         var c1, c2, c3 byte
 511         for {
 512                 c1, c2, c3 = c2, c3, r.readByte()
 513                 if c3 == 0 { // NUL or EOF, either is bad
 514                         return errCorruptObject
 515                 }
 516                 if c1 == '\n' && c2 == '!' && c3 == '\n' {
 517                         break
 518                 }
 519         }
 520
 521         r.readFull(r.tmp[:8])
 522         if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go13ld")) {
 523                 return r.error(errCorruptObject)
 524         }
 525
 526         // Direct package dependencies.
 527         for {
 528                 s := r.readString()
 529                 if s == "" {
 530                         break
 531                 }
 532                 r.p.Imports = append(r.p.Imports, s)
 533         }
 534
 535         // Symbols.
 536         for {
 537                 if b := r.readByte(); b != 0xfe {
 538                         if b != 0xff {
 539                                 return r.error(errCorruptObject)
 540                         }
 541                         break
 542                 }
 543
 544                 typ := r.readInt()
 545                 s := &Sym{SymID: r.readSymID()}
 546                 r.p.Syms = append(r.p.Syms, s)
 547                 s.Kind = SymKind(typ)
 548                 s.DupOK = r.readInt() != 0
 549                 s.Size = r.readInt()
 550                 s.Type = r.readSymID()
 551                 s.Data = r.readData()
 552                 s.Reloc = make([]Reloc, r.readInt())
 553                 for i := range s.Reloc {
 554                         rel := &s.Reloc[i]
 555                         rel.Offset = r.readInt()
 556                         rel.Size = r.readInt()
 557                         rel.Type = r.readInt()
 558                         rel.Add = r.readInt()
 559                         r.readInt() // Xadd - ignored
 560                         rel.Sym = r.readSymID()
 561                         r.readSymID() // Xsym - ignored
 562                 }
 563
 564                 if s.Kind == STEXT {
 565                         f := new(Func)
 566                         s.Func = f
 567                         f.Args = r.readInt()
 568                         f.Frame = r.readInt()
 569                         f.Var = make([]Var, r.readInt())
 570                         for i := range f.Var {
 571                                 v := &f.Var[i]
 572                                 v.Name = r.readSymID().Name
 573                                 v.Offset = r.readInt()
 574                                 v.Kind = r.readInt()
 575                                 v.Type = r.readSymID()
 576                         }
 577
 578                         f.PCSP = r.readData()
 579                         f.PCFile = r.readData()
 580                         f.PCLine = r.readData()
 581                         f.PCData = make([]Data, r.readInt())
 582                         for i := range f.PCData {
 583                                 f.PCData[i] = r.readData()
 584                         }
 585                         f.FuncData = make([]FuncData, r.readInt())
 586                         for i := range f.FuncData {
 587                                 f.FuncData[i].Sym = r.readSymID()
 588                         }
 589                         for i := range f.FuncData {
 590                                 f.FuncData[i].Offset = int64(r.readInt()) // TODO
 591                         }
 592                         f.File = make([]string, r.readInt())
 593                         for i := range f.File {
 594                                 f.File[i] = r.readSymID().Name
 595                         }
 596                 }
 597         }
 598
 599         r.readFull(r.tmp[:7])
 600         if !bytes.Equal(r.tmp[:7], []byte("\xffgo13ld")) {
 601                 return r.error(errCorruptObject)
 602         }
 603
 604         return nil
 605 }