1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/DebugInfo/MSF/MSFCommon.h"
13 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
14 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
15 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
16 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
17 #include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
18 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
19 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
20 #include "llvm/DebugInfo/PDB/Native/RawError.h"
21 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
22 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
23 #include "llvm/Support/BinaryStream.h"
24 #include "llvm/Support/BinaryStreamArray.h"
25 #include "llvm/Support/BinaryStreamReader.h"
26 #include "llvm/Support/Endian.h"
27 #include "llvm/Support/Error.h"
28 #include "llvm/Support/Path.h"
34 using namespace llvm::codeview
;
35 using namespace llvm::msf
;
36 using namespace llvm::pdb
;
39 typedef FixedStreamArray
<support::ulittle32_t
> ulittle_array
;
40 } // end anonymous namespace
42 PDBFile::PDBFile(StringRef Path
, std::unique_ptr
<BinaryStream
> PdbFileBuffer
,
43 BumpPtrAllocator
&Allocator
)
44 : FilePath(Path
), Allocator(Allocator
), Buffer(std::move(PdbFileBuffer
)) {}
46 PDBFile::~PDBFile() = default;
48 StringRef
PDBFile::getFilePath() const { return FilePath
; }
50 StringRef
PDBFile::getFileDirectory() const {
51 return sys::path::parent_path(FilePath
);
54 uint32_t PDBFile::getBlockSize() const { return ContainerLayout
.SB
->BlockSize
; }
56 uint32_t PDBFile::getFreeBlockMapBlock() const {
57 return ContainerLayout
.SB
->FreeBlockMapBlock
;
60 uint32_t PDBFile::getBlockCount() const {
61 return ContainerLayout
.SB
->NumBlocks
;
64 uint32_t PDBFile::getNumDirectoryBytes() const {
65 return ContainerLayout
.SB
->NumDirectoryBytes
;
68 uint32_t PDBFile::getBlockMapIndex() const {
69 return ContainerLayout
.SB
->BlockMapAddr
;
72 uint32_t PDBFile::getUnknown1() const { return ContainerLayout
.SB
->Unknown1
; }
74 uint32_t PDBFile::getNumDirectoryBlocks() const {
75 return msf::bytesToBlocks(ContainerLayout
.SB
->NumDirectoryBytes
,
76 ContainerLayout
.SB
->BlockSize
);
79 uint64_t PDBFile::getBlockMapOffset() const {
80 return (uint64_t)ContainerLayout
.SB
->BlockMapAddr
*
81 ContainerLayout
.SB
->BlockSize
;
84 uint32_t PDBFile::getNumStreams() const {
85 return ContainerLayout
.StreamSizes
.size();
88 uint32_t PDBFile::getMaxStreamSize() const {
89 return *std::max_element(ContainerLayout
.StreamSizes
.begin(),
90 ContainerLayout
.StreamSizes
.end());
93 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex
) const {
94 return ContainerLayout
.StreamSizes
[StreamIndex
];
97 ArrayRef
<support::ulittle32_t
>
98 PDBFile::getStreamBlockList(uint32_t StreamIndex
) const {
99 return ContainerLayout
.StreamMap
[StreamIndex
];
102 uint32_t PDBFile::getFileSize() const { return Buffer
->getLength(); }
104 Expected
<ArrayRef
<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex
,
105 uint32_t NumBytes
) const {
106 uint64_t StreamBlockOffset
= msf::blockToOffset(BlockIndex
, getBlockSize());
108 ArrayRef
<uint8_t> Result
;
109 if (auto EC
= Buffer
->readBytes(StreamBlockOffset
, NumBytes
, Result
))
110 return std::move(EC
);
114 Error
PDBFile::setBlockData(uint32_t BlockIndex
, uint32_t Offset
,
115 ArrayRef
<uint8_t> Data
) const {
116 return make_error
<RawError
>(raw_error_code::not_writable
,
117 "PDBFile is immutable");
120 Error
PDBFile::parseFileHeaders() {
121 BinaryStreamReader
Reader(*Buffer
);
124 const msf::SuperBlock
*SB
= nullptr;
125 if (auto EC
= Reader
.readObject(SB
)) {
126 consumeError(std::move(EC
));
127 return make_error
<RawError
>(raw_error_code::corrupt_file
,
128 "MSF superblock is missing");
131 if (auto EC
= msf::validateSuperBlock(*SB
))
134 if (Buffer
->getLength() % SB
->BlockSize
!= 0)
135 return make_error
<RawError
>(raw_error_code::corrupt_file
,
136 "File size is not a multiple of block size");
137 ContainerLayout
.SB
= SB
;
139 // Initialize Free Page Map.
140 ContainerLayout
.FreePageMap
.resize(SB
->NumBlocks
);
141 // The Fpm exists either at block 1 or block 2 of the MSF. However, this
142 // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
143 // thusly an equal number of total blocks in the file. For a block size
144 // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
145 // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so
146 // the Fpm is split across the file at `getBlockSize()` intervals. As a
147 // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
148 // for any non-negative integer k is an Fpm block. In theory, we only really
149 // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
150 // current versions of the MSF format already expect the Fpm to be arranged
151 // at getBlockSize() intervals, so we have to be compatible.
152 // See the function fpmPn() for more information:
153 // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
155 MappedBlockStream::createFpmStream(ContainerLayout
, *Buffer
, Allocator
);
156 BinaryStreamReader
FpmReader(*FpmStream
);
157 ArrayRef
<uint8_t> FpmBytes
;
158 if (auto EC
= FpmReader
.readBytes(FpmBytes
, FpmReader
.bytesRemaining()))
160 uint32_t BlocksRemaining
= getBlockCount();
162 for (auto Byte
: FpmBytes
) {
163 uint32_t BlocksThisByte
= std::min(BlocksRemaining
, 8U);
164 for (uint32_t I
= 0; I
< BlocksThisByte
; ++I
) {
166 ContainerLayout
.FreePageMap
[BI
] = true;
172 Reader
.setOffset(getBlockMapOffset());
173 if (auto EC
= Reader
.readArray(ContainerLayout
.DirectoryBlocks
,
174 getNumDirectoryBlocks()))
177 return Error::success();
180 Error
PDBFile::parseStreamData() {
181 assert(ContainerLayout
.SB
);
183 return Error::success();
185 uint32_t NumStreams
= 0;
187 // Normally you can't use a MappedBlockStream without having fully parsed the
188 // PDB file, because it accesses the directory and various other things, which
189 // is exactly what we are attempting to parse. By specifying a custom
190 // subclass of IPDBStreamData which only accesses the fields that have already
191 // been parsed, we can avoid this and reuse MappedBlockStream.
192 auto DS
= MappedBlockStream::createDirectoryStream(ContainerLayout
, *Buffer
,
194 BinaryStreamReader
Reader(*DS
);
195 if (auto EC
= Reader
.readInteger(NumStreams
))
198 if (auto EC
= Reader
.readArray(ContainerLayout
.StreamSizes
, NumStreams
))
200 for (uint32_t I
= 0; I
< NumStreams
; ++I
) {
201 uint32_t StreamSize
= getStreamByteSize(I
);
202 // FIXME: What does StreamSize ~0U mean?
203 uint64_t NumExpectedStreamBlocks
=
204 StreamSize
== UINT32_MAX
206 : msf::bytesToBlocks(StreamSize
, ContainerLayout
.SB
->BlockSize
);
208 // For convenience, we store the block array contiguously. This is because
209 // if someone calls setStreamMap(), it is more convenient to be able to call
210 // it with an ArrayRef instead of setting up a StreamRef. Since the
211 // DirectoryStream is cached in the class and thus lives for the life of the
212 // class, we can be guaranteed that readArray() will return a stable
213 // reference, even if it has to allocate from its internal pool.
214 ArrayRef
<support::ulittle32_t
> Blocks
;
215 if (auto EC
= Reader
.readArray(Blocks
, NumExpectedStreamBlocks
))
217 for (uint32_t Block
: Blocks
) {
218 uint64_t BlockEndOffset
=
219 (uint64_t)(Block
+ 1) * ContainerLayout
.SB
->BlockSize
;
220 if (BlockEndOffset
> getFileSize())
221 return make_error
<RawError
>(raw_error_code::corrupt_file
,
222 "Stream block map is corrupt.");
224 ContainerLayout
.StreamMap
.push_back(Blocks
);
227 // We should have read exactly SB->NumDirectoryBytes bytes.
228 assert(Reader
.bytesRemaining() == 0);
229 DirectoryStream
= std::move(DS
);
230 return Error::success();
233 ArrayRef
<support::ulittle32_t
> PDBFile::getDirectoryBlockArray() const {
234 return ContainerLayout
.DirectoryBlocks
;
237 std::unique_ptr
<MappedBlockStream
>
238 PDBFile::createIndexedStream(uint16_t SN
) const {
239 if (SN
== kInvalidStreamIndex
)
241 return MappedBlockStream::createIndexedStream(ContainerLayout
, *Buffer
, SN
,
245 MSFStreamLayout
PDBFile::getStreamLayout(uint32_t StreamIdx
) const {
246 MSFStreamLayout Result
;
247 auto Blocks
= getStreamBlockList(StreamIdx
);
248 Result
.Blocks
.assign(Blocks
.begin(), Blocks
.end());
249 Result
.Length
= getStreamByteSize(StreamIdx
);
253 msf::MSFStreamLayout
PDBFile::getFpmStreamLayout() const {
254 return msf::getFpmStreamLayout(ContainerLayout
);
257 Expected
<GlobalsStream
&> PDBFile::getPDBGlobalsStream() {
259 auto DbiS
= getPDBDbiStream();
261 return DbiS
.takeError();
264 safelyCreateIndexedStream(DbiS
->getGlobalSymbolStreamIndex());
266 return GlobalS
.takeError();
267 auto TempGlobals
= std::make_unique
<GlobalsStream
>(std::move(*GlobalS
));
268 if (auto EC
= TempGlobals
->reload())
269 return std::move(EC
);
270 Globals
= std::move(TempGlobals
);
275 Expected
<InfoStream
&> PDBFile::getPDBInfoStream() {
277 auto InfoS
= safelyCreateIndexedStream(StreamPDB
);
279 return InfoS
.takeError();
280 auto TempInfo
= std::make_unique
<InfoStream
>(std::move(*InfoS
));
281 if (auto EC
= TempInfo
->reload())
282 return std::move(EC
);
283 Info
= std::move(TempInfo
);
288 Expected
<DbiStream
&> PDBFile::getPDBDbiStream() {
290 auto DbiS
= safelyCreateIndexedStream(StreamDBI
);
292 return DbiS
.takeError();
293 auto TempDbi
= std::make_unique
<DbiStream
>(std::move(*DbiS
));
294 if (auto EC
= TempDbi
->reload(this))
295 return std::move(EC
);
296 Dbi
= std::move(TempDbi
);
301 Expected
<TpiStream
&> PDBFile::getPDBTpiStream() {
303 auto TpiS
= safelyCreateIndexedStream(StreamTPI
);
305 return TpiS
.takeError();
306 auto TempTpi
= std::make_unique
<TpiStream
>(*this, std::move(*TpiS
));
307 if (auto EC
= TempTpi
->reload())
308 return std::move(EC
);
309 Tpi
= std::move(TempTpi
);
314 Expected
<TpiStream
&> PDBFile::getPDBIpiStream() {
316 if (!hasPDBIpiStream())
317 return make_error
<RawError
>(raw_error_code::no_stream
);
319 auto IpiS
= safelyCreateIndexedStream(StreamIPI
);
321 return IpiS
.takeError();
322 auto TempIpi
= std::make_unique
<TpiStream
>(*this, std::move(*IpiS
));
323 if (auto EC
= TempIpi
->reload())
324 return std::move(EC
);
325 Ipi
= std::move(TempIpi
);
330 Expected
<PublicsStream
&> PDBFile::getPDBPublicsStream() {
332 auto DbiS
= getPDBDbiStream();
334 return DbiS
.takeError();
337 safelyCreateIndexedStream(DbiS
->getPublicSymbolStreamIndex());
339 return PublicS
.takeError();
340 auto TempPublics
= std::make_unique
<PublicsStream
>(std::move(*PublicS
));
341 if (auto EC
= TempPublics
->reload())
342 return std::move(EC
);
343 Publics
= std::move(TempPublics
);
348 Expected
<SymbolStream
&> PDBFile::getPDBSymbolStream() {
350 auto DbiS
= getPDBDbiStream();
352 return DbiS
.takeError();
354 uint32_t SymbolStreamNum
= DbiS
->getSymRecordStreamIndex();
355 auto SymbolS
= safelyCreateIndexedStream(SymbolStreamNum
);
357 return SymbolS
.takeError();
359 auto TempSymbols
= std::make_unique
<SymbolStream
>(std::move(*SymbolS
));
360 if (auto EC
= TempSymbols
->reload())
361 return std::move(EC
);
362 Symbols
= std::move(TempSymbols
);
367 Expected
<PDBStringTable
&> PDBFile::getStringTable() {
369 auto NS
= safelyCreateNamedStream("/names");
371 return NS
.takeError();
373 auto N
= std::make_unique
<PDBStringTable
>();
374 BinaryStreamReader
Reader(**NS
);
375 if (auto EC
= N
->reload(Reader
))
376 return std::move(EC
);
377 assert(Reader
.bytesRemaining() == 0);
378 StringTableStream
= std::move(*NS
);
379 Strings
= std::move(N
);
384 Expected
<InjectedSourceStream
&> PDBFile::getInjectedSourceStream() {
385 if (!InjectedSources
) {
386 auto IJS
= safelyCreateNamedStream("/src/headerblock");
388 return IJS
.takeError();
390 auto Strings
= getStringTable();
392 return Strings
.takeError();
394 auto IJ
= std::make_unique
<InjectedSourceStream
>(std::move(*IJS
));
395 if (auto EC
= IJ
->reload(*Strings
))
396 return std::move(EC
);
397 InjectedSources
= std::move(IJ
);
399 return *InjectedSources
;
402 uint32_t PDBFile::getPointerSize() {
403 auto DbiS
= getPDBDbiStream();
406 PDB_Machine Machine
= DbiS
->getMachineType();
407 if (Machine
== PDB_Machine::Amd64
)
412 bool PDBFile::hasPDBDbiStream() const {
413 return StreamDBI
< getNumStreams() && getStreamByteSize(StreamDBI
) > 0;
416 bool PDBFile::hasPDBGlobalsStream() {
417 auto DbiS
= getPDBDbiStream();
419 consumeError(DbiS
.takeError());
423 return DbiS
->getGlobalSymbolStreamIndex() < getNumStreams();
426 bool PDBFile::hasPDBInfoStream() const { return StreamPDB
< getNumStreams(); }
428 bool PDBFile::hasPDBIpiStream() const {
429 if (!hasPDBInfoStream())
432 if (StreamIPI
>= getNumStreams())
435 auto &InfoStream
= cantFail(const_cast<PDBFile
*>(this)->getPDBInfoStream());
436 return InfoStream
.containsIdStream();
439 bool PDBFile::hasPDBPublicsStream() {
440 auto DbiS
= getPDBDbiStream();
442 consumeError(DbiS
.takeError());
445 return DbiS
->getPublicSymbolStreamIndex() < getNumStreams();
448 bool PDBFile::hasPDBSymbolStream() {
449 auto DbiS
= getPDBDbiStream();
452 return DbiS
->getSymRecordStreamIndex() < getNumStreams();
455 bool PDBFile::hasPDBTpiStream() const { return StreamTPI
< getNumStreams(); }
457 bool PDBFile::hasPDBStringTable() {
458 auto IS
= getPDBInfoStream();
461 Expected
<uint32_t> ExpectedNSI
= IS
->getNamedStreamIndex("/names");
463 consumeError(ExpectedNSI
.takeError());
466 assert(*ExpectedNSI
< getNumStreams());
470 bool PDBFile::hasPDBInjectedSourceStream() {
471 auto IS
= getPDBInfoStream();
474 Expected
<uint32_t> ExpectedNSI
= IS
->getNamedStreamIndex("/src/headerblock");
476 consumeError(ExpectedNSI
.takeError());
479 assert(*ExpectedNSI
< getNumStreams());
483 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
484 /// stream with that index actually exists. If it does not, the return value
485 /// will have an MSFError with code msf_error_code::no_stream. Else, the return
486 /// value will contain the stream returned by createIndexedStream().
487 Expected
<std::unique_ptr
<MappedBlockStream
>>
488 PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex
) const {
489 if (StreamIndex
>= getNumStreams())
490 // This rejects kInvalidStreamIndex with an error as well.
491 return make_error
<RawError
>(raw_error_code::no_stream
);
492 return createIndexedStream(StreamIndex
);
495 Expected
<std::unique_ptr
<MappedBlockStream
>>
496 PDBFile::safelyCreateNamedStream(StringRef Name
) {
497 auto IS
= getPDBInfoStream();
499 return IS
.takeError();
501 Expected
<uint32_t> ExpectedNSI
= IS
->getNamedStreamIndex(Name
);
503 return ExpectedNSI
.takeError();
504 uint32_t NameStreamIndex
= *ExpectedNSI
;
506 return safelyCreateIndexedStream(NameStreamIndex
);