1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
5 #include "leveldb/table.h"
7 #include "leveldb/cache.h"
8 #include "leveldb/comparator.h"
9 #include "leveldb/env.h"
10 #include "leveldb/filter_policy.h"
11 #include "leveldb/options.h"
12 #include "table/block.h"
13 #include "table/filter_block.h"
14 #include "table/format.h"
15 #include "table/two_level_iterator.h"
16 #include "util/coding.h"
23 delete [] filter_data
;
29 RandomAccessFile
* file
;
31 FilterBlockReader
* filter
;
32 const char* filter_data
;
34 BlockHandle metaindex_handle
; // Handle to metaindex_block: saved from footer
38 Status
Table::Open(const Options
& options
,
39 RandomAccessFile
* file
,
43 if (size
< Footer::kEncodedLength
) {
44 return Status::Corruption("file is too short to be an sstable");
47 char footer_space
[Footer::kEncodedLength
];
49 Status s
= file
->Read(size
- Footer::kEncodedLength
, Footer::kEncodedLength
,
50 &footer_input
, footer_space
);
51 if (!s
.ok()) return s
;
54 s
= footer
.DecodeFrom(&footer_input
);
55 if (!s
.ok()) return s
;
57 // Read the index block
58 BlockContents contents
;
59 Block
* index_block
= NULL
;
62 if (options
.paranoid_checks
) {
63 opt
.verify_checksums
= true;
65 s
= ReadBlock(file
, opt
, footer
.index_handle(), &contents
);
67 index_block
= new Block(contents
);
72 // We've successfully read the footer and the index block: we're
73 // ready to serve requests.
74 Rep
* rep
= new Table::Rep
;
75 rep
->options
= options
;
77 rep
->metaindex_handle
= footer
.metaindex_handle();
78 rep
->index_block
= index_block
;
79 rep
->cache_id
= (options
.block_cache
? options
.block_cache
->NewId() : 0);
80 rep
->filter_data
= NULL
;
82 *table
= new Table(rep
);
83 (*table
)->ReadMeta(footer
);
91 void Table::ReadMeta(const Footer
& footer
) {
92 if (rep_
->options
.filter_policy
== NULL
) {
93 return; // Do not need any metadata
96 // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
97 // it is an empty block.
99 if (rep_
->options
.paranoid_checks
) {
100 opt
.verify_checksums
= true;
102 BlockContents contents
;
103 if (!ReadBlock(rep_
->file
, opt
, footer
.metaindex_handle(), &contents
).ok()) {
104 // Do not propagate errors since meta info is not needed for operation
107 Block
* meta
= new Block(contents
);
109 Iterator
* iter
= meta
->NewIterator(BytewiseComparator());
110 std::string key
= "filter.";
111 key
.append(rep_
->options
.filter_policy
->Name());
113 if (iter
->Valid() && iter
->key() == Slice(key
)) {
114 ReadFilter(iter
->value());
120 void Table::ReadFilter(const Slice
& filter_handle_value
) {
121 Slice v
= filter_handle_value
;
122 BlockHandle filter_handle
;
123 if (!filter_handle
.DecodeFrom(&v
).ok()) {
127 // We might want to unify with ReadBlock() if we start
128 // requiring checksum verification in Table::Open.
130 if (rep_
->options
.paranoid_checks
) {
131 opt
.verify_checksums
= true;
134 if (!ReadBlock(rep_
->file
, opt
, filter_handle
, &block
).ok()) {
137 if (block
.heap_allocated
) {
138 rep_
->filter_data
= block
.data
.data(); // Will need to delete later
140 rep_
->filter
= new FilterBlockReader(rep_
->options
.filter_policy
, block
.data
);
147 static void DeleteBlock(void* arg
, void* ignored
) {
148 delete reinterpret_cast<Block
*>(arg
);
151 static void DeleteCachedBlock(const Slice
& key
, void* value
) {
152 Block
* block
= reinterpret_cast<Block
*>(value
);
156 static void ReleaseBlock(void* arg
, void* h
) {
157 Cache
* cache
= reinterpret_cast<Cache
*>(arg
);
158 Cache::Handle
* handle
= reinterpret_cast<Cache::Handle
*>(h
);
159 cache
->Release(handle
);
162 // Convert an index iterator value (i.e., an encoded BlockHandle)
163 // into an iterator over the contents of the corresponding block.
164 Iterator
* Table::BlockReader(void* arg
,
165 const ReadOptions
& options
,
166 const Slice
& index_value
) {
167 Table
* table
= reinterpret_cast<Table
*>(arg
);
168 Cache
* block_cache
= table
->rep_
->options
.block_cache
;
170 Cache::Handle
* cache_handle
= NULL
;
173 Slice input
= index_value
;
174 Status s
= handle
.DecodeFrom(&input
);
175 // We intentionally allow extra stuff in index_value so that we
176 // can add more features in the future.
179 BlockContents contents
;
180 if (block_cache
!= NULL
) {
181 char cache_key_buffer
[16];
182 EncodeFixed64(cache_key_buffer
, table
->rep_
->cache_id
);
183 EncodeFixed64(cache_key_buffer
+8, handle
.offset());
184 Slice
key(cache_key_buffer
, sizeof(cache_key_buffer
));
185 cache_handle
= block_cache
->Lookup(key
);
186 if (cache_handle
!= NULL
) {
187 block
= reinterpret_cast<Block
*>(block_cache
->Value(cache_handle
));
189 s
= ReadBlock(table
->rep_
->file
, options
, handle
, &contents
);
191 block
= new Block(contents
);
192 if (contents
.cachable
&& options
.fill_cache
) {
193 cache_handle
= block_cache
->Insert(
194 key
, block
, block
->size(), &DeleteCachedBlock
);
199 s
= ReadBlock(table
->rep_
->file
, options
, handle
, &contents
);
201 block
= new Block(contents
);
208 iter
= block
->NewIterator(table
->rep_
->options
.comparator
);
209 if (cache_handle
== NULL
) {
210 iter
->RegisterCleanup(&DeleteBlock
, block
, NULL
);
212 iter
->RegisterCleanup(&ReleaseBlock
, block_cache
, cache_handle
);
215 iter
= NewErrorIterator(s
);
220 Iterator
* Table::NewIterator(const ReadOptions
& options
) const {
221 return NewTwoLevelIterator(
222 rep_
->index_block
->NewIterator(rep_
->options
.comparator
),
223 &Table::BlockReader
, const_cast<Table
*>(this), options
);
226 Status
Table::InternalGet(const ReadOptions
& options
, const Slice
& k
,
228 void (*saver
)(void*, const Slice
&, const Slice
&)) {
230 Iterator
* iiter
= rep_
->index_block
->NewIterator(rep_
->options
.comparator
);
232 if (iiter
->Valid()) {
233 Slice handle_value
= iiter
->value();
234 FilterBlockReader
* filter
= rep_
->filter
;
236 if (filter
!= NULL
&&
237 handle
.DecodeFrom(&handle_value
).ok() &&
238 !filter
->KeyMayMatch(handle
.offset(), k
)) {
241 Iterator
* block_iter
= BlockReader(this, options
, iiter
->value());
243 if (block_iter
->Valid()) {
244 (*saver
)(arg
, block_iter
->key(), block_iter
->value());
246 s
= block_iter
->status();
258 uint64_t Table::ApproximateOffsetOf(const Slice
& key
) const {
259 Iterator
* index_iter
=
260 rep_
->index_block
->NewIterator(rep_
->options
.comparator
);
261 index_iter
->Seek(key
);
263 if (index_iter
->Valid()) {
265 Slice input
= index_iter
->value();
266 Status s
= handle
.DecodeFrom(&input
);
268 result
= handle
.offset();
270 // Strange: we can't decode the block handle in the index block.
271 // We'll just return the offset of the metaindex block, which is
272 // close to the whole file size for this case.
273 result
= rep_
->metaindex_handle
.offset();
276 // key is past the last key in the file. Approximate the offset
277 // by returning the offset of the metaindex block (which is
278 // right near the end of the file).
279 result
= rep_
->metaindex_handle
.offset();
285 } // namespace leveldb