Use MakeUnique<Db>(...)
[bitcoinplatinum.git] / src / leveldb / db / repair.cc
blob4cd4bb047f483ad211a05bdd98201c6df1bfa87a
1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
4 //
5 // We recover the contents of the descriptor from the other files we find.
6 // (1) Any log files are first converted to tables
7 // (2) We scan every table to compute
8 // (a) smallest/largest for the table
9 // (b) largest sequence number in the table
10 // (3) We generate descriptor contents:
11 // - log number is set to zero
12 // - next-file-number is set to 1 + largest file number we found
13 // - last-sequence-number is set to largest sequence# found across
14 // all tables (see 2c)
15 // - compaction pointers are cleared
16 // - every table file is added at level 0
18 // Possible optimization 1:
19 // (a) Compute total size and use to pick appropriate max-level M
20 // (b) Sort tables by largest sequence# in the table
21 // (c) For each table: if it overlaps earlier table, place in level-0,
22 // else place in level-M.
23 // Possible optimization 2:
24 // Store per-table metadata (smallest, largest, largest-seq#, ...)
25 // in the table's meta section to speed up ScanTable.
27 #include "db/builder.h"
28 #include "db/db_impl.h"
29 #include "db/dbformat.h"
30 #include "db/filename.h"
31 #include "db/log_reader.h"
32 #include "db/log_writer.h"
33 #include "db/memtable.h"
34 #include "db/table_cache.h"
35 #include "db/version_edit.h"
36 #include "db/write_batch_internal.h"
37 #include "leveldb/comparator.h"
38 #include "leveldb/db.h"
39 #include "leveldb/env.h"
41 namespace leveldb {
43 namespace {
45 class Repairer {
46 public:
47 Repairer(const std::string& dbname, const Options& options)
48 : dbname_(dbname),
49 env_(options.env),
50 icmp_(options.comparator),
51 ipolicy_(options.filter_policy),
52 options_(SanitizeOptions(dbname, &icmp_, &ipolicy_, options)),
53 owns_info_log_(options_.info_log != options.info_log),
54 owns_cache_(options_.block_cache != options.block_cache),
55 next_file_number_(1) {
56 // TableCache can be small since we expect each table to be opened once.
57 table_cache_ = new TableCache(dbname_, &options_, 10);
60 ~Repairer() {
61 delete table_cache_;
62 if (owns_info_log_) {
63 delete options_.info_log;
65 if (owns_cache_) {
66 delete options_.block_cache;
70 Status Run() {
71 Status status = FindFiles();
72 if (status.ok()) {
73 ConvertLogFilesToTables();
74 ExtractMetaData();
75 status = WriteDescriptor();
77 if (status.ok()) {
78 unsigned long long bytes = 0;
79 for (size_t i = 0; i < tables_.size(); i++) {
80 bytes += tables_[i].meta.file_size;
82 Log(options_.info_log,
83 "**** Repaired leveldb %s; "
84 "recovered %d files; %llu bytes. "
85 "Some data may have been lost. "
86 "****",
87 dbname_.c_str(),
88 static_cast<int>(tables_.size()),
89 bytes);
91 return status;
94 private:
95 struct TableInfo {
96 FileMetaData meta;
97 SequenceNumber max_sequence;
100 std::string const dbname_;
101 Env* const env_;
102 InternalKeyComparator const icmp_;
103 InternalFilterPolicy const ipolicy_;
104 Options const options_;
105 bool owns_info_log_;
106 bool owns_cache_;
107 TableCache* table_cache_;
108 VersionEdit edit_;
110 std::vector<std::string> manifests_;
111 std::vector<uint64_t> table_numbers_;
112 std::vector<uint64_t> logs_;
113 std::vector<TableInfo> tables_;
114 uint64_t next_file_number_;
116 Status FindFiles() {
117 std::vector<std::string> filenames;
118 Status status = env_->GetChildren(dbname_, &filenames);
119 if (!status.ok()) {
120 return status;
122 if (filenames.empty()) {
123 return Status::IOError(dbname_, "repair found no files");
126 uint64_t number;
127 FileType type;
128 for (size_t i = 0; i < filenames.size(); i++) {
129 if (ParseFileName(filenames[i], &number, &type)) {
130 if (type == kDescriptorFile) {
131 manifests_.push_back(filenames[i]);
132 } else {
133 if (number + 1 > next_file_number_) {
134 next_file_number_ = number + 1;
136 if (type == kLogFile) {
137 logs_.push_back(number);
138 } else if (type == kTableFile) {
139 table_numbers_.push_back(number);
140 } else {
141 // Ignore other files
146 return status;
149 void ConvertLogFilesToTables() {
150 for (size_t i = 0; i < logs_.size(); i++) {
151 std::string logname = LogFileName(dbname_, logs_[i]);
152 Status status = ConvertLogToTable(logs_[i]);
153 if (!status.ok()) {
154 Log(options_.info_log, "Log #%llu: ignoring conversion error: %s",
155 (unsigned long long) logs_[i],
156 status.ToString().c_str());
158 ArchiveFile(logname);
162 Status ConvertLogToTable(uint64_t log) {
163 struct LogReporter : public log::Reader::Reporter {
164 Env* env;
165 Logger* info_log;
166 uint64_t lognum;
167 virtual void Corruption(size_t bytes, const Status& s) {
168 // We print error messages for corruption, but continue repairing.
169 Log(info_log, "Log #%llu: dropping %d bytes; %s",
170 (unsigned long long) lognum,
171 static_cast<int>(bytes),
172 s.ToString().c_str());
176 // Open the log file
177 std::string logname = LogFileName(dbname_, log);
178 SequentialFile* lfile;
179 Status status = env_->NewSequentialFile(logname, &lfile);
180 if (!status.ok()) {
181 return status;
184 // Create the log reader.
185 LogReporter reporter;
186 reporter.env = env_;
187 reporter.info_log = options_.info_log;
188 reporter.lognum = log;
189 // We intentionally make log::Reader do checksumming so that
190 // corruptions cause entire commits to be skipped instead of
191 // propagating bad information (like overly large sequence
192 // numbers).
193 log::Reader reader(lfile, &reporter, false/*do not checksum*/,
194 0/*initial_offset*/);
196 // Read all the records and add to a memtable
197 std::string scratch;
198 Slice record;
199 WriteBatch batch;
200 MemTable* mem = new MemTable(icmp_);
201 mem->Ref();
202 int counter = 0;
203 while (reader.ReadRecord(&record, &scratch)) {
204 if (record.size() < 12) {
205 reporter.Corruption(
206 record.size(), Status::Corruption("log record too small"));
207 continue;
209 WriteBatchInternal::SetContents(&batch, record);
210 status = WriteBatchInternal::InsertInto(&batch, mem);
211 if (status.ok()) {
212 counter += WriteBatchInternal::Count(&batch);
213 } else {
214 Log(options_.info_log, "Log #%llu: ignoring %s",
215 (unsigned long long) log,
216 status.ToString().c_str());
217 status = Status::OK(); // Keep going with rest of file
220 delete lfile;
222 // Do not record a version edit for this conversion to a Table
223 // since ExtractMetaData() will also generate edits.
224 FileMetaData meta;
225 meta.number = next_file_number_++;
226 Iterator* iter = mem->NewIterator();
227 status = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta);
228 delete iter;
229 mem->Unref();
230 mem = NULL;
231 if (status.ok()) {
232 if (meta.file_size > 0) {
233 table_numbers_.push_back(meta.number);
236 Log(options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s",
237 (unsigned long long) log,
238 counter,
239 (unsigned long long) meta.number,
240 status.ToString().c_str());
241 return status;
244 void ExtractMetaData() {
245 for (size_t i = 0; i < table_numbers_.size(); i++) {
246 ScanTable(table_numbers_[i]);
250 Iterator* NewTableIterator(const FileMetaData& meta) {
251 // Same as compaction iterators: if paranoid_checks are on, turn
252 // on checksum verification.
253 ReadOptions r;
254 r.verify_checksums = options_.paranoid_checks;
255 return table_cache_->NewIterator(r, meta.number, meta.file_size);
258 void ScanTable(uint64_t number) {
259 TableInfo t;
260 t.meta.number = number;
261 std::string fname = TableFileName(dbname_, number);
262 Status status = env_->GetFileSize(fname, &t.meta.file_size);
263 if (!status.ok()) {
264 // Try alternate file name.
265 fname = SSTTableFileName(dbname_, number);
266 Status s2 = env_->GetFileSize(fname, &t.meta.file_size);
267 if (s2.ok()) {
268 status = Status::OK();
271 if (!status.ok()) {
272 ArchiveFile(TableFileName(dbname_, number));
273 ArchiveFile(SSTTableFileName(dbname_, number));
274 Log(options_.info_log, "Table #%llu: dropped: %s",
275 (unsigned long long) t.meta.number,
276 status.ToString().c_str());
277 return;
280 // Extract metadata by scanning through table.
281 int counter = 0;
282 Iterator* iter = NewTableIterator(t.meta);
283 bool empty = true;
284 ParsedInternalKey parsed;
285 t.max_sequence = 0;
286 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
287 Slice key = iter->key();
288 if (!ParseInternalKey(key, &parsed)) {
289 Log(options_.info_log, "Table #%llu: unparsable key %s",
290 (unsigned long long) t.meta.number,
291 EscapeString(key).c_str());
292 continue;
295 counter++;
296 if (empty) {
297 empty = false;
298 t.meta.smallest.DecodeFrom(key);
300 t.meta.largest.DecodeFrom(key);
301 if (parsed.sequence > t.max_sequence) {
302 t.max_sequence = parsed.sequence;
305 if (!iter->status().ok()) {
306 status = iter->status();
308 delete iter;
309 Log(options_.info_log, "Table #%llu: %d entries %s",
310 (unsigned long long) t.meta.number,
311 counter,
312 status.ToString().c_str());
314 if (status.ok()) {
315 tables_.push_back(t);
316 } else {
317 RepairTable(fname, t); // RepairTable archives input file.
321 void RepairTable(const std::string& src, TableInfo t) {
322 // We will copy src contents to a new table and then rename the
323 // new table over the source.
325 // Create builder.
326 std::string copy = TableFileName(dbname_, next_file_number_++);
327 WritableFile* file;
328 Status s = env_->NewWritableFile(copy, &file);
329 if (!s.ok()) {
330 return;
332 TableBuilder* builder = new TableBuilder(options_, file);
334 // Copy data.
335 Iterator* iter = NewTableIterator(t.meta);
336 int counter = 0;
337 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
338 builder->Add(iter->key(), iter->value());
339 counter++;
341 delete iter;
343 ArchiveFile(src);
344 if (counter == 0) {
345 builder->Abandon(); // Nothing to save
346 } else {
347 s = builder->Finish();
348 if (s.ok()) {
349 t.meta.file_size = builder->FileSize();
352 delete builder;
353 builder = NULL;
355 if (s.ok()) {
356 s = file->Close();
358 delete file;
359 file = NULL;
361 if (counter > 0 && s.ok()) {
362 std::string orig = TableFileName(dbname_, t.meta.number);
363 s = env_->RenameFile(copy, orig);
364 if (s.ok()) {
365 Log(options_.info_log, "Table #%llu: %d entries repaired",
366 (unsigned long long) t.meta.number, counter);
367 tables_.push_back(t);
370 if (!s.ok()) {
371 env_->DeleteFile(copy);
375 Status WriteDescriptor() {
376 std::string tmp = TempFileName(dbname_, 1);
377 WritableFile* file;
378 Status status = env_->NewWritableFile(tmp, &file);
379 if (!status.ok()) {
380 return status;
383 SequenceNumber max_sequence = 0;
384 for (size_t i = 0; i < tables_.size(); i++) {
385 if (max_sequence < tables_[i].max_sequence) {
386 max_sequence = tables_[i].max_sequence;
390 edit_.SetComparatorName(icmp_.user_comparator()->Name());
391 edit_.SetLogNumber(0);
392 edit_.SetNextFile(next_file_number_);
393 edit_.SetLastSequence(max_sequence);
395 for (size_t i = 0; i < tables_.size(); i++) {
396 // TODO(opt): separate out into multiple levels
397 const TableInfo& t = tables_[i];
398 edit_.AddFile(0, t.meta.number, t.meta.file_size,
399 t.meta.smallest, t.meta.largest);
402 //fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str());
404 log::Writer log(file);
405 std::string record;
406 edit_.EncodeTo(&record);
407 status = log.AddRecord(record);
409 if (status.ok()) {
410 status = file->Close();
412 delete file;
413 file = NULL;
415 if (!status.ok()) {
416 env_->DeleteFile(tmp);
417 } else {
418 // Discard older manifests
419 for (size_t i = 0; i < manifests_.size(); i++) {
420 ArchiveFile(dbname_ + "/" + manifests_[i]);
423 // Install new manifest
424 status = env_->RenameFile(tmp, DescriptorFileName(dbname_, 1));
425 if (status.ok()) {
426 status = SetCurrentFile(env_, dbname_, 1);
427 } else {
428 env_->DeleteFile(tmp);
431 return status;
434 void ArchiveFile(const std::string& fname) {
435 // Move into another directory. E.g., for
436 // dir/foo
437 // rename to
438 // dir/lost/foo
439 const char* slash = strrchr(fname.c_str(), '/');
440 std::string new_dir;
441 if (slash != NULL) {
442 new_dir.assign(fname.data(), slash - fname.data());
444 new_dir.append("/lost");
445 env_->CreateDir(new_dir); // Ignore error
446 std::string new_file = new_dir;
447 new_file.append("/");
448 new_file.append((slash == NULL) ? fname.c_str() : slash + 1);
449 Status s = env_->RenameFile(fname, new_file);
450 Log(options_.info_log, "Archiving %s: %s\n",
451 fname.c_str(), s.ToString().c_str());
454 } // namespace
456 Status RepairDB(const std::string& dbname, const Options& options) {
457 Repairer repairer(dbname, options);
458 return repairer.Run();
461 } // namespace leveldb