1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/disk_cache/simple/simple_index_file.h"
9 #include "base/files/file.h"
10 #include "base/files/file_util.h"
11 #include "base/files/memory_mapped_file.h"
12 #include "base/hash.h"
13 #include "base/logging.h"
14 #include "base/pickle.h"
15 #include "base/single_thread_task_runner.h"
16 #include "base/task_runner_util.h"
17 #include "base/threading/thread_restrictions.h"
18 #include "net/disk_cache/simple/simple_backend_version.h"
19 #include "net/disk_cache/simple/simple_entry_format.h"
20 #include "net/disk_cache/simple/simple_histogram_macros.h"
21 #include "net/disk_cache/simple/simple_index.h"
22 #include "net/disk_cache/simple/simple_synchronous_entry.h"
23 #include "net/disk_cache/simple/simple_util.h"
24 #include "third_party/zlib/zlib.h"
28 namespace disk_cache
{
31 const int kEntryFilesHashLength
= 16;
32 const int kEntryFilesSuffixLength
= 2;
34 const uint64 kMaxEntiresInIndex
= 100000000;
36 uint32
CalculatePickleCRC(const Pickle
& pickle
) {
37 return crc32(crc32(0, Z_NULL
, 0),
38 reinterpret_cast<const Bytef
*>(pickle
.payload()),
39 pickle
.payload_size());
42 // Used in histograms. Please only add new values at the end.
44 INDEX_STATE_CORRUPT
= 0,
45 INDEX_STATE_STALE
= 1,
46 INDEX_STATE_FRESH
= 2,
47 INDEX_STATE_FRESH_CONCURRENT_UPDATES
= 3,
51 void UmaRecordIndexFileState(IndexFileState state
, net::CacheType cache_type
) {
52 SIMPLE_CACHE_UMA(ENUMERATION
,
53 "IndexFileStateOnLoad", cache_type
, state
, INDEX_STATE_MAX
);
56 // Used in histograms. Please only add new values at the end.
57 enum IndexInitMethod
{
58 INITIALIZE_METHOD_RECOVERED
= 0,
59 INITIALIZE_METHOD_LOADED
= 1,
60 INITIALIZE_METHOD_NEWCACHE
= 2,
61 INITIALIZE_METHOD_MAX
= 3,
64 void UmaRecordIndexInitMethod(IndexInitMethod method
,
65 net::CacheType cache_type
) {
66 SIMPLE_CACHE_UMA(ENUMERATION
,
67 "IndexInitializeMethod", cache_type
,
68 method
, INITIALIZE_METHOD_MAX
);
71 bool WritePickleFile(Pickle
* pickle
, const base::FilePath
& file_name
) {
74 File::FLAG_CREATE
| File::FLAG_WRITE
| File::FLAG_SHARE_DELETE
);
79 file
.Write(0, static_cast<const char*>(pickle
->data()), pickle
->size());
80 if (bytes_written
!= implicit_cast
<int>(pickle
->size())) {
81 simple_util::SimpleCacheDeleteFile(file_name
);
87 // Called for each cache directory traversal iteration.
88 void ProcessEntryFile(SimpleIndex::EntrySet
* entries
,
89 const base::FilePath
& file_path
) {
90 static const size_t kEntryFilesLength
=
91 kEntryFilesHashLength
+ kEntryFilesSuffixLength
;
92 // Converting to std::string is OK since we never use UTF8 wide chars in our
94 const base::FilePath::StringType base_name
= file_path
.BaseName().value();
95 const std::string
file_name(base_name
.begin(), base_name
.end());
96 if (file_name
.size() != kEntryFilesLength
)
98 const base::StringPiece
hash_string(
99 file_name
.begin(), file_name
.begin() + kEntryFilesHashLength
);
101 if (!simple_util::GetEntryHashKeyFromHexString(hash_string
, &hash_key
)) {
102 LOG(WARNING
) << "Invalid entry hash key filename while restoring index from"
103 << " disk: " << file_name
;
107 File::Info file_info
;
108 if (!base::GetFileInfo(file_path
, &file_info
)) {
109 LOG(ERROR
) << "Could not get file info for " << file_path
.value();
112 base::Time last_used_time
;
113 #if defined(OS_POSIX)
114 // For POSIX systems, a last access time is available. However, it's not
115 // guaranteed to be more accurate than mtime. It is no worse though.
116 last_used_time
= file_info
.last_accessed
;
118 if (last_used_time
.is_null())
119 last_used_time
= file_info
.last_modified
;
121 int64 file_size
= file_info
.size
;
122 SimpleIndex::EntrySet::iterator it
= entries
->find(hash_key
);
123 if (it
== entries
->end()) {
124 SimpleIndex::InsertInEntrySet(
126 EntryMetadata(last_used_time
, file_size
),
129 // Summing up the total size of the entry through all the *_[0-1] files
130 it
->second
.SetEntrySize(it
->second
.GetEntrySize() + file_size
);
136 SimpleIndexLoadResult::SimpleIndexLoadResult() : did_load(false),
137 flush_required(false) {
140 SimpleIndexLoadResult::~SimpleIndexLoadResult() {
143 void SimpleIndexLoadResult::Reset() {
145 flush_required
= false;
150 const char SimpleIndexFile::kIndexFileName
[] = "the-real-index";
152 const char SimpleIndexFile::kIndexDirectory
[] = "index-dir";
154 const char SimpleIndexFile::kTempIndexFileName
[] = "temp-index";
156 SimpleIndexFile::IndexMetadata::IndexMetadata()
157 : magic_number_(kSimpleIndexMagicNumber
),
158 version_(kSimpleVersion
),
159 number_of_entries_(0),
162 SimpleIndexFile::IndexMetadata::IndexMetadata(
163 uint64 number_of_entries
, uint64 cache_size
)
164 : magic_number_(kSimpleIndexMagicNumber
),
165 version_(kSimpleVersion
),
166 number_of_entries_(number_of_entries
),
167 cache_size_(cache_size
) {}
169 void SimpleIndexFile::IndexMetadata::Serialize(Pickle
* pickle
) const {
171 pickle
->WriteUInt64(magic_number_
);
172 pickle
->WriteUInt32(version_
);
173 pickle
->WriteUInt64(number_of_entries_
);
174 pickle
->WriteUInt64(cache_size_
);
178 bool SimpleIndexFile::SerializeFinalData(base::Time cache_modified
,
180 if (!pickle
->WriteInt64(cache_modified
.ToInternalValue()))
182 SimpleIndexFile::PickleHeader
* header_p
= pickle
->headerT
<PickleHeader
>();
183 header_p
->crc
= CalculatePickleCRC(*pickle
);
187 bool SimpleIndexFile::IndexMetadata::Deserialize(PickleIterator
* it
) {
189 return it
->ReadUInt64(&magic_number_
) &&
190 it
->ReadUInt32(&version_
) &&
191 it
->ReadUInt64(&number_of_entries_
)&&
192 it
->ReadUInt64(&cache_size_
);
195 void SimpleIndexFile::SyncWriteToDisk(net::CacheType cache_type
,
196 const base::FilePath
& cache_directory
,
197 const base::FilePath
& index_filename
,
198 const base::FilePath
& temp_index_filename
,
199 scoped_ptr
<Pickle
> pickle
,
200 const base::TimeTicks
& start_time
,
201 bool app_on_background
) {
202 // There is a chance that the index containing all the necessary data about
203 // newly created entries will appear to be stale. This can happen if on-disk
204 // part of a Create operation does not fit into the time budget for the index
205 // flush delay. This simple approach will be reconsidered if it does not allow
206 // for maintaining freshness.
207 base::Time cache_dir_mtime
;
208 if (!simple_util::GetMTime(cache_directory
, &cache_dir_mtime
)) {
209 LOG(ERROR
) << "Could obtain information about cache age";
212 SerializeFinalData(cache_dir_mtime
, pickle
.get());
213 if (!WritePickleFile(pickle
.get(), temp_index_filename
)) {
214 if (!base::CreateDirectory(temp_index_filename
.DirName())) {
215 LOG(ERROR
) << "Could not create a directory to hold the index file";
218 if (!WritePickleFile(pickle
.get(), temp_index_filename
)) {
219 LOG(ERROR
) << "Failed to write the temporary index file";
224 // Atomically rename the temporary index file to become the real one.
225 // TODO(gavinp): DCHECK when not shutting down, since that is very strange.
226 // The rename failing during shutdown is legal because it's legal to begin
227 // erasing a cache as soon as the destructor has been called.
228 if (!base::ReplaceFile(temp_index_filename
, index_filename
, NULL
))
231 if (app_on_background
) {
232 SIMPLE_CACHE_UMA(TIMES
,
233 "IndexWriteToDiskTime.Background", cache_type
,
234 (base::TimeTicks::Now() - start_time
));
236 SIMPLE_CACHE_UMA(TIMES
,
237 "IndexWriteToDiskTime.Foreground", cache_type
,
238 (base::TimeTicks::Now() - start_time
));
242 bool SimpleIndexFile::IndexMetadata::CheckIndexMetadata() {
243 return number_of_entries_
<= kMaxEntiresInIndex
&&
244 magic_number_
== kSimpleIndexMagicNumber
&&
245 version_
== kSimpleVersion
;
248 SimpleIndexFile::SimpleIndexFile(
249 const scoped_refptr
<base::SingleThreadTaskRunner
>& cache_thread
,
250 const scoped_refptr
<base::TaskRunner
>& worker_pool
,
251 net::CacheType cache_type
,
252 const base::FilePath
& cache_directory
)
253 : cache_thread_(cache_thread
),
254 worker_pool_(worker_pool
),
255 cache_type_(cache_type
),
256 cache_directory_(cache_directory
),
257 index_file_(cache_directory_
.AppendASCII(kIndexDirectory
)
258 .AppendASCII(kIndexFileName
)),
259 temp_index_file_(cache_directory_
.AppendASCII(kIndexDirectory
)
260 .AppendASCII(kTempIndexFileName
)) {
263 SimpleIndexFile::~SimpleIndexFile() {}
265 void SimpleIndexFile::LoadIndexEntries(base::Time cache_last_modified
,
266 const base::Closure
& callback
,
267 SimpleIndexLoadResult
* out_result
) {
268 base::Closure task
= base::Bind(&SimpleIndexFile::SyncLoadIndexEntries
,
270 cache_last_modified
, cache_directory_
,
271 index_file_
, out_result
);
272 worker_pool_
->PostTaskAndReply(FROM_HERE
, task
, callback
);
275 void SimpleIndexFile::WriteToDisk(const SimpleIndex::EntrySet
& entry_set
,
277 const base::TimeTicks
& start
,
278 bool app_on_background
) {
279 IndexMetadata
index_metadata(entry_set
.size(), cache_size
);
280 scoped_ptr
<Pickle
> pickle
= Serialize(index_metadata
, entry_set
);
281 cache_thread_
->PostTask(FROM_HERE
,
282 base::Bind(&SimpleIndexFile::SyncWriteToDisk
,
287 base::Passed(&pickle
),
288 base::TimeTicks::Now(),
293 void SimpleIndexFile::SyncLoadIndexEntries(
294 net::CacheType cache_type
,
295 base::Time cache_last_modified
,
296 const base::FilePath
& cache_directory
,
297 const base::FilePath
& index_file_path
,
298 SimpleIndexLoadResult
* out_result
) {
299 // Load the index and find its age.
300 base::Time last_cache_seen_by_index
;
301 SyncLoadFromDisk(index_file_path
, &last_cache_seen_by_index
, out_result
);
303 // Consider the index loaded if it is fresh.
304 const bool index_file_existed
= base::PathExists(index_file_path
);
305 if (!out_result
->did_load
) {
306 if (index_file_existed
)
307 UmaRecordIndexFileState(INDEX_STATE_CORRUPT
, cache_type
);
309 if (cache_last_modified
<= last_cache_seen_by_index
) {
310 base::Time latest_dir_mtime
;
311 simple_util::GetMTime(cache_directory
, &latest_dir_mtime
);
312 if (LegacyIsIndexFileStale(latest_dir_mtime
, index_file_path
)) {
313 UmaRecordIndexFileState(INDEX_STATE_FRESH_CONCURRENT_UPDATES
,
316 UmaRecordIndexFileState(INDEX_STATE_FRESH
, cache_type
);
318 UmaRecordIndexInitMethod(INITIALIZE_METHOD_LOADED
, cache_type
);
321 UmaRecordIndexFileState(INDEX_STATE_STALE
, cache_type
);
324 // Reconstruct the index by scanning the disk for entries.
325 const base::TimeTicks start
= base::TimeTicks::Now();
326 SyncRestoreFromDisk(cache_directory
, index_file_path
, out_result
);
327 SIMPLE_CACHE_UMA(MEDIUM_TIMES
, "IndexRestoreTime", cache_type
,
328 base::TimeTicks::Now() - start
);
329 SIMPLE_CACHE_UMA(COUNTS
, "IndexEntriesRestored", cache_type
,
330 out_result
->entries
.size());
331 if (index_file_existed
) {
332 UmaRecordIndexInitMethod(INITIALIZE_METHOD_RECOVERED
, cache_type
);
334 UmaRecordIndexInitMethod(INITIALIZE_METHOD_NEWCACHE
, cache_type
);
335 SIMPLE_CACHE_UMA(COUNTS
,
336 "IndexCreatedEntryCount", cache_type
,
337 out_result
->entries
.size());
342 void SimpleIndexFile::SyncLoadFromDisk(const base::FilePath
& index_filename
,
343 base::Time
* out_last_cache_seen_by_index
,
344 SimpleIndexLoadResult
* out_result
) {
347 File
file(index_filename
,
348 File::FLAG_OPEN
| File::FLAG_READ
| File::FLAG_SHARE_DELETE
);
352 base::MemoryMappedFile index_file_map
;
353 if (!index_file_map
.Initialize(file
.Pass())) {
354 simple_util::SimpleCacheDeleteFile(index_filename
);
358 SimpleIndexFile::Deserialize(
359 reinterpret_cast<const char*>(index_file_map
.data()),
360 index_file_map
.length(),
361 out_last_cache_seen_by_index
,
364 if (!out_result
->did_load
)
365 simple_util::SimpleCacheDeleteFile(index_filename
);
369 scoped_ptr
<Pickle
> SimpleIndexFile::Serialize(
370 const SimpleIndexFile::IndexMetadata
& index_metadata
,
371 const SimpleIndex::EntrySet
& entries
) {
372 scoped_ptr
<Pickle
> pickle(new Pickle(sizeof(SimpleIndexFile::PickleHeader
)));
374 index_metadata
.Serialize(pickle
.get());
375 for (SimpleIndex::EntrySet::const_iterator it
= entries
.begin();
376 it
!= entries
.end(); ++it
) {
377 pickle
->WriteUInt64(it
->first
);
378 it
->second
.Serialize(pickle
.get());
380 return pickle
.Pass();
384 void SimpleIndexFile::Deserialize(const char* data
, int data_len
,
385 base::Time
* out_cache_last_modified
,
386 SimpleIndexLoadResult
* out_result
) {
390 SimpleIndex::EntrySet
* entries
= &out_result
->entries
;
392 Pickle
pickle(data
, data_len
);
393 if (!pickle
.data()) {
394 LOG(WARNING
) << "Corrupt Simple Index File.";
398 PickleIterator
pickle_it(pickle
);
399 SimpleIndexFile::PickleHeader
* header_p
=
400 pickle
.headerT
<SimpleIndexFile::PickleHeader
>();
401 const uint32 crc_read
= header_p
->crc
;
402 const uint32 crc_calculated
= CalculatePickleCRC(pickle
);
404 if (crc_read
!= crc_calculated
) {
405 LOG(WARNING
) << "Invalid CRC in Simple Index file.";
409 SimpleIndexFile::IndexMetadata index_metadata
;
410 if (!index_metadata
.Deserialize(&pickle_it
)) {
411 LOG(ERROR
) << "Invalid index_metadata on Simple Cache Index.";
415 if (!index_metadata
.CheckIndexMetadata()) {
416 LOG(ERROR
) << "Invalid index_metadata on Simple Cache Index.";
421 // TODO(gavinp): Consider using std::unordered_map.
422 entries
->resize(index_metadata
.GetNumberOfEntries() + kExtraSizeForMerge
);
424 while (entries
->size() < index_metadata
.GetNumberOfEntries()) {
426 EntryMetadata entry_metadata
;
427 if (!pickle_it
.ReadUInt64(&hash_key
) ||
428 !entry_metadata
.Deserialize(&pickle_it
)) {
429 LOG(WARNING
) << "Invalid EntryMetadata in Simple Index file.";
433 SimpleIndex::InsertInEntrySet(hash_key
, entry_metadata
, entries
);
436 int64 cache_last_modified
;
437 if (!pickle_it
.ReadInt64(&cache_last_modified
)) {
441 DCHECK(out_cache_last_modified
);
442 *out_cache_last_modified
= base::Time::FromInternalValue(cache_last_modified
);
444 out_result
->did_load
= true;
448 void SimpleIndexFile::SyncRestoreFromDisk(
449 const base::FilePath
& cache_directory
,
450 const base::FilePath
& index_file_path
,
451 SimpleIndexLoadResult
* out_result
) {
452 VLOG(1) << "Simple Cache Index is being restored from disk.";
453 simple_util::SimpleCacheDeleteFile(index_file_path
);
455 SimpleIndex::EntrySet
* entries
= &out_result
->entries
;
457 const bool did_succeed
= TraverseCacheDirectory(
458 cache_directory
, base::Bind(&ProcessEntryFile
, entries
));
460 LOG(ERROR
) << "Could not reconstruct index from disk";
463 out_result
->did_load
= true;
464 // When we restore from disk we write the merged index file to disk right
465 // away, this might save us from having to restore again next time.
466 out_result
->flush_required
= true;
470 bool SimpleIndexFile::LegacyIsIndexFileStale(
471 base::Time cache_last_modified
,
472 const base::FilePath
& index_file_path
) {
473 base::Time index_mtime
;
474 if (!simple_util::GetMTime(index_file_path
, &index_mtime
))
476 return index_mtime
< cache_last_modified
;
479 } // namespace disk_cache