Fixing build: GetViewContainer changed name from under me. :)
[chromium-blink-merge.git] / chrome / browser / visitedlink_master.cc
blobae307ab53c45dfd1d24632b255bb87884ddf2719
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/visitedlink_master.h"
7 #include <windows.h>
8 #include <shlobj.h>
9 #include <algorithm>
11 #include "base/logging.h"
12 #include "base/message_loop.h"
13 #include "base/path_service.h"
14 #include "base/stack_container.h"
15 #include "base/string_util.h"
16 #include "chrome/browser/browser_process.h"
17 #include "chrome/browser/history/history.h"
18 #include "chrome/browser/profile.h"
19 #include "chrome/common/win_util.h"
21 #ifdef _WIN32
22 #pragma comment(lib, "rpcrt4.lib") // for UuidCreate().
23 #endif
25 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0;
26 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4;
27 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8;
28 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12;
29 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16;
31 const int32 VisitedLinkMaster::kFileCurrentVersion = 2;
33 // the signature at the beginning of the URL table = "VLnk" (visited links)
34 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56;
35 const size_t VisitedLinkMaster::kFileHeaderSize =
36 kFileHeaderSaltOffset + LINK_SALT_LENGTH;
38 // This value should also be the same as the smallest size in the lookup
39 // table in NewTableSizeForCount (prime number).
40 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381;
42 const int32 VisitedLinkMaster::kBigDeleteThreshold = 64;
44 namespace {
46 // Fills the given salt structure with some quasi-random values
47 // Fills some salt values into the given buffer, we ask the system to generate
48 // a UUID for us, and we use some of the bytes out of that. It is not necessary
49 // to generate a cryptographically strong random string, only that it be
50 // reasonably different for different users. Here, we use every-other byte of
51 // the 16-byte UUID.
52 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) {
53 UUID uuid;
54 UuidCreate(&uuid);
56 DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt";
57 salt[0] = static_cast<uint8>(uuid.Data1 & 0xFF);
58 salt[1] = static_cast<uint8>((uuid.Data1 >> 8) & 0xFF);
59 salt[2] = static_cast<uint8>(uuid.Data2 & 0xFF);
60 salt[3] = static_cast<uint8>(uuid.Data3 & 0xFF);
61 salt[4] = uuid.Data4[0];
62 salt[5] = uuid.Data4[2];
63 salt[6] = uuid.Data4[4];
64 salt[7] = uuid.Data4[6];
66 // AsyncWriter ----------------------------------------------------------------
68 // This task executes on a background thread and executes a write. This
69 // prevents us from blocking the UI thread doing I/O.
70 class AsyncWriter : public Task {
71 public:
72 AsyncWriter(HANDLE hfile, int32 offset, const void* data, int32 data_len)
73 : hfile_(hfile),
74 offset_(offset) {
75 data_->resize(data_len);
76 memcpy(&*data_->begin(), data, data_len);
79 virtual void Run() {
80 WriteToFile(hfile_, offset_,
81 &*data_->begin(), static_cast<int32>(data_->size()));
84 // Exposed as a static so it can be called directly from the Master to
85 // reduce the number of platform-specific I/O sites we have. Returns true if
86 // the write was complete.
87 static bool WriteToFile(HANDLE hfile,
88 int32 offset,
89 const void* data,
90 int32 data_len) {
91 if (SetFilePointer(hfile, offset, NULL, FILE_BEGIN) ==
92 INVALID_SET_FILE_POINTER)
93 return false; // Don't write to an invalid part of the file.
95 DWORD num_written;
96 return WriteFile(hfile, data, data_len, &num_written, NULL) &&
97 (num_written == data_len);
100 private:
101 // The data to write and where to write it.
102 HANDLE hfile_;
103 int32 offset_; // Offset from the beginning of the file.
105 // Most writes are just a single fingerprint, so we reserve that much in this
106 // object to avoid mallocs in that case.
107 StackVector<char, sizeof(VisitedLinkCommon::Fingerprint)> data_;
109 DISALLOW_EVIL_CONSTRUCTORS(AsyncWriter);
112 // Used to asynchronously set the end of the file. This must be done on the
113 // same thread as the writing to keep things synchronized.
114 class AsyncSetEndOfFile : public Task {
115 public:
116 explicit AsyncSetEndOfFile(HANDLE hfile) : hfile_(hfile) {}
118 virtual void Run() {
119 SetEndOfFile(hfile_);
122 private:
123 HANDLE hfile_;
124 DISALLOW_EVIL_CONSTRUCTORS(AsyncSetEndOfFile);
127 // Used to asynchronously close a file. This must be done on the same thread as
128 // the writing to keep things synchronized.
129 class AsyncCloseHandle : public Task {
130 public:
131 explicit AsyncCloseHandle(HANDLE hfile) : hfile_(hfile) {}
133 virtual void Run() {
134 CloseHandle(hfile_);
137 private:
138 HANDLE hfile_;
139 DISALLOW_EVIL_CONSTRUCTORS(AsyncCloseHandle);
142 } // namespace
144 // TableBuilder ---------------------------------------------------------------
146 // How rebuilding from history works
147 // ---------------------------------
149 // We mark that we're rebuilding from history by setting the table_builder_
150 // member in VisitedLinkMaster to the TableBuilder we create. This builder
151 // will be called on the history thread by the history system for every URL
152 // in the database.
154 // The builder will store the fingerprints for those URLs, and then marshalls
155 // back to the main thread where the VisitedLinkMaster will be notified. The
156 // master then replaces its table with a new table containing the computed
157 // fingerprints.
159 // The builder must remain active while the history system is using it.
160 // Sometimes, the master will be deleted before the rebuild is complete, in
161 // which case it notifies the builder via DisownMaster(). The builder will
162 // delete itself once rebuilding is complete, and not execute any callback.
163 class VisitedLinkMaster::TableBuilder : public HistoryService::URLEnumerator,
164 public base::RefCounted<TableBuilder> {
165 public:
166 TableBuilder(VisitedLinkMaster* master,
167 const uint8 salt[LINK_SALT_LENGTH]);
169 // Called on the main thread when the master is being destroyed. This will
170 // prevent a crash when the query completes and the master is no longer
171 // around. We can not actually do anything but mark this fact, since the
172 // table will be being rebuilt simultaneously on the other thread.
173 void DisownMaster();
175 // HistoryService::URLEnumerator
176 virtual void OnURL(const GURL& url);
177 virtual void OnComplete(bool succeed);
179 private:
180 // OnComplete mashals to this function on the main thread to do the
181 // notification.
182 void OnCompleteMainThread();
184 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
185 VisitedLinkMaster* master_;
187 // The thread the visited link master is on where we will notify it.
188 MessageLoop* main_message_loop_;
190 // Indicates whether the operation has failed or not.
191 bool success_;
193 // Salt for this new table.
194 uint8 salt_[LINK_SALT_LENGTH];
196 // Stores the fingerprints we computed on the background thread.
197 std::vector<VisitedLinkMaster::Fingerprint> fingerprints_;
200 // VisitedLinkMaster ----------------------------------------------------------
202 VisitedLinkMaster::VisitedLinkMaster(base::Thread* file_thread,
203 PostNewTableEvent* poster,
204 Profile* profile) {
205 InitMembers(file_thread, poster, profile);
208 VisitedLinkMaster::VisitedLinkMaster(base::Thread* file_thread,
209 PostNewTableEvent* poster,
210 HistoryService* history_service,
211 bool suppress_rebuild,
212 const std::wstring& filename,
213 int32 default_table_size) {
214 InitMembers(file_thread, poster, NULL);
216 database_name_override_.assign(filename);
217 table_size_override_ = default_table_size;
218 history_service_override_ = history_service;
219 suppress_rebuild_ = suppress_rebuild;
222 VisitedLinkMaster::~VisitedLinkMaster() {
223 if (table_builder_.get()) {
224 // Prevent the table builder from calling us back now that we're being
225 // destroyed. Note that we DON'T delete the object, since the history
226 // system is still writing into it. When that is complete, the table
227 // builder will destroy itself when it finds we are gone.
228 table_builder_->DisownMaster();
230 FreeURLTable();
233 void VisitedLinkMaster::InitMembers(base::Thread* file_thread,
234 PostNewTableEvent* poster,
235 Profile* profile) {
236 if (file_thread)
237 file_thread_ = file_thread->message_loop();
238 else
239 file_thread_ = NULL;
241 post_new_table_event_ = poster;
242 file_ = NULL;
243 shared_memory_ = NULL;
244 shared_memory_serial_ = 0;
245 used_items_ = 0;
246 table_size_override_ = 0;
247 history_service_override_ = NULL;
248 suppress_rebuild_ = false;
249 profile_ = profile;
251 #ifndef NDEBUG
252 posted_asynchronous_operation_ = false;
253 #endif
256 // The shared memory name should be unique on the system and also needs to
257 // change when we create a new table. The scheme we use includes the process
258 // ID, an increasing serial number, and the profile ID.
259 std::wstring VisitedLinkMaster::GetSharedMemoryName() const {
260 // When unit testing, there's no profile, so use an empty ID string.
261 std::wstring profile_id;
262 if (profile_)
263 profile_id = profile_->GetID().c_str();
265 return StringPrintf(L"GVisitedLinks_%lu_%lu_%ls",
266 GetCurrentProcessId(), shared_memory_serial_,
267 profile_id.c_str());
270 bool VisitedLinkMaster::Init() {
271 if (!InitFromFile())
272 return InitFromScratch(suppress_rebuild_);
273 return true;
276 bool VisitedLinkMaster::ShareToProcess(ProcessHandle process,
277 SharedMemoryHandle *new_handle) {
278 if (shared_memory_)
279 return shared_memory_->ShareToProcess(process, new_handle);
281 NOTREACHED();
282 return false;
285 SharedMemoryHandle VisitedLinkMaster::GetSharedMemoryHandle() {
286 return shared_memory_->handle();
289 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) {
290 // Extra check that we are not off the record. This should not happen.
291 if (profile_ && profile_->IsOffTheRecord()) {
292 NOTREACHED();
293 return null_hash_;
296 if (!url.is_valid())
297 return null_hash_; // Don't add invalid URLs.
299 Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(),
300 url.spec().size(),
301 salt_);
302 if (table_builder_) {
303 // If we have a pending delete for this fingerprint, cancel it.
304 std::set<Fingerprint>::iterator found =
305 deleted_since_rebuild_.find(fingerprint);
306 if (found != deleted_since_rebuild_.end())
307 deleted_since_rebuild_.erase(found);
309 // A rebuild is in progress, save this addition in the temporary list so
310 // it can be added once rebuild is complete.
311 added_since_rebuild_.insert(fingerprint);
314 // If the table is "full", we don't add URLs and just drop them on the floor.
315 // This can happen if we get thousands of new URLs and something causes
316 // the table resizing to fail. This check prevents a hang in that case. Note
317 // that this is *not* the resize limit, this is just a sanity check.
318 if (used_items_ / 8 > table_length_ / 10)
319 return null_hash_; // Table is more than 80% full.
321 return AddFingerprint(fingerprint);
324 void VisitedLinkMaster::AddURL(const GURL& url) {
325 Hash index = TryToAddURL(url);
326 if (!table_builder_ && index != null_hash_) {
327 // Not rebuilding, so we want to keep the file on disk up-to-date.
328 WriteUsedItemCountToFile();
329 WriteHashRangeToFile(index, index);
330 ResizeTableIfNecessary();
334 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) {
335 for (std::vector<GURL>::const_iterator i = url.begin();
336 i != url.end(); ++i) {
337 Hash index = TryToAddURL(*i);
338 if (!table_builder_ && index != null_hash_)
339 ResizeTableIfNecessary();
342 // Keeps the file on disk up-to-date.
343 if (!table_builder_)
344 WriteFullTable();
347 void VisitedLinkMaster::DeleteAllURLs() {
348 // Any pending modifications are invalid.
349 added_since_rebuild_.clear();
350 deleted_since_rebuild_.clear();
352 // Clear the hash table.
353 used_items_ = 0;
354 memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint));
356 // Resize it if it is now too empty. Resize may write the new table out for
357 // us, otherwise, schedule writing the new table to disk ourselves.
358 if (!ResizeTableIfNecessary())
359 WriteFullTable();
362 void VisitedLinkMaster::DeleteURLs(const std::set<GURL>& urls) {
363 typedef std::set<GURL>::const_iterator SetIterator;
365 if (urls.empty())
366 return;
368 if (table_builder_) {
369 // A rebuild is in progress, save this deletion in the temporary list so
370 // it can be added once rebuild is complete.
371 for (SetIterator i = urls.begin(); i != urls.end(); ++i) {
372 if (!i->is_valid())
373 continue;
375 Fingerprint fingerprint =
376 ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_);
377 deleted_since_rebuild_.insert(fingerprint);
379 // If the URL was just added and now we're deleting it, it may be in the
380 // list of things added since the last rebuild. Delete it from that list.
381 std::set<Fingerprint>::iterator found =
382 added_since_rebuild_.find(fingerprint);
383 if (found != added_since_rebuild_.end())
384 added_since_rebuild_.erase(found);
386 // Delete the URLs from the in-memory table, but don't bother writing
387 // to disk since it will be replaced soon.
388 DeleteFingerprint(fingerprint, false);
390 return;
393 // Compute the deleted URLs' fingerprints and delete them
394 std::set<Fingerprint> deleted_fingerprints;
395 for (SetIterator i = urls.begin(); i != urls.end(); ++i) {
396 if (!i->is_valid())
397 continue;
398 deleted_fingerprints.insert(
399 ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_));
401 DeleteFingerprintsFromCurrentTable(deleted_fingerprints);
404 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
405 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint(
406 Fingerprint fingerprint) {
407 if (!hash_table_ || table_length_ == 0) {
408 NOTREACHED(); // Not initialized.
409 return null_hash_;
412 Hash cur_hash = HashFingerprint(fingerprint);
413 Hash first_hash = cur_hash;
414 while (true) {
415 Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
416 if (cur_fingerprint == fingerprint)
417 return null_hash_; // This fingerprint is already in there, do nothing.
419 if (cur_fingerprint == null_fingerprint_) {
420 // End of probe sequence found, insert here.
421 hash_table_[cur_hash] = fingerprint;
422 used_items_++;
423 return cur_hash;
426 // Advance in the probe sequence.
427 cur_hash = IncrementHash(cur_hash);
428 if (cur_hash == first_hash) {
429 // This means that we've wrapped around and are about to go into an
430 // infinite loop. Something was wrong with the hashtable resizing
431 // logic, so stop here.
432 NOTREACHED();
433 return null_hash_;
438 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
439 const std::set<Fingerprint>& fingerprints) {
440 bool bulk_write = (fingerprints.size() > kBigDeleteThreshold);
442 // Delete the URLs from the table.
443 for (std::set<Fingerprint>::const_iterator i = fingerprints.begin();
444 i != fingerprints.end(); ++i)
445 DeleteFingerprint(*i, !bulk_write);
447 // These deleted fingerprints may make us shrink the table.
448 if (ResizeTableIfNecessary())
449 return; // The resize function wrote the new table to disk for us.
451 // Nobody wrote this out for us, write the full file to disk.
452 if (bulk_write)
453 WriteFullTable();
456 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint,
457 bool update_file) {
458 if (!hash_table_ || table_length_ == 0) {
459 NOTREACHED(); // Not initialized.
460 return false;
462 if (!IsVisited(fingerprint))
463 return false; // Not in the database to delete.
465 // First update the header used count.
466 used_items_--;
467 if (update_file)
468 WriteUsedItemCountToFile();
470 Hash deleted_hash = HashFingerprint(fingerprint);
472 // Find the range of "stuff" in the hash table that is adjacent to this
473 // fingerprint. These are things that could be affected by the change in
474 // the hash table. Since we use linear probing, anything after the deleted
475 // item up until an empty item could be affected.
476 Hash end_range = deleted_hash;
477 while (true) {
478 Hash next_hash = IncrementHash(end_range);
479 if (next_hash == deleted_hash)
480 break; // We wrapped around and the whole table is full.
481 if (!hash_table_[next_hash])
482 break; // Found the last spot.
483 end_range = next_hash;
486 // We could get all fancy and move the affected fingerprints around, but
487 // instead we just remove them all and re-add them (minus our deleted one).
488 // This will mean there's a small window of time where the affected links
489 // won't be marked visited.
490 StackVector<Fingerprint, 32> shuffled_fingerprints;
491 Hash stop_loop = IncrementHash(end_range); // The end range is inclusive.
492 for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) {
493 if (hash_table_[i] != fingerprint) {
494 // Don't save the one we're deleting!
495 shuffled_fingerprints->push_back(hash_table_[i]);
497 // This will balance the increment of this value in AddFingerprint below
498 // so there is no net change.
499 used_items_--;
501 hash_table_[i] = null_fingerprint_;
504 if (!shuffled_fingerprints->empty()) {
505 // Need to add the new items back.
506 for (size_t i = 0; i < shuffled_fingerprints->size(); i++)
507 AddFingerprint(shuffled_fingerprints[i]);
510 // Write the affected range to disk [deleted_hash, end_range].
511 if (update_file)
512 WriteHashRangeToFile(deleted_hash, end_range);
514 return true;
517 bool VisitedLinkMaster::WriteFullTable() {
518 // This function can get called when the file is open, for example, when we
519 // resize the table. We must handle this case and not try to reopen the file,
520 // since there may be write operations pending on the file I/O thread.
522 // Note that once we start writing, we do not delete on error. This means
523 // there can be a partial file, but the short file will be detected next time
524 // we start, and will be replaced.
526 // This might possibly get corrupted if we crash in the middle of writing.
527 // We should pick up the most common types of these failures when we notice
528 // that the file size is different when we load it back in, and then we will
529 // regenerate the table.
530 win_util::ScopedHandle hfile_closer; // Valid only when not open already.
531 HANDLE hfile; // Always valid.
532 if (file_) {
533 hfile = file_;
534 } else {
535 std::wstring filename;
536 GetDatabaseFileName(&filename);
537 hfile_closer.Set(
538 CreateFile(filename.c_str(), GENERIC_READ | GENERIC_WRITE, 0, NULL,
539 CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL));
540 if (!hfile_closer.IsValid())
541 return false;
542 hfile = hfile_closer;
545 // Write the new header.
546 int32 header[4];
547 header[0] = kFileSignature;
548 header[1] = kFileCurrentVersion;
549 header[2] = table_length_;
550 header[3] = used_items_;
551 WriteToFile(hfile, 0, header, sizeof(header));
552 WriteToFile(hfile, sizeof(header), salt_, LINK_SALT_LENGTH);
554 // Write the hash data.
555 WriteToFile(hfile, kFileHeaderSize,
556 hash_table_, table_length_ * sizeof(Fingerprint));
558 // The hash table may have shrunk, so make sure this is the end.
559 if (file_thread_) {
560 AsyncSetEndOfFile* setter = new AsyncSetEndOfFile(hfile);
561 file_thread_->PostTask(FROM_HERE, setter);
562 } else {
563 SetEndOfFile(hfile);
566 // Keep the file open so we can dynamically write changes to it. When the
567 // file was alrady open, the hfile_closer is NULL, and file_ is already good.
568 if (hfile_closer.IsValid())
569 file_ = hfile_closer.Take();
570 return true;
573 bool VisitedLinkMaster::InitFromFile() {
574 DCHECK(file_ == NULL);
576 std::wstring filename;
577 GetDatabaseFileName(&filename);
578 win_util::ScopedHandle hfile(
579 CreateFile(filename.c_str(), GENERIC_READ | GENERIC_WRITE, 0, NULL,
580 OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL));
581 if (!hfile.IsValid())
582 return false;
584 int32 num_entries, used_count;
585 if (!ReadFileHeader(hfile, &num_entries, &used_count, salt_))
586 return false; // Header isn't valid.
588 // Allocate and read the table.
589 if (!CreateURLTable(num_entries, false))
590 return false;
591 if (!ReadFromFile(hfile, kFileHeaderSize,
592 hash_table_, num_entries * sizeof(Fingerprint))) {
593 FreeURLTable();
594 return false;
596 used_items_ = used_count;
598 file_ = hfile.Take();
599 return true;
602 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) {
603 int32 table_size = kDefaultTableSize;
604 if (table_size_override_)
605 table_size = table_size_override_;
607 // The salt must be generated before the table so that it can be copied to
608 // the shared memory.
609 GenerateSalt(salt_);
610 if (!CreateURLTable(table_size, true))
611 return false;
613 if (suppress_rebuild) {
614 // When we disallow rebuilds (normally just unit tests), just use the
615 // current empty table.
616 return WriteFullTable();
619 // This will build the table from history. On the first run, history will
620 // be empty, so this will be correct. This will also write the new table
621 // to disk. We don't want to save explicitly here, since the rebuild may
622 // not complete, leaving us with an empty but valid visited link database.
623 // In the future, we won't know we need to try rebuilding again.
624 return RebuildTableFromHistory();
627 bool VisitedLinkMaster::ReadFileHeader(HANDLE hfile,
628 int32* num_entries,
629 int32* used_count,
630 uint8 salt[LINK_SALT_LENGTH]) {
631 int32 file_size = GetFileSize(hfile, NULL);
632 if (file_size <= kFileHeaderSize)
633 return false;
635 uint8 header[kFileHeaderSize];
636 if (!ReadFromFile(hfile, 0, &header, kFileHeaderSize))
637 return false;
639 // Verify the signature.
640 uint32 signature;
641 memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature));
642 if (signature != kFileSignature)
643 return false;
645 // Verify the version is up-to-date. As with other read errors, a version
646 // mistmatch will trigger a rebuild of the database from history, which will
647 // have the effect of migrating the database.
648 int32 version;
649 memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version));
650 if (version != kFileCurrentVersion)
651 return false; // Bad version.
653 // Read the table size and make sure it matches the file size.
654 memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries));
655 if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size)
656 return false; // Bad size.
658 // Read the used item count.
659 memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count));
660 if (*used_count > *num_entries)
661 return false; // Bad used item count;
663 // Read the salt.
664 memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH);
666 // This file looks OK from the header's perspective.
667 return true;
670 bool VisitedLinkMaster::GetDatabaseFileName(std::wstring* filename) {
671 if (!database_name_override_.empty()) {
672 // use this filename, the directory must exist
673 *filename = database_name_override_;
674 return true;
677 if (!profile_ || profile_->GetPath().empty())
678 return false;
680 *filename = profile_->GetPath();
681 filename->append(L"\\Visited Links");
682 return true;
685 // Initializes the shared memory structure. The salt should already be filled
686 // in so that it can be written to the shared memory
687 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) {
688 // The table is the size of the table followed by the entries.
689 int32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader);
691 // Create the shared memory object.
692 shared_memory_ = new SharedMemory();
693 if (!shared_memory_)
694 return false;
696 if (!shared_memory_->Create(GetSharedMemoryName().c_str(),
697 false, false, alloc_size))
698 return false;
700 // Map into our process.
701 if (!shared_memory_->Map(alloc_size)) {
702 delete shared_memory_;
703 shared_memory_ = NULL;
704 return false;
707 if (init_to_empty) {
708 memset(shared_memory_->memory(), 0, alloc_size);
709 used_items_ = 0;
711 table_length_ = num_entries;
713 // Save the header for other processes to read.
714 SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory());
715 header->length = table_length_;
716 memcpy(header->salt, salt_, LINK_SALT_LENGTH);
718 // Our table pointer is just the data immediately following the size.
719 hash_table_ = reinterpret_cast<Fingerprint*>(
720 static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader));
722 #ifndef NDEBUG
723 DebugValidate();
724 #endif
726 return true;
729 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) {
730 SharedMemory *old_shared_memory = shared_memory_;
731 Fingerprint* old_hash_table = hash_table_;
732 int32 old_table_length = table_length_;
733 if (!CreateURLTable(num_entries, true)) {
734 // Try to put back the old state.
735 shared_memory_ = old_shared_memory;
736 hash_table_ = old_hash_table;
737 table_length_ = old_table_length;
738 return false;
740 return true;
743 void VisitedLinkMaster::FreeURLTable() {
744 if (shared_memory_) {
745 delete shared_memory_;
746 shared_memory_ = NULL;
748 if (file_) {
749 if (file_thread_) {
750 AsyncCloseHandle* closer = new AsyncCloseHandle(file_);
751 file_thread_->PostTask(FROM_HERE, closer);
752 } else {
753 CloseHandle(file_);
758 bool VisitedLinkMaster::ResizeTableIfNecessary() {
759 DCHECK(table_length_ > 0) << "Must have a table";
761 // Load limits for good performance/space. We are pretty conservative about
762 // keeping the table not very full. This is because we use linear probing
763 // which increases the likelihood of clumps of entries which will reduce
764 // performance.
765 const float max_table_load = 0.5f; // Grow when we're > this full.
766 const float min_table_load = 0.2f; // Shrink when we're < this full.
768 float load = ComputeTableLoad();
769 if (load < max_table_load &&
770 (table_length_ <= kDefaultTableSize || load > min_table_load))
771 return false;
773 // Table needs to grow or shrink.
774 int new_size = NewTableSizeForCount(used_items_);
775 DCHECK(new_size > used_items_);
776 DCHECK(load <= min_table_load || new_size > table_length_);
777 ResizeTable(new_size);
778 return true;
781 void VisitedLinkMaster::ResizeTable(int32 new_size) {
782 DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_);
783 shared_memory_serial_++;
785 #ifndef NDEBUG
786 DebugValidate();
787 #endif
789 SharedMemory* old_shared_memory = shared_memory_;
790 Fingerprint* old_hash_table = hash_table_;
791 int32 old_table_length = table_length_;
792 if (!BeginReplaceURLTable(new_size))
793 return;
795 // Now we have two tables, our local copy which is the old one, and the new
796 // one loaded into this object where we need to copy the data.
797 for (int32 i = 0; i < old_table_length; i++) {
798 Fingerprint cur = old_hash_table[i];
799 if (cur)
800 AddFingerprint(cur);
803 // On error unmapping, just forget about it since we can't do anything
804 // else to release it.
805 delete old_shared_memory;
807 // Send an update notification to all child processes so they read the new
808 // table.
809 post_new_table_event_(shared_memory_);
811 #ifndef NDEBUG
812 DebugValidate();
813 #endif
815 // The new table needs to be written to disk.
816 WriteFullTable();
819 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const {
820 // These table sizes are selected to be the maximum prime number less than
821 // a "convenient" multiple of 1K.
822 static const int table_sizes[] = {
823 16381, // 16K = 16384 <- don't shrink below this table size
824 // (should be == default_table_size)
825 32767, // 32K = 32768
826 65521, // 64K = 65536
827 130051, // 128K = 131072
828 262127, // 256K = 262144
829 524269, // 512K = 524288
830 1048549, // 1M = 1048576
831 2097143, // 2M = 2097152
832 4194301, // 4M = 4194304
833 8388571, // 8M = 8388608
834 16777199, // 16M = 16777216
835 33554347}; // 32M = 33554432
837 // Try to leave the table 33% full.
838 int desired = item_count * 3;
840 // Find the closest prime.
841 for (int i = 0; i < arraysize(table_sizes); i ++) {
842 if (table_sizes[i] > desired)
843 return table_sizes[i];
846 // Growing very big, just approximate a "good" number, not growing as much
847 // as normal.
848 return item_count * 2 - 1;
851 // See the TableBuilder definition in the header file for how this works.
852 bool VisitedLinkMaster::RebuildTableFromHistory() {
853 DCHECK(!table_builder_);
854 if (table_builder_)
855 return false;
857 HistoryService* history_service = history_service_override_;
858 if (!history_service && profile_) {
859 history_service = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
862 if (!history_service) {
863 DLOG(WARNING) << "Attempted to rebuild visited link table, but couldn't "
864 "obtain a HistoryService.";
865 return false;
868 // TODO(brettw) make sure we have reasonable salt!
869 table_builder_ = new TableBuilder(this, salt_);
871 // Make sure the table builder stays live during the call, even if the
872 // master is deleted. This is balanced in TableBuilder::OnCompleteMainThread.
873 table_builder_->AddRef();
874 history_service->IterateURLs(table_builder_);
875 return true;
878 // See the TableBuilder definition in the header file for how this works.
879 void VisitedLinkMaster::OnTableRebuildComplete(
880 bool success,
881 const std::vector<Fingerprint>& fingerprints) {
882 if (success) {
883 // Replace the old table with a new blank one.
884 shared_memory_serial_++;
886 // We are responsible for freeing it AFTER it has been replaced if
887 // replacement succeeds.
888 SharedMemory* old_shared_memory = shared_memory_;
890 int new_table_size = NewTableSizeForCount(
891 static_cast<int>(fingerprints.size()));
892 if (BeginReplaceURLTable(new_table_size)) {
893 // Free the old table.
894 delete old_shared_memory;
896 // Add the stored fingerprints to the hash table.
897 for (size_t i = 0; i < fingerprints.size(); i++)
898 AddFingerprint(fingerprints[i]);
900 // Also add anything that was added while we were asynchronously
901 // generating the new table.
902 for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin();
903 i != added_since_rebuild_.end(); ++i)
904 AddFingerprint(*i);
905 added_since_rebuild_.clear();
907 // Now handle deletions.
908 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_);
909 deleted_since_rebuild_.clear();
911 // Send an update notification to all child processes.
912 post_new_table_event_(shared_memory_);
914 WriteFullTable();
917 table_builder_ = NULL; // Will release our reference to the builder.
919 // Notify the unit test that the rebuild is complete (will be NULL in prod.)
920 if (rebuild_complete_task_.get()) {
921 rebuild_complete_task_->Run();
922 rebuild_complete_task_.reset(NULL);
926 void VisitedLinkMaster::WriteToFile(HANDLE hfile,
927 int32 offset,
928 void* data,
929 int32 data_size) {
930 #ifndef NDEBUG
931 posted_asynchronous_operation_ = true;
932 #endif
934 if (file_thread_) {
935 // Send the write to the other thread for execution to avoid blocking.
936 AsyncWriter* writer = new AsyncWriter(hfile, offset, data, data_size);
937 file_thread_->PostTask(FROM_HERE, writer);
938 } else {
939 // When there is no I/O thread, we are probably running in unit test mode,
940 // just do the write synchronously.
941 AsyncWriter::WriteToFile(hfile, offset, data, data_size);
945 void VisitedLinkMaster::WriteUsedItemCountToFile() {
946 WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_));
949 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) {
950 if (last_hash < first_hash) {
951 // Handle wraparound at 0. This first write is first_hash->EOF
952 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
953 &hash_table_[first_hash],
954 (table_length_ - first_hash + 1) * sizeof(Fingerprint));
956 // Now do 0->last_lash.
957 WriteToFile(file_, kFileHeaderSize, hash_table_,
958 (last_hash + 1) * sizeof(Fingerprint));
959 } else {
960 // Normal case, just write the range.
961 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
962 &hash_table_[first_hash],
963 (last_hash - first_hash + 1) * sizeof(Fingerprint));
967 bool VisitedLinkMaster::ReadFromFile(HANDLE hfile,
968 int32 offset,
969 void* data,
970 int32 data_size) {
971 #ifndef NDEBUG
972 // Since this function is synchronous, we require that no asynchronous
973 // operations could possibly be pending.
974 DCHECK(!posted_asynchronous_operation_);
975 #endif
977 SetFilePointer(hfile, offset, NULL, FILE_BEGIN);
979 DWORD num_read;
980 return ReadFile(hfile, data, data_size, &num_read, NULL) &&
981 num_read == data_size;
984 // VisitedLinkTableBuilder ----------------------------------------------------
986 VisitedLinkMaster::TableBuilder::TableBuilder(
987 VisitedLinkMaster* master,
988 const uint8 salt[LINK_SALT_LENGTH])
989 : master_(master),
990 success_(true),
991 main_message_loop_(MessageLoop::current()) {
992 fingerprints_.reserve(4096);
993 memcpy(salt_, salt, sizeof(salt));
996 // TODO(brettw): Do we want to try to cancel the request if this happens? It
997 // could delay shutdown if there are a lot of URLs.
998 void VisitedLinkMaster::TableBuilder::DisownMaster() {
999 master_ = NULL;
1002 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) {
1003 if (!url.is_empty()) {
1004 fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint(
1005 url.spec().data(), url.spec().length(), salt_));
1009 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) {
1010 success_ = success;
1011 DLOG_IF(WARNING, !success) << "Unable to rebuild visited links";
1013 // Marshal to the main thread to notify the VisitedLinkMaster that the
1014 // rebuild is complete.
1015 main_message_loop_->PostTask(FROM_HERE, NewRunnableMethod(this,
1016 &TableBuilder::OnCompleteMainThread));
1019 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
1020 if (master_)
1021 master_->OnTableRebuildComplete(success_, fingerprints_);
1023 // WILL (generally) DELETE THIS! This balances the AddRef in
1024 // VisitedLinkMaster::RebuildTableFromHistory.
1025 Release();