Remove use of TextView.getMaxLines to restore ICS compatibility
[chromium-blink-merge.git] / courgette / difference_estimator.cc
blobf2f09e15cc9cd92b6f0adcbdc88ef76ed09dd14f
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // We want to measure the similarity of two sequences of bytes as a surrogate
6 // for measuring how well a second sequence will compress differentially to the
7 // first sequence.
9 #include "courgette/difference_estimator.h"
11 #include "base/containers/hash_tables.h"
13 namespace courgette {
15 // Our difference measure is the number of k-tuples that occur in Subject that
16 // don't occur in Base.
17 const int kTupleSize = 4;
19 namespace {
21 static_assert(kTupleSize >= 4 && kTupleSize <= 8,
22 "kTupleSize should be between 4 and 8");
24 size_t HashTuple(const uint8* source) {
25 size_t hash1 = *reinterpret_cast<const uint32*>(source);
26 size_t hash2 = *reinterpret_cast<const uint32*>(source + kTupleSize - 4);
27 size_t hash = ((hash1 * 17 + hash2 * 37) + (hash1 >> 17)) ^ (hash2 >> 23);
28 return hash;
31 bool RegionsEqual(const Region& a, const Region& b) {
32 if (a.length() != b.length())
33 return false;
34 return memcmp(a.start(), b.start(), a.length()) == 0;
37 } // anonymous namepace
39 class DifferenceEstimator::Base {
40 public:
41 explicit Base(const Region& region) : region_(region) { }
43 void Init() {
44 const uint8* start = region_.start();
45 const uint8* end = region_.end() - (kTupleSize - 1);
46 for (const uint8* p = start; p < end; ++p) {
47 size_t hash = HashTuple(p);
48 hashes_.insert(hash);
52 const Region& region() const { return region_; }
54 private:
55 Region region_;
56 base::hash_set<size_t> hashes_;
58 friend class DifferenceEstimator;
59 DISALLOW_COPY_AND_ASSIGN(Base);
62 class DifferenceEstimator::Subject {
63 public:
64 explicit Subject(const Region& region) : region_(region) {}
66 const Region& region() const { return region_; }
68 private:
69 Region region_;
71 DISALLOW_COPY_AND_ASSIGN(Subject);
74 DifferenceEstimator::DifferenceEstimator() {}
76 DifferenceEstimator::~DifferenceEstimator() {
77 for (size_t i = 0; i < owned_bases_.size(); ++i)
78 delete owned_bases_[i];
79 for (size_t i = 0; i < owned_subjects_.size(); ++i)
80 delete owned_subjects_[i];
83 DifferenceEstimator::Base* DifferenceEstimator::MakeBase(const Region& region) {
84 Base* base = new Base(region);
85 base->Init();
86 owned_bases_.push_back(base);
87 return base;
90 DifferenceEstimator::Subject* DifferenceEstimator::MakeSubject(
91 const Region& region) {
92 Subject* subject = new Subject(region);
93 owned_subjects_.push_back(subject);
94 return subject;
97 size_t DifferenceEstimator::Measure(Base* base, Subject* subject) {
98 size_t mismatches = 0;
99 const uint8* start = subject->region().start();
100 const uint8* end = subject->region().end() - (kTupleSize - 1);
102 const uint8* p = start;
103 while (p < end) {
104 size_t hash = HashTuple(p);
105 if (base->hashes_.find(hash) == base->hashes_.end()) {
106 ++mismatches;
108 p += 1;
111 if (mismatches == 0) {
112 if (RegionsEqual(base->region(), subject->region()))
113 return 0;
115 ++mismatches; // Guarantee not zero.
116 return mismatches;
119 } // namespace