Evict resources from resource pool after timeout
[chromium-blink-merge.git] / net / dns / dns_session.cc
blob4d8f61e99c1c54db4f3eb4e6488bfee228db00b1
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/dns/dns_session.h"
7 #include "base/basictypes.h"
8 #include "base/bind.h"
9 #include "base/lazy_instance.h"
10 #include "base/metrics/histogram_macros.h"
11 #include "base/metrics/sample_vector.h"
12 #include "base/rand_util.h"
13 #include "base/stl_util.h"
14 #include "base/time/time.h"
15 #include "net/base/ip_endpoint.h"
16 #include "net/base/net_errors.h"
17 #include "net/dns/dns_config_service.h"
18 #include "net/dns/dns_socket_pool.h"
19 #include "net/socket/stream_socket.h"
20 #include "net/udp/datagram_client_socket.h"
22 namespace net {
24 namespace {
25 // Never exceed max timeout.
26 const unsigned kMaxTimeoutMs = 5000;
27 // Set min timeout, in case we are talking to a local DNS proxy.
28 const unsigned kMinTimeoutMs = 10;
30 // Number of buckets in the histogram of observed RTTs.
31 const size_t kRTTBucketCount = 100;
32 // Target percentile in the RTT histogram used for retransmission timeout.
33 const unsigned kRTOPercentile = 99;
34 } // namespace
36 // Runtime statistics of DNS server.
37 struct DnsSession::ServerStats {
38 ServerStats(base::TimeDelta rtt_estimate_param, RttBuckets* buckets)
39 : last_failure_count(0), rtt_estimate(rtt_estimate_param) {
40 rtt_histogram.reset(new base::SampleVector(buckets));
41 // Seed histogram with 2 samples at |rtt_estimate| timeout.
42 rtt_histogram->Accumulate(
43 static_cast<base::HistogramBase::Sample>(rtt_estimate.InMilliseconds()),
44 2);
47 // Count of consecutive failures after last success.
48 int last_failure_count;
50 // Last time when server returned failure or timeout.
51 base::Time last_failure;
52 // Last time when server returned success.
53 base::Time last_success;
55 // Estimated RTT using moving average.
56 base::TimeDelta rtt_estimate;
57 // Estimated error in the above.
58 base::TimeDelta rtt_deviation;
60 // A histogram of observed RTT .
61 scoped_ptr<base::SampleVector> rtt_histogram;
63 DISALLOW_COPY_AND_ASSIGN(ServerStats);
66 // static
67 base::LazyInstance<DnsSession::RttBuckets>::Leaky DnsSession::rtt_buckets_ =
68 LAZY_INSTANCE_INITIALIZER;
70 DnsSession::RttBuckets::RttBuckets() : base::BucketRanges(kRTTBucketCount + 1) {
71 base::Histogram::InitializeBucketRanges(1, 5000, this);
74 DnsSession::SocketLease::SocketLease(scoped_refptr<DnsSession> session,
75 unsigned server_index,
76 scoped_ptr<DatagramClientSocket> socket)
77 : session_(session), server_index_(server_index), socket_(socket.Pass()) {}
79 DnsSession::SocketLease::~SocketLease() {
80 session_->FreeSocket(server_index_, socket_.Pass());
83 DnsSession::DnsSession(const DnsConfig& config,
84 scoped_ptr<DnsSocketPool> socket_pool,
85 const RandIntCallback& rand_int_callback,
86 NetLog* net_log)
87 : config_(config),
88 socket_pool_(socket_pool.Pass()),
89 rand_callback_(base::Bind(rand_int_callback, 0, kuint16max)),
90 net_log_(net_log),
91 server_index_(0) {
92 socket_pool_->Initialize(&config_.nameservers, net_log);
93 UMA_HISTOGRAM_CUSTOM_COUNTS(
94 "AsyncDNS.ServerCount", config_.nameservers.size(), 0, 10, 11);
95 for (size_t i = 0; i < config_.nameservers.size(); ++i) {
96 server_stats_.push_back(new ServerStats(config_.timeout,
97 rtt_buckets_.Pointer()));
101 DnsSession::~DnsSession() {
102 RecordServerStats();
105 uint16 DnsSession::NextQueryId() const {
106 return static_cast<uint16>(rand_callback_.Run());
109 unsigned DnsSession::NextFirstServerIndex() {
110 unsigned index = NextGoodServerIndex(server_index_);
111 if (config_.rotate)
112 server_index_ = (server_index_ + 1) % config_.nameservers.size();
113 return index;
116 unsigned DnsSession::NextGoodServerIndex(unsigned server_index) {
117 unsigned index = server_index;
118 base::Time oldest_server_failure(base::Time::Now());
119 unsigned oldest_server_failure_index = 0;
121 UMA_HISTOGRAM_BOOLEAN("AsyncDNS.ServerIsGood",
122 server_stats_[server_index]->last_failure.is_null());
124 do {
125 base::Time cur_server_failure = server_stats_[index]->last_failure;
126 // If number of failures on this server doesn't exceed number of allowed
127 // attempts, return its index.
128 if (server_stats_[server_index]->last_failure_count < config_.attempts) {
129 return index;
131 // Track oldest failed server.
132 if (cur_server_failure < oldest_server_failure) {
133 oldest_server_failure = cur_server_failure;
134 oldest_server_failure_index = index;
136 index = (index + 1) % config_.nameservers.size();
137 } while (index != server_index);
139 // If we are here it means that there are no successful servers, so we have
140 // to use one that has failed oldest.
141 return oldest_server_failure_index;
144 void DnsSession::RecordServerFailure(unsigned server_index) {
145 UMA_HISTOGRAM_CUSTOM_COUNTS(
146 "AsyncDNS.ServerFailureIndex", server_index, 0, 10, 11);
147 ++(server_stats_[server_index]->last_failure_count);
148 server_stats_[server_index]->last_failure = base::Time::Now();
151 void DnsSession::RecordServerSuccess(unsigned server_index) {
152 if (server_stats_[server_index]->last_success.is_null()) {
153 UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresAfterNetworkChange",
154 server_stats_[server_index]->last_failure_count);
155 } else {
156 UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresBeforeSuccess",
157 server_stats_[server_index]->last_failure_count);
159 server_stats_[server_index]->last_failure_count = 0;
160 server_stats_[server_index]->last_failure = base::Time();
161 server_stats_[server_index]->last_success = base::Time::Now();
164 void DnsSession::RecordRTT(unsigned server_index, base::TimeDelta rtt) {
165 DCHECK_LT(server_index, server_stats_.size());
167 // For measurement, assume it is the first attempt (no backoff).
168 base::TimeDelta timeout_jacobson = NextTimeoutFromJacobson(server_index, 0);
169 base::TimeDelta timeout_histogram = NextTimeoutFromHistogram(server_index, 0);
170 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobson", rtt - timeout_jacobson);
171 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogram",
172 rtt - timeout_histogram);
173 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobsonUnder",
174 timeout_jacobson - rtt);
175 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogramUnder",
176 timeout_histogram - rtt);
178 // Jacobson/Karels algorithm for TCP.
179 // Using parameters: alpha = 1/8, delta = 1/4, beta = 4
180 base::TimeDelta& estimate = server_stats_[server_index]->rtt_estimate;
181 base::TimeDelta& deviation = server_stats_[server_index]->rtt_deviation;
182 base::TimeDelta current_error = rtt - estimate;
183 estimate += current_error / 8; // * alpha
184 base::TimeDelta abs_error = base::TimeDelta::FromInternalValue(
185 std::abs(current_error.ToInternalValue()));
186 deviation += (abs_error - deviation) / 4; // * delta
188 // Histogram-based method.
189 server_stats_[server_index]->rtt_histogram->Accumulate(
190 static_cast<base::HistogramBase::Sample>(rtt.InMilliseconds()), 1);
193 void DnsSession::RecordLostPacket(unsigned server_index, int attempt) {
194 base::TimeDelta timeout_jacobson =
195 NextTimeoutFromJacobson(server_index, attempt);
196 base::TimeDelta timeout_histogram =
197 NextTimeoutFromHistogram(server_index, attempt);
198 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentJacobson", timeout_jacobson);
199 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentHistogram", timeout_histogram);
202 void DnsSession::RecordServerStats() {
203 for (size_t index = 0; index < server_stats_.size(); ++index) {
204 if (server_stats_[index]->last_failure_count) {
205 if (server_stats_[index]->last_success.is_null()) {
206 UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresWithoutSuccess",
207 server_stats_[index]->last_failure_count);
208 } else {
209 UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresAfterSuccess",
210 server_stats_[index]->last_failure_count);
217 base::TimeDelta DnsSession::NextTimeout(unsigned server_index, int attempt) {
218 // Respect config timeout if it exceeds |kMaxTimeoutMs|.
219 if (config_.timeout.InMilliseconds() >= kMaxTimeoutMs)
220 return config_.timeout;
221 return NextTimeoutFromHistogram(server_index, attempt);
224 // Allocate a socket, already connected to the server address.
225 scoped_ptr<DnsSession::SocketLease> DnsSession::AllocateSocket(
226 unsigned server_index, const NetLog::Source& source) {
227 scoped_ptr<DatagramClientSocket> socket;
229 socket = socket_pool_->AllocateSocket(server_index);
230 if (!socket.get())
231 return scoped_ptr<SocketLease>();
233 socket->NetLog().BeginEvent(NetLog::TYPE_SOCKET_IN_USE,
234 source.ToEventParametersCallback());
236 SocketLease* lease = new SocketLease(this, server_index, socket.Pass());
237 return scoped_ptr<SocketLease>(lease);
240 scoped_ptr<StreamSocket> DnsSession::CreateTCPSocket(
241 unsigned server_index, const NetLog::Source& source) {
242 return socket_pool_->CreateTCPSocket(server_index, source);
245 // Release a socket.
246 void DnsSession::FreeSocket(unsigned server_index,
247 scoped_ptr<DatagramClientSocket> socket) {
248 DCHECK(socket.get());
250 socket->NetLog().EndEvent(NetLog::TYPE_SOCKET_IN_USE);
252 socket_pool_->FreeSocket(server_index, socket.Pass());
255 base::TimeDelta DnsSession::NextTimeoutFromJacobson(unsigned server_index,
256 int attempt) {
257 DCHECK_LT(server_index, server_stats_.size());
259 base::TimeDelta timeout = server_stats_[server_index]->rtt_estimate +
260 4 * server_stats_[server_index]->rtt_deviation;
262 timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs));
264 // The timeout doubles every full round.
265 unsigned num_backoffs = attempt / config_.nameservers.size();
267 return std::min(timeout * (1 << num_backoffs),
268 base::TimeDelta::FromMilliseconds(kMaxTimeoutMs));
271 base::TimeDelta DnsSession::NextTimeoutFromHistogram(unsigned server_index,
272 int attempt) {
273 DCHECK_LT(server_index, server_stats_.size());
275 static_assert(std::numeric_limits<base::HistogramBase::Count>::is_signed,
276 "histogram base count assumed to be signed");
278 // Use fixed percentile of observed samples.
279 const base::SampleVector& samples =
280 *server_stats_[server_index]->rtt_histogram;
282 base::HistogramBase::Count total = samples.TotalCount();
283 base::HistogramBase::Count remaining_count = kRTOPercentile * total / 100;
284 size_t index = 0;
285 while (remaining_count > 0 && index < rtt_buckets_.Get().size()) {
286 remaining_count -= samples.GetCountAtIndex(index);
287 ++index;
290 base::TimeDelta timeout =
291 base::TimeDelta::FromMilliseconds(rtt_buckets_.Get().range(index));
293 timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs));
295 // The timeout still doubles every full round.
296 unsigned num_backoffs = attempt / config_.nameservers.size();
298 return std::min(timeout * (1 << num_backoffs),
299 base::TimeDelta::FromMilliseconds(kMaxTimeoutMs));
302 } // namespace net