Added User-Agent header to GET requests (in addition to just POSTs).
[dotnetoauth.git] / src / DotNetOpenAuth / Messaging / UntrustedWebRequestHandler.cs
blobead3f40e0ea47c384b5c22eecd07dca4dd061eba
1 //-----------------------------------------------------------------------
2 // <copyright file="UntrustedWebRequestHandler.cs" company="Andrew Arnott">
3 // Copyright (c) Andrew Arnott. All rights reserved.
4 // </copyright>
5 //-----------------------------------------------------------------------
7 #if DEBUG
8 #define LONGTIMEOUT
9 #endif
10 namespace DotNetOpenAuth.Messaging {
11 using System;
12 using System.Collections.Generic;
13 using System.Diagnostics;
14 using System.Diagnostics.CodeAnalysis;
15 using System.Globalization;
16 using System.IO;
17 using System.Net;
18 using System.Net.Cache;
19 using System.Text.RegularExpressions;
20 using DotNetOpenAuth.Configuration;
21 using DotNetOpenAuth.Messaging;
23 /// <summary>
24 /// A paranoid HTTP get/post request engine. It helps to protect against attacks from remote
25 /// server leaving dangling connections, sending too much data, causing requests against
26 /// internal servers, etc.
27 /// </summary>
28 /// <remarks>
29 /// Protections include:
30 /// * Conservative maximum time to receive the complete response.
31 /// * Only HTTP and HTTPS schemes are permitted.
32 /// * Internal IP address ranges are not permitted: 127.*.*.*, 1::*
33 /// * Internal host names are not permitted (periods must be found in the host name)
34 /// If a particular host would be permitted but is in the blacklist, it is not allowed.
35 /// If a particular host would not be permitted but is in the whitelist, it is allowed.
36 /// </remarks>
37 public class UntrustedWebRequestHandler : IDirectSslWebRequestHandler {
38 /// <summary>
39 /// Gets or sets the default cache policy to use for HTTP requests.
40 /// </summary>
41 internal static readonly RequestCachePolicy DefaultCachePolicy = HttpWebRequest.DefaultCachePolicy;
43 /// <summary>
44 /// The set of URI schemes allowed in untrusted web requests.
45 /// </summary>
46 private ICollection<string> allowableSchemes = new List<string> { "http", "https" };
48 /// <summary>
49 /// The collection of blacklisted hosts.
50 /// </summary>
51 private ICollection<string> blacklistHosts = new List<string>(Configuration.BlacklistHosts.KeysAsStrings);
53 /// <summary>
54 /// The collection of regular expressions used to identify additional blacklisted hosts.
55 /// </summary>
56 private ICollection<Regex> blacklistHostsRegex = new List<Regex>(Configuration.BlacklistHostsRegex.KeysAsRegexs);
58 /// <summary>
59 /// The collection of whitelisted hosts.
60 /// </summary>
61 private ICollection<string> whitelistHosts = new List<string>(Configuration.WhitelistHosts.KeysAsStrings);
63 /// <summary>
64 /// The collection of regular expressions used to identify additional whitelisted hosts.
65 /// </summary>
66 private ICollection<Regex> whitelistHostsRegex = new List<Regex>(Configuration.WhitelistHostsRegex.KeysAsRegexs);
68 /// <summary>
69 /// The maximum redirections to follow in the course of a single request.
70 /// </summary>
71 [DebuggerBrowsable(DebuggerBrowsableState.Never)]
72 private int maximumRedirections = Configuration.MaximumRedirections;
74 /// <summary>
75 /// The maximum number of bytes to read from the response of an untrusted server.
76 /// </summary>
77 [DebuggerBrowsable(DebuggerBrowsableState.Never)]
78 private int maximumBytesToRead = Configuration.MaximumBytesToRead;
80 /// <summary>
81 /// The handler that will actually send the HTTP request and collect
82 /// the response once the untrusted server gates have been satisfied.
83 /// </summary>
84 private IDirectWebRequestHandler chainedWebRequestHandler;
86 /// <summary>
87 /// Initializes a new instance of the <see cref="UntrustedWebRequestHandler"/> class.
88 /// </summary>
89 public UntrustedWebRequestHandler()
90 : this(new StandardWebRequestHandler()) {
93 /// <summary>
94 /// Initializes a new instance of the <see cref="UntrustedWebRequestHandler"/> class.
95 /// </summary>
96 /// <param name="chainedWebRequestHandler">The chained web request handler.</param>
97 public UntrustedWebRequestHandler(IDirectWebRequestHandler chainedWebRequestHandler) {
98 ErrorUtilities.VerifyArgumentNotNull(chainedWebRequestHandler, "chainedWebRequestHandler");
100 this.chainedWebRequestHandler = chainedWebRequestHandler;
101 this.ReadWriteTimeout = Configuration.ReadWriteTimeout;
102 this.Timeout = Configuration.Timeout;
103 #if LONGTIMEOUT
104 this.ReadWriteTimeout = TimeSpan.FromHours(1);
105 this.Timeout = TimeSpan.FromHours(1);
106 #endif
109 /// <summary>
110 /// Gets or sets the default maximum bytes to read in any given HTTP request.
111 /// </summary>
112 /// <value>Default is 1MB. Cannot be less than 2KB.</value>
113 public int MaximumBytesToRead {
114 get {
115 return this.maximumBytesToRead;
118 set {
119 ErrorUtilities.VerifyArgumentInRange(value >= 2048, "value");
120 this.maximumBytesToRead = value;
124 /// <summary>
125 /// Gets or sets the total number of redirections to allow on any one request.
126 /// Default is 10.
127 /// </summary>
128 public int MaximumRedirections {
129 get {
130 return this.maximumRedirections;
133 set {
134 ErrorUtilities.VerifyArgumentInRange(value >= 0, "value");
135 this.maximumRedirections = value;
139 /// <summary>
140 /// Gets or sets the time allowed to wait for single read or write operation to complete.
141 /// Default is 500 milliseconds.
142 /// </summary>
143 public TimeSpan ReadWriteTimeout { get; set; }
145 /// <summary>
146 /// Gets or sets the time allowed for an entire HTTP request.
147 /// Default is 5 seconds.
148 /// </summary>
149 public TimeSpan Timeout { get; set; }
151 /// <summary>
152 /// Gets a collection of host name literals that should be allowed even if they don't
153 /// pass standard security checks.
154 /// </summary>
155 public ICollection<string> WhitelistHosts { get { return this.whitelistHosts; } }
157 /// <summary>
158 /// Gets a collection of host name regular expressions that indicate hosts that should
159 /// be allowed even though they don't pass standard security checks.
160 /// </summary>
161 public ICollection<Regex> WhitelistHostsRegex { get { return this.whitelistHostsRegex; } }
163 /// <summary>
164 /// Gets a collection of host name literals that should be rejected even if they
165 /// pass standard security checks.
166 /// </summary>
167 public ICollection<string> BlacklistHosts { get { return this.blacklistHosts; } }
169 /// <summary>
170 /// Gets a collection of host name regular expressions that indicate hosts that should
171 /// be rejected even if they pass standard security checks.
172 /// </summary>
173 public ICollection<Regex> BlacklistHostsRegex { get { return this.blacklistHostsRegex; } }
175 /// <summary>
176 /// Gets the configuration for this class that is specified in the host's .config file.
177 /// </summary>
178 private static DotNetOpenAuth.Configuration.UntrustedWebRequestSection Configuration {
179 get { return UntrustedWebRequestSection.Configuration; }
182 #region IDirectSslWebRequestHandler Members
184 /// <summary>
185 /// Prepares an <see cref="HttpWebRequest"/> that contains an POST entity for sending the entity.
186 /// </summary>
187 /// <param name="request">The <see cref="HttpWebRequest"/> that should contain the entity.</param>
188 /// <param name="requireSsl">if set to <c>true</c> all requests made with this instance must be completed using SSL.</param>
189 /// <returns>
190 /// The writer the caller should write out the entity data to.
191 /// </returns>
192 public Stream GetRequestStream(HttpWebRequest request, bool requireSsl) {
193 ErrorUtilities.VerifyArgumentNotNull(request, "request");
194 this.EnsureAllowableRequestUri(request.RequestUri, requireSsl);
196 this.PrepareRequest(request, true);
198 // Submit the request and get the request stream back.
199 return this.chainedWebRequestHandler.GetRequestStream(request);
202 /// <summary>
203 /// Processes an <see cref="HttpWebRequest"/> and converts the
204 /// <see cref="HttpWebResponse"/> to a <see cref="DirectWebResponse"/> instance.
205 /// </summary>
206 /// <param name="request">The <see cref="HttpWebRequest"/> to handle.</param>
207 /// <param name="requireSsl">if set to <c>true</c> all requests made with this instance must be completed using SSL.</param>
208 /// <returns>
209 /// An instance of <see cref="CachedDirectWebResponse"/> describing the response.
210 /// </returns>
211 public DirectWebResponse GetResponse(HttpWebRequest request, bool requireSsl) {
212 ErrorUtilities.VerifyArgumentNotNull(request, "request");
214 // This request MAY have already been prepared by GetRequestStream, but
215 // we have no guarantee, so do it just to be safe.
216 this.PrepareRequest(request, false);
218 // Since we may require SSL for every redirect, we handle each redirect manually
219 // in order to detect and fail if any redirect sends us to an HTTP url.
220 // We COULD allow automatic redirect in the cases where HTTPS is not required,
221 // but our mock request infrastructure can't do redirects on its own either.
222 Uri originalRequestUri = request.RequestUri;
223 int i;
224 for (i = 0; i < this.MaximumRedirections; i++) {
225 this.EnsureAllowableRequestUri(request.RequestUri, requireSsl);
226 CachedDirectWebResponse response = this.chainedWebRequestHandler.GetResponse(request).GetSnapshot(this.MaximumBytesToRead);
227 if (response.Status == HttpStatusCode.MovedPermanently ||
228 response.Status == HttpStatusCode.Redirect ||
229 response.Status == HttpStatusCode.RedirectMethod ||
230 response.Status == HttpStatusCode.RedirectKeepVerb) {
231 // We have no copy of the post entity stream to repeat on our manually
232 // cloned HttpWebRequest, so we have to bail.
233 ErrorUtilities.VerifyProtocol(request.Method != "POST", MessagingStrings.UntrustedRedirectsOnPOSTNotSupported);
234 Uri redirectUri = new Uri(response.FinalUri, response.Headers[HttpResponseHeader.Location]);
235 request = request.Clone(redirectUri);
236 } else {
237 return response;
241 throw ErrorUtilities.ThrowProtocol(MessagingStrings.TooManyRedirects, originalRequestUri);
244 #endregion
246 #region IDirectWebRequestHandler Members
248 /// <summary>
249 /// Prepares an <see cref="HttpWebRequest"/> that contains an POST entity for sending the entity.
250 /// </summary>
251 /// <param name="request">The <see cref="HttpWebRequest"/> that should contain the entity.</param>
252 /// <returns>
253 /// The writer the caller should write out the entity data to.
254 /// </returns>
255 Stream IDirectWebRequestHandler.GetRequestStream(HttpWebRequest request) {
256 return this.GetRequestStream(request, false);
259 /// <summary>
260 /// Processes an <see cref="HttpWebRequest"/> and converts the
261 /// <see cref="HttpWebResponse"/> to a <see cref="DirectWebResponse"/> instance.
262 /// </summary>
263 /// <param name="request">The <see cref="HttpWebRequest"/> to handle.</param>
264 /// <returns>An instance of <see cref="DirectWebResponse"/> describing the response.</returns>
265 DirectWebResponse IDirectWebRequestHandler.GetResponse(HttpWebRequest request) {
266 return this.GetResponse(request, false);
269 #endregion
271 /// <summary>
272 /// Determines whether a given host is whitelisted.
273 /// </summary>
274 /// <param name="host">The host name to test.</param>
275 /// <returns>
276 /// <c>true</c> if the host is whitelisted; otherwise, <c>false</c>.
277 /// </returns>
278 private bool IsHostWhitelisted(string host) {
279 return this.IsHostInList(host, this.WhitelistHosts, this.WhitelistHostsRegex);
282 /// <summary>
283 /// Determines whether a given host is blacklisted.
284 /// </summary>
285 /// <param name="host">The host name to test.</param>
286 /// <returns>
287 /// <c>true</c> if the host is blacklisted; otherwise, <c>false</c>.
288 /// </returns>
289 private bool IsHostBlacklisted(string host) {
290 return this.IsHostInList(host, this.BlacklistHosts, this.BlacklistHostsRegex);
293 /// <summary>
294 /// Determines whether the given host name is in a host list or host name regex list.
295 /// </summary>
296 /// <param name="host">The host name.</param>
297 /// <param name="stringList">The list of host names.</param>
298 /// <param name="regexList">The list of regex patterns of host names.</param>
299 /// <returns>
300 /// <c>true</c> if the specified host falls within at least one of the given lists; otherwise, <c>false</c>.
301 /// </returns>
302 private bool IsHostInList(string host, ICollection<string> stringList, ICollection<Regex> regexList) {
303 ErrorUtilities.VerifyNonZeroLength(host, "host");
304 ErrorUtilities.VerifyArgumentNotNull(stringList, "stringList");
305 ErrorUtilities.VerifyArgumentNotNull(regexList, "regexList");
306 foreach (string testHost in stringList) {
307 if (string.Equals(host, testHost, StringComparison.OrdinalIgnoreCase)) {
308 return true;
311 foreach (Regex regex in regexList) {
312 if (regex.IsMatch(host)) {
313 return true;
316 return false;
319 /// <summary>
320 /// Verify that the request qualifies under our security policies
321 /// </summary>
322 /// <param name="requestUri">The request URI.</param>
323 /// <param name="requireSsl">If set to <c>true</c>, only web requests that can be made entirely over SSL will succeed.</param>
324 private void EnsureAllowableRequestUri(Uri requestUri, bool requireSsl) {
325 ErrorUtilities.VerifyArgument(this.IsUriAllowable(requestUri), MessagingStrings.UnsafeWebRequestDetected, requestUri);
326 ErrorUtilities.VerifyProtocol(!requireSsl || String.Equals(requestUri.Scheme, Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase), MessagingStrings.InsecureWebRequestWithSslRequired, requestUri);
329 /// <summary>
330 /// Determines whether a URI is allowed based on scheme and host name.
331 /// No requireSSL check is done here
332 /// </summary>
333 /// <param name="uri">The URI to test for whether it should be allowed.</param>
334 /// <returns>
335 /// <c>true</c> if [is URI allowable] [the specified URI]; otherwise, <c>false</c>.
336 /// </returns>
337 private bool IsUriAllowable(Uri uri) {
338 ErrorUtilities.VerifyArgumentNotNull(uri, "uri");
339 if (!this.allowableSchemes.Contains(uri.Scheme)) {
340 Logger.WarnFormat("Rejecting URL {0} because it uses a disallowed scheme.", uri);
341 return false;
344 // Allow for whitelist or blacklist to override our detection.
345 Func<string, bool> failsUnlessWhitelisted = (string reason) => {
346 if (IsHostWhitelisted(uri.DnsSafeHost)) {
347 return true;
349 Logger.WarnFormat("Rejecting URL {0} because {1}.", uri, reason);
350 return false;
353 // Try to interpret the hostname as an IP address so we can test for internal
354 // IP address ranges. Note that IP addresses can appear in many forms
355 // (e.g. http://127.0.0.1, http://2130706433, http://0x0100007f, http://::1
356 // So we convert them to a canonical IPAddress instance, and test for all
357 // non-routable IP ranges: 10.*.*.*, 127.*.*.*, ::1
358 // Note that Uri.IsLoopback is very unreliable, not catching many of these variants.
359 IPAddress hostIPAddress;
360 if (IPAddress.TryParse(uri.DnsSafeHost, out hostIPAddress)) {
361 byte[] addressBytes = hostIPAddress.GetAddressBytes();
363 // The host is actually an IP address.
364 switch (hostIPAddress.AddressFamily) {
365 case System.Net.Sockets.AddressFamily.InterNetwork:
366 if (addressBytes[0] == 127 || addressBytes[0] == 10) {
367 return failsUnlessWhitelisted("it is a loopback address.");
369 break;
370 case System.Net.Sockets.AddressFamily.InterNetworkV6:
371 if (this.IsIPv6Loopback(hostIPAddress)) {
372 return failsUnlessWhitelisted("it is a loopback address.");
374 break;
375 default:
376 return failsUnlessWhitelisted("it does not use an IPv4 or IPv6 address.");
378 } else {
379 // The host is given by name. We require names to contain periods to
380 // help make sure it's not an internal address.
381 if (!uri.Host.Contains(".")) {
382 return failsUnlessWhitelisted("it does not contain a period in the host name.");
385 if (this.IsHostBlacklisted(uri.DnsSafeHost)) {
386 Logger.WarnFormat("Rejected URL {0} because it is blacklisted.", uri);
387 return false;
389 return true;
392 /// <summary>
393 /// Determines whether an IP address is the IPv6 equivalent of "localhost/127.0.0.1".
394 /// </summary>
395 /// <param name="ip">The ip address to check.</param>
396 /// <returns>
397 /// <c>true</c> if this is a loopback IP address; <c>false</c> otherwise.
398 /// </returns>
399 private bool IsIPv6Loopback(IPAddress ip) {
400 ErrorUtilities.VerifyArgumentNotNull(ip, "ip");
401 byte[] addressBytes = ip.GetAddressBytes();
402 for (int i = 0; i < addressBytes.Length - 1; i++) {
403 if (addressBytes[i] != 0) {
404 return false;
407 if (addressBytes[addressBytes.Length - 1] != 1) {
408 return false;
410 return true;
413 /// <summary>
414 /// Prepares the request by setting timeout and redirect policies.
415 /// </summary>
416 /// <param name="request">The request to prepare.</param>
417 /// <param name="preparingPost"><c>true</c> if this is a POST request whose headers have not yet been sent out; <c>false</c> otherwise.</param>
418 private void PrepareRequest(HttpWebRequest request, bool preparingPost) {
419 ErrorUtilities.VerifyArgumentNotNull(request, "request");
421 // Be careful to not try to change the HTTP headers that have already gone out.
422 if (preparingPost || request.Method == "GET") {
423 // Set/override a few properties of the request to apply our policies for untrusted requests.
424 request.ReadWriteTimeout = (int)this.ReadWriteTimeout.TotalMilliseconds;
425 request.Timeout = (int)this.Timeout.TotalMilliseconds;
426 request.KeepAlive = false;
428 // If SSL is required throughout, we cannot allow auto redirects because
429 // it may include a pass through an unprotected HTTP request.
430 // We have to follow redirects manually.
431 request.AllowAutoRedirect = false;