Fix for -mminimal-toc detection, enable BTLS on FreeBSD PowerPC (#18578)
[mono-project.git] / docs / HtmlAgilityPack / HtmlWeb.cs
blob39dd4267569f98064d1b0641163453ad508f0fc2
1 // HtmlAgilityPack V1.0 - Simon Mourier <simon underscore mourier at hotmail dot com>
2 using System;
3 using System.IO;
4 using System.Net;
5 using System.Text;
6 using System.Xml;
7 using System.Xml.Serialization;
8 using System.Xml.Xsl;
9 using Microsoft.Win32;
11 namespace HtmlAgilityPack
13 /// <summary>
14 /// A utility class to get HTML document from HTTP.
15 /// </summary>
16 public class HtmlWeb
18 #region Delegates
20 /// <summary>
21 /// Represents the method that will handle the PostResponse event.
22 /// </summary>
23 public delegate void PostResponseHandler(HttpWebRequest request, HttpWebResponse response);
25 /// <summary>
26 /// Represents the method that will handle the PreHandleDocument event.
27 /// </summary>
28 public delegate void PreHandleDocumentHandler(HtmlDocument document);
30 /// <summary>
31 /// Represents the method that will handle the PreRequest event.
32 /// </summary>
33 public delegate bool PreRequestHandler(HttpWebRequest request);
35 #endregion
37 #region Fields
39 private bool _autoDetectEncoding = true;
40 private bool _cacheOnly;
42 private string _cachePath;
43 private bool _fromCache;
44 private int _requestDuration;
45 private Uri _responseUri;
46 private HttpStatusCode _statusCode = HttpStatusCode.OK;
47 private int _streamBufferSize = 1024;
48 private bool _useCookies;
49 private bool _usingCache;
51 /// <summary>
52 /// Occurs after an HTTP request has been executed.
53 /// </summary>
54 public PostResponseHandler PostResponse;
56 /// <summary>
57 /// Occurs before an HTML document is handled.
58 /// </summary>
59 public PreHandleDocumentHandler PreHandleDocument;
61 /// <summary>
62 /// Occurs before an HTTP request is executed.
63 /// </summary>
64 public PreRequestHandler PreRequest;
66 #endregion
68 #region Properties
70 /// <summary>
71 /// Gets or Sets a value indicating if document encoding must be automatically detected.
72 /// </summary>
73 public bool AutoDetectEncoding
75 get { return _autoDetectEncoding; }
76 set { _autoDetectEncoding = value; }
79 /// <summary>
80 /// Gets or Sets a value indicating whether to get document only from the cache.
81 /// If this is set to true and document is not found in the cache, nothing will be loaded.
82 /// </summary>
83 public bool CacheOnly
85 get { return _cacheOnly; }
86 set
88 if ((value) && !UsingCache)
90 throw new HtmlWebException("Cache is not enabled. Set UsingCache to true first.");
92 _cacheOnly = value;
96 /// <summary>
97 /// Gets or Sets the cache path. If null, no caching mechanism will be used.
98 /// </summary>
99 public string CachePath
101 get { return _cachePath; }
102 set { _cachePath = value; }
105 /// <summary>
106 /// Gets a value indicating if the last document was retrieved from the cache.
107 /// </summary>
108 public bool FromCache
110 get { return _fromCache; }
113 /// <summary>
114 /// Gets the last request duration in milliseconds.
115 /// </summary>
116 public int RequestDuration
118 get { return _requestDuration; }
121 /// <summary>
122 /// Gets the URI of the Internet resource that actually responded to the request.
123 /// </summary>
124 public Uri ResponseUri
126 get { return _responseUri; }
129 /// <summary>
130 /// Gets the last request status.
131 /// </summary>
132 public HttpStatusCode StatusCode
134 get { return _statusCode; }
137 /// <summary>
138 /// Gets or Sets the size of the buffer used for memory operations.
139 /// </summary>
140 public int StreamBufferSize
142 get { return _streamBufferSize; }
145 if (_streamBufferSize <= 0)
147 throw new ArgumentException("Size must be greater than zero.");
149 _streamBufferSize = value;
153 /// <summary>
154 /// Gets or Sets a value indicating if cookies will be stored.
155 /// </summary>
156 public bool UseCookies
158 get { return _useCookies; }
159 set { _useCookies = value; }
162 /// <summary>
163 /// Gets or Sets a value indicating whether the caching mechanisms should be used or not.
164 /// </summary>
165 public bool UsingCache
169 if (_cachePath == null)
171 return false;
173 return _usingCache;
177 if ((value) && (_cachePath == null))
179 throw new HtmlWebException("You need to define a CachePath first.");
181 _usingCache = value;
185 #endregion
187 #region Public Methods
189 /// <summary>
190 /// Gets the MIME content type for a given path extension.
191 /// </summary>
192 /// <param name="extension">The input path extension.</param>
193 /// <param name="def">The default content type to return if any error occurs.</param>
194 /// <returns>The path extension's MIME content type.</returns>
195 public static string GetContentTypeForExtension(string extension, string def)
197 if (string.IsNullOrEmpty(extension))
199 return def;
201 string contentType = "";
204 RegistryKey reg = Registry.ClassesRoot;
205 reg = reg.OpenSubKey(extension, false);
206 if (reg != null) contentType = (string)reg.GetValue("", def);
208 catch (Exception)
210 contentType = def;
212 return contentType;
215 /// <summary>
216 /// Gets the path extension for a given MIME content type.
217 /// </summary>
218 /// <param name="contentType">The input MIME content type.</param>
219 /// <param name="def">The default path extension to return if any error occurs.</param>
220 /// <returns>The MIME content type's path extension.</returns>
221 public static string GetExtensionForContentType(string contentType, string def)
223 if (string.IsNullOrEmpty(contentType))
225 return def;
227 string ext = "";
230 RegistryKey reg = Registry.ClassesRoot;
231 reg = reg.OpenSubKey(@"MIME\Database\Content Type\" + contentType, false);
232 if (reg != null) ext = (string)reg.GetValue("Extension", def);
234 catch (Exception)
236 ext = def;
238 return ext;
241 /// <summary>
242 /// Creates an instance of the given type from the specified Internet resource.
243 /// </summary>
244 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
245 /// <param name="type">The requested type.</param>
246 /// <returns>An newly created instance.</returns>
247 public object CreateInstance(string url, Type type)
249 return CreateInstance(url, null, null, type);
252 /// <summary>
253 /// Creates an instance of the given type from the specified Internet resource.
254 /// </summary>
255 /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
256 /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
257 /// <param name="xsltArgs">An <see cref="XsltArgumentList"/> containing the namespace-qualified arguments used as input to the transform.</param>
258 /// <param name="type">The requested type.</param>
259 /// <returns>An newly created instance.</returns>
260 public object CreateInstance(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, Type type)
262 return CreateInstance(htmlUrl, xsltUrl, xsltArgs, type, null);
265 /// <summary>
266 /// Creates an instance of the given type from the specified Internet resource.
267 /// </summary>
268 /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
269 /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
270 /// <param name="xsltArgs">An <see cref="XsltArgumentList"/> containing the namespace-qualified arguments used as input to the transform.</param>
271 /// <param name="type">The requested type.</param>
272 /// <param name="xmlPath">A file path where the temporary XML before transformation will be saved. Mostly used for debugging purposes.</param>
273 /// <returns>An newly created instance.</returns>
274 public object CreateInstance(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, Type type,
275 string xmlPath)
277 StringWriter sw = new StringWriter();
278 XmlTextWriter writer = new XmlTextWriter(sw);
279 if (xsltUrl == null)
281 LoadHtmlAsXml(htmlUrl, writer);
283 else
285 if (xmlPath == null)
287 LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer);
289 else
291 LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer, xmlPath);
294 writer.Flush();
295 StringReader sr = new StringReader(sw.ToString());
296 XmlTextReader reader = new XmlTextReader(sr);
297 XmlSerializer serializer = new XmlSerializer(type);
298 object o;
301 o = serializer.Deserialize(reader);
303 catch (InvalidOperationException ex)
305 throw new Exception(ex + ", --- xml:" + sw);
307 return o;
310 /// <summary>
311 /// Gets an HTML document from an Internet resource and saves it to the specified file.
312 /// </summary>
313 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
314 /// <param name="path">The location of the file where you want to save the document.</param>
315 public void Get(string url, string path)
317 Get(url, path, "GET");
320 /// <summary>
321 /// Gets an HTML document from an Internet resource and saves it to the specified file. - Proxy aware
322 /// </summary>
323 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
324 /// <param name="path">The location of the file where you want to save the document.</param>
325 /// <param name="proxy"></param>
326 /// <param name="credentials"></param>
327 public void Get(string url, string path, WebProxy proxy, NetworkCredential credentials)
329 Get(url, path, proxy, credentials, "GET");
332 /// <summary>
333 /// Gets an HTML document from an Internet resource and saves it to the specified file.
334 /// </summary>
335 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
336 /// <param name="path">The location of the file where you want to save the document.</param>
337 /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param>
338 public void Get(string url, string path, string method)
340 Uri uri = new Uri(url);
341 if ((uri.Scheme == Uri.UriSchemeHttps) ||
342 (uri.Scheme == Uri.UriSchemeHttp))
344 Get(uri, method, path, null, null, null);
346 else
348 throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'.");
352 /// <summary>
353 /// Gets an HTML document from an Internet resource and saves it to the specified file. Understands Proxies
354 /// </summary>
355 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
356 /// <param name="path">The location of the file where you want to save the document.</param>
357 /// <param name="credentials"></param>
358 /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param>
359 /// <param name="proxy"></param>
360 public void Get(string url, string path, WebProxy proxy, NetworkCredential credentials, string method)
362 Uri uri = new Uri(url);
363 if ((uri.Scheme == Uri.UriSchemeHttps) ||
364 (uri.Scheme == Uri.UriSchemeHttp))
366 Get(uri, method, path, null, proxy, credentials);
368 else
370 throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'.");
374 /// <summary>
375 /// Gets the cache file path for a specified url.
376 /// </summary>
377 /// <param name="uri">The url fo which to retrieve the cache path. May not be null.</param>
378 /// <returns>The cache file path.</returns>
379 public string GetCachePath(Uri uri)
381 if (uri == null)
383 throw new ArgumentNullException("uri");
385 if (!UsingCache)
387 throw new HtmlWebException("Cache is not enabled. Set UsingCache to true first.");
389 string cachePath;
390 if (uri.AbsolutePath == "/")
392 cachePath = Path.Combine(_cachePath, ".htm");
394 else
396 cachePath = Path.Combine(_cachePath, (uri.Host + uri.AbsolutePath).Replace('/', '\\'));
398 return cachePath;
401 /// <summary>
402 /// Gets an HTML document from an Internet resource.
403 /// </summary>
404 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
405 /// <returns>A new HTML document.</returns>
406 public HtmlDocument Load(string url)
408 return Load(url, "GET");
411 /// <summary>
412 /// Gets an HTML document from an Internet resource.
413 /// </summary>
414 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
415 /// <param name="proxyHost">Host to use for Proxy</param>
416 /// <param name="proxyPort">Port the Proxy is on</param>
417 /// <param name="userId">User Id for Authentication</param>
418 /// <param name="password">Password for Authentication</param>
419 /// <returns>A new HTML document.</returns>
420 public HtmlDocument Load(string url, string proxyHost, int proxyPort, string userId, string password)
422 //Create my proxy
423 WebProxy myProxy = new WebProxy(proxyHost, proxyPort);
424 myProxy.BypassProxyOnLocal = true;
426 //Create my credentials
427 NetworkCredential myCreds = null;
428 if ((userId != null) && (password != null))
430 myCreds = new NetworkCredential(userId, password);
431 CredentialCache credCache = new CredentialCache();
432 //Add the creds
433 credCache.Add(myProxy.Address, "Basic", myCreds);
434 credCache.Add(myProxy.Address, "Digest", myCreds);
437 return Load(url, "GET", myProxy, myCreds);
440 /// <summary>
441 /// Loads an HTML document from an Internet resource.
442 /// </summary>
443 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
444 /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param>
445 /// <returns>A new HTML document.</returns>
446 public HtmlDocument Load(string url, string method)
448 Uri uri = new Uri(url);
449 HtmlDocument doc;
450 if ((uri.Scheme == Uri.UriSchemeHttps) ||
451 (uri.Scheme == Uri.UriSchemeHttp))
453 doc = LoadUrl(uri, method, null, null);
455 else
457 if (uri.Scheme == Uri.UriSchemeFile)
459 doc = new HtmlDocument();
460 doc.OptionAutoCloseOnEnd = false;
461 doc.OptionAutoCloseOnEnd = true;
462 doc.DetectEncodingAndLoad(url, _autoDetectEncoding);
464 else
466 throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'.");
469 if (PreHandleDocument != null)
471 PreHandleDocument(doc);
473 return doc;
476 /// <summary>
477 /// Loads an HTML document from an Internet resource.
478 /// </summary>
479 /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
480 /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param>
481 /// <param name="proxy">Proxy to use with this request</param>
482 /// <param name="credentials">Credentials to use when authenticating</param>
483 /// <returns>A new HTML document.</returns>
484 public HtmlDocument Load(string url, string method, WebProxy proxy, NetworkCredential credentials)
486 Uri uri = new Uri(url);
487 HtmlDocument doc;
488 if ((uri.Scheme == Uri.UriSchemeHttps) ||
489 (uri.Scheme == Uri.UriSchemeHttp))
491 doc = LoadUrl(uri, method, proxy, credentials);
493 else
495 if (uri.Scheme == Uri.UriSchemeFile)
497 doc = new HtmlDocument();
498 doc.OptionAutoCloseOnEnd = false;
499 doc.OptionAutoCloseOnEnd = true;
500 doc.DetectEncodingAndLoad(url, _autoDetectEncoding);
502 else
504 throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'.");
507 if (PreHandleDocument != null)
509 PreHandleDocument(doc);
511 return doc;
514 /// <summary>
515 /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter.
516 /// </summary>
517 /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
518 /// <param name="writer">The XmlTextWriter to which you want to save.</param>
519 public void LoadHtmlAsXml(string htmlUrl, XmlTextWriter writer)
521 HtmlDocument doc = Load(htmlUrl);
522 doc.Save(writer);
525 /// <summary>
526 /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter, after an XSLT transformation.
527 /// </summary>
528 /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param>
529 /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
530 /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param>
531 /// <param name="writer">The XmlTextWriter to which you want to save.</param>
532 public void LoadHtmlAsXml(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, XmlTextWriter writer)
534 LoadHtmlAsXml(htmlUrl, xsltUrl, xsltArgs, writer, null);
537 /// <summary>
538 /// Loads an HTML document from an Internet resource and saves it to the specified XmlTextWriter, after an XSLT transformation.
539 /// </summary>
540 /// <param name="htmlUrl">The requested URL, such as "http://Myserver/Mypath/Myfile.asp". May not be null.</param>
541 /// <param name="xsltUrl">The URL that specifies the XSLT stylesheet to load.</param>
542 /// <param name="xsltArgs">An XsltArgumentList containing the namespace-qualified arguments used as input to the transform.</param>
543 /// <param name="writer">The XmlTextWriter to which you want to save.</param>
544 /// <param name="xmlPath">A file path where the temporary XML before transformation will be saved. Mostly used for debugging purposes.</param>
545 public void LoadHtmlAsXml(string htmlUrl, string xsltUrl, XsltArgumentList xsltArgs, XmlTextWriter writer,
546 string xmlPath)
548 if (htmlUrl == null)
550 throw new ArgumentNullException("htmlUrl");
553 HtmlDocument doc = Load(htmlUrl);
555 if (xmlPath != null)
557 XmlTextWriter w = new XmlTextWriter(xmlPath, doc.Encoding);
558 doc.Save(w);
559 w.Close();
561 if (xsltArgs == null)
563 xsltArgs = new XsltArgumentList();
566 // add some useful variables to the xslt doc
567 xsltArgs.AddParam("url", "", htmlUrl);
568 xsltArgs.AddParam("requestDuration", "", RequestDuration);
569 xsltArgs.AddParam("fromCache", "", FromCache);
571 XslCompiledTransform xslt = new XslCompiledTransform();
572 xslt.Load(xsltUrl);
573 xslt.Transform(doc, xsltArgs, writer);
576 #endregion
578 #region Private Methods
580 private static void FilePreparePath(string target)
582 if (File.Exists(target))
584 FileAttributes atts = File.GetAttributes(target);
585 File.SetAttributes(target, atts & ~FileAttributes.ReadOnly);
587 else
589 string dir = Path.GetDirectoryName(target);
590 if (!Directory.Exists(dir))
592 Directory.CreateDirectory(dir);
597 private static DateTime RemoveMilliseconds(DateTime t)
599 return new DateTime(t.Year, t.Month, t.Day, t.Hour, t.Minute, t.Second, 0);
602 // ReSharper disable UnusedMethodReturnValue.Local
603 private static long SaveStream(Stream stream, string path, DateTime touchDate, int streamBufferSize)
604 // ReSharper restore UnusedMethodReturnValue.Local
606 FilePreparePath(path);
607 FileStream fs = new FileStream(path, FileMode.Create, FileAccess.Write);
608 BinaryReader br = null;
609 BinaryWriter bw = null;
610 long len = 0;
613 br = new BinaryReader(stream);
614 bw = new BinaryWriter(fs);
616 byte[] buffer;
619 buffer = br.ReadBytes(streamBufferSize);
620 len += buffer.Length;
621 if (buffer.Length > 0)
623 bw.Write(buffer);
625 } while (buffer.Length > 0);
627 finally
629 if (br != null)
631 br.Close();
633 if (bw != null)
635 bw.Flush();
636 bw.Close();
638 if (fs != null)
640 fs.Close();
643 File.SetLastWriteTime(path, touchDate);
644 return len;
647 private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc, IWebProxy proxy,
648 ICredentials creds)
650 string cachePath = null;
651 HttpWebRequest req;
652 bool oldFile = false;
654 req = WebRequest.Create(uri) as HttpWebRequest;
655 req.Method = method;
657 if (proxy != null)
659 if (creds != null)
661 proxy.Credentials = creds;
662 req.Credentials = creds;
664 else
666 proxy.Credentials = CredentialCache.DefaultCredentials;
667 req.Credentials = CredentialCache.DefaultCredentials;
669 req.Proxy = proxy;
672 _fromCache = false;
673 _requestDuration = 0;
674 int tc = Environment.TickCount;
675 if (UsingCache)
677 cachePath = GetCachePath(req.RequestUri);
678 if (File.Exists(cachePath))
680 req.IfModifiedSince = File.GetLastAccessTime(cachePath);
681 oldFile = true;
685 if (_cacheOnly)
687 if (!File.Exists(cachePath))
689 throw new HtmlWebException("File was not found at cache path: '" + cachePath + "'");
692 if (path != null)
694 IOLibrary.CopyAlways(cachePath, path);
695 // touch the file
696 File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
698 _fromCache = true;
699 return HttpStatusCode.NotModified;
702 if (_useCookies)
704 req.CookieContainer = new CookieContainer();
707 if (PreRequest != null)
709 // allow our user to change the request at will
710 if (!PreRequest(req))
712 return HttpStatusCode.ResetContent;
715 // dump cookie
716 // if (_useCookies)
717 // {
718 // foreach(Cookie cookie in req.CookieContainer.GetCookies(req.RequestUri))
719 // {
720 // HtmlLibrary.Trace("Cookie " + cookie.Name + "=" + cookie.Value + " path=" + cookie.Path + " domain=" + cookie.Domain);
721 // }
722 // }
725 HttpWebResponse resp;
729 resp = req.GetResponse() as HttpWebResponse;
731 catch (WebException we)
733 _requestDuration = Environment.TickCount - tc;
734 resp = (HttpWebResponse)we.Response;
735 if (resp == null)
737 if (oldFile)
739 if (path != null)
741 IOLibrary.CopyAlways(cachePath, path);
742 // touch the file
743 File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
745 return HttpStatusCode.NotModified;
747 throw;
750 catch (Exception)
752 _requestDuration = Environment.TickCount - tc;
753 throw;
756 // allow our user to get some info from the response
757 if (PostResponse != null)
759 PostResponse(req, resp);
762 _requestDuration = Environment.TickCount - tc;
763 _responseUri = resp.ResponseUri;
765 bool html = IsHtmlContent(resp.ContentType);
766 Encoding respenc;
768 if ((resp.ContentEncoding != null) && (resp.ContentEncoding.Length > 0))
770 respenc = Encoding.GetEncoding(resp.ContentEncoding);
772 else
774 respenc = null;
777 if (resp.StatusCode == HttpStatusCode.NotModified)
779 if (UsingCache)
781 _fromCache = true;
782 if (path != null)
784 IOLibrary.CopyAlways(cachePath, path);
785 // touch the file
786 File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
788 return resp.StatusCode;
790 else
792 // this should *never* happen...
793 throw new HtmlWebException("Server has send a NotModifed code, without cache enabled.");
796 Stream s = resp.GetResponseStream();
797 if (s != null)
799 if (UsingCache)
801 // NOTE: LastModified does not contain milliseconds, so we remove them to the file
802 SaveStream(s, cachePath, RemoveMilliseconds(resp.LastModified), _streamBufferSize);
804 // save headers
805 SaveCacheHeaders(req.RequestUri, resp);
807 if (path != null)
809 // copy and touch the file
810 IOLibrary.CopyAlways(cachePath, path);
811 File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath));
814 else
816 // try to work in-memory
817 if ((doc != null) && (html))
819 if (respenc != null)
821 doc.Load(s, respenc);
823 else
825 doc.Load(s, true);
829 resp.Close();
831 return resp.StatusCode;
834 private string GetCacheHeader(Uri requestUri, string name, string def)
836 // note: some headers are collection (ex: www-authenticate)
837 // we don't handle that here
838 XmlDocument doc = new XmlDocument();
839 doc.Load(GetCacheHeadersPath(requestUri));
840 XmlNode node =
841 doc.SelectSingleNode("//h[translate(@n, 'abcdefghijklmnopqrstuvwxyz','ABCDEFGHIJKLMNOPQRSTUVWXYZ')='" +
842 name.ToUpper() + "']");
843 if (node == null)
845 return def;
847 // attribute should exist
848 return node.Attributes[name].Value;
851 private string GetCacheHeadersPath(Uri uri)
853 //return Path.Combine(GetCachePath(uri), ".h.xml");
854 return GetCachePath(uri) + ".h.xml";
857 private bool IsCacheHtmlContent(string path)
859 string ct = GetContentTypeForExtension(Path.GetExtension(path), null);
860 return IsHtmlContent(ct);
863 private bool IsHtmlContent(string contentType)
865 return contentType.ToLower().StartsWith("text/html");
868 private HtmlDocument LoadUrl(Uri uri, string method, WebProxy proxy, NetworkCredential creds)
870 HtmlDocument doc = new HtmlDocument();
871 doc.OptionAutoCloseOnEnd = false;
872 doc.OptionFixNestedTags = true;
873 _statusCode = Get(uri, method, null, doc, proxy, creds);
874 if (_statusCode == HttpStatusCode.NotModified)
876 // read cached encoding
877 doc.DetectEncodingAndLoad(GetCachePath(uri));
879 return doc;
882 private void SaveCacheHeaders(Uri requestUri, HttpWebResponse resp)
884 // we cache the original headers aside the cached document.
885 string file = GetCacheHeadersPath(requestUri);
886 XmlDocument doc = new XmlDocument();
887 doc.LoadXml("<c></c>");
888 XmlNode cache = doc.FirstChild;
889 foreach (string header in resp.Headers)
891 XmlNode entry = doc.CreateElement("h");
892 XmlAttribute att = doc.CreateAttribute("n");
893 att.Value = header;
894 entry.Attributes.Append(att);
896 att = doc.CreateAttribute("v");
897 att.Value = resp.Headers[header];
898 entry.Attributes.Append(att);
900 cache.AppendChild(entry);
902 doc.Save(file);
905 #endregion