net/base/net_util.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "net/base/net_util.h"
   6
   7 #include <algorithm>
   8 #include <iterator>
   9 #include <map>
  10
  11 #include "build/build_config.h"
  12
  13 #if defined(OS_WIN)
  14 #include <windows.h>
  15 #include <iphlpapi.h>
  16 #include <winsock2.h>
  17 #pragma comment(lib, "iphlpapi.lib")
  18 #elif defined(OS_POSIX)
  19 #include <fcntl.h>
  20 #if !defined(OS_ANDROID)
  21 #include <ifaddrs.h>
  22 #endif
  23 #include <net/if.h>
  24 #include <netdb.h>
  25 #include <netinet/in.h>
  26 #endif
  27
  28 #include "base/basictypes.h"
  29 #include "base/file_util.h"
  30 #include "base/files/file_path.h"
  31 #include "base/i18n/file_util_icu.h"
  32 #include "base/i18n/icu_string_conversions.h"
  33 #include "base/i18n/time_formatting.h"
  34 #include "base/json/string_escape.h"
  35 #include "base/lazy_instance.h"
  36 #include "base/logging.h"
  37 #include "base/memory/singleton.h"
  38 #include "base/message_loop.h"
  39 #include "base/metrics/histogram.h"
  40 #include "base/path_service.h"
  41 #include "base/stl_util.h"
  42 #include "base/strings/string_number_conversions.h"
  43 #include "base/strings/string_piece.h"
  44 #include "base/strings/string_split.h"
  45 #include "base/strings/string_tokenizer.h"
  46 #include "base/strings/string_util.h"
  47 #include "base/strings/stringprintf.h"
  48 #include "base/strings/sys_string_conversions.h"
  49 #include "base/strings/utf_offset_string_conversions.h"
  50 #include "base/strings/utf_string_conversions.h"
  51 #include "base/synchronization/lock.h"
  52 #include "base/sys_byteorder.h"
  53 #include "base/time.h"
  54 #include "base/values.h"
  55 #include "googleurl/src/gurl.h"
  56 #include "googleurl/src/url_canon.h"
  57 #include "googleurl/src/url_canon_ip.h"
  58 #include "googleurl/src/url_parse.h"
  59 #include "grit/net_resources.h"
  60 #if defined(OS_ANDROID)
  61 #include "net/android/network_library.h"
  62 #endif
  63 #include "net/base/dns_util.h"
  64 #include "net/base/escape.h"
  65 #include "net/base/mime_util.h"
  66 #include "net/base/net_module.h"
  67 #if defined(OS_WIN)
  68 #include "net/base/winsock_init.h"
  69 #endif
  70 #include "net/http/http_content_disposition.h"
  71 #include "third_party/icu/public/common/unicode/uidna.h"
  72 #include "third_party/icu/public/common/unicode/uniset.h"
  73 #include "third_party/icu/public/common/unicode/uscript.h"
  74 #include "third_party/icu/public/common/unicode/uset.h"
  75 #include "third_party/icu/public/i18n/unicode/datefmt.h"
  76 #include "third_party/icu/public/i18n/unicode/regex.h"
  77 #include "third_party/icu/public/i18n/unicode/ulocdata.h"
  78
  79 using base::Time;
  80
  81 namespace net {
  82
  83 namespace {
  84
  85 // what we prepend to get a file URL
  86 static const base::FilePath::CharType kFileURLPrefix[] =
  87     FILE_PATH_LITERAL("file:///");
  88
  89 // The general list of blocked ports. Will be blocked unless a specific
  90 // protocol overrides it. (Ex: ftp can use ports 20 and 21)
  91 static const int kRestrictedPorts[] = {
  92   1,    // tcpmux
  93   7,    // echo
  94   9,    // discard
  95   11,   // systat
  96   13,   // daytime
  97   15,   // netstat
  98   17,   // qotd
  99   19,   // chargen
 100   20,   // ftp data
 101   21,   // ftp access
 102   22,   // ssh
 103   23,   // telnet
 104   25,   // smtp
 105   37,   // time
 106   42,   // name
 107   43,   // nicname
 108   53,   // domain
 109   77,   // priv-rjs
 110   79,   // finger
 111   87,   // ttylink
 112   95,   // supdup
 113   101,  // hostriame
 114   102,  // iso-tsap
 115   103,  // gppitnp
 116   104,  // acr-nema
 117   109,  // pop2
 118   110,  // pop3
 119   111,  // sunrpc
 120   113,  // auth
 121   115,  // sftp
 122   117,  // uucp-path
 123   119,  // nntp
 124   123,  // NTP
 125   135,  // loc-srv /epmap
 126   139,  // netbios
 127   143,  // imap2
 128   179,  // BGP
 129   389,  // ldap
 130   465,  // smtp+ssl
 131   512,  // print / exec
 132   513,  // login
 133   514,  // shell
 134   515,  // printer
 135   526,  // tempo
 136   530,  // courier
 137   531,  // chat
 138   532,  // netnews
 139   540,  // uucp
 140   556,  // remotefs
 141   563,  // nntp+ssl
 142   587,  // stmp?
 143   601,  // ??
 144   636,  // ldap+ssl
 145   993,  // ldap+ssl
 146   995,  // pop3+ssl
 147   2049, // nfs
 148   3659, // apple-sasl / PasswordServer
 149   4045, // lockd
 150   6000, // X11
 151   6665, // Alternate IRC [Apple addition]
 152   6666, // Alternate IRC [Apple addition]
 153   6667, // Standard IRC [Apple addition]
 154   6668, // Alternate IRC [Apple addition]
 155   6669, // Alternate IRC [Apple addition]
 156   0xFFFF, // Used to block all invalid port numbers (see
 157           // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port())
 158 };
 159
 160 // FTP overrides the following restricted ports.
 161 static const int kAllowedFtpPorts[] = {
 162   21,   // ftp data
 163   22,   // ssh
 164 };
 165
 166 // Does some simple normalization of scripts so we can allow certain scripts
 167 // to exist together.
 168 // TODO(brettw) bug 880223: we should allow some other languages to be
 169 // oombined such as Chinese and Latin. We will probably need a more
 170 // complicated system of language pairs to have more fine-grained control.
 171 UScriptCode NormalizeScript(UScriptCode code) {
 172   switch (code) {
 173     case USCRIPT_KATAKANA:
 174     case USCRIPT_HIRAGANA:
 175     case USCRIPT_KATAKANA_OR_HIRAGANA:
 176     case USCRIPT_HANGUL:  // This one is arguable.
 177       return USCRIPT_HAN;
 178     default:
 179       return code;
 180   }
 181 }
 182
 183 bool IsIDNComponentInSingleScript(const base::char16* str, int str_len) {
 184   UScriptCode first_script = USCRIPT_INVALID_CODE;
 185   bool is_first = true;
 186
 187   int i = 0;
 188   while (i < str_len) {
 189     unsigned code_point;
 190     U16_NEXT(str, i, str_len, code_point);
 191
 192     UErrorCode err = U_ZERO_ERROR;
 193     UScriptCode cur_script = uscript_getScript(code_point, &err);
 194     if (err != U_ZERO_ERROR)
 195       return false;  // Report mixed on error.
 196     cur_script = NormalizeScript(cur_script);
 197
 198     // TODO(brettw) We may have to check for USCRIPT_INHERENT as well.
 199     if (is_first && cur_script != USCRIPT_COMMON) {
 200       first_script = cur_script;
 201       is_first = false;
 202     } else {
 203       if (cur_script != USCRIPT_COMMON && cur_script != first_script)
 204         return false;
 205     }
 206   }
 207   return true;
 208 }
 209
 210 // Check if the script of a language can be 'safely' mixed with
 211 // Latin letters in the ASCII range.
 212 bool IsCompatibleWithASCIILetters(const std::string& lang) {
 213   // For now, just list Chinese, Japanese and Korean (positive list).
 214   // An alternative is negative-listing (languages using Greek and
 215   // Cyrillic letters), but it can be more dangerous.
 216   return !lang.substr(0, 2).compare("zh") ||
 217          !lang.substr(0, 2).compare("ja") ||
 218          !lang.substr(0, 2).compare("ko");
 219 }
 220
 221 typedef std::map<std::string, icu::UnicodeSet*> LangToExemplarSetMap;
 222
 223 class LangToExemplarSet {
 224  public:
 225   static LangToExemplarSet* GetInstance() {
 226     return Singleton<LangToExemplarSet>::get();
 227   }
 228
 229  private:
 230   LangToExemplarSetMap map;
 231   LangToExemplarSet() { }
 232   ~LangToExemplarSet() {
 233     STLDeleteContainerPairSecondPointers(map.begin(), map.end());
 234   }
 235
 236   friend class Singleton<LangToExemplarSet>;
 237   friend struct DefaultSingletonTraits<LangToExemplarSet>;
 238   friend bool GetExemplarSetForLang(const std::string&, icu::UnicodeSet**);
 239   friend void SetExemplarSetForLang(const std::string&, icu::UnicodeSet*);
 240
 241   DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet);
 242 };
 243
 244 bool GetExemplarSetForLang(const std::string& lang,
 245                            icu::UnicodeSet** lang_set) {
 246   const LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map;
 247   LangToExemplarSetMap::const_iterator pos = map.find(lang);
 248   if (pos != map.end()) {
 249     *lang_set = pos->second;
 250     return true;
 251   }
 252   return false;
 253 }
 254
 255 void SetExemplarSetForLang(const std::string& lang,
 256                            icu::UnicodeSet* lang_set) {
 257   LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map;
 258   map.insert(std::make_pair(lang, lang_set));
 259 }
 260
 261 static base::LazyInstance<base::Lock>::Leaky
 262     g_lang_set_lock = LAZY_INSTANCE_INITIALIZER;
 263
 264 // Returns true if all the characters in component_characters are used by
 265 // the language |lang|.
 266 bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters,
 267                               const std::string& lang) {
 268   CR_DEFINE_STATIC_LOCAL(
 269       const icu::UnicodeSet, kASCIILetters, ('a', 'z'));
 270   icu::UnicodeSet* lang_set = NULL;
 271   // We're called from both the UI thread and the history thread.
 272   {
 273     base::AutoLock lock(g_lang_set_lock.Get());
 274     if (!GetExemplarSetForLang(lang, &lang_set)) {
 275       UErrorCode status = U_ZERO_ERROR;
 276       ULocaleData* uld = ulocdata_open(lang.c_str(), &status);
 277       // TODO(jungshik) Turn this check on when the ICU data file is
 278       // rebuilt with the minimal subset of locale data for languages
 279       // to which Chrome is not localized but which we offer in the list
 280       // of languages selectable for Accept-Languages. With the rebuilt ICU
 281       // data, ulocdata_open never should fall back to the default locale.
 282       // (issue 2078)
 283       // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING);
 284       if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) {
 285         lang_set = reinterpret_cast<icu::UnicodeSet *>(
 286             ulocdata_getExemplarSet(uld, NULL, 0,
 287                                     ULOCDATA_ES_STANDARD, &status));
 288         // If |lang| is compatible with ASCII Latin letters, add them.
 289         if (IsCompatibleWithASCIILetters(lang))
 290           lang_set->addAll(kASCIILetters);
 291       } else {
 292         lang_set = new icu::UnicodeSet(1, 0);
 293       }
 294       lang_set->freeze();
 295       SetExemplarSetForLang(lang, lang_set);
 296       ulocdata_close(uld);
 297     }
 298   }
 299   return !lang_set->isEmpty() && lang_set->containsAll(component_characters);
 300 }
 301
 302 // Returns true if the given Unicode host component is safe to display to the
 303 // user.
 304 bool IsIDNComponentSafe(const base::char16* str,
 305                         int str_len,
 306                         const std::string& languages) {
 307   // Most common cases (non-IDN) do not reach here so that we don't
 308   // need a fast return path.
 309   // TODO(jungshik) : Check if there's any character inappropriate
 310   // (although allowed) for domain names.
 311   // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and
 312   // http://www.unicode.org/reports/tr39/data/xidmodifications.txt
 313   // For now, we borrow the list from Mozilla and tweaked it slightly.
 314   // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because
 315   //  they're gonna be canonicalized to U+0020 and full stop before
 316   //  reaching here.)
 317   // The original list is available at
 318   // http://kb.mozillazine.org/Network.IDN.blacklist_chars and
 319   // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703
 320
 321   UErrorCode status = U_ZERO_ERROR;
 322 #ifdef U_WCHAR_IS_UTF16
 323   icu::UnicodeSet dangerous_characters(icu::UnicodeString(
 324       L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338"
 325       L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"
 326       L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"
 327       L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"
 328       L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014"
 329       L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14"
 330       L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]"
 331       L"[\ufffa-\ufffd]]"), status);
 332   DCHECK(U_SUCCESS(status));
 333   icu::RegexMatcher dangerous_patterns(icu::UnicodeString(
 334       // Lone katakana no, so, or n
 335       L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]"
 336       // Repeating Japanese accent characters
 337       L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"),
 338       0, status);
 339 #else
 340   icu::UnicodeSet dangerous_characters(icu::UnicodeString(
 341       "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338"
 342       "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"
 343       "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"
 344       "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"
 345       "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014"
 346       "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14"
 347       "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]"
 348       "[\\ufffa-\\ufffd]]", -1, US_INV), status);
 349   DCHECK(U_SUCCESS(status));
 350   icu::RegexMatcher dangerous_patterns(icu::UnicodeString(
 351       // Lone katakana no, so, or n
 352       "[^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]"
 353       // Repeating Japanese accent characters
 354       "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"),
 355       0, status);
 356 #endif
 357   DCHECK(U_SUCCESS(status));
 358   icu::UnicodeSet component_characters;
 359   icu::UnicodeString component_string(str, str_len);
 360   component_characters.addAll(component_string);
 361   if (dangerous_characters.containsSome(component_characters))
 362     return false;
 363
 364   DCHECK(U_SUCCESS(status));
 365   dangerous_patterns.reset(component_string);
 366   if (dangerous_patterns.find())
 367     return false;
 368
 369   // If the language list is empty, the result is completely determined
 370   // by whether a component is a single script or not. This will block
 371   // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are
 372   // allowed with |languages| (while it blocks Chinese + Latin letters with
 373   // an accent as should be the case), but we want to err on the safe side
 374   // when |languages| is empty.
 375   if (languages.empty())
 376     return IsIDNComponentInSingleScript(str, str_len);
 377
 378   // |common_characters| is made up of  ASCII numbers, hyphen, plus and
 379   // underscore that are used across scripts and allowed in domain names.
 380   // (sync'd with characters allowed in url_canon_host with square
 381   // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc.
 382   icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"),
 383                                     status);
 384   DCHECK(U_SUCCESS(status));
 385   // Subtract common characters because they're always allowed so that
 386   // we just have to check if a language-specific set contains
 387   // the remainder.
 388   component_characters.removeAll(common_characters);
 389
 390   base::StringTokenizer t(languages, ",");
 391   while (t.GetNext()) {
 392     if (IsComponentCoveredByLang(component_characters, t.token()))
 393       return true;
 394   }
 395   return false;
 396 }
 397
 398 // Converts one component of a host (between dots) to IDN if safe. The result
 399 // will be APPENDED to the given output string and will be the same as the input
 400 // if it is not IDN or the IDN is unsafe to display.  Returns whether any
 401 // conversion was performed.
 402 bool IDNToUnicodeOneComponent(const base::char16* comp,
 403                               size_t comp_len,
 404                               const std::string& languages,
 405                               base::string16* out) {
 406   DCHECK(out);
 407   if (comp_len == 0)
 408     return false;
 409
 410   // Only transform if the input can be an IDN component.
 411   static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};
 412   if ((comp_len > arraysize(kIdnPrefix)) &&
 413       !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) {
 414     // Repeatedly expand the output string until it's big enough.  It looks like
 415     // ICU will return the required size of the buffer, but that's not
 416     // documented, so we'll just grow by 2x. This should be rare and is not on a
 417     // critical path.
 418     size_t original_length = out->length();
 419     for (int extra_space = 64; ; extra_space *= 2) {
 420       UErrorCode status = U_ZERO_ERROR;
 421       out->resize(out->length() + extra_space);
 422       int output_chars = uidna_IDNToUnicode(comp,
 423           static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space,
 424           UIDNA_DEFAULT, NULL, &status);
 425       if (status == U_ZERO_ERROR) {
 426         // Converted successfully.
 427         out->resize(original_length + output_chars);
 428         if (IsIDNComponentSafe(out->data() + original_length, output_chars,
 429                                languages))
 430           return true;
 431       }
 432
 433       if (status != U_BUFFER_OVERFLOW_ERROR)
 434         break;
 435     }
 436     // Failed, revert back to original string.
 437     out->resize(original_length);
 438   }
 439
 440   // We get here with no IDN or on error, in which case we just append the
 441   // literal input.
 442   out->append(comp, comp_len);
 443   return false;
 444 }
 445
 446 // Clamps the offsets in |offsets_for_adjustment| to the length of |str|.
 447 void LimitOffsets(const base::string16& str,
 448                   std::vector<size_t>* offsets_for_adjustment) {
 449   if (offsets_for_adjustment) {
 450     std::for_each(offsets_for_adjustment->begin(),
 451                   offsets_for_adjustment->end(),
 452                   base::LimitOffset<base::string16>(str.length()));
 453   }
 454 }
 455
 456 // TODO(brettw) bug 734373: check the scripts for each host component and
 457 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for
 458 // scripts that the user has installed. For now, just put the entire
 459 // path through IDN. Maybe this feature can be implemented in ICU itself?
 460 //
 461 // We may want to skip this step in the case of file URLs to allow unicode
 462 // UNC hostnames regardless of encodings.
 463 base::string16 IDNToUnicodeWithOffsets(
 464     const std::string& host,
 465     const std::string& languages,
 466     std::vector<size_t>* offsets_for_adjustment) {
 467   // Convert the ASCII input to a base::string16 for ICU.
 468   base::string16 input16;
 469   input16.reserve(host.length());
 470   input16.insert(input16.end(), host.begin(), host.end());
 471
 472   // Do each component of the host separately, since we enforce script matching
 473   // on a per-component basis.
 474   base::string16 out16;
 475   {
 476     base::OffsetAdjuster offset_adjuster(offsets_for_adjustment);
 477     for (size_t component_start = 0, component_end;
 478          component_start < input16.length();
 479          component_start = component_end + 1) {
 480       // Find the end of the component.
 481       component_end = input16.find('.', component_start);
 482       if (component_end == base::string16::npos)
 483         component_end = input16.length();  // For getting the last component.
 484       size_t component_length = component_end - component_start;
 485       size_t new_component_start = out16.length();
 486       bool converted_idn = false;
 487       if (component_end > component_start) {
 488         // Add the substring that we just found.
 489         converted_idn = IDNToUnicodeOneComponent(
 490             input16.data() + component_start, component_length, languages,
 491             &out16);
 492       }
 493       size_t new_component_length = out16.length() - new_component_start;
 494
 495       if (converted_idn && offsets_for_adjustment) {
 496         offset_adjuster.Add(base::OffsetAdjuster::Adjustment(component_start,
 497             component_length, new_component_length));
 498       }
 499
 500       // Need to add the dot we just found (if we found one).
 501       if (component_end < input16.length())
 502         out16.push_back('.');
 503     }
 504   }
 505
 506   LimitOffsets(out16, offsets_for_adjustment);
 507   return out16;
 508 }
 509
 510 // Transforms |original_offsets| by subtracting |component_begin| from all
 511 // offsets.  Any offset which was not at least this large to begin with is set
 512 // to std::string::npos.
 513 std::vector<size_t> OffsetsIntoComponent(
 514     const std::vector<size_t>& original_offsets,
 515     size_t component_begin) {
 516   DCHECK_NE(std::string::npos, component_begin);
 517   std::vector<size_t> offsets_into_component(original_offsets);
 518   for (std::vector<size_t>::iterator i(offsets_into_component.begin());
 519        i != offsets_into_component.end(); ++i) {
 520     if (*i != std::string::npos)
 521       *i = (*i < component_begin) ? std::string::npos : (*i - component_begin);
 522   }
 523   return offsets_into_component;
 524 }
 525
 526 // Called after we transform a component and append it to an output string.
 527 // Maps |transformed_offsets|, which represent offsets into the transformed
 528 // component itself, into appropriate offsets for the output string, by adding
 529 // |output_component_begin| to each.  Determines which offsets need mapping by
 530 // checking to see which of the |original_offsets| were within the designated
 531 // original component, using its provided endpoints.
 532 void AdjustForComponentTransform(
 533     const std::vector<size_t>& original_offsets,
 534     size_t original_component_begin,
 535     size_t original_component_end,
 536     const std::vector<size_t>& transformed_offsets,
 537     size_t output_component_begin,
 538     std::vector<size_t>* offsets_for_adjustment) {
 539   if (!offsets_for_adjustment)
 540     return;
 541
 542   DCHECK_NE(std::string::npos, original_component_begin);
 543   DCHECK_NE(std::string::npos, original_component_end);
 544   DCHECK_NE(base::string16::npos, output_component_begin);
 545   size_t offsets_size = offsets_for_adjustment->size();
 546   DCHECK_EQ(offsets_size, original_offsets.size());
 547   DCHECK_EQ(offsets_size, transformed_offsets.size());
 548   for (size_t i = 0; i < offsets_size; ++i) {
 549     size_t original_offset = original_offsets[i];
 550     if ((original_offset >= original_component_begin) &&
 551         (original_offset < original_component_end)) {
 552       size_t transformed_offset = transformed_offsets[i];
 553       (*offsets_for_adjustment)[i] =
 554           (transformed_offset == base::string16::npos) ?
 555           base::string16::npos : (output_component_begin + transformed_offset);
 556     }
 557   }
 558 }
 559
 560 // If |component| is valid, its begin is incremented by |delta|.
 561 void AdjustComponent(int delta, url_parse::Component* component) {
 562   if (!component->is_valid())
 563     return;
 564
 565   DCHECK(delta >= 0 || component->begin >= -delta);
 566   component->begin += delta;
 567 }
 568
 569 // Adjusts all the components of |parsed| by |delta|, except for the scheme.
 570 void AdjustComponents(int delta, url_parse::Parsed* parsed) {
 571   AdjustComponent(delta, &(parsed->username));
 572   AdjustComponent(delta, &(parsed->password));
 573   AdjustComponent(delta, &(parsed->host));
 574   AdjustComponent(delta, &(parsed->port));
 575   AdjustComponent(delta, &(parsed->path));
 576   AdjustComponent(delta, &(parsed->query));
 577   AdjustComponent(delta, &(parsed->ref));
 578 }
 579
 580 // Helper for FormatUrlWithOffsets().
 581 base::string16 FormatViewSourceUrl(
 582     const GURL& url,
 583     const std::vector<size_t>& original_offsets,
 584     const std::string& languages,
 585     FormatUrlTypes format_types,
 586     UnescapeRule::Type unescape_rules,
 587     url_parse::Parsed* new_parsed,
 588     size_t* prefix_end,
 589     std::vector<size_t>* offsets_for_adjustment) {
 590   DCHECK(new_parsed);
 591   const char kViewSource[] = "view-source:";
 592   const size_t kViewSourceLength = arraysize(kViewSource) - 1;
 593   std::vector<size_t> offsets_into_url(
 594       OffsetsIntoComponent(original_offsets, kViewSourceLength));
 595
 596   GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLength));
 597   base::string16 result(ASCIIToUTF16(kViewSource) +
 598       FormatUrlWithOffsets(real_url, languages, format_types, unescape_rules,
 599                            new_parsed, prefix_end, &offsets_into_url));
 600
 601   // Adjust position values.
 602   if (new_parsed->scheme.is_nonempty()) {
 603     // Assume "view-source:real-scheme" as a scheme.
 604     new_parsed->scheme.len += kViewSourceLength;
 605   } else {
 606     new_parsed->scheme.begin = 0;
 607     new_parsed->scheme.len = kViewSourceLength - 1;
 608   }
 609   AdjustComponents(kViewSourceLength, new_parsed);
 610   if (prefix_end)
 611     *prefix_end += kViewSourceLength;
 612   AdjustForComponentTransform(original_offsets, kViewSourceLength,
 613       url.possibly_invalid_spec().length(), offsets_into_url, kViewSourceLength,
 614       offsets_for_adjustment);
 615   LimitOffsets(result, offsets_for_adjustment);
 616   return result;
 617 }
 618
 619 class AppendComponentTransform {
 620  public:
 621   AppendComponentTransform() {}
 622   virtual ~AppendComponentTransform() {}
 623
 624   virtual base::string16 Execute(
 625       const std::string& component_text,
 626       std::vector<size_t>* offsets_into_component) const = 0;
 627
 628   // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an
 629   // accessible copy constructor in order to call AppendFormattedComponent()
 630   // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ).
 631 };
 632
 633 class HostComponentTransform : public AppendComponentTransform {
 634  public:
 635   explicit HostComponentTransform(const std::string& languages)
 636       : languages_(languages) {
 637   }
 638
 639  private:
 640   virtual base::string16 Execute(
 641       const std::string& component_text,
 642       std::vector<size_t>* offsets_into_component) const OVERRIDE {
 643     return IDNToUnicodeWithOffsets(component_text, languages_,
 644                                    offsets_into_component);
 645   }
 646
 647   const std::string& languages_;
 648 };
 649
 650 class NonHostComponentTransform : public AppendComponentTransform {
 651  public:
 652   explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules)
 653       : unescape_rules_(unescape_rules) {
 654   }
 655
 656  private:
 657   virtual base::string16 Execute(
 658       const std::string& component_text,
 659       std::vector<size_t>* offsets_into_component) const OVERRIDE {
 660     return (unescape_rules_ == UnescapeRule::NONE) ?
 661         base::UTF8ToUTF16AndAdjustOffsets(component_text,
 662                                           offsets_into_component) :
 663         UnescapeAndDecodeUTF8URLComponentWithOffsets(component_text,
 664             unescape_rules_, offsets_into_component);
 665   }
 666
 667   const UnescapeRule::Type unescape_rules_;
 668 };
 669
 670 void AppendFormattedComponent(const std::string& spec,
 671                               const url_parse::Component& original_component,
 672                               const std::vector<size_t>& original_offsets,
 673                               const AppendComponentTransform& transform,
 674                               base::string16* output,
 675                               url_parse::Component* output_component,
 676                               std::vector<size_t>* offsets_for_adjustment) {
 677   DCHECK(output);
 678   if (original_component.is_nonempty()) {
 679     size_t original_component_begin =
 680         static_cast<size_t>(original_component.begin);
 681     size_t output_component_begin = output->length();
 682     if (output_component)
 683       output_component->begin = static_cast<int>(output_component_begin);
 684
 685     std::vector<size_t> offsets_into_component =
 686         OffsetsIntoComponent(original_offsets, original_component_begin);
 687     output->append(transform.Execute(std::string(spec, original_component_begin,
 688         static_cast<size_t>(original_component.len)), &offsets_into_component));
 689
 690     if (output_component) {
 691       output_component->len =
 692           static_cast<int>(output->length() - output_component_begin);
 693     }
 694     AdjustForComponentTransform(original_offsets, original_component_begin,
 695                                 static_cast<size_t>(original_component.end()),
 696                                 offsets_into_component, output_component_begin,
 697                                 offsets_for_adjustment);
 698   } else if (output_component) {
 699     output_component->reset();
 700   }
 701 }
 702
 703 void SanitizeGeneratedFileName(base::FilePath::StringType* filename,
 704                                bool replace_trailing) {
 705   const base::FilePath::CharType kReplace[] = FILE_PATH_LITERAL("-");
 706   if (filename->empty())
 707     return;
 708   if (replace_trailing) {
 709     // Handle CreateFile() stripping trailing dots and spaces on filenames
 710     // http://support.microsoft.com/kb/115827
 711     size_t length = filename->size();
 712     size_t pos = filename->find_last_not_of(FILE_PATH_LITERAL(" ."));
 713     filename->resize((pos == std::string::npos) ? 0 : (pos + 1));
 714     TrimWhitespace(*filename, TRIM_TRAILING, filename);
 715     if (filename->empty())
 716       return;
 717     size_t trimmed = length - filename->size();
 718     if (trimmed)
 719       filename->insert(filename->end(), trimmed, kReplace[0]);
 720   }
 721   TrimString(*filename, FILE_PATH_LITERAL("."), filename);
 722   if (filename->empty())
 723     return;
 724   // Replace any path information by changing path separators.
 725   ReplaceSubstringsAfterOffset(filename, 0, FILE_PATH_LITERAL("/"), kReplace);
 726   ReplaceSubstringsAfterOffset(filename, 0, FILE_PATH_LITERAL("\\"), kReplace);
 727 }
 728
 729 // Returns the filename determined from the last component of the path portion
 730 // of the URL.  Returns an empty string if the URL doesn't have a path or is
 731 // invalid. If the generated filename is not reliable,
 732 // |should_overwrite_extension| will be set to true, in which case a better
 733 // extension should be determined based on the content type.
 734 std::string GetFileNameFromURL(const GURL& url,
 735                                const std::string& referrer_charset,
 736                                bool* should_overwrite_extension) {
 737   // about: and data: URLs don't have file names, but esp. data: URLs may
 738   // contain parts that look like ones (i.e., contain a slash).  Therefore we
 739   // don't attempt to divine a file name out of them.
 740   if (!url.is_valid() || url.SchemeIs("about") || url.SchemeIs("data"))
 741     return std::string();
 742
 743   const std::string unescaped_url_filename = UnescapeURLComponent(
 744       url.ExtractFileName(),
 745       UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
 746
 747   // The URL's path should be escaped UTF-8, but may not be.
 748   std::string decoded_filename = unescaped_url_filename;
 749   if (!IsStringUTF8(decoded_filename)) {
 750     // TODO(jshin): this is probably not robust enough. To be sure, we need
 751     // encoding detection.
 752     base::string16 utf16_output;
 753     if (!referrer_charset.empty() &&
 754         base::CodepageToUTF16(unescaped_url_filename,
 755                               referrer_charset.c_str(),
 756                               base::OnStringConversionError::FAIL,
 757                               &utf16_output)) {
 758       decoded_filename = UTF16ToUTF8(utf16_output);
 759     } else {
 760       decoded_filename = WideToUTF8(
 761           base::SysNativeMBToWide(unescaped_url_filename));
 762     }
 763   }
 764   // If the URL contains a (possibly empty) query, assume it is a generator, and
 765   // allow the determined extension to be overwritten.
 766   *should_overwrite_extension = !decoded_filename.empty() && url.has_query();
 767
 768   return decoded_filename;
 769 }
 770
 771 // Returns whether the specified extension is automatically integrated into the
 772 // windows shell.
 773 bool IsShellIntegratedExtension(const base::FilePath::StringType& extension) {
 774   base::FilePath::StringType extension_lower = StringToLowerASCII(extension);
 775
 776   // http://msdn.microsoft.com/en-us/library/ms811694.aspx
 777   // Right-clicking on shortcuts can be magical.
 778   if ((extension_lower == FILE_PATH_LITERAL("local")) ||
 779       (extension_lower == FILE_PATH_LITERAL("lnk")))
 780     return true;
 781
 782   // http://www.juniper.net/security/auto/vulnerabilities/vuln2612.html
 783   // Files become magical if they end in a CLSID, so block such extensions.
 784   if (!extension_lower.empty() &&
 785       (extension_lower[0] == FILE_PATH_LITERAL('{')) &&
 786       (extension_lower[extension_lower.length() - 1] == FILE_PATH_LITERAL('}')))
 787     return true;
 788   return false;
 789 }
 790
 791 // Returns whether the specified file name is a reserved name on windows.
 792 // This includes names like "com2.zip" (which correspond to devices) and
 793 // desktop.ini and thumbs.db which have special meaning to the windows shell.
 794 bool IsReservedName(const base::FilePath::StringType& filename) {
 795   // This list is taken from the MSDN article "Naming a file"
 796   // http://msdn2.microsoft.com/en-us/library/aa365247(VS.85).aspx
 797   // I also added clock$ because GetSaveFileName seems to consider it as a
 798   // reserved name too.
 799   static const char* const known_devices[] = {
 800     "con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", "com5",
 801     "com6", "com7", "com8", "com9", "lpt1", "lpt2", "lpt3", "lpt4",
 802     "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "clock$"
 803   };
 804 #if defined(OS_WIN)
 805   std::string filename_lower = StringToLowerASCII(WideToUTF8(filename));
 806 #elif defined(OS_POSIX)
 807   std::string filename_lower = StringToLowerASCII(filename);
 808 #endif
 809
 810   for (size_t i = 0; i < arraysize(known_devices); ++i) {
 811     // Exact match.
 812     if (filename_lower == known_devices[i])
 813       return true;
 814     // Starts with "DEVICE.".
 815     if (filename_lower.find(std::string(known_devices[i]) + ".") == 0)
 816       return true;
 817   }
 818
 819   static const char* const magic_names[] = {
 820     // These file names are used by the "Customize folder" feature of the shell.
 821     "desktop.ini",
 822     "thumbs.db",
 823   };
 824
 825   for (size_t i = 0; i < arraysize(magic_names); ++i) {
 826     if (filename_lower == magic_names[i])
 827       return true;
 828   }
 829
 830   return false;
 831 }
 832
 833 // Examines the current extension in |file_name| and modifies it if necessary in
 834 // order to ensure the filename is safe.  If |file_name| doesn't contain an
 835 // extension or if |ignore_extension| is true, then a new extension will be
 836 // constructed based on the |mime_type|.
 837 //
 838 // We're addressing two things here:
 839 //
 840 // 1) Usability.  If there is no reliable file extension, we want to guess a
 841 //    reasonable file extension based on the content type.
 842 //
 843 // 2) Shell integration.  Some file extensions automatically integrate with the
 844 //    shell.  We block these extensions to prevent a malicious web site from
 845 //    integrating with the user's shell.
 846 void EnsureSafeExtension(const std::string& mime_type,
 847                          bool ignore_extension,
 848                          base::FilePath* file_name) {
 849   // See if our file name already contains an extension.
 850   base::FilePath::StringType extension = file_name->Extension();
 851   if (!extension.empty())
 852     extension.erase(extension.begin());  // Erase preceding '.'.
 853
 854   if ((ignore_extension || extension.empty()) && !mime_type.empty()) {
 855     base::FilePath::StringType preferred_mime_extension;
 856     std::vector<base::FilePath::StringType> all_mime_extensions;
 857     // The GetPreferredExtensionForMimeType call will end up going to disk.  Do
 858     // this on another thread to avoid slowing the IO thread.
 859     // http://crbug.com/61827
 860     // TODO(asanka): Remove this ScopedAllowIO once all callers have switched
 861     // over to IO safe threads.
 862     base::ThreadRestrictions::ScopedAllowIO allow_io;
 863     net::GetPreferredExtensionForMimeType(mime_type, &preferred_mime_extension);
 864     net::GetExtensionsForMimeType(mime_type, &all_mime_extensions);
 865     // If the existing extension is in the list of valid extensions for the
 866     // given type, use it. This avoids doing things like pointlessly renaming
 867     // "foo.jpg" to "foo.jpeg".
 868     if (std::find(all_mime_extensions.begin(),
 869                   all_mime_extensions.end(),
 870                   extension) != all_mime_extensions.end()) {
 871       // leave |extension| alone
 872     } else if (!preferred_mime_extension.empty()) {
 873       extension = preferred_mime_extension;
 874     }
 875   }
 876
 877 #if defined(OS_WIN)
 878   static const base::FilePath::CharType default_extension[] =
 879       FILE_PATH_LITERAL("download");
 880
 881   // Rename shell-integrated extensions.
 882   // TODO(asanka): Consider stripping out the bad extension and replacing it
 883   // with the preferred extension for the MIME type if one is available.
 884   if (IsShellIntegratedExtension(extension))
 885     extension.assign(default_extension);
 886 #endif
 887
 888   *file_name = file_name->ReplaceExtension(extension);
 889 }
 890
 891 bool FilePathToString16(const base::FilePath& path, base::string16* converted) {
 892 #if defined(OS_WIN)
 893   return WideToUTF16(path.value().c_str(), path.value().size(), converted);
 894 #elif defined(OS_POSIX)
 895   std::string component8 = path.AsUTF8Unsafe();
 896   return !component8.empty() &&
 897          UTF8ToUTF16(component8.c_str(), component8.size(), converted);
 898 #endif
 899 }
 900
 901 }  // namespace
 902
 903 const FormatUrlType kFormatUrlOmitNothing                     = 0;
 904 const FormatUrlType kFormatUrlOmitUsernamePassword            = 1 << 0;
 905 const FormatUrlType kFormatUrlOmitHTTP                        = 1 << 1;
 906 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;
 907 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword |
 908     kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname;
 909
 910 static base::LazyInstance<std::multiset<int> >::Leaky
 911     g_explicitly_allowed_ports = LAZY_INSTANCE_INITIALIZER;
 912
 913 size_t GetCountOfExplicitlyAllowedPorts() {
 914   return g_explicitly_allowed_ports.Get().size();
 915 }
 916
 917 GURL FilePathToFileURL(const base::FilePath& path) {
 918   // Produce a URL like "file:///C:/foo" for a regular file, or
 919   // "file://///server/path" for UNC. The URL canonicalizer will fix up the
 920   // latter case to be the canonical UNC form: "file://server/path"
 921   base::FilePath::StringType url_string(kFileURLPrefix);
 922   url_string.append(path.value());
 923
 924   // Now do replacement of some characters. Since we assume the input is a
 925   // literal filename, anything the URL parser might consider special should
 926   // be escaped here.
 927
 928   // must be the first substitution since others will introduce percents as the
 929   // escape character
 930   ReplaceSubstringsAfterOffset(&url_string, 0,
 931       FILE_PATH_LITERAL("%"), FILE_PATH_LITERAL("%25"));
 932
 933   // semicolon is supposed to be some kind of separator according to RFC 2396
 934   ReplaceSubstringsAfterOffset(&url_string, 0,
 935       FILE_PATH_LITERAL(";"), FILE_PATH_LITERAL("%3B"));
 936
 937   ReplaceSubstringsAfterOffset(&url_string, 0,
 938       FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23"));
 939
 940   ReplaceSubstringsAfterOffset(&url_string, 0,
 941       FILE_PATH_LITERAL("?"), FILE_PATH_LITERAL("%3F"));
 942
 943 #if defined(OS_POSIX)
 944   ReplaceSubstringsAfterOffset(&url_string, 0,
 945       FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C"));
 946 #endif
 947
 948   return GURL(url_string);
 949 }
 950
 951 std::string GetSpecificHeader(const std::string& headers,
 952                               const std::string& name) {
 953   // We want to grab the Value from the "Key: Value" pairs in the headers,
 954   // which should look like this (no leading spaces, \n-separated) (we format
 955   // them this way in url_request_inet.cc):
 956   //    HTTP/1.1 200 OK\n
 957   //    ETag: "6d0b8-947-24f35ec0"\n
 958   //    Content-Length: 2375\n
 959   //    Content-Type: text/html; charset=UTF-8\n
 960   //    Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n
 961   if (headers.empty())
 962     return std::string();
 963
 964   std::string match('\n' + name + ':');
 965
 966   std::string::const_iterator begin =
 967       std::search(headers.begin(), headers.end(), match.begin(), match.end(),
 968              base::CaseInsensitiveCompareASCII<char>());
 969
 970   if (begin == headers.end())
 971     return std::string();
 972
 973   begin += match.length();
 974
 975   std::string ret;
 976   TrimWhitespace(std::string(begin, std::find(begin, headers.end(), '\n')),
 977                  TRIM_ALL, &ret);
 978   return ret;
 979 }
 980
 981 base::string16 IDNToUnicode(const std::string& host,
 982                             const std::string& languages) {
 983   return IDNToUnicodeWithOffsets(host, languages, NULL);
 984 }
 985
 986 std::string CanonicalizeHost(const std::string& host,
 987                              url_canon::CanonHostInfo* host_info) {
 988   // Try to canonicalize the host.
 989   const url_parse::Component raw_host_component(
 990       0, static_cast<int>(host.length()));
 991   std::string canon_host;
 992   url_canon::StdStringCanonOutput canon_host_output(&canon_host);
 993   url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component,
 994                                      &canon_host_output, host_info);
 995
 996   if (host_info->out_host.is_nonempty() &&
 997       host_info->family != url_canon::CanonHostInfo::BROKEN) {
 998     // Success!  Assert that there's no extra garbage.
 999     canon_host_output.Complete();
1000     DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length()));
1001   } else {
1002     // Empty host, or canonicalization failed.  We'll return empty.
1003     canon_host.clear();
1004   }
1005
1006   return canon_host;
1007 }
1008
1009 std::string GetDirectoryListingHeader(const base::string16& title) {
1010   static const base::StringPiece header(
1011       NetModule::GetResource(IDR_DIR_HEADER_HTML));
1012   // This can be null in unit tests.
1013   DLOG_IF(WARNING, header.empty()) <<
1014       "Missing resource: directory listing header";
1015
1016   std::string result;
1017   if (!header.empty())
1018     result.assign(header.data(), header.size());
1019
1020   result.append("<script>start(");
1021   base::JsonDoubleQuote(title, true, &result);
1022   result.append(");</script>\n");
1023
1024   return result;
1025 }
1026
1027 inline bool IsHostCharAlpha(char c) {
1028   // We can just check lowercase because uppercase characters have already been
1029   // normalized.
1030   return (c >= 'a') && (c <= 'z');
1031 }
1032
1033 inline bool IsHostCharDigit(char c) {
1034   return (c >= '0') && (c <= '9');
1035 }
1036
1037 bool IsCanonicalizedHostCompliant(const std::string& host,
1038                                   const std::string& desired_tld) {
1039   if (host.empty())
1040     return false;
1041
1042   bool in_component = false;
1043   bool most_recent_component_started_alpha = false;
1044   bool last_char_was_underscore = false;
1045
1046   for (std::string::const_iterator i(host.begin()); i != host.end(); ++i) {
1047     const char c = *i;
1048     if (!in_component) {
1049       most_recent_component_started_alpha = IsHostCharAlpha(c);
1050       if (!most_recent_component_started_alpha && !IsHostCharDigit(c) &&
1051           (c != '-'))
1052         return false;
1053       in_component = true;
1054     } else {
1055       if (c == '.') {
1056         if (last_char_was_underscore)
1057           return false;
1058         in_component = false;
1059       } else if (IsHostCharAlpha(c) || IsHostCharDigit(c) || (c == '-')) {
1060         last_char_was_underscore = false;
1061       } else if (c == '_') {
1062         last_char_was_underscore = true;
1063       } else {
1064         return false;
1065       }
1066     }
1067   }
1068
1069   return most_recent_component_started_alpha ||
1070       (!desired_tld.empty() && IsHostCharAlpha(desired_tld[0]));
1071 }
1072
1073 std::string GetDirectoryListingEntry(const base::string16& name,
1074                                      const std::string& raw_bytes,
1075                                      bool is_dir,
1076                                      int64 size,
1077                                      Time modified) {
1078   std::string result;
1079   result.append("<script>addRow(");
1080   base::JsonDoubleQuote(name, true, &result);
1081   result.append(",");
1082   if (raw_bytes.empty()) {
1083     base::JsonDoubleQuote(EscapePath(UTF16ToUTF8(name)),
1084                                    true, &result);
1085   } else {
1086     base::JsonDoubleQuote(EscapePath(raw_bytes), true, &result);
1087   }
1088   if (is_dir) {
1089     result.append(",1,");
1090   } else {
1091     result.append(",0,");
1092   }
1093
1094   // Negative size means unknown or not applicable (e.g. directory).
1095   base::string16 size_string;
1096   if (size >= 0)
1097     size_string = FormatBytesUnlocalized(size);
1098   base::JsonDoubleQuote(size_string, true, &result);
1099
1100   result.append(",");
1101
1102   base::string16 modified_str;
1103   // |modified| can be NULL in FTP listings.
1104   if (!modified.is_null()) {
1105     modified_str = base::TimeFormatShortDateAndTime(modified);
1106   }
1107   base::JsonDoubleQuote(modified_str, true, &result);
1108
1109   result.append(");</script>\n");
1110
1111   return result;
1112 }
1113
1114 base::string16 StripWWW(const base::string16& text) {
1115   const base::string16 www(ASCIIToUTF16("www."));
1116   return StartsWith(text, www, true) ? text.substr(www.length()) : text;
1117 }
1118
1119 base::string16 StripWWWFromHost(const GURL& url) {
1120   DCHECK(url.is_valid());
1121   return StripWWW(ASCIIToUTF16(url.host()));
1122 }
1123
1124 bool IsSafePortablePathComponent(const base::FilePath& component) {
1125   base::string16 component16;
1126   base::FilePath::StringType sanitized = component.value();
1127   SanitizeGeneratedFileName(&sanitized, true);
1128   base::FilePath::StringType extension = component.Extension();
1129   if (!extension.empty())
1130     extension.erase(extension.begin());  // Erase preceding '.'.
1131   return !component.empty() &&
1132          (component == component.BaseName()) &&
1133          (component == component.StripTrailingSeparators()) &&
1134          FilePathToString16(component, &component16) &&
1135          file_util::IsFilenameLegal(component16) &&
1136          !IsShellIntegratedExtension(extension) &&
1137          (sanitized == component.value());
1138 }
1139
1140 bool IsSafePortableBasename(const base::FilePath& filename) {
1141   return IsSafePortablePathComponent(filename) &&
1142          !IsReservedName(filename.value());
1143 }
1144
1145 bool IsSafePortableRelativePath(const base::FilePath& path) {
1146   if (path.empty() || path.IsAbsolute() || path.EndsWithSeparator())
1147     return false;
1148   std::vector<base::FilePath::StringType> components;
1149   path.GetComponents(&components);
1150   if (components.empty())
1151     return false;
1152   for (size_t i = 0; i < components.size() - 1; ++i) {
1153     if (!IsSafePortablePathComponent(base::FilePath(components[i])))
1154       return false;
1155   }
1156   return IsSafePortableBasename(path.BaseName());
1157 }
1158
1159 void GenerateSafeFileName(const std::string& mime_type,
1160                           bool ignore_extension,
1161                           base::FilePath* file_path) {
1162   // Make sure we get the right file extension
1163   EnsureSafeExtension(mime_type, ignore_extension, file_path);
1164
1165 #if defined(OS_WIN)
1166   // Prepend "_" to the file name if it's a reserved name
1167   base::FilePath::StringType leaf_name = file_path->BaseName().value();
1168   DCHECK(!leaf_name.empty());
1169   if (IsReservedName(leaf_name)) {
1170     leaf_name = base::FilePath::StringType(FILE_PATH_LITERAL("_")) + leaf_name;
1171     *file_path = file_path->DirName();
1172     if (file_path->value() == base::FilePath::kCurrentDirectory) {
1173       *file_path = base::FilePath(leaf_name);
1174     } else {
1175       *file_path = file_path->Append(leaf_name);
1176     }
1177   }
1178 #endif
1179 }
1180
1181 base::string16 GetSuggestedFilename(const GURL& url,
1182                                     const std::string& content_disposition,
1183                                     const std::string& referrer_charset,
1184                                     const std::string& suggested_name,
1185                                     const std::string& mime_type,
1186                                     const std::string& default_name) {
1187   // TODO: this function to be updated to match the httpbis recommendations.
1188   // Talk to abarth for the latest news.
1189
1190   // We don't translate this fallback string, "download". If localization is
1191   // needed, the caller should provide localized fallback in |default_name|.
1192   static const base::FilePath::CharType kFinalFallbackName[] =
1193     FILE_PATH_LITERAL("download");
1194   std::string filename;  // In UTF-8
1195   bool overwrite_extension = false;
1196
1197   // Try to extract a filename from content-disposition first.
1198   if (!content_disposition.empty()) {
1199     HttpContentDisposition header(content_disposition, referrer_charset);
1200     filename = header.filename();
1201   }
1202
1203   // Then try to use the suggested name.
1204   if (filename.empty() && !suggested_name.empty())
1205     filename = suggested_name;
1206
1207   // Now try extracting the filename from the URL.  GetFileNameFromURL() only
1208   // looks at the last component of the URL and doesn't return the hostname as a
1209   // failover.
1210   if (filename.empty())
1211     filename = GetFileNameFromURL(url, referrer_charset, &overwrite_extension);
1212
1213   // Finally try the URL hostname, but only if there's no default specified in
1214   // |default_name|.  Some schemes (e.g.: file:, about:, data:) do not have a
1215   // host name.
1216   if (filename.empty() &&
1217       default_name.empty() &&
1218       url.is_valid() &&
1219       !url.host().empty()) {
1220     // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451)
1221     filename = url.host();
1222   }
1223
1224   bool replace_trailing = false;
1225   base::FilePath::StringType result_str, default_name_str;
1226 #if defined(OS_WIN)
1227   replace_trailing = true;
1228   result_str = UTF8ToUTF16(filename);
1229   default_name_str = UTF8ToUTF16(default_name);
1230 #else
1231   result_str = filename;
1232   default_name_str = default_name;
1233 #endif
1234   SanitizeGeneratedFileName(&result_str, replace_trailing);
1235   if (result_str.find_last_not_of(FILE_PATH_LITERAL("-_")) ==
1236       base::FilePath::StringType::npos) {
1237     result_str = !default_name_str.empty() ? default_name_str :
1238       base::FilePath::StringType(kFinalFallbackName);
1239     overwrite_extension = false;
1240   }
1241   file_util::ReplaceIllegalCharactersInPath(&result_str, '-');
1242   base::FilePath result(result_str);
1243   GenerateSafeFileName(mime_type, overwrite_extension, &result);
1244
1245   base::string16 result16;
1246   if (!FilePathToString16(result, &result16)) {
1247     result = base::FilePath(default_name_str);
1248     if (!FilePathToString16(result, &result16)) {
1249       result = base::FilePath(kFinalFallbackName);
1250       FilePathToString16(result, &result16);
1251     }
1252   }
1253   return result16;
1254 }
1255
1256 base::FilePath GenerateFileName(const GURL& url,
1257                                 const std::string& content_disposition,
1258                                 const std::string& referrer_charset,
1259                                 const std::string& suggested_name,
1260                                 const std::string& mime_type,
1261                                 const std::string& default_file_name) {
1262   base::string16 file_name = GetSuggestedFilename(url,
1263                                                   content_disposition,
1264                                                   referrer_charset,
1265                                                   suggested_name,
1266                                                   mime_type,
1267                                                   default_file_name);
1268
1269 #if defined(OS_WIN)
1270   base::FilePath generated_name(file_name);
1271 #else
1272   base::FilePath generated_name(
1273       base::SysWideToNativeMB(UTF16ToWide(file_name)));
1274 #endif
1275
1276 #if defined(OS_CHROMEOS)
1277   // When doing file manager operations on ChromeOS, the file paths get
1278   // normalized in WebKit layer, so let's ensure downloaded files have
1279   // normalized names. Otherwise, we won't be able to handle files with NFD
1280   // utf8 encoded characters in name.
1281   file_util::NormalizeFileNameEncoding(&generated_name);
1282 #endif
1283
1284   DCHECK(!generated_name.empty());
1285
1286   return generated_name;
1287 }
1288
1289 bool IsPortAllowedByDefault(int port) {
1290   int array_size = arraysize(kRestrictedPorts);
1291   for (int i = 0; i < array_size; i++) {
1292     if (kRestrictedPorts[i] == port) {
1293       return false;
1294     }
1295   }
1296   return true;
1297 }
1298
1299 bool IsPortAllowedByFtp(int port) {
1300   int array_size = arraysize(kAllowedFtpPorts);
1301   for (int i = 0; i < array_size; i++) {
1302     if (kAllowedFtpPorts[i] == port) {
1303         return true;
1304     }
1305   }
1306   // Port not explicitly allowed by FTP, so return the default restrictions.
1307   return IsPortAllowedByDefault(port);
1308 }
1309
1310 bool IsPortAllowedByOverride(int port) {
1311   if (g_explicitly_allowed_ports.Get().empty())
1312     return false;
1313
1314   return g_explicitly_allowed_ports.Get().count(port) > 0;
1315 }
1316
1317 int SetNonBlocking(int fd) {
1318 #if defined(OS_WIN)
1319   unsigned long no_block = 1;
1320   return ioctlsocket(fd, FIONBIO, &no_block);
1321 #elif defined(OS_POSIX)
1322   int flags = fcntl(fd, F_GETFL, 0);
1323   if (-1 == flags)
1324     return flags;
1325   return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
1326 #endif
1327 }
1328
1329 bool ParseHostAndPort(std::string::const_iterator host_and_port_begin,
1330                       std::string::const_iterator host_and_port_end,
1331                       std::string* host,
1332                       int* port) {
1333   if (host_and_port_begin >= host_and_port_end)
1334     return false;
1335
1336   // When using url_parse, we use char*.
1337   const char* auth_begin = &(*host_and_port_begin);
1338   int auth_len = host_and_port_end - host_and_port_begin;
1339
1340   url_parse::Component auth_component(0, auth_len);
1341   url_parse::Component username_component;
1342   url_parse::Component password_component;
1343   url_parse::Component hostname_component;
1344   url_parse::Component port_component;
1345
1346   url_parse::ParseAuthority(auth_begin, auth_component, &username_component,
1347       &password_component, &hostname_component, &port_component);
1348
1349   // There shouldn't be a username/password.
1350   if (username_component.is_valid() || password_component.is_valid())
1351     return false;
1352
1353   if (!hostname_component.is_nonempty())
1354     return false;  // Failed parsing.
1355
1356   int parsed_port_number = -1;
1357   if (port_component.is_nonempty()) {
1358     parsed_port_number = url_parse::ParsePort(auth_begin, port_component);
1359
1360     // If parsing failed, port_number will be either PORT_INVALID or
1361     // PORT_UNSPECIFIED, both of which are negative.
1362     if (parsed_port_number < 0)
1363       return false;  // Failed parsing the port number.
1364   }
1365
1366   if (port_component.len == 0)
1367     return false;  // Reject inputs like "foo:"
1368
1369   // Pass results back to caller.
1370   host->assign(auth_begin + hostname_component.begin, hostname_component.len);
1371   *port = parsed_port_number;
1372
1373   return true;  // Success.
1374 }
1375
1376 bool ParseHostAndPort(const std::string& host_and_port,
1377                       std::string* host,
1378                       int* port) {
1379   return ParseHostAndPort(
1380       host_and_port.begin(), host_and_port.end(), host, port);
1381 }
1382
1383 std::string GetHostAndPort(const GURL& url) {
1384   // For IPv6 literals, GURL::host() already includes the brackets so it is
1385   // safe to just append a colon.
1386   return base::StringPrintf("%s:%d", url.host().c_str(),
1387                             url.EffectiveIntPort());
1388 }
1389
1390 std::string GetHostAndOptionalPort(const GURL& url) {
1391   // For IPv6 literals, GURL::host() already includes the brackets
1392   // so it is safe to just append a colon.
1393   if (url.has_port())
1394     return base::StringPrintf("%s:%s", url.host().c_str(), url.port().c_str());
1395   return url.host();
1396 }
1397
1398 // Extracts the address and port portions of a sockaddr.
1399 bool GetIPAddressFromSockAddr(const struct sockaddr* sock_addr,
1400                               socklen_t sock_addr_len,
1401                               const uint8** address,
1402                               size_t* address_len,
1403                               uint16* port) {
1404   if (sock_addr->sa_family == AF_INET) {
1405     if (sock_addr_len < static_cast<socklen_t>(sizeof(struct sockaddr_in)))
1406       return false;
1407     const struct sockaddr_in* addr =
1408         reinterpret_cast<const struct sockaddr_in*>(sock_addr);
1409     *address = reinterpret_cast<const uint8*>(&addr->sin_addr);
1410     *address_len = kIPv4AddressSize;
1411     if (port)
1412       *port = base::NetToHost16(addr->sin_port);
1413     return true;
1414   }
1415
1416   if (sock_addr->sa_family == AF_INET6) {
1417     if (sock_addr_len < static_cast<socklen_t>(sizeof(struct sockaddr_in6)))
1418       return false;
1419     const struct sockaddr_in6* addr =
1420         reinterpret_cast<const struct sockaddr_in6*>(sock_addr);
1421     *address = reinterpret_cast<const unsigned char*>(&addr->sin6_addr);
1422     *address_len = kIPv6AddressSize;
1423     if (port)
1424       *port = base::NetToHost16(addr->sin6_port);
1425     return true;
1426   }
1427
1428   return false;  // Unrecognized |sa_family|.
1429 }
1430
1431 std::string IPAddressToString(const uint8* address,
1432                               size_t address_len) {
1433   std::string str;
1434   url_canon::StdStringCanonOutput output(&str);
1435
1436   if (address_len == kIPv4AddressSize) {
1437     url_canon::AppendIPv4Address(address, &output);
1438   } else if (address_len == kIPv6AddressSize) {
1439     url_canon::AppendIPv6Address(address, &output);
1440   } else {
1441     CHECK(false) << "Invalid IP address with length: " << address_len;
1442   }
1443
1444   output.Complete();
1445   return str;
1446 }
1447
1448 std::string IPAddressToStringWithPort(const uint8* address,
1449                                       size_t address_len,
1450                                       uint16 port) {
1451   std::string address_str = IPAddressToString(address, address_len);
1452
1453   if (address_len == kIPv6AddressSize) {
1454     // Need to bracket IPv6 addresses since they contain colons.
1455     return base::StringPrintf("[%s]:%d", address_str.c_str(), port);
1456   }
1457   return base::StringPrintf("%s:%d", address_str.c_str(), port);
1458 }
1459
1460 std::string NetAddressToString(const struct sockaddr* sa,
1461                                socklen_t sock_addr_len) {
1462   const uint8* address;
1463   size_t address_len;
1464   if (!GetIPAddressFromSockAddr(sa, sock_addr_len, &address,
1465                                 &address_len, NULL)) {
1466     NOTREACHED();
1467     return std::string();
1468   }
1469   return IPAddressToString(address, address_len);
1470 }
1471
1472 std::string NetAddressToStringWithPort(const struct sockaddr* sa,
1473                                        socklen_t sock_addr_len) {
1474   const uint8* address;
1475   size_t address_len;
1476   uint16 port;
1477   if (!GetIPAddressFromSockAddr(sa, sock_addr_len, &address,
1478                                 &address_len, &port)) {
1479     NOTREACHED();
1480     return std::string();
1481   }
1482   return IPAddressToStringWithPort(address, address_len, port);
1483 }
1484
1485 std::string IPAddressToString(const IPAddressNumber& addr) {
1486   return IPAddressToString(&addr.front(), addr.size());
1487 }
1488
1489 std::string IPAddressToStringWithPort(const IPAddressNumber& addr,
1490                                       uint16 port) {
1491   return IPAddressToStringWithPort(&addr.front(), addr.size(), port);
1492 }
1493
1494 std::string GetHostName() {
1495 #if defined(OS_WIN)
1496   EnsureWinsockInit();
1497 #endif
1498
1499   // Host names are limited to 255 bytes.
1500   char buffer[256];
1501   int result = gethostname(buffer, sizeof(buffer));
1502   if (result != 0) {
1503     DVLOG(1) << "gethostname() failed with " << result;
1504     buffer[0] = '\0';
1505   }
1506   return std::string(buffer);
1507 }
1508
1509 void GetIdentityFromURL(const GURL& url,
1510                         base::string16* username,
1511                         base::string16* password) {
1512   UnescapeRule::Type flags =
1513       UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS;
1514   *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL);
1515   *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL);
1516 }
1517
1518 std::string GetHostOrSpecFromURL(const GURL& url) {
1519   return url.has_host() ? TrimEndingDot(url.host()) : url.spec();
1520 }
1521
1522 void AppendFormattedHost(const GURL& url,
1523                          const std::string& languages,
1524                          base::string16* output) {
1525   std::vector<size_t> offsets;
1526   AppendFormattedComponent(url.possibly_invalid_spec(),
1527       url.parsed_for_possibly_invalid_spec().host, offsets,
1528       HostComponentTransform(languages), output, NULL, NULL);
1529 }
1530
1531 base::string16 FormatUrlWithOffsets(
1532     const GURL& url,
1533     const std::string& languages,
1534     FormatUrlTypes format_types,
1535     UnescapeRule::Type unescape_rules,
1536     url_parse::Parsed* new_parsed,
1537     size_t* prefix_end,
1538     std::vector<size_t>* offsets_for_adjustment) {
1539   url_parse::Parsed parsed_temp;
1540   if (!new_parsed)
1541     new_parsed = &parsed_temp;
1542   else
1543     *new_parsed = url_parse::Parsed();
1544   std::vector<size_t> original_offsets;
1545   if (offsets_for_adjustment)
1546     original_offsets = *offsets_for_adjustment;
1547
1548   // Special handling for view-source:.  Don't use content::kViewSourceScheme
1549   // because this library shouldn't depend on chrome.
1550   const char* const kViewSource = "view-source";
1551   // Reject "view-source:view-source:..." to avoid deep recursion.
1552   const char* const kViewSourceTwice = "view-source:view-source:";
1553   if (url.SchemeIs(kViewSource) &&
1554       !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {
1555     return FormatViewSourceUrl(url, original_offsets, languages, format_types,
1556         unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);
1557   }
1558
1559   // We handle both valid and invalid URLs (this will give us the spec
1560   // regardless of validity).
1561   const std::string& spec = url.possibly_invalid_spec();
1562   const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
1563
1564   // Scheme & separators.  These are ASCII.
1565   base::string16 url_string;
1566   url_string.insert(url_string.end(), spec.begin(),
1567       spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME,
1568                                                   true));
1569   const char kHTTP[] = "http://";
1570   const char kFTP[] = "ftp.";
1571   // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com.  This
1572   // means that if we trim "http://" off a URL whose host starts with "ftp." and
1573   // the user inputs this into any field subject to fixup (which is basically
1574   // all input fields), the meaning would be changed.  (In fact, often the
1575   // formatted URL is directly pre-filled into an input field.)  For this reason
1576   // we avoid stripping "http://" in this case.
1577   bool omit_http = (format_types & kFormatUrlOmitHTTP) &&
1578       EqualsASCII(url_string, kHTTP) &&
1579       !StartsWithASCII(url.host(), kFTP, true);
1580   new_parsed->scheme = parsed.scheme;
1581
1582   // Username & password.
1583   if ((format_types & kFormatUrlOmitUsernamePassword) != 0) {
1584     // Remove the username and password fields. We don't want to display those
1585     // to the user since they can be used for attacks,
1586     // e.g. "http://google.com:search@evil.ru/"
1587     new_parsed->username.reset();
1588     new_parsed->password.reset();
1589     // Update the offsets based on removed username and/or password.
1590     if (offsets_for_adjustment && !offsets_for_adjustment->empty() &&
1591         (parsed.username.is_nonempty() || parsed.password.is_nonempty())) {
1592       base::OffsetAdjuster offset_adjuster(offsets_for_adjustment);
1593       if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {
1594         // The seeming off-by-one and off-by-two in these first two lines are to
1595         // account for the ':' after the username and '@' after the password.
1596         offset_adjuster.Add(base::OffsetAdjuster::Adjustment(
1597             static_cast<size_t>(parsed.username.begin),
1598             static_cast<size_t>(parsed.username.len + parsed.password.len + 2),
1599             0));
1600       } else {
1601         const url_parse::Component* nonempty_component =
1602             parsed.username.is_nonempty() ? &parsed.username : &parsed.password;
1603         // The seeming off-by-one in below is to account for the '@' after the
1604         // username/password.
1605         offset_adjuster.Add(base::OffsetAdjuster::Adjustment(
1606             static_cast<size_t>(nonempty_component->begin),
1607             static_cast<size_t>(nonempty_component->len + 1), 0));
1608       }
1609     }
1610   } else {
1611     AppendFormattedComponent(spec, parsed.username, original_offsets,
1612         NonHostComponentTransform(unescape_rules), &url_string,
1613         &new_parsed->username, offsets_for_adjustment);
1614     if (parsed.password.is_valid()) {
1615       size_t colon = parsed.username.end();
1616       DCHECK_EQ(static_cast<size_t>(parsed.password.begin - 1), colon);
1617       std::vector<size_t>::const_iterator colon_iter =
1618           std::find(original_offsets.begin(), original_offsets.end(), colon);
1619       if (colon_iter != original_offsets.end()) {
1620         (*offsets_for_adjustment)[colon_iter - original_offsets.begin()] =
1621             url_string.length();
1622       }
1623       url_string.push_back(':');
1624     }
1625     AppendFormattedComponent(spec, parsed.password, original_offsets,
1626         NonHostComponentTransform(unescape_rules), &url_string,
1627         &new_parsed->password, offsets_for_adjustment);
1628     if (parsed.username.is_valid() || parsed.password.is_valid()) {
1629       size_t at_sign = (parsed.password.is_valid() ?
1630           parsed.password : parsed.username).end();
1631       DCHECK_EQ(static_cast<size_t>(parsed.host.begin - 1), at_sign);
1632       std::vector<size_t>::const_iterator at_sign_iter =
1633           std::find(original_offsets.begin(), original_offsets.end(), at_sign);
1634       if (at_sign_iter != original_offsets.end()) {
1635         (*offsets_for_adjustment)[at_sign_iter - original_offsets.begin()] =
1636             url_string.length();
1637       }
1638       url_string.push_back('@');
1639     }
1640   }
1641   if (prefix_end)
1642     *prefix_end = static_cast<size_t>(url_string.length());
1643
1644   // Host.
1645   AppendFormattedComponent(spec, parsed.host, original_offsets,
1646       HostComponentTransform(languages), &url_string, &new_parsed->host,
1647       offsets_for_adjustment);
1648
1649   // Port.
1650   if (parsed.port.is_nonempty()) {
1651     url_string.push_back(':');
1652     new_parsed->port.begin = url_string.length();
1653     url_string.insert(url_string.end(),
1654                       spec.begin() + parsed.port.begin,
1655                       spec.begin() + parsed.port.end());
1656     new_parsed->port.len = url_string.length() - new_parsed->port.begin;
1657   } else {
1658     new_parsed->port.reset();
1659   }
1660
1661   // Path & query.  Both get the same general unescape & convert treatment.
1662   if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) ||
1663       !CanStripTrailingSlash(url)) {
1664     AppendFormattedComponent(spec, parsed.path, original_offsets,
1665         NonHostComponentTransform(unescape_rules), &url_string,
1666         &new_parsed->path, offsets_for_adjustment);
1667   }
1668   if (parsed.query.is_valid())
1669     url_string.push_back('?');
1670   AppendFormattedComponent(spec, parsed.query, original_offsets,
1671       NonHostComponentTransform(unescape_rules), &url_string,
1672       &new_parsed->query, offsets_for_adjustment);
1673
1674   // Ref.  This is valid, unescaped UTF-8, so we can just convert.
1675   if (parsed.ref.is_valid()) {
1676     url_string.push_back('#');
1677     size_t original_ref_begin = static_cast<size_t>(parsed.ref.begin);
1678     size_t output_ref_begin = url_string.length();
1679     new_parsed->ref.begin = static_cast<int>(output_ref_begin);
1680
1681     std::vector<size_t> offsets_into_ref(
1682         OffsetsIntoComponent(original_offsets, original_ref_begin));
1683     if (parsed.ref.len > 0) {
1684       url_string.append(base::UTF8ToUTF16AndAdjustOffsets(
1685           spec.substr(original_ref_begin, static_cast<size_t>(parsed.ref.len)),
1686           &offsets_into_ref));
1687     }
1688
1689     new_parsed->ref.len =
1690         static_cast<int>(url_string.length() - new_parsed->ref.begin);
1691     AdjustForComponentTransform(original_offsets, original_ref_begin,
1692         static_cast<size_t>(parsed.ref.end()), offsets_into_ref,
1693         output_ref_begin, offsets_for_adjustment);
1694   }
1695
1696   // If we need to strip out http do it after the fact. This way we don't need
1697   // to worry about how offset_for_adjustment is interpreted.
1698   if (omit_http && StartsWith(url_string, ASCIIToUTF16(kHTTP), true)) {
1699     const size_t kHTTPSize = arraysize(kHTTP) - 1;
1700     url_string = url_string.substr(kHTTPSize);
1701     if (offsets_for_adjustment && !offsets_for_adjustment->empty()) {
1702       base::OffsetAdjuster offset_adjuster(offsets_for_adjustment);
1703       offset_adjuster.Add(base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0));
1704     }
1705     if (prefix_end)
1706       *prefix_end -= kHTTPSize;
1707
1708     // Adjust new_parsed.
1709     DCHECK(new_parsed->scheme.is_valid());
1710     int delta = -(new_parsed->scheme.len + 3);  // +3 for ://.
1711     new_parsed->scheme.reset();
1712     AdjustComponents(delta, new_parsed);
1713   }
1714
1715   LimitOffsets(url_string, offsets_for_adjustment);
1716   return url_string;
1717 }
1718
1719 base::string16 FormatUrl(const GURL& url,
1720                          const std::string& languages,
1721                          FormatUrlTypes format_types,
1722                          UnescapeRule::Type unescape_rules,
1723                          url_parse::Parsed* new_parsed,
1724                          size_t* prefix_end,
1725                          size_t* offset_for_adjustment) {
1726   std::vector<size_t> offsets;
1727   if (offset_for_adjustment)
1728     offsets.push_back(*offset_for_adjustment);
1729   base::string16 result = FormatUrlWithOffsets(url, languages, format_types,
1730       unescape_rules, new_parsed, prefix_end, &offsets);
1731   if (offset_for_adjustment)
1732     *offset_for_adjustment = offsets[0];
1733   return result;
1734 }
1735
1736 bool CanStripTrailingSlash(const GURL& url) {
1737   // Omit the path only for standard, non-file URLs with nothing but "/" after
1738   // the hostname.
1739   return url.IsStandard() && !url.SchemeIsFile() &&
1740       !url.SchemeIsFileSystem() && !url.has_query() && !url.has_ref()
1741       && url.path() == "/";
1742 }
1743
1744 GURL SimplifyUrlForRequest(const GURL& url) {
1745   DCHECK(url.is_valid());
1746   GURL::Replacements replacements;
1747   replacements.ClearUsername();
1748   replacements.ClearPassword();
1749   replacements.ClearRef();
1750   return url.ReplaceComponents(replacements);
1751 }
1752
1753 // Specifies a comma separated list of port numbers that should be accepted
1754 // despite bans. If the string is invalid no allowed ports are stored.
1755 void SetExplicitlyAllowedPorts(const std::string& allowed_ports) {
1756   if (allowed_ports.empty())
1757     return;
1758
1759   std::multiset<int> ports;
1760   size_t last = 0;
1761   size_t size = allowed_ports.size();
1762   // The comma delimiter.
1763   const std::string::value_type kComma = ',';
1764
1765   // Overflow is still possible for evil user inputs.
1766   for (size_t i = 0; i <= size; ++i) {
1767     // The string should be composed of only digits and commas.
1768     if (i != size && !IsAsciiDigit(allowed_ports[i]) &&
1769         (allowed_ports[i] != kComma))
1770       return;
1771     if (i == size || allowed_ports[i] == kComma) {
1772       if (i > last) {
1773         int port;
1774         base::StringToInt(base::StringPiece(allowed_ports.begin() + last,
1775                                             allowed_ports.begin() + i),
1776                           &port);
1777         ports.insert(port);
1778       }
1779       last = i + 1;
1780     }
1781   }
1782   g_explicitly_allowed_ports.Get() = ports;
1783 }
1784
1785 ScopedPortException::ScopedPortException(int port) : port_(port) {
1786   g_explicitly_allowed_ports.Get().insert(port);
1787 }
1788
1789 ScopedPortException::~ScopedPortException() {
1790   std::multiset<int>::iterator it =
1791       g_explicitly_allowed_ports.Get().find(port_);
1792   if (it != g_explicitly_allowed_ports.Get().end())
1793     g_explicitly_allowed_ports.Get().erase(it);
1794   else
1795     NOTREACHED();
1796 }
1797
1798 namespace {
1799
1800 const char* kFinalStatusNames[] = {
1801   "Cannot create sockets",
1802   "Can create sockets",
1803   "Can't get addresses",
1804   "Global ipv6 address missing",
1805   "Global ipv6 address present",
1806   "Interface array too short",
1807   "Probing not supported",  // IPV6_SUPPORT_MAX
1808 };
1809 COMPILE_ASSERT(arraysize(kFinalStatusNames) == IPV6_SUPPORT_MAX + 1,
1810                IPv6SupportStatus_name_count_mismatch);
1811
1812 // TODO(jar): The following is a simple estimate of IPv6 support.  We may need
1813 // to do a test resolution, and a test connection, to REALLY verify support.
1814 IPv6SupportResult TestIPv6SupportInternal() {
1815 #if defined(OS_ANDROID)
1816   // TODO: We should fully implement IPv6 probe once 'getifaddrs' API available;
1817   // Another approach is implementing the similar feature by
1818   // java.net.NetworkInterface through JNI.
1819   NOTIMPLEMENTED();
1820   return IPv6SupportResult(true, IPV6_SUPPORT_MAX, 0);
1821 #elif defined(OS_POSIX)
1822   int test_socket = socket(AF_INET6, SOCK_STREAM, 0);
1823   if (test_socket == -1)
1824     return IPv6SupportResult(false, IPV6_CANNOT_CREATE_SOCKETS, errno);
1825   close(test_socket);
1826
1827   // Check to see if any interface has a IPv6 address.
1828   struct ifaddrs* interface_addr = NULL;
1829   int rv = getifaddrs(&interface_addr);
1830   if (rv != 0) {
1831     // Don't yet block IPv6.
1832     return IPv6SupportResult(true, IPV6_GETIFADDRS_FAILED, errno);
1833   }
1834
1835   bool found_ipv6 = false;
1836   for (struct ifaddrs* interface = interface_addr;
1837        interface != NULL;
1838        interface = interface->ifa_next) {
1839     if (!(IFF_UP & interface->ifa_flags))
1840       continue;
1841     if (IFF_LOOPBACK & interface->ifa_flags)
1842       continue;
1843     struct sockaddr* addr = interface->ifa_addr;
1844     if (!addr)
1845       continue;
1846     if (addr->sa_family != AF_INET6)
1847       continue;
1848     // Safe cast since this is AF_INET6.
1849     struct sockaddr_in6* addr_in6 =
1850         reinterpret_cast<struct sockaddr_in6*>(addr);
1851     struct in6_addr* sin6_addr = &addr_in6->sin6_addr;
1852     if (IN6_IS_ADDR_LOOPBACK(sin6_addr) || IN6_IS_ADDR_LINKLOCAL(sin6_addr))
1853       continue;
1854     found_ipv6 = true;
1855     break;
1856   }
1857   freeifaddrs(interface_addr);
1858   if (!found_ipv6)
1859     return IPv6SupportResult(false, IPV6_GLOBAL_ADDRESS_MISSING, 0);
1860
1861   return IPv6SupportResult(true, IPV6_GLOBAL_ADDRESS_PRESENT, 0);
1862 #elif defined(OS_WIN)
1863   EnsureWinsockInit();
1864   SOCKET test_socket = socket(AF_INET6, SOCK_STREAM, 0);
1865   if (test_socket == INVALID_SOCKET) {
1866     return IPv6SupportResult(false,
1867                              IPV6_CANNOT_CREATE_SOCKETS,
1868                              WSAGetLastError());
1869   }
1870   closesocket(test_socket);
1871
1872   // Check to see if any interface has a IPv6 address.
1873   // The GetAdaptersAddresses MSDN page recommends using a size of 15000 to
1874   // avoid reallocation.
1875   ULONG adapters_size = 15000;
1876   scoped_ptr_malloc<IP_ADAPTER_ADDRESSES> adapters;
1877   ULONG error;
1878   int num_tries = 0;
1879   do {
1880     adapters.reset(
1881         reinterpret_cast<PIP_ADAPTER_ADDRESSES>(malloc(adapters_size)));
1882     // Return only unicast addresses.
1883     error = GetAdaptersAddresses(AF_UNSPEC,
1884                                  GAA_FLAG_SKIP_ANYCAST |
1885                                  GAA_FLAG_SKIP_MULTICAST |
1886                                  GAA_FLAG_SKIP_DNS_SERVER |
1887                                  GAA_FLAG_SKIP_FRIENDLY_NAME,
1888                                  NULL, adapters.get(), &adapters_size);
1889     num_tries++;
1890   } while (error == ERROR_BUFFER_OVERFLOW && num_tries <= 3);
1891   if (error == ERROR_NO_DATA)
1892     return IPv6SupportResult(false, IPV6_GLOBAL_ADDRESS_MISSING, error);
1893   if (error != ERROR_SUCCESS) {
1894     // Don't yet block IPv6.
1895     return IPv6SupportResult(true, IPV6_GETIFADDRS_FAILED, error);
1896   }
1897
1898   PIP_ADAPTER_ADDRESSES adapter;
1899   for (adapter = adapters.get(); adapter; adapter = adapter->Next) {
1900     if (adapter->OperStatus != IfOperStatusUp)
1901       continue;
1902     if (adapter->IfType == IF_TYPE_SOFTWARE_LOOPBACK)
1903       continue;
1904     PIP_ADAPTER_UNICAST_ADDRESS unicast_address;
1905     for (unicast_address = adapter->FirstUnicastAddress;
1906          unicast_address;
1907          unicast_address = unicast_address->Next) {
1908       if (unicast_address->Address.lpSockaddr->sa_family != AF_INET6)
1909         continue;
1910       // Safe cast since this is AF_INET6.
1911       struct sockaddr_in6* addr_in6 = reinterpret_cast<struct sockaddr_in6*>(
1912           unicast_address->Address.lpSockaddr);
1913       struct in6_addr* sin6_addr = &addr_in6->sin6_addr;
1914       if (IN6_IS_ADDR_LOOPBACK(sin6_addr) || IN6_IS_ADDR_LINKLOCAL(sin6_addr))
1915         continue;
1916       const uint8 kTeredoPrefix[] = { 0x20, 0x01, 0, 0 };
1917       if (!memcmp(sin6_addr->s6_addr, kTeredoPrefix, arraysize(kTeredoPrefix)))
1918         continue;
1919       return IPv6SupportResult(true, IPV6_GLOBAL_ADDRESS_PRESENT, 0);
1920     }
1921   }
1922
1923   return IPv6SupportResult(false, IPV6_GLOBAL_ADDRESS_MISSING, 0);
1924 #else
1925   NOTIMPLEMENTED();
1926   return IPv6SupportResult(true, IPV6_SUPPORT_MAX, 0);
1927 #endif  // defined(various platforms)
1928 }
1929
1930 }  // namespace
1931
1932 IPv6SupportResult::IPv6SupportResult(bool ipv6_supported,
1933                                      IPv6SupportStatus ipv6_support_status,
1934                                      int os_error)
1935                                      : ipv6_supported(ipv6_supported),
1936                                        ipv6_support_status(ipv6_support_status),
1937                                        os_error(os_error) {
1938 }
1939
1940 base::Value* IPv6SupportResult::ToNetLogValue(
1941     NetLog::LogLevel /* log_level */) const {
1942   base::DictionaryValue* dict = new DictionaryValue();
1943   dict->SetBoolean("ipv6_supported", ipv6_supported);
1944   dict->SetString("ipv6_support_status",
1945                   kFinalStatusNames[ipv6_support_status]);
1946   if (os_error)
1947     dict->SetInteger("os_error", os_error);
1948   return dict;
1949 }
1950
1951 IPv6SupportResult TestIPv6Support() {
1952   IPv6SupportResult result = TestIPv6SupportInternal();
1953
1954   // Record UMA.
1955   if (result.ipv6_support_status != IPV6_SUPPORT_MAX) {
1956     static bool run_once = false;
1957     if (!run_once) {
1958       run_once = true;
1959       UMA_HISTOGRAM_ENUMERATION("Net.IPv6Status",
1960                                 result.ipv6_support_status,
1961                                 IPV6_SUPPORT_MAX);
1962     } else {
1963       UMA_HISTOGRAM_ENUMERATION("Net.IPv6Status_retest",
1964                                 result.ipv6_support_status,
1965                                 IPV6_SUPPORT_MAX);
1966     }
1967   }
1968   return result;
1969 }
1970
1971 bool HaveOnlyLoopbackAddresses() {
1972 #if defined(OS_ANDROID)
1973   return android::HaveOnlyLoopbackAddresses();
1974 #elif defined(OS_POSIX)
1975   struct ifaddrs* interface_addr = NULL;
1976   int rv = getifaddrs(&interface_addr);
1977   if (rv != 0) {
1978     DVLOG(1) << "getifaddrs() failed with errno = " << errno;
1979     return false;
1980   }
1981
1982   bool result = true;
1983   for (struct ifaddrs* interface = interface_addr;
1984        interface != NULL;
1985        interface = interface->ifa_next) {
1986     if (!(IFF_UP & interface->ifa_flags))
1987       continue;
1988     if (IFF_LOOPBACK & interface->ifa_flags)
1989       continue;
1990     const struct sockaddr* addr = interface->ifa_addr;
1991     if (!addr)
1992       continue;
1993     if (addr->sa_family == AF_INET6) {
1994       // Safe cast since this is AF_INET6.
1995       const struct sockaddr_in6* addr_in6 =
1996           reinterpret_cast<const struct sockaddr_in6*>(addr);
1997       const struct in6_addr* sin6_addr = &addr_in6->sin6_addr;
1998       if (IN6_IS_ADDR_LOOPBACK(sin6_addr) || IN6_IS_ADDR_LINKLOCAL(sin6_addr))
1999         continue;
2000     }
2001     if (addr->sa_family != AF_INET6 && addr->sa_family != AF_INET)
2002       continue;
2003
2004     result = false;
2005     break;
2006   }
2007   freeifaddrs(interface_addr);
2008   return result;
2009 #elif defined(OS_WIN)
2010   // TODO(wtc): implement with the GetAdaptersAddresses function.
2011   NOTIMPLEMENTED();
2012   return false;
2013 #else
2014   NOTIMPLEMENTED();
2015   return false;
2016 #endif  // defined(various platforms)
2017 }
2018
2019 AddressFamily GetAddressFamily(const IPAddressNumber& address) {
2020   switch (address.size()) {
2021     case kIPv4AddressSize:
2022       return ADDRESS_FAMILY_IPV4;
2023     case kIPv6AddressSize:
2024       return ADDRESS_FAMILY_IPV6;
2025     default:
2026       return ADDRESS_FAMILY_UNSPECIFIED;
2027   }
2028 }
2029
2030 bool ParseIPLiteralToNumber(const std::string& ip_literal,
2031                             IPAddressNumber* ip_number) {
2032   // |ip_literal| could be either a IPv4 or an IPv6 literal. If it contains
2033   // a colon however, it must be an IPv6 address.
2034   if (ip_literal.find(':') != std::string::npos) {
2035     // GURL expects IPv6 hostnames to be surrounded with brackets.
2036     std::string host_brackets = "[" + ip_literal + "]";
2037     url_parse::Component host_comp(0, host_brackets.size());
2038
2039     // Try parsing the hostname as an IPv6 literal.
2040     ip_number->resize(16);  // 128 bits.
2041     return url_canon::IPv6AddressToNumber(host_brackets.data(),
2042                                           host_comp,
2043                                           &(*ip_number)[0]);
2044   }
2045
2046   // Otherwise the string is an IPv4 address.
2047   ip_number->resize(4);  // 32 bits.
2048   url_parse::Component host_comp(0, ip_literal.size());
2049   int num_components;
2050   url_canon::CanonHostInfo::Family family = url_canon::IPv4AddressToNumber(
2051       ip_literal.data(), host_comp, &(*ip_number)[0], &num_components);
2052   return family == url_canon::CanonHostInfo::IPV4;
2053 }
2054
2055 namespace {
2056
2057 const unsigned char kIPv4MappedPrefix[] =
2058     { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF };
2059 }
2060
2061 IPAddressNumber ConvertIPv4NumberToIPv6Number(
2062     const IPAddressNumber& ipv4_number) {
2063   DCHECK(ipv4_number.size() == 4);
2064
2065   // IPv4-mapped addresses are formed by:
2066   // <80 bits of zeros>  + <16 bits of ones> + <32-bit IPv4 address>.
2067   IPAddressNumber ipv6_number;
2068   ipv6_number.reserve(16);
2069   ipv6_number.insert(ipv6_number.end(),
2070                      kIPv4MappedPrefix,
2071                      kIPv4MappedPrefix + arraysize(kIPv4MappedPrefix));
2072   ipv6_number.insert(ipv6_number.end(), ipv4_number.begin(), ipv4_number.end());
2073   return ipv6_number;
2074 }
2075
2076 bool IsIPv4Mapped(const IPAddressNumber& address) {
2077   if (address.size() != kIPv6AddressSize)
2078     return false;
2079   return std::equal(address.begin(),
2080                     address.begin() + arraysize(kIPv4MappedPrefix),
2081                     kIPv4MappedPrefix);
2082 }
2083
2084 IPAddressNumber ConvertIPv4MappedToIPv4(const IPAddressNumber& address) {
2085   DCHECK(IsIPv4Mapped(address));
2086   return IPAddressNumber(address.begin() + arraysize(kIPv4MappedPrefix),
2087                          address.end());
2088 }
2089
2090 bool ParseCIDRBlock(const std::string& cidr_literal,
2091                     IPAddressNumber* ip_number,
2092                     size_t* prefix_length_in_bits) {
2093   // We expect CIDR notation to match one of these two templates:
2094   //   <IPv4-literal> "/" <number of bits>
2095   //   <IPv6-literal> "/" <number of bits>
2096
2097   std::vector<std::string> parts;
2098   base::SplitString(cidr_literal, '/', &parts);
2099   if (parts.size() != 2)
2100     return false;
2101
2102   // Parse the IP address.
2103   if (!ParseIPLiteralToNumber(parts[0], ip_number))
2104     return false;
2105
2106   // Parse the prefix length.
2107   int number_of_bits = -1;
2108   if (!base::StringToInt(parts[1], &number_of_bits))
2109     return false;
2110
2111   // Make sure the prefix length is in a valid range.
2112   if (number_of_bits < 0 ||
2113       number_of_bits > static_cast<int>(ip_number->size() * 8))
2114     return false;
2115
2116   *prefix_length_in_bits = static_cast<size_t>(number_of_bits);
2117   return true;
2118 }
2119
2120 bool IPNumberMatchesPrefix(const IPAddressNumber& ip_number,
2121                            const IPAddressNumber& ip_prefix,
2122                            size_t prefix_length_in_bits) {
2123   // Both the input IP address and the prefix IP address should be
2124   // either IPv4 or IPv6.
2125   DCHECK(ip_number.size() == 4 || ip_number.size() == 16);
2126   DCHECK(ip_prefix.size() == 4 || ip_prefix.size() == 16);
2127
2128   DCHECK_LE(prefix_length_in_bits, ip_prefix.size() * 8);
2129
2130   // In case we have an IPv6 / IPv4 mismatch, convert the IPv4 addresses to
2131   // IPv6 addresses in order to do the comparison.
2132   if (ip_number.size() != ip_prefix.size()) {
2133     if (ip_number.size() == 4) {
2134       return IPNumberMatchesPrefix(ConvertIPv4NumberToIPv6Number(ip_number),
2135                                    ip_prefix, prefix_length_in_bits);
2136     }
2137     return IPNumberMatchesPrefix(ip_number,
2138                                  ConvertIPv4NumberToIPv6Number(ip_prefix),
2139                                  96 + prefix_length_in_bits);
2140   }
2141
2142   // Otherwise we are comparing two IPv4 addresses, or two IPv6 addresses.
2143   // Compare all the bytes that fall entirely within the prefix.
2144   int num_entire_bytes_in_prefix = prefix_length_in_bits / 8;
2145   for (int i = 0; i < num_entire_bytes_in_prefix; ++i) {
2146     if (ip_number[i] != ip_prefix[i])
2147       return false;
2148   }
2149
2150   // In case the prefix was not a multiple of 8, there will be 1 byte
2151   // which is only partially masked.
2152   int remaining_bits = prefix_length_in_bits % 8;
2153   if (remaining_bits != 0) {
2154     unsigned char mask = 0xFF << (8 - remaining_bits);
2155     int i = num_entire_bytes_in_prefix;
2156     if ((ip_number[i] & mask) != (ip_prefix[i] & mask))
2157       return false;
2158   }
2159
2160   return true;
2161 }
2162
2163 const uint16* GetPortFieldFromSockaddr(const struct sockaddr* address,
2164                                        socklen_t address_len) {
2165   if (address->sa_family == AF_INET) {
2166     DCHECK_LE(sizeof(sockaddr_in), static_cast<size_t>(address_len));
2167     const struct sockaddr_in* sockaddr =
2168         reinterpret_cast<const struct sockaddr_in*>(address);
2169     return &sockaddr->sin_port;
2170   } else if (address->sa_family == AF_INET6) {
2171     DCHECK_LE(sizeof(sockaddr_in6), static_cast<size_t>(address_len));
2172     const struct sockaddr_in6* sockaddr =
2173         reinterpret_cast<const struct sockaddr_in6*>(address);
2174     return &sockaddr->sin6_port;
2175   } else {
2176     NOTREACHED();
2177     return NULL;
2178   }
2179 }
2180
2181 int GetPortFromSockaddr(const struct sockaddr* address, socklen_t address_len) {
2182   const uint16* port_field = GetPortFieldFromSockaddr(address, address_len);
2183   if (!port_field)
2184     return -1;
2185   return base::NetToHost16(*port_field);
2186 }
2187
2188 bool IsLocalhost(const std::string& host) {
2189   if (host == "localhost" ||
2190       host == "localhost.localdomain" ||
2191       host == "localhost6" ||
2192       host == "localhost6.localdomain6")
2193     return true;
2194
2195   IPAddressNumber ip_number;
2196   if (ParseIPLiteralToNumber(host, &ip_number)) {
2197     size_t size = ip_number.size();
2198     switch (size) {
2199       case kIPv4AddressSize: {
2200         IPAddressNumber localhost_prefix;
2201         localhost_prefix.push_back(127);
2202         for (int i = 0; i < 3; ++i) {
2203           localhost_prefix.push_back(0);
2204         }
2205         return IPNumberMatchesPrefix(ip_number, localhost_prefix, 8);
2206       }
2207
2208       case kIPv6AddressSize: {
2209         struct in6_addr sin6_addr;
2210         memcpy(&sin6_addr, &ip_number[0], kIPv6AddressSize);
2211         return !!IN6_IS_ADDR_LOOPBACK(&sin6_addr);
2212       }
2213
2214       default:
2215         NOTREACHED();
2216     }
2217   }
2218
2219   return false;
2220 }
2221
2222 NetworkInterface::NetworkInterface() {
2223 }
2224
2225 NetworkInterface::NetworkInterface(const std::string& name,
2226                                    const IPAddressNumber& address)
2227     : name(name), address(address) {
2228 }
2229
2230 NetworkInterface::~NetworkInterface() {
2231 }
2232
2233 }  // namespace net