Revert 205649 "Adding wittman who can submit this change. Either..."
[chromium-blink-merge.git] / net / base / net_util.cc
blob75c6358c2ee812f393e0bac61ce1412e33465e82
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/net_util.h"
7 #include <algorithm>
8 #include <iterator>
9 #include <map>
11 #include "build/build_config.h"
13 #if defined(OS_WIN)
14 #include <windows.h>
15 #include <iphlpapi.h>
16 #include <winsock2.h>
17 #pragma comment(lib, "iphlpapi.lib")
18 #elif defined(OS_POSIX)
19 #include <fcntl.h>
20 #if !defined(OS_ANDROID)
21 #include <ifaddrs.h>
22 #endif
23 #include <net/if.h>
24 #include <netdb.h>
25 #include <netinet/in.h>
26 #endif
28 #include "base/basictypes.h"
29 #include "base/file_util.h"
30 #include "base/files/file_path.h"
31 #include "base/i18n/file_util_icu.h"
32 #include "base/i18n/icu_string_conversions.h"
33 #include "base/i18n/time_formatting.h"
34 #include "base/json/string_escape.h"
35 #include "base/lazy_instance.h"
36 #include "base/logging.h"
37 #include "base/memory/singleton.h"
38 #include "base/message_loop.h"
39 #include "base/metrics/histogram.h"
40 #include "base/path_service.h"
41 #include "base/stl_util.h"
42 #include "base/strings/string_number_conversions.h"
43 #include "base/strings/string_piece.h"
44 #include "base/strings/string_split.h"
45 #include "base/strings/string_tokenizer.h"
46 #include "base/strings/string_util.h"
47 #include "base/strings/stringprintf.h"
48 #include "base/strings/sys_string_conversions.h"
49 #include "base/strings/utf_offset_string_conversions.h"
50 #include "base/strings/utf_string_conversions.h"
51 #include "base/synchronization/lock.h"
52 #include "base/sys_byteorder.h"
53 #include "base/time.h"
54 #include "base/values.h"
55 #include "googleurl/src/gurl.h"
56 #include "googleurl/src/url_canon.h"
57 #include "googleurl/src/url_canon_ip.h"
58 #include "googleurl/src/url_parse.h"
59 #include "grit/net_resources.h"
60 #if defined(OS_ANDROID)
61 #include "net/android/network_library.h"
62 #endif
63 #include "net/base/dns_util.h"
64 #include "net/base/escape.h"
65 #include "net/base/mime_util.h"
66 #include "net/base/net_module.h"
67 #if defined(OS_WIN)
68 #include "net/base/winsock_init.h"
69 #endif
70 #include "net/http/http_content_disposition.h"
71 #include "third_party/icu/public/common/unicode/uidna.h"
72 #include "third_party/icu/public/common/unicode/uniset.h"
73 #include "third_party/icu/public/common/unicode/uscript.h"
74 #include "third_party/icu/public/common/unicode/uset.h"
75 #include "third_party/icu/public/i18n/unicode/datefmt.h"
76 #include "third_party/icu/public/i18n/unicode/regex.h"
77 #include "third_party/icu/public/i18n/unicode/ulocdata.h"
79 using base::Time;
81 namespace net {
83 namespace {
85 // what we prepend to get a file URL
86 static const base::FilePath::CharType kFileURLPrefix[] =
87 FILE_PATH_LITERAL("file:///");
89 // The general list of blocked ports. Will be blocked unless a specific
90 // protocol overrides it. (Ex: ftp can use ports 20 and 21)
91 static const int kRestrictedPorts[] = {
92 1, // tcpmux
93 7, // echo
94 9, // discard
95 11, // systat
96 13, // daytime
97 15, // netstat
98 17, // qotd
99 19, // chargen
100 20, // ftp data
101 21, // ftp access
102 22, // ssh
103 23, // telnet
104 25, // smtp
105 37, // time
106 42, // name
107 43, // nicname
108 53, // domain
109 77, // priv-rjs
110 79, // finger
111 87, // ttylink
112 95, // supdup
113 101, // hostriame
114 102, // iso-tsap
115 103, // gppitnp
116 104, // acr-nema
117 109, // pop2
118 110, // pop3
119 111, // sunrpc
120 113, // auth
121 115, // sftp
122 117, // uucp-path
123 119, // nntp
124 123, // NTP
125 135, // loc-srv /epmap
126 139, // netbios
127 143, // imap2
128 179, // BGP
129 389, // ldap
130 465, // smtp+ssl
131 512, // print / exec
132 513, // login
133 514, // shell
134 515, // printer
135 526, // tempo
136 530, // courier
137 531, // chat
138 532, // netnews
139 540, // uucp
140 556, // remotefs
141 563, // nntp+ssl
142 587, // stmp?
143 601, // ??
144 636, // ldap+ssl
145 993, // ldap+ssl
146 995, // pop3+ssl
147 2049, // nfs
148 3659, // apple-sasl / PasswordServer
149 4045, // lockd
150 6000, // X11
151 6665, // Alternate IRC [Apple addition]
152 6666, // Alternate IRC [Apple addition]
153 6667, // Standard IRC [Apple addition]
154 6668, // Alternate IRC [Apple addition]
155 6669, // Alternate IRC [Apple addition]
156 0xFFFF, // Used to block all invalid port numbers (see
157 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port())
160 // FTP overrides the following restricted ports.
161 static const int kAllowedFtpPorts[] = {
162 21, // ftp data
163 22, // ssh
166 // Does some simple normalization of scripts so we can allow certain scripts
167 // to exist together.
168 // TODO(brettw) bug 880223: we should allow some other languages to be
169 // oombined such as Chinese and Latin. We will probably need a more
170 // complicated system of language pairs to have more fine-grained control.
171 UScriptCode NormalizeScript(UScriptCode code) {
172 switch (code) {
173 case USCRIPT_KATAKANA:
174 case USCRIPT_HIRAGANA:
175 case USCRIPT_KATAKANA_OR_HIRAGANA:
176 case USCRIPT_HANGUL: // This one is arguable.
177 return USCRIPT_HAN;
178 default:
179 return code;
183 bool IsIDNComponentInSingleScript(const base::char16* str, int str_len) {
184 UScriptCode first_script = USCRIPT_INVALID_CODE;
185 bool is_first = true;
187 int i = 0;
188 while (i < str_len) {
189 unsigned code_point;
190 U16_NEXT(str, i, str_len, code_point);
192 UErrorCode err = U_ZERO_ERROR;
193 UScriptCode cur_script = uscript_getScript(code_point, &err);
194 if (err != U_ZERO_ERROR)
195 return false; // Report mixed on error.
196 cur_script = NormalizeScript(cur_script);
198 // TODO(brettw) We may have to check for USCRIPT_INHERENT as well.
199 if (is_first && cur_script != USCRIPT_COMMON) {
200 first_script = cur_script;
201 is_first = false;
202 } else {
203 if (cur_script != USCRIPT_COMMON && cur_script != first_script)
204 return false;
207 return true;
210 // Check if the script of a language can be 'safely' mixed with
211 // Latin letters in the ASCII range.
212 bool IsCompatibleWithASCIILetters(const std::string& lang) {
213 // For now, just list Chinese, Japanese and Korean (positive list).
214 // An alternative is negative-listing (languages using Greek and
215 // Cyrillic letters), but it can be more dangerous.
216 return !lang.substr(0, 2).compare("zh") ||
217 !lang.substr(0, 2).compare("ja") ||
218 !lang.substr(0, 2).compare("ko");
221 typedef std::map<std::string, icu::UnicodeSet*> LangToExemplarSetMap;
223 class LangToExemplarSet {
224 public:
225 static LangToExemplarSet* GetInstance() {
226 return Singleton<LangToExemplarSet>::get();
229 private:
230 LangToExemplarSetMap map;
231 LangToExemplarSet() { }
232 ~LangToExemplarSet() {
233 STLDeleteContainerPairSecondPointers(map.begin(), map.end());
236 friend class Singleton<LangToExemplarSet>;
237 friend struct DefaultSingletonTraits<LangToExemplarSet>;
238 friend bool GetExemplarSetForLang(const std::string&, icu::UnicodeSet**);
239 friend void SetExemplarSetForLang(const std::string&, icu::UnicodeSet*);
241 DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet);
244 bool GetExemplarSetForLang(const std::string& lang,
245 icu::UnicodeSet** lang_set) {
246 const LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map;
247 LangToExemplarSetMap::const_iterator pos = map.find(lang);
248 if (pos != map.end()) {
249 *lang_set = pos->second;
250 return true;
252 return false;
255 void SetExemplarSetForLang(const std::string& lang,
256 icu::UnicodeSet* lang_set) {
257 LangToExemplarSetMap& map = LangToExemplarSet::GetInstance()->map;
258 map.insert(std::make_pair(lang, lang_set));
261 static base::LazyInstance<base::Lock>::Leaky
262 g_lang_set_lock = LAZY_INSTANCE_INITIALIZER;
264 // Returns true if all the characters in component_characters are used by
265 // the language |lang|.
266 bool IsComponentCoveredByLang(const icu::UnicodeSet& component_characters,
267 const std::string& lang) {
268 CR_DEFINE_STATIC_LOCAL(
269 const icu::UnicodeSet, kASCIILetters, ('a', 'z'));
270 icu::UnicodeSet* lang_set = NULL;
271 // We're called from both the UI thread and the history thread.
273 base::AutoLock lock(g_lang_set_lock.Get());
274 if (!GetExemplarSetForLang(lang, &lang_set)) {
275 UErrorCode status = U_ZERO_ERROR;
276 ULocaleData* uld = ulocdata_open(lang.c_str(), &status);
277 // TODO(jungshik) Turn this check on when the ICU data file is
278 // rebuilt with the minimal subset of locale data for languages
279 // to which Chrome is not localized but which we offer in the list
280 // of languages selectable for Accept-Languages. With the rebuilt ICU
281 // data, ulocdata_open never should fall back to the default locale.
282 // (issue 2078)
283 // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING);
284 if (U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING) {
285 lang_set = reinterpret_cast<icu::UnicodeSet *>(
286 ulocdata_getExemplarSet(uld, NULL, 0,
287 ULOCDATA_ES_STANDARD, &status));
288 // If |lang| is compatible with ASCII Latin letters, add them.
289 if (IsCompatibleWithASCIILetters(lang))
290 lang_set->addAll(kASCIILetters);
291 } else {
292 lang_set = new icu::UnicodeSet(1, 0);
294 lang_set->freeze();
295 SetExemplarSetForLang(lang, lang_set);
296 ulocdata_close(uld);
299 return !lang_set->isEmpty() && lang_set->containsAll(component_characters);
302 // Returns true if the given Unicode host component is safe to display to the
303 // user.
304 bool IsIDNComponentSafe(const base::char16* str,
305 int str_len,
306 const std::string& languages) {
307 // Most common cases (non-IDN) do not reach here so that we don't
308 // need a fast return path.
309 // TODO(jungshik) : Check if there's any character inappropriate
310 // (although allowed) for domain names.
311 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and
312 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt
313 // For now, we borrow the list from Mozilla and tweaked it slightly.
314 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because
315 // they're gonna be canonicalized to U+0020 and full stop before
316 // reaching here.)
317 // The original list is available at
318 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and
319 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703
321 UErrorCode status = U_ZERO_ERROR;
322 #ifdef U_WCHAR_IS_UTF16
323 icu::UnicodeSet dangerous_characters(icu::UnicodeString(
324 L"[[\\ \u00bc\u00bd\u01c3\u0337\u0338"
325 L"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"
326 L"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"
327 L"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"
328 L"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014"
329 L"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14"
330 L"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]"
331 L"[\ufffa-\ufffd]]"), status);
332 DCHECK(U_SUCCESS(status));
333 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(
334 // Lone katakana no, so, or n
335 L"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]"
336 // Repeating Japanese accent characters
337 L"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"),
338 0, status);
339 #else
340 icu::UnicodeSet dangerous_characters(icu::UnicodeString(
341 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338"
342 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"
343 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"
344 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"
345 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014"
346 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14"
347 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]"
348 "[\\ufffa-\\ufffd]]", -1, US_INV), status);
349 DCHECK(U_SUCCESS(status));
350 icu::RegexMatcher dangerous_patterns(icu::UnicodeString(
351 // Lone katakana no, so, or n
352 "[^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]"
353 // Repeating Japanese accent characters
354 "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"),
355 0, status);
356 #endif
357 DCHECK(U_SUCCESS(status));
358 icu::UnicodeSet component_characters;
359 icu::UnicodeString component_string(str, str_len);
360 component_characters.addAll(component_string);
361 if (dangerous_characters.containsSome(component_characters))
362 return false;
364 DCHECK(U_SUCCESS(status));
365 dangerous_patterns.reset(component_string);
366 if (dangerous_patterns.find())
367 return false;
369 // If the language list is empty, the result is completely determined
370 // by whether a component is a single script or not. This will block
371 // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are
372 // allowed with |languages| (while it blocks Chinese + Latin letters with
373 // an accent as should be the case), but we want to err on the safe side
374 // when |languages| is empty.
375 if (languages.empty())
376 return IsIDNComponentInSingleScript(str, str_len);
378 // |common_characters| is made up of ASCII numbers, hyphen, plus and
379 // underscore that are used across scripts and allowed in domain names.
380 // (sync'd with characters allowed in url_canon_host with square
381 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc.
382 icu::UnicodeSet common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"),
383 status);
384 DCHECK(U_SUCCESS(status));
385 // Subtract common characters because they're always allowed so that
386 // we just have to check if a language-specific set contains
387 // the remainder.
388 component_characters.removeAll(common_characters);
390 base::StringTokenizer t(languages, ",");
391 while (t.GetNext()) {
392 if (IsComponentCoveredByLang(component_characters, t.token()))
393 return true;
395 return false;
398 // Converts one component of a host (between dots) to IDN if safe. The result
399 // will be APPENDED to the given output string and will be the same as the input
400 // if it is not IDN or the IDN is unsafe to display. Returns whether any
401 // conversion was performed.
402 bool IDNToUnicodeOneComponent(const base::char16* comp,
403 size_t comp_len,
404 const std::string& languages,
405 base::string16* out) {
406 DCHECK(out);
407 if (comp_len == 0)
408 return false;
410 // Only transform if the input can be an IDN component.
411 static const base::char16 kIdnPrefix[] = {'x', 'n', '-', '-'};
412 if ((comp_len > arraysize(kIdnPrefix)) &&
413 !memcmp(comp, kIdnPrefix, arraysize(kIdnPrefix) * sizeof(base::char16))) {
414 // Repeatedly expand the output string until it's big enough. It looks like
415 // ICU will return the required size of the buffer, but that's not
416 // documented, so we'll just grow by 2x. This should be rare and is not on a
417 // critical path.
418 size_t original_length = out->length();
419 for (int extra_space = 64; ; extra_space *= 2) {
420 UErrorCode status = U_ZERO_ERROR;
421 out->resize(out->length() + extra_space);
422 int output_chars = uidna_IDNToUnicode(comp,
423 static_cast<int32_t>(comp_len), &(*out)[original_length], extra_space,
424 UIDNA_DEFAULT, NULL, &status);
425 if (status == U_ZERO_ERROR) {
426 // Converted successfully.
427 out->resize(original_length + output_chars);
428 if (IsIDNComponentSafe(out->data() + original_length, output_chars,
429 languages))
430 return true;
433 if (status != U_BUFFER_OVERFLOW_ERROR)
434 break;
436 // Failed, revert back to original string.
437 out->resize(original_length);
440 // We get here with no IDN or on error, in which case we just append the
441 // literal input.
442 out->append(comp, comp_len);
443 return false;
446 // Clamps the offsets in |offsets_for_adjustment| to the length of |str|.
447 void LimitOffsets(const base::string16& str,
448 std::vector<size_t>* offsets_for_adjustment) {
449 if (offsets_for_adjustment) {
450 std::for_each(offsets_for_adjustment->begin(),
451 offsets_for_adjustment->end(),
452 base::LimitOffset<base::string16>(str.length()));
456 // TODO(brettw) bug 734373: check the scripts for each host component and
457 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for
458 // scripts that the user has installed. For now, just put the entire
459 // path through IDN. Maybe this feature can be implemented in ICU itself?
461 // We may want to skip this step in the case of file URLs to allow unicode
462 // UNC hostnames regardless of encodings.
463 base::string16 IDNToUnicodeWithOffsets(
464 const std::string& host,
465 const std::string& languages,
466 std::vector<size_t>* offsets_for_adjustment) {
467 // Convert the ASCII input to a base::string16 for ICU.
468 base::string16 input16;
469 input16.reserve(host.length());
470 input16.insert(input16.end(), host.begin(), host.end());
472 // Do each component of the host separately, since we enforce script matching
473 // on a per-component basis.
474 base::string16 out16;
476 base::OffsetAdjuster offset_adjuster(offsets_for_adjustment);
477 for (size_t component_start = 0, component_end;
478 component_start < input16.length();
479 component_start = component_end + 1) {
480 // Find the end of the component.
481 component_end = input16.find('.', component_start);
482 if (component_end == base::string16::npos)
483 component_end = input16.length(); // For getting the last component.
484 size_t component_length = component_end - component_start;
485 size_t new_component_start = out16.length();
486 bool converted_idn = false;
487 if (component_end > component_start) {
488 // Add the substring that we just found.
489 converted_idn = IDNToUnicodeOneComponent(
490 input16.data() + component_start, component_length, languages,
491 &out16);
493 size_t new_component_length = out16.length() - new_component_start;
495 if (converted_idn && offsets_for_adjustment) {
496 offset_adjuster.Add(base::OffsetAdjuster::Adjustment(component_start,
497 component_length, new_component_length));
500 // Need to add the dot we just found (if we found one).
501 if (component_end < input16.length())
502 out16.push_back('.');
506 LimitOffsets(out16, offsets_for_adjustment);
507 return out16;
510 // Transforms |original_offsets| by subtracting |component_begin| from all
511 // offsets. Any offset which was not at least this large to begin with is set
512 // to std::string::npos.
513 std::vector<size_t> OffsetsIntoComponent(
514 const std::vector<size_t>& original_offsets,
515 size_t component_begin) {
516 DCHECK_NE(std::string::npos, component_begin);
517 std::vector<size_t> offsets_into_component(original_offsets);
518 for (std::vector<size_t>::iterator i(offsets_into_component.begin());
519 i != offsets_into_component.end(); ++i) {
520 if (*i != std::string::npos)
521 *i = (*i < component_begin) ? std::string::npos : (*i - component_begin);
523 return offsets_into_component;
526 // Called after we transform a component and append it to an output string.
527 // Maps |transformed_offsets|, which represent offsets into the transformed
528 // component itself, into appropriate offsets for the output string, by adding
529 // |output_component_begin| to each. Determines which offsets need mapping by
530 // checking to see which of the |original_offsets| were within the designated
531 // original component, using its provided endpoints.
532 void AdjustForComponentTransform(
533 const std::vector<size_t>& original_offsets,
534 size_t original_component_begin,
535 size_t original_component_end,
536 const std::vector<size_t>& transformed_offsets,
537 size_t output_component_begin,
538 std::vector<size_t>* offsets_for_adjustment) {
539 if (!offsets_for_adjustment)
540 return;
542 DCHECK_NE(std::string::npos, original_component_begin);
543 DCHECK_NE(std::string::npos, original_component_end);
544 DCHECK_NE(base::string16::npos, output_component_begin);
545 size_t offsets_size = offsets_for_adjustment->size();
546 DCHECK_EQ(offsets_size, original_offsets.size());
547 DCHECK_EQ(offsets_size, transformed_offsets.size());
548 for (size_t i = 0; i < offsets_size; ++i) {
549 size_t original_offset = original_offsets[i];
550 if ((original_offset >= original_component_begin) &&
551 (original_offset < original_component_end)) {
552 size_t transformed_offset = transformed_offsets[i];
553 (*offsets_for_adjustment)[i] =
554 (transformed_offset == base::string16::npos) ?
555 base::string16::npos : (output_component_begin + transformed_offset);
560 // If |component| is valid, its begin is incremented by |delta|.
561 void AdjustComponent(int delta, url_parse::Component* component) {
562 if (!component->is_valid())
563 return;
565 DCHECK(delta >= 0 || component->begin >= -delta);
566 component->begin += delta;
569 // Adjusts all the components of |parsed| by |delta|, except for the scheme.
570 void AdjustComponents(int delta, url_parse::Parsed* parsed) {
571 AdjustComponent(delta, &(parsed->username));
572 AdjustComponent(delta, &(parsed->password));
573 AdjustComponent(delta, &(parsed->host));
574 AdjustComponent(delta, &(parsed->port));
575 AdjustComponent(delta, &(parsed->path));
576 AdjustComponent(delta, &(parsed->query));
577 AdjustComponent(delta, &(parsed->ref));
580 // Helper for FormatUrlWithOffsets().
581 base::string16 FormatViewSourceUrl(
582 const GURL& url,
583 const std::vector<size_t>& original_offsets,
584 const std::string& languages,
585 FormatUrlTypes format_types,
586 UnescapeRule::Type unescape_rules,
587 url_parse::Parsed* new_parsed,
588 size_t* prefix_end,
589 std::vector<size_t>* offsets_for_adjustment) {
590 DCHECK(new_parsed);
591 const char kViewSource[] = "view-source:";
592 const size_t kViewSourceLength = arraysize(kViewSource) - 1;
593 std::vector<size_t> offsets_into_url(
594 OffsetsIntoComponent(original_offsets, kViewSourceLength));
596 GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLength));
597 base::string16 result(ASCIIToUTF16(kViewSource) +
598 FormatUrlWithOffsets(real_url, languages, format_types, unescape_rules,
599 new_parsed, prefix_end, &offsets_into_url));
601 // Adjust position values.
602 if (new_parsed->scheme.is_nonempty()) {
603 // Assume "view-source:real-scheme" as a scheme.
604 new_parsed->scheme.len += kViewSourceLength;
605 } else {
606 new_parsed->scheme.begin = 0;
607 new_parsed->scheme.len = kViewSourceLength - 1;
609 AdjustComponents(kViewSourceLength, new_parsed);
610 if (prefix_end)
611 *prefix_end += kViewSourceLength;
612 AdjustForComponentTransform(original_offsets, kViewSourceLength,
613 url.possibly_invalid_spec().length(), offsets_into_url, kViewSourceLength,
614 offsets_for_adjustment);
615 LimitOffsets(result, offsets_for_adjustment);
616 return result;
619 class AppendComponentTransform {
620 public:
621 AppendComponentTransform() {}
622 virtual ~AppendComponentTransform() {}
624 virtual base::string16 Execute(
625 const std::string& component_text,
626 std::vector<size_t>* offsets_into_component) const = 0;
628 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an
629 // accessible copy constructor in order to call AppendFormattedComponent()
630 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ).
633 class HostComponentTransform : public AppendComponentTransform {
634 public:
635 explicit HostComponentTransform(const std::string& languages)
636 : languages_(languages) {
639 private:
640 virtual base::string16 Execute(
641 const std::string& component_text,
642 std::vector<size_t>* offsets_into_component) const OVERRIDE {
643 return IDNToUnicodeWithOffsets(component_text, languages_,
644 offsets_into_component);
647 const std::string& languages_;
650 class NonHostComponentTransform : public AppendComponentTransform {
651 public:
652 explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules)
653 : unescape_rules_(unescape_rules) {
656 private:
657 virtual base::string16 Execute(
658 const std::string& component_text,
659 std::vector<size_t>* offsets_into_component) const OVERRIDE {
660 return (unescape_rules_ == UnescapeRule::NONE) ?
661 base::UTF8ToUTF16AndAdjustOffsets(component_text,
662 offsets_into_component) :
663 UnescapeAndDecodeUTF8URLComponentWithOffsets(component_text,
664 unescape_rules_, offsets_into_component);
667 const UnescapeRule::Type unescape_rules_;
670 void AppendFormattedComponent(const std::string& spec,
671 const url_parse::Component& original_component,
672 const std::vector<size_t>& original_offsets,
673 const AppendComponentTransform& transform,
674 base::string16* output,
675 url_parse::Component* output_component,
676 std::vector<size_t>* offsets_for_adjustment) {
677 DCHECK(output);
678 if (original_component.is_nonempty()) {
679 size_t original_component_begin =
680 static_cast<size_t>(original_component.begin);
681 size_t output_component_begin = output->length();
682 if (output_component)
683 output_component->begin = static_cast<int>(output_component_begin);
685 std::vector<size_t> offsets_into_component =
686 OffsetsIntoComponent(original_offsets, original_component_begin);
687 output->append(transform.Execute(std::string(spec, original_component_begin,
688 static_cast<size_t>(original_component.len)), &offsets_into_component));
690 if (output_component) {
691 output_component->len =
692 static_cast<int>(output->length() - output_component_begin);
694 AdjustForComponentTransform(original_offsets, original_component_begin,
695 static_cast<size_t>(original_component.end()),
696 offsets_into_component, output_component_begin,
697 offsets_for_adjustment);
698 } else if (output_component) {
699 output_component->reset();
703 void SanitizeGeneratedFileName(base::FilePath::StringType* filename,
704 bool replace_trailing) {
705 const base::FilePath::CharType kReplace[] = FILE_PATH_LITERAL("-");
706 if (filename->empty())
707 return;
708 if (replace_trailing) {
709 // Handle CreateFile() stripping trailing dots and spaces on filenames
710 // http://support.microsoft.com/kb/115827
711 size_t length = filename->size();
712 size_t pos = filename->find_last_not_of(FILE_PATH_LITERAL(" ."));
713 filename->resize((pos == std::string::npos) ? 0 : (pos + 1));
714 TrimWhitespace(*filename, TRIM_TRAILING, filename);
715 if (filename->empty())
716 return;
717 size_t trimmed = length - filename->size();
718 if (trimmed)
719 filename->insert(filename->end(), trimmed, kReplace[0]);
721 TrimString(*filename, FILE_PATH_LITERAL("."), filename);
722 if (filename->empty())
723 return;
724 // Replace any path information by changing path separators.
725 ReplaceSubstringsAfterOffset(filename, 0, FILE_PATH_LITERAL("/"), kReplace);
726 ReplaceSubstringsAfterOffset(filename, 0, FILE_PATH_LITERAL("\\"), kReplace);
729 // Returns the filename determined from the last component of the path portion
730 // of the URL. Returns an empty string if the URL doesn't have a path or is
731 // invalid. If the generated filename is not reliable,
732 // |should_overwrite_extension| will be set to true, in which case a better
733 // extension should be determined based on the content type.
734 std::string GetFileNameFromURL(const GURL& url,
735 const std::string& referrer_charset,
736 bool* should_overwrite_extension) {
737 // about: and data: URLs don't have file names, but esp. data: URLs may
738 // contain parts that look like ones (i.e., contain a slash). Therefore we
739 // don't attempt to divine a file name out of them.
740 if (!url.is_valid() || url.SchemeIs("about") || url.SchemeIs("data"))
741 return std::string();
743 const std::string unescaped_url_filename = UnescapeURLComponent(
744 url.ExtractFileName(),
745 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
747 // The URL's path should be escaped UTF-8, but may not be.
748 std::string decoded_filename = unescaped_url_filename;
749 if (!IsStringUTF8(decoded_filename)) {
750 // TODO(jshin): this is probably not robust enough. To be sure, we need
751 // encoding detection.
752 base::string16 utf16_output;
753 if (!referrer_charset.empty() &&
754 base::CodepageToUTF16(unescaped_url_filename,
755 referrer_charset.c_str(),
756 base::OnStringConversionError::FAIL,
757 &utf16_output)) {
758 decoded_filename = UTF16ToUTF8(utf16_output);
759 } else {
760 decoded_filename = WideToUTF8(
761 base::SysNativeMBToWide(unescaped_url_filename));
764 // If the URL contains a (possibly empty) query, assume it is a generator, and
765 // allow the determined extension to be overwritten.
766 *should_overwrite_extension = !decoded_filename.empty() && url.has_query();
768 return decoded_filename;
771 // Returns whether the specified extension is automatically integrated into the
772 // windows shell.
773 bool IsShellIntegratedExtension(const base::FilePath::StringType& extension) {
774 base::FilePath::StringType extension_lower = StringToLowerASCII(extension);
776 // http://msdn.microsoft.com/en-us/library/ms811694.aspx
777 // Right-clicking on shortcuts can be magical.
778 if ((extension_lower == FILE_PATH_LITERAL("local")) ||
779 (extension_lower == FILE_PATH_LITERAL("lnk")))
780 return true;
782 // http://www.juniper.net/security/auto/vulnerabilities/vuln2612.html
783 // Files become magical if they end in a CLSID, so block such extensions.
784 if (!extension_lower.empty() &&
785 (extension_lower[0] == FILE_PATH_LITERAL('{')) &&
786 (extension_lower[extension_lower.length() - 1] == FILE_PATH_LITERAL('}')))
787 return true;
788 return false;
791 // Returns whether the specified file name is a reserved name on windows.
792 // This includes names like "com2.zip" (which correspond to devices) and
793 // desktop.ini and thumbs.db which have special meaning to the windows shell.
794 bool IsReservedName(const base::FilePath::StringType& filename) {
795 // This list is taken from the MSDN article "Naming a file"
796 // http://msdn2.microsoft.com/en-us/library/aa365247(VS.85).aspx
797 // I also added clock$ because GetSaveFileName seems to consider it as a
798 // reserved name too.
799 static const char* const known_devices[] = {
800 "con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", "com5",
801 "com6", "com7", "com8", "com9", "lpt1", "lpt2", "lpt3", "lpt4",
802 "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "clock$"
804 #if defined(OS_WIN)
805 std::string filename_lower = StringToLowerASCII(WideToUTF8(filename));
806 #elif defined(OS_POSIX)
807 std::string filename_lower = StringToLowerASCII(filename);
808 #endif
810 for (size_t i = 0; i < arraysize(known_devices); ++i) {
811 // Exact match.
812 if (filename_lower == known_devices[i])
813 return true;
814 // Starts with "DEVICE.".
815 if (filename_lower.find(std::string(known_devices[i]) + ".") == 0)
816 return true;
819 static const char* const magic_names[] = {
820 // These file names are used by the "Customize folder" feature of the shell.
821 "desktop.ini",
822 "thumbs.db",
825 for (size_t i = 0; i < arraysize(magic_names); ++i) {
826 if (filename_lower == magic_names[i])
827 return true;
830 return false;
833 // Examines the current extension in |file_name| and modifies it if necessary in
834 // order to ensure the filename is safe. If |file_name| doesn't contain an
835 // extension or if |ignore_extension| is true, then a new extension will be
836 // constructed based on the |mime_type|.
838 // We're addressing two things here:
840 // 1) Usability. If there is no reliable file extension, we want to guess a
841 // reasonable file extension based on the content type.
843 // 2) Shell integration. Some file extensions automatically integrate with the
844 // shell. We block these extensions to prevent a malicious web site from
845 // integrating with the user's shell.
846 void EnsureSafeExtension(const std::string& mime_type,
847 bool ignore_extension,
848 base::FilePath* file_name) {
849 // See if our file name already contains an extension.
850 base::FilePath::StringType extension = file_name->Extension();
851 if (!extension.empty())
852 extension.erase(extension.begin()); // Erase preceding '.'.
854 if ((ignore_extension || extension.empty()) && !mime_type.empty()) {
855 base::FilePath::StringType preferred_mime_extension;
856 std::vector<base::FilePath::StringType> all_mime_extensions;
857 // The GetPreferredExtensionForMimeType call will end up going to disk. Do
858 // this on another thread to avoid slowing the IO thread.
859 // http://crbug.com/61827
860 // TODO(asanka): Remove this ScopedAllowIO once all callers have switched
861 // over to IO safe threads.
862 base::ThreadRestrictions::ScopedAllowIO allow_io;
863 net::GetPreferredExtensionForMimeType(mime_type, &preferred_mime_extension);
864 net::GetExtensionsForMimeType(mime_type, &all_mime_extensions);
865 // If the existing extension is in the list of valid extensions for the
866 // given type, use it. This avoids doing things like pointlessly renaming
867 // "foo.jpg" to "foo.jpeg".
868 if (std::find(all_mime_extensions.begin(),
869 all_mime_extensions.end(),
870 extension) != all_mime_extensions.end()) {
871 // leave |extension| alone
872 } else if (!preferred_mime_extension.empty()) {
873 extension = preferred_mime_extension;
877 #if defined(OS_WIN)
878 static const base::FilePath::CharType default_extension[] =
879 FILE_PATH_LITERAL("download");
881 // Rename shell-integrated extensions.
882 // TODO(asanka): Consider stripping out the bad extension and replacing it
883 // with the preferred extension for the MIME type if one is available.
884 if (IsShellIntegratedExtension(extension))
885 extension.assign(default_extension);
886 #endif
888 *file_name = file_name->ReplaceExtension(extension);
891 bool FilePathToString16(const base::FilePath& path, base::string16* converted) {
892 #if defined(OS_WIN)
893 return WideToUTF16(path.value().c_str(), path.value().size(), converted);
894 #elif defined(OS_POSIX)
895 std::string component8 = path.AsUTF8Unsafe();
896 return !component8.empty() &&
897 UTF8ToUTF16(component8.c_str(), component8.size(), converted);
898 #endif
901 } // namespace
903 const FormatUrlType kFormatUrlOmitNothing = 0;
904 const FormatUrlType kFormatUrlOmitUsernamePassword = 1 << 0;
905 const FormatUrlType kFormatUrlOmitHTTP = 1 << 1;
906 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname = 1 << 2;
907 const FormatUrlType kFormatUrlOmitAll = kFormatUrlOmitUsernamePassword |
908 kFormatUrlOmitHTTP | kFormatUrlOmitTrailingSlashOnBareHostname;
910 static base::LazyInstance<std::multiset<int> >::Leaky
911 g_explicitly_allowed_ports = LAZY_INSTANCE_INITIALIZER;
913 size_t GetCountOfExplicitlyAllowedPorts() {
914 return g_explicitly_allowed_ports.Get().size();
917 GURL FilePathToFileURL(const base::FilePath& path) {
918 // Produce a URL like "file:///C:/foo" for a regular file, or
919 // "file://///server/path" for UNC. The URL canonicalizer will fix up the
920 // latter case to be the canonical UNC form: "file://server/path"
921 base::FilePath::StringType url_string(kFileURLPrefix);
922 url_string.append(path.value());
924 // Now do replacement of some characters. Since we assume the input is a
925 // literal filename, anything the URL parser might consider special should
926 // be escaped here.
928 // must be the first substitution since others will introduce percents as the
929 // escape character
930 ReplaceSubstringsAfterOffset(&url_string, 0,
931 FILE_PATH_LITERAL("%"), FILE_PATH_LITERAL("%25"));
933 // semicolon is supposed to be some kind of separator according to RFC 2396
934 ReplaceSubstringsAfterOffset(&url_string, 0,
935 FILE_PATH_LITERAL(";"), FILE_PATH_LITERAL("%3B"));
937 ReplaceSubstringsAfterOffset(&url_string, 0,
938 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23"));
940 ReplaceSubstringsAfterOffset(&url_string, 0,
941 FILE_PATH_LITERAL("?"), FILE_PATH_LITERAL("%3F"));
943 #if defined(OS_POSIX)
944 ReplaceSubstringsAfterOffset(&url_string, 0,
945 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C"));
946 #endif
948 return GURL(url_string);
951 std::string GetSpecificHeader(const std::string& headers,
952 const std::string& name) {
953 // We want to grab the Value from the "Key: Value" pairs in the headers,
954 // which should look like this (no leading spaces, \n-separated) (we format
955 // them this way in url_request_inet.cc):
956 // HTTP/1.1 200 OK\n
957 // ETag: "6d0b8-947-24f35ec0"\n
958 // Content-Length: 2375\n
959 // Content-Type: text/html; charset=UTF-8\n
960 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n
961 if (headers.empty())
962 return std::string();
964 std::string match('\n' + name + ':');
966 std::string::const_iterator begin =
967 std::search(headers.begin(), headers.end(), match.begin(), match.end(),
968 base::CaseInsensitiveCompareASCII<char>());
970 if (begin == headers.end())
971 return std::string();
973 begin += match.length();
975 std::string ret;
976 TrimWhitespace(std::string(begin, std::find(begin, headers.end(), '\n')),
977 TRIM_ALL, &ret);
978 return ret;
981 base::string16 IDNToUnicode(const std::string& host,
982 const std::string& languages) {
983 return IDNToUnicodeWithOffsets(host, languages, NULL);
986 std::string CanonicalizeHost(const std::string& host,
987 url_canon::CanonHostInfo* host_info) {
988 // Try to canonicalize the host.
989 const url_parse::Component raw_host_component(
990 0, static_cast<int>(host.length()));
991 std::string canon_host;
992 url_canon::StdStringCanonOutput canon_host_output(&canon_host);
993 url_canon::CanonicalizeHostVerbose(host.c_str(), raw_host_component,
994 &canon_host_output, host_info);
996 if (host_info->out_host.is_nonempty() &&
997 host_info->family != url_canon::CanonHostInfo::BROKEN) {
998 // Success! Assert that there's no extra garbage.
999 canon_host_output.Complete();
1000 DCHECK_EQ(host_info->out_host.len, static_cast<int>(canon_host.length()));
1001 } else {
1002 // Empty host, or canonicalization failed. We'll return empty.
1003 canon_host.clear();
1006 return canon_host;
1009 std::string GetDirectoryListingHeader(const base::string16& title) {
1010 static const base::StringPiece header(
1011 NetModule::GetResource(IDR_DIR_HEADER_HTML));
1012 // This can be null in unit tests.
1013 DLOG_IF(WARNING, header.empty()) <<
1014 "Missing resource: directory listing header";
1016 std::string result;
1017 if (!header.empty())
1018 result.assign(header.data(), header.size());
1020 result.append("<script>start(");
1021 base::JsonDoubleQuote(title, true, &result);
1022 result.append(");</script>\n");
1024 return result;
1027 inline bool IsHostCharAlpha(char c) {
1028 // We can just check lowercase because uppercase characters have already been
1029 // normalized.
1030 return (c >= 'a') && (c <= 'z');
1033 inline bool IsHostCharDigit(char c) {
1034 return (c >= '0') && (c <= '9');
1037 bool IsCanonicalizedHostCompliant(const std::string& host,
1038 const std::string& desired_tld) {
1039 if (host.empty())
1040 return false;
1042 bool in_component = false;
1043 bool most_recent_component_started_alpha = false;
1044 bool last_char_was_underscore = false;
1046 for (std::string::const_iterator i(host.begin()); i != host.end(); ++i) {
1047 const char c = *i;
1048 if (!in_component) {
1049 most_recent_component_started_alpha = IsHostCharAlpha(c);
1050 if (!most_recent_component_started_alpha && !IsHostCharDigit(c) &&
1051 (c != '-'))
1052 return false;
1053 in_component = true;
1054 } else {
1055 if (c == '.') {
1056 if (last_char_was_underscore)
1057 return false;
1058 in_component = false;
1059 } else if (IsHostCharAlpha(c) || IsHostCharDigit(c) || (c == '-')) {
1060 last_char_was_underscore = false;
1061 } else if (c == '_') {
1062 last_char_was_underscore = true;
1063 } else {
1064 return false;
1069 return most_recent_component_started_alpha ||
1070 (!desired_tld.empty() && IsHostCharAlpha(desired_tld[0]));
1073 std::string GetDirectoryListingEntry(const base::string16& name,
1074 const std::string& raw_bytes,
1075 bool is_dir,
1076 int64 size,
1077 Time modified) {
1078 std::string result;
1079 result.append("<script>addRow(");
1080 base::JsonDoubleQuote(name, true, &result);
1081 result.append(",");
1082 if (raw_bytes.empty()) {
1083 base::JsonDoubleQuote(EscapePath(UTF16ToUTF8(name)),
1084 true, &result);
1085 } else {
1086 base::JsonDoubleQuote(EscapePath(raw_bytes), true, &result);
1088 if (is_dir) {
1089 result.append(",1,");
1090 } else {
1091 result.append(",0,");
1094 // Negative size means unknown or not applicable (e.g. directory).
1095 base::string16 size_string;
1096 if (size >= 0)
1097 size_string = FormatBytesUnlocalized(size);
1098 base::JsonDoubleQuote(size_string, true, &result);
1100 result.append(",");
1102 base::string16 modified_str;
1103 // |modified| can be NULL in FTP listings.
1104 if (!modified.is_null()) {
1105 modified_str = base::TimeFormatShortDateAndTime(modified);
1107 base::JsonDoubleQuote(modified_str, true, &result);
1109 result.append(");</script>\n");
1111 return result;
1114 base::string16 StripWWW(const base::string16& text) {
1115 const base::string16 www(ASCIIToUTF16("www."));
1116 return StartsWith(text, www, true) ? text.substr(www.length()) : text;
1119 base::string16 StripWWWFromHost(const GURL& url) {
1120 DCHECK(url.is_valid());
1121 return StripWWW(ASCIIToUTF16(url.host()));
1124 bool IsSafePortablePathComponent(const base::FilePath& component) {
1125 base::string16 component16;
1126 base::FilePath::StringType sanitized = component.value();
1127 SanitizeGeneratedFileName(&sanitized, true);
1128 base::FilePath::StringType extension = component.Extension();
1129 if (!extension.empty())
1130 extension.erase(extension.begin()); // Erase preceding '.'.
1131 return !component.empty() &&
1132 (component == component.BaseName()) &&
1133 (component == component.StripTrailingSeparators()) &&
1134 FilePathToString16(component, &component16) &&
1135 file_util::IsFilenameLegal(component16) &&
1136 !IsShellIntegratedExtension(extension) &&
1137 (sanitized == component.value());
1140 bool IsSafePortableBasename(const base::FilePath& filename) {
1141 return IsSafePortablePathComponent(filename) &&
1142 !IsReservedName(filename.value());
1145 bool IsSafePortableRelativePath(const base::FilePath& path) {
1146 if (path.empty() || path.IsAbsolute() || path.EndsWithSeparator())
1147 return false;
1148 std::vector<base::FilePath::StringType> components;
1149 path.GetComponents(&components);
1150 if (components.empty())
1151 return false;
1152 for (size_t i = 0; i < components.size() - 1; ++i) {
1153 if (!IsSafePortablePathComponent(base::FilePath(components[i])))
1154 return false;
1156 return IsSafePortableBasename(path.BaseName());
1159 void GenerateSafeFileName(const std::string& mime_type,
1160 bool ignore_extension,
1161 base::FilePath* file_path) {
1162 // Make sure we get the right file extension
1163 EnsureSafeExtension(mime_type, ignore_extension, file_path);
1165 #if defined(OS_WIN)
1166 // Prepend "_" to the file name if it's a reserved name
1167 base::FilePath::StringType leaf_name = file_path->BaseName().value();
1168 DCHECK(!leaf_name.empty());
1169 if (IsReservedName(leaf_name)) {
1170 leaf_name = base::FilePath::StringType(FILE_PATH_LITERAL("_")) + leaf_name;
1171 *file_path = file_path->DirName();
1172 if (file_path->value() == base::FilePath::kCurrentDirectory) {
1173 *file_path = base::FilePath(leaf_name);
1174 } else {
1175 *file_path = file_path->Append(leaf_name);
1178 #endif
1181 base::string16 GetSuggestedFilename(const GURL& url,
1182 const std::string& content_disposition,
1183 const std::string& referrer_charset,
1184 const std::string& suggested_name,
1185 const std::string& mime_type,
1186 const std::string& default_name) {
1187 // TODO: this function to be updated to match the httpbis recommendations.
1188 // Talk to abarth for the latest news.
1190 // We don't translate this fallback string, "download". If localization is
1191 // needed, the caller should provide localized fallback in |default_name|.
1192 static const base::FilePath::CharType kFinalFallbackName[] =
1193 FILE_PATH_LITERAL("download");
1194 std::string filename; // In UTF-8
1195 bool overwrite_extension = false;
1197 // Try to extract a filename from content-disposition first.
1198 if (!content_disposition.empty()) {
1199 HttpContentDisposition header(content_disposition, referrer_charset);
1200 filename = header.filename();
1203 // Then try to use the suggested name.
1204 if (filename.empty() && !suggested_name.empty())
1205 filename = suggested_name;
1207 // Now try extracting the filename from the URL. GetFileNameFromURL() only
1208 // looks at the last component of the URL and doesn't return the hostname as a
1209 // failover.
1210 if (filename.empty())
1211 filename = GetFileNameFromURL(url, referrer_charset, &overwrite_extension);
1213 // Finally try the URL hostname, but only if there's no default specified in
1214 // |default_name|. Some schemes (e.g.: file:, about:, data:) do not have a
1215 // host name.
1216 if (filename.empty() &&
1217 default_name.empty() &&
1218 url.is_valid() &&
1219 !url.host().empty()) {
1220 // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451)
1221 filename = url.host();
1224 bool replace_trailing = false;
1225 base::FilePath::StringType result_str, default_name_str;
1226 #if defined(OS_WIN)
1227 replace_trailing = true;
1228 result_str = UTF8ToUTF16(filename);
1229 default_name_str = UTF8ToUTF16(default_name);
1230 #else
1231 result_str = filename;
1232 default_name_str = default_name;
1233 #endif
1234 SanitizeGeneratedFileName(&result_str, replace_trailing);
1235 if (result_str.find_last_not_of(FILE_PATH_LITERAL("-_")) ==
1236 base::FilePath::StringType::npos) {
1237 result_str = !default_name_str.empty() ? default_name_str :
1238 base::FilePath::StringType(kFinalFallbackName);
1239 overwrite_extension = false;
1241 file_util::ReplaceIllegalCharactersInPath(&result_str, '-');
1242 base::FilePath result(result_str);
1243 GenerateSafeFileName(mime_type, overwrite_extension, &result);
1245 base::string16 result16;
1246 if (!FilePathToString16(result, &result16)) {
1247 result = base::FilePath(default_name_str);
1248 if (!FilePathToString16(result, &result16)) {
1249 result = base::FilePath(kFinalFallbackName);
1250 FilePathToString16(result, &result16);
1253 return result16;
1256 base::FilePath GenerateFileName(const GURL& url,
1257 const std::string& content_disposition,
1258 const std::string& referrer_charset,
1259 const std::string& suggested_name,
1260 const std::string& mime_type,
1261 const std::string& default_file_name) {
1262 base::string16 file_name = GetSuggestedFilename(url,
1263 content_disposition,
1264 referrer_charset,
1265 suggested_name,
1266 mime_type,
1267 default_file_name);
1269 #if defined(OS_WIN)
1270 base::FilePath generated_name(file_name);
1271 #else
1272 base::FilePath generated_name(
1273 base::SysWideToNativeMB(UTF16ToWide(file_name)));
1274 #endif
1276 #if defined(OS_CHROMEOS)
1277 // When doing file manager operations on ChromeOS, the file paths get
1278 // normalized in WebKit layer, so let's ensure downloaded files have
1279 // normalized names. Otherwise, we won't be able to handle files with NFD
1280 // utf8 encoded characters in name.
1281 file_util::NormalizeFileNameEncoding(&generated_name);
1282 #endif
1284 DCHECK(!generated_name.empty());
1286 return generated_name;
1289 bool IsPortAllowedByDefault(int port) {
1290 int array_size = arraysize(kRestrictedPorts);
1291 for (int i = 0; i < array_size; i++) {
1292 if (kRestrictedPorts[i] == port) {
1293 return false;
1296 return true;
1299 bool IsPortAllowedByFtp(int port) {
1300 int array_size = arraysize(kAllowedFtpPorts);
1301 for (int i = 0; i < array_size; i++) {
1302 if (kAllowedFtpPorts[i] == port) {
1303 return true;
1306 // Port not explicitly allowed by FTP, so return the default restrictions.
1307 return IsPortAllowedByDefault(port);
1310 bool IsPortAllowedByOverride(int port) {
1311 if (g_explicitly_allowed_ports.Get().empty())
1312 return false;
1314 return g_explicitly_allowed_ports.Get().count(port) > 0;
1317 int SetNonBlocking(int fd) {
1318 #if defined(OS_WIN)
1319 unsigned long no_block = 1;
1320 return ioctlsocket(fd, FIONBIO, &no_block);
1321 #elif defined(OS_POSIX)
1322 int flags = fcntl(fd, F_GETFL, 0);
1323 if (-1 == flags)
1324 return flags;
1325 return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
1326 #endif
1329 bool ParseHostAndPort(std::string::const_iterator host_and_port_begin,
1330 std::string::const_iterator host_and_port_end,
1331 std::string* host,
1332 int* port) {
1333 if (host_and_port_begin >= host_and_port_end)
1334 return false;
1336 // When using url_parse, we use char*.
1337 const char* auth_begin = &(*host_and_port_begin);
1338 int auth_len = host_and_port_end - host_and_port_begin;
1340 url_parse::Component auth_component(0, auth_len);
1341 url_parse::Component username_component;
1342 url_parse::Component password_component;
1343 url_parse::Component hostname_component;
1344 url_parse::Component port_component;
1346 url_parse::ParseAuthority(auth_begin, auth_component, &username_component,
1347 &password_component, &hostname_component, &port_component);
1349 // There shouldn't be a username/password.
1350 if (username_component.is_valid() || password_component.is_valid())
1351 return false;
1353 if (!hostname_component.is_nonempty())
1354 return false; // Failed parsing.
1356 int parsed_port_number = -1;
1357 if (port_component.is_nonempty()) {
1358 parsed_port_number = url_parse::ParsePort(auth_begin, port_component);
1360 // If parsing failed, port_number will be either PORT_INVALID or
1361 // PORT_UNSPECIFIED, both of which are negative.
1362 if (parsed_port_number < 0)
1363 return false; // Failed parsing the port number.
1366 if (port_component.len == 0)
1367 return false; // Reject inputs like "foo:"
1369 // Pass results back to caller.
1370 host->assign(auth_begin + hostname_component.begin, hostname_component.len);
1371 *port = parsed_port_number;
1373 return true; // Success.
1376 bool ParseHostAndPort(const std::string& host_and_port,
1377 std::string* host,
1378 int* port) {
1379 return ParseHostAndPort(
1380 host_and_port.begin(), host_and_port.end(), host, port);
1383 std::string GetHostAndPort(const GURL& url) {
1384 // For IPv6 literals, GURL::host() already includes the brackets so it is
1385 // safe to just append a colon.
1386 return base::StringPrintf("%s:%d", url.host().c_str(),
1387 url.EffectiveIntPort());
1390 std::string GetHostAndOptionalPort(const GURL& url) {
1391 // For IPv6 literals, GURL::host() already includes the brackets
1392 // so it is safe to just append a colon.
1393 if (url.has_port())
1394 return base::StringPrintf("%s:%s", url.host().c_str(), url.port().c_str());
1395 return url.host();
1398 // Extracts the address and port portions of a sockaddr.
1399 bool GetIPAddressFromSockAddr(const struct sockaddr* sock_addr,
1400 socklen_t sock_addr_len,
1401 const uint8** address,
1402 size_t* address_len,
1403 uint16* port) {
1404 if (sock_addr->sa_family == AF_INET) {
1405 if (sock_addr_len < static_cast<socklen_t>(sizeof(struct sockaddr_in)))
1406 return false;
1407 const struct sockaddr_in* addr =
1408 reinterpret_cast<const struct sockaddr_in*>(sock_addr);
1409 *address = reinterpret_cast<const uint8*>(&addr->sin_addr);
1410 *address_len = kIPv4AddressSize;
1411 if (port)
1412 *port = base::NetToHost16(addr->sin_port);
1413 return true;
1416 if (sock_addr->sa_family == AF_INET6) {
1417 if (sock_addr_len < static_cast<socklen_t>(sizeof(struct sockaddr_in6)))
1418 return false;
1419 const struct sockaddr_in6* addr =
1420 reinterpret_cast<const struct sockaddr_in6*>(sock_addr);
1421 *address = reinterpret_cast<const unsigned char*>(&addr->sin6_addr);
1422 *address_len = kIPv6AddressSize;
1423 if (port)
1424 *port = base::NetToHost16(addr->sin6_port);
1425 return true;
1428 return false; // Unrecognized |sa_family|.
1431 std::string IPAddressToString(const uint8* address,
1432 size_t address_len) {
1433 std::string str;
1434 url_canon::StdStringCanonOutput output(&str);
1436 if (address_len == kIPv4AddressSize) {
1437 url_canon::AppendIPv4Address(address, &output);
1438 } else if (address_len == kIPv6AddressSize) {
1439 url_canon::AppendIPv6Address(address, &output);
1440 } else {
1441 CHECK(false) << "Invalid IP address with length: " << address_len;
1444 output.Complete();
1445 return str;
1448 std::string IPAddressToStringWithPort(const uint8* address,
1449 size_t address_len,
1450 uint16 port) {
1451 std::string address_str = IPAddressToString(address, address_len);
1453 if (address_len == kIPv6AddressSize) {
1454 // Need to bracket IPv6 addresses since they contain colons.
1455 return base::StringPrintf("[%s]:%d", address_str.c_str(), port);
1457 return base::StringPrintf("%s:%d", address_str.c_str(), port);
1460 std::string NetAddressToString(const struct sockaddr* sa,
1461 socklen_t sock_addr_len) {
1462 const uint8* address;
1463 size_t address_len;
1464 if (!GetIPAddressFromSockAddr(sa, sock_addr_len, &address,
1465 &address_len, NULL)) {
1466 NOTREACHED();
1467 return std::string();
1469 return IPAddressToString(address, address_len);
1472 std::string NetAddressToStringWithPort(const struct sockaddr* sa,
1473 socklen_t sock_addr_len) {
1474 const uint8* address;
1475 size_t address_len;
1476 uint16 port;
1477 if (!GetIPAddressFromSockAddr(sa, sock_addr_len, &address,
1478 &address_len, &port)) {
1479 NOTREACHED();
1480 return std::string();
1482 return IPAddressToStringWithPort(address, address_len, port);
1485 std::string IPAddressToString(const IPAddressNumber& addr) {
1486 return IPAddressToString(&addr.front(), addr.size());
1489 std::string IPAddressToStringWithPort(const IPAddressNumber& addr,
1490 uint16 port) {
1491 return IPAddressToStringWithPort(&addr.front(), addr.size(), port);
1494 std::string GetHostName() {
1495 #if defined(OS_WIN)
1496 EnsureWinsockInit();
1497 #endif
1499 // Host names are limited to 255 bytes.
1500 char buffer[256];
1501 int result = gethostname(buffer, sizeof(buffer));
1502 if (result != 0) {
1503 DVLOG(1) << "gethostname() failed with " << result;
1504 buffer[0] = '\0';
1506 return std::string(buffer);
1509 void GetIdentityFromURL(const GURL& url,
1510 base::string16* username,
1511 base::string16* password) {
1512 UnescapeRule::Type flags =
1513 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS;
1514 *username = UnescapeAndDecodeUTF8URLComponent(url.username(), flags, NULL);
1515 *password = UnescapeAndDecodeUTF8URLComponent(url.password(), flags, NULL);
1518 std::string GetHostOrSpecFromURL(const GURL& url) {
1519 return url.has_host() ? TrimEndingDot(url.host()) : url.spec();
1522 void AppendFormattedHost(const GURL& url,
1523 const std::string& languages,
1524 base::string16* output) {
1525 std::vector<size_t> offsets;
1526 AppendFormattedComponent(url.possibly_invalid_spec(),
1527 url.parsed_for_possibly_invalid_spec().host, offsets,
1528 HostComponentTransform(languages), output, NULL, NULL);
1531 base::string16 FormatUrlWithOffsets(
1532 const GURL& url,
1533 const std::string& languages,
1534 FormatUrlTypes format_types,
1535 UnescapeRule::Type unescape_rules,
1536 url_parse::Parsed* new_parsed,
1537 size_t* prefix_end,
1538 std::vector<size_t>* offsets_for_adjustment) {
1539 url_parse::Parsed parsed_temp;
1540 if (!new_parsed)
1541 new_parsed = &parsed_temp;
1542 else
1543 *new_parsed = url_parse::Parsed();
1544 std::vector<size_t> original_offsets;
1545 if (offsets_for_adjustment)
1546 original_offsets = *offsets_for_adjustment;
1548 // Special handling for view-source:. Don't use content::kViewSourceScheme
1549 // because this library shouldn't depend on chrome.
1550 const char* const kViewSource = "view-source";
1551 // Reject "view-source:view-source:..." to avoid deep recursion.
1552 const char* const kViewSourceTwice = "view-source:view-source:";
1553 if (url.SchemeIs(kViewSource) &&
1554 !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) {
1555 return FormatViewSourceUrl(url, original_offsets, languages, format_types,
1556 unescape_rules, new_parsed, prefix_end, offsets_for_adjustment);
1559 // We handle both valid and invalid URLs (this will give us the spec
1560 // regardless of validity).
1561 const std::string& spec = url.possibly_invalid_spec();
1562 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
1564 // Scheme & separators. These are ASCII.
1565 base::string16 url_string;
1566 url_string.insert(url_string.end(), spec.begin(),
1567 spec.begin() + parsed.CountCharactersBefore(url_parse::Parsed::USERNAME,
1568 true));
1569 const char kHTTP[] = "http://";
1570 const char kFTP[] = "ftp.";
1571 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This
1572 // means that if we trim "http://" off a URL whose host starts with "ftp." and
1573 // the user inputs this into any field subject to fixup (which is basically
1574 // all input fields), the meaning would be changed. (In fact, often the
1575 // formatted URL is directly pre-filled into an input field.) For this reason
1576 // we avoid stripping "http://" in this case.
1577 bool omit_http = (format_types & kFormatUrlOmitHTTP) &&
1578 EqualsASCII(url_string, kHTTP) &&
1579 !StartsWithASCII(url.host(), kFTP, true);
1580 new_parsed->scheme = parsed.scheme;
1582 // Username & password.
1583 if ((format_types & kFormatUrlOmitUsernamePassword) != 0) {
1584 // Remove the username and password fields. We don't want to display those
1585 // to the user since they can be used for attacks,
1586 // e.g. "http://google.com:search@evil.ru/"
1587 new_parsed->username.reset();
1588 new_parsed->password.reset();
1589 // Update the offsets based on removed username and/or password.
1590 if (offsets_for_adjustment && !offsets_for_adjustment->empty() &&
1591 (parsed.username.is_nonempty() || parsed.password.is_nonempty())) {
1592 base::OffsetAdjuster offset_adjuster(offsets_for_adjustment);
1593 if (parsed.username.is_nonempty() && parsed.password.is_nonempty()) {
1594 // The seeming off-by-one and off-by-two in these first two lines are to
1595 // account for the ':' after the username and '@' after the password.
1596 offset_adjuster.Add(base::OffsetAdjuster::Adjustment(
1597 static_cast<size_t>(parsed.username.begin),
1598 static_cast<size_t>(parsed.username.len + parsed.password.len + 2),
1599 0));
1600 } else {
1601 const url_parse::Component* nonempty_component =
1602 parsed.username.is_nonempty() ? &parsed.username : &parsed.password;
1603 // The seeming off-by-one in below is to account for the '@' after the
1604 // username/password.
1605 offset_adjuster.Add(base::OffsetAdjuster::Adjustment(
1606 static_cast<size_t>(nonempty_component->begin),
1607 static_cast<size_t>(nonempty_component->len + 1), 0));
1610 } else {
1611 AppendFormattedComponent(spec, parsed.username, original_offsets,
1612 NonHostComponentTransform(unescape_rules), &url_string,
1613 &new_parsed->username, offsets_for_adjustment);
1614 if (parsed.password.is_valid()) {
1615 size_t colon = parsed.username.end();
1616 DCHECK_EQ(static_cast<size_t>(parsed.password.begin - 1), colon);
1617 std::vector<size_t>::const_iterator colon_iter =
1618 std::find(original_offsets.begin(), original_offsets.end(), colon);
1619 if (colon_iter != original_offsets.end()) {
1620 (*offsets_for_adjustment)[colon_iter - original_offsets.begin()] =
1621 url_string.length();
1623 url_string.push_back(':');
1625 AppendFormattedComponent(spec, parsed.password, original_offsets,
1626 NonHostComponentTransform(unescape_rules), &url_string,
1627 &new_parsed->password, offsets_for_adjustment);
1628 if (parsed.username.is_valid() || parsed.password.is_valid()) {
1629 size_t at_sign = (parsed.password.is_valid() ?
1630 parsed.password : parsed.username).end();
1631 DCHECK_EQ(static_cast<size_t>(parsed.host.begin - 1), at_sign);
1632 std::vector<size_t>::const_iterator at_sign_iter =
1633 std::find(original_offsets.begin(), original_offsets.end(), at_sign);
1634 if (at_sign_iter != original_offsets.end()) {
1635 (*offsets_for_adjustment)[at_sign_iter - original_offsets.begin()] =
1636 url_string.length();
1638 url_string.push_back('@');
1641 if (prefix_end)
1642 *prefix_end = static_cast<size_t>(url_string.length());
1644 // Host.
1645 AppendFormattedComponent(spec, parsed.host, original_offsets,
1646 HostComponentTransform(languages), &url_string, &new_parsed->host,
1647 offsets_for_adjustment);
1649 // Port.
1650 if (parsed.port.is_nonempty()) {
1651 url_string.push_back(':');
1652 new_parsed->port.begin = url_string.length();
1653 url_string.insert(url_string.end(),
1654 spec.begin() + parsed.port.begin,
1655 spec.begin() + parsed.port.end());
1656 new_parsed->port.len = url_string.length() - new_parsed->port.begin;
1657 } else {
1658 new_parsed->port.reset();
1661 // Path & query. Both get the same general unescape & convert treatment.
1662 if (!(format_types & kFormatUrlOmitTrailingSlashOnBareHostname) ||
1663 !CanStripTrailingSlash(url)) {
1664 AppendFormattedComponent(spec, parsed.path, original_offsets,
1665 NonHostComponentTransform(unescape_rules), &url_string,
1666 &new_parsed->path, offsets_for_adjustment);
1668 if (parsed.query.is_valid())
1669 url_string.push_back('?');
1670 AppendFormattedComponent(spec, parsed.query, original_offsets,
1671 NonHostComponentTransform(unescape_rules), &url_string,
1672 &new_parsed->query, offsets_for_adjustment);
1674 // Ref. This is valid, unescaped UTF-8, so we can just convert.
1675 if (parsed.ref.is_valid()) {
1676 url_string.push_back('#');
1677 size_t original_ref_begin = static_cast<size_t>(parsed.ref.begin);
1678 size_t output_ref_begin = url_string.length();
1679 new_parsed->ref.begin = static_cast<int>(output_ref_begin);
1681 std::vector<size_t> offsets_into_ref(
1682 OffsetsIntoComponent(original_offsets, original_ref_begin));
1683 if (parsed.ref.len > 0) {
1684 url_string.append(base::UTF8ToUTF16AndAdjustOffsets(
1685 spec.substr(original_ref_begin, static_cast<size_t>(parsed.ref.len)),
1686 &offsets_into_ref));
1689 new_parsed->ref.len =
1690 static_cast<int>(url_string.length() - new_parsed->ref.begin);
1691 AdjustForComponentTransform(original_offsets, original_ref_begin,
1692 static_cast<size_t>(parsed.ref.end()), offsets_into_ref,
1693 output_ref_begin, offsets_for_adjustment);
1696 // If we need to strip out http do it after the fact. This way we don't need
1697 // to worry about how offset_for_adjustment is interpreted.
1698 if (omit_http && StartsWith(url_string, ASCIIToUTF16(kHTTP), true)) {
1699 const size_t kHTTPSize = arraysize(kHTTP) - 1;
1700 url_string = url_string.substr(kHTTPSize);
1701 if (offsets_for_adjustment && !offsets_for_adjustment->empty()) {
1702 base::OffsetAdjuster offset_adjuster(offsets_for_adjustment);
1703 offset_adjuster.Add(base::OffsetAdjuster::Adjustment(0, kHTTPSize, 0));
1705 if (prefix_end)
1706 *prefix_end -= kHTTPSize;
1708 // Adjust new_parsed.
1709 DCHECK(new_parsed->scheme.is_valid());
1710 int delta = -(new_parsed->scheme.len + 3); // +3 for ://.
1711 new_parsed->scheme.reset();
1712 AdjustComponents(delta, new_parsed);
1715 LimitOffsets(url_string, offsets_for_adjustment);
1716 return url_string;
1719 base::string16 FormatUrl(const GURL& url,
1720 const std::string& languages,
1721 FormatUrlTypes format_types,
1722 UnescapeRule::Type unescape_rules,
1723 url_parse::Parsed* new_parsed,
1724 size_t* prefix_end,
1725 size_t* offset_for_adjustment) {
1726 std::vector<size_t> offsets;
1727 if (offset_for_adjustment)
1728 offsets.push_back(*offset_for_adjustment);
1729 base::string16 result = FormatUrlWithOffsets(url, languages, format_types,
1730 unescape_rules, new_parsed, prefix_end, &offsets);
1731 if (offset_for_adjustment)
1732 *offset_for_adjustment = offsets[0];
1733 return result;
1736 bool CanStripTrailingSlash(const GURL& url) {
1737 // Omit the path only for standard, non-file URLs with nothing but "/" after
1738 // the hostname.
1739 return url.IsStandard() && !url.SchemeIsFile() &&
1740 !url.SchemeIsFileSystem() && !url.has_query() && !url.has_ref()
1741 && url.path() == "/";
1744 GURL SimplifyUrlForRequest(const GURL& url) {
1745 DCHECK(url.is_valid());
1746 GURL::Replacements replacements;
1747 replacements.ClearUsername();
1748 replacements.ClearPassword();
1749 replacements.ClearRef();
1750 return url.ReplaceComponents(replacements);
1753 // Specifies a comma separated list of port numbers that should be accepted
1754 // despite bans. If the string is invalid no allowed ports are stored.
1755 void SetExplicitlyAllowedPorts(const std::string& allowed_ports) {
1756 if (allowed_ports.empty())
1757 return;
1759 std::multiset<int> ports;
1760 size_t last = 0;
1761 size_t size = allowed_ports.size();
1762 // The comma delimiter.
1763 const std::string::value_type kComma = ',';
1765 // Overflow is still possible for evil user inputs.
1766 for (size_t i = 0; i <= size; ++i) {
1767 // The string should be composed of only digits and commas.
1768 if (i != size && !IsAsciiDigit(allowed_ports[i]) &&
1769 (allowed_ports[i] != kComma))
1770 return;
1771 if (i == size || allowed_ports[i] == kComma) {
1772 if (i > last) {
1773 int port;
1774 base::StringToInt(base::StringPiece(allowed_ports.begin() + last,
1775 allowed_ports.begin() + i),
1776 &port);
1777 ports.insert(port);
1779 last = i + 1;
1782 g_explicitly_allowed_ports.Get() = ports;
1785 ScopedPortException::ScopedPortException(int port) : port_(port) {
1786 g_explicitly_allowed_ports.Get().insert(port);
1789 ScopedPortException::~ScopedPortException() {
1790 std::multiset<int>::iterator it =
1791 g_explicitly_allowed_ports.Get().find(port_);
1792 if (it != g_explicitly_allowed_ports.Get().end())
1793 g_explicitly_allowed_ports.Get().erase(it);
1794 else
1795 NOTREACHED();
1798 namespace {
1800 const char* kFinalStatusNames[] = {
1801 "Cannot create sockets",
1802 "Can create sockets",
1803 "Can't get addresses",
1804 "Global ipv6 address missing",
1805 "Global ipv6 address present",
1806 "Interface array too short",
1807 "Probing not supported", // IPV6_SUPPORT_MAX
1809 COMPILE_ASSERT(arraysize(kFinalStatusNames) == IPV6_SUPPORT_MAX + 1,
1810 IPv6SupportStatus_name_count_mismatch);
1812 // TODO(jar): The following is a simple estimate of IPv6 support. We may need
1813 // to do a test resolution, and a test connection, to REALLY verify support.
1814 IPv6SupportResult TestIPv6SupportInternal() {
1815 #if defined(OS_ANDROID)
1816 // TODO: We should fully implement IPv6 probe once 'getifaddrs' API available;
1817 // Another approach is implementing the similar feature by
1818 // java.net.NetworkInterface through JNI.
1819 NOTIMPLEMENTED();
1820 return IPv6SupportResult(true, IPV6_SUPPORT_MAX, 0);
1821 #elif defined(OS_POSIX)
1822 int test_socket = socket(AF_INET6, SOCK_STREAM, 0);
1823 if (test_socket == -1)
1824 return IPv6SupportResult(false, IPV6_CANNOT_CREATE_SOCKETS, errno);
1825 close(test_socket);
1827 // Check to see if any interface has a IPv6 address.
1828 struct ifaddrs* interface_addr = NULL;
1829 int rv = getifaddrs(&interface_addr);
1830 if (rv != 0) {
1831 // Don't yet block IPv6.
1832 return IPv6SupportResult(true, IPV6_GETIFADDRS_FAILED, errno);
1835 bool found_ipv6 = false;
1836 for (struct ifaddrs* interface = interface_addr;
1837 interface != NULL;
1838 interface = interface->ifa_next) {
1839 if (!(IFF_UP & interface->ifa_flags))
1840 continue;
1841 if (IFF_LOOPBACK & interface->ifa_flags)
1842 continue;
1843 struct sockaddr* addr = interface->ifa_addr;
1844 if (!addr)
1845 continue;
1846 if (addr->sa_family != AF_INET6)
1847 continue;
1848 // Safe cast since this is AF_INET6.
1849 struct sockaddr_in6* addr_in6 =
1850 reinterpret_cast<struct sockaddr_in6*>(addr);
1851 struct in6_addr* sin6_addr = &addr_in6->sin6_addr;
1852 if (IN6_IS_ADDR_LOOPBACK(sin6_addr) || IN6_IS_ADDR_LINKLOCAL(sin6_addr))
1853 continue;
1854 found_ipv6 = true;
1855 break;
1857 freeifaddrs(interface_addr);
1858 if (!found_ipv6)
1859 return IPv6SupportResult(false, IPV6_GLOBAL_ADDRESS_MISSING, 0);
1861 return IPv6SupportResult(true, IPV6_GLOBAL_ADDRESS_PRESENT, 0);
1862 #elif defined(OS_WIN)
1863 EnsureWinsockInit();
1864 SOCKET test_socket = socket(AF_INET6, SOCK_STREAM, 0);
1865 if (test_socket == INVALID_SOCKET) {
1866 return IPv6SupportResult(false,
1867 IPV6_CANNOT_CREATE_SOCKETS,
1868 WSAGetLastError());
1870 closesocket(test_socket);
1872 // Check to see if any interface has a IPv6 address.
1873 // The GetAdaptersAddresses MSDN page recommends using a size of 15000 to
1874 // avoid reallocation.
1875 ULONG adapters_size = 15000;
1876 scoped_ptr_malloc<IP_ADAPTER_ADDRESSES> adapters;
1877 ULONG error;
1878 int num_tries = 0;
1879 do {
1880 adapters.reset(
1881 reinterpret_cast<PIP_ADAPTER_ADDRESSES>(malloc(adapters_size)));
1882 // Return only unicast addresses.
1883 error = GetAdaptersAddresses(AF_UNSPEC,
1884 GAA_FLAG_SKIP_ANYCAST |
1885 GAA_FLAG_SKIP_MULTICAST |
1886 GAA_FLAG_SKIP_DNS_SERVER |
1887 GAA_FLAG_SKIP_FRIENDLY_NAME,
1888 NULL, adapters.get(), &adapters_size);
1889 num_tries++;
1890 } while (error == ERROR_BUFFER_OVERFLOW && num_tries <= 3);
1891 if (error == ERROR_NO_DATA)
1892 return IPv6SupportResult(false, IPV6_GLOBAL_ADDRESS_MISSING, error);
1893 if (error != ERROR_SUCCESS) {
1894 // Don't yet block IPv6.
1895 return IPv6SupportResult(true, IPV6_GETIFADDRS_FAILED, error);
1898 PIP_ADAPTER_ADDRESSES adapter;
1899 for (adapter = adapters.get(); adapter; adapter = adapter->Next) {
1900 if (adapter->OperStatus != IfOperStatusUp)
1901 continue;
1902 if (adapter->IfType == IF_TYPE_SOFTWARE_LOOPBACK)
1903 continue;
1904 PIP_ADAPTER_UNICAST_ADDRESS unicast_address;
1905 for (unicast_address = adapter->FirstUnicastAddress;
1906 unicast_address;
1907 unicast_address = unicast_address->Next) {
1908 if (unicast_address->Address.lpSockaddr->sa_family != AF_INET6)
1909 continue;
1910 // Safe cast since this is AF_INET6.
1911 struct sockaddr_in6* addr_in6 = reinterpret_cast<struct sockaddr_in6*>(
1912 unicast_address->Address.lpSockaddr);
1913 struct in6_addr* sin6_addr = &addr_in6->sin6_addr;
1914 if (IN6_IS_ADDR_LOOPBACK(sin6_addr) || IN6_IS_ADDR_LINKLOCAL(sin6_addr))
1915 continue;
1916 const uint8 kTeredoPrefix[] = { 0x20, 0x01, 0, 0 };
1917 if (!memcmp(sin6_addr->s6_addr, kTeredoPrefix, arraysize(kTeredoPrefix)))
1918 continue;
1919 return IPv6SupportResult(true, IPV6_GLOBAL_ADDRESS_PRESENT, 0);
1923 return IPv6SupportResult(false, IPV6_GLOBAL_ADDRESS_MISSING, 0);
1924 #else
1925 NOTIMPLEMENTED();
1926 return IPv6SupportResult(true, IPV6_SUPPORT_MAX, 0);
1927 #endif // defined(various platforms)
1930 } // namespace
1932 IPv6SupportResult::IPv6SupportResult(bool ipv6_supported,
1933 IPv6SupportStatus ipv6_support_status,
1934 int os_error)
1935 : ipv6_supported(ipv6_supported),
1936 ipv6_support_status(ipv6_support_status),
1937 os_error(os_error) {
1940 base::Value* IPv6SupportResult::ToNetLogValue(
1941 NetLog::LogLevel /* log_level */) const {
1942 base::DictionaryValue* dict = new DictionaryValue();
1943 dict->SetBoolean("ipv6_supported", ipv6_supported);
1944 dict->SetString("ipv6_support_status",
1945 kFinalStatusNames[ipv6_support_status]);
1946 if (os_error)
1947 dict->SetInteger("os_error", os_error);
1948 return dict;
1951 IPv6SupportResult TestIPv6Support() {
1952 IPv6SupportResult result = TestIPv6SupportInternal();
1954 // Record UMA.
1955 if (result.ipv6_support_status != IPV6_SUPPORT_MAX) {
1956 static bool run_once = false;
1957 if (!run_once) {
1958 run_once = true;
1959 UMA_HISTOGRAM_ENUMERATION("Net.IPv6Status",
1960 result.ipv6_support_status,
1961 IPV6_SUPPORT_MAX);
1962 } else {
1963 UMA_HISTOGRAM_ENUMERATION("Net.IPv6Status_retest",
1964 result.ipv6_support_status,
1965 IPV6_SUPPORT_MAX);
1968 return result;
1971 bool HaveOnlyLoopbackAddresses() {
1972 #if defined(OS_ANDROID)
1973 return android::HaveOnlyLoopbackAddresses();
1974 #elif defined(OS_POSIX)
1975 struct ifaddrs* interface_addr = NULL;
1976 int rv = getifaddrs(&interface_addr);
1977 if (rv != 0) {
1978 DVLOG(1) << "getifaddrs() failed with errno = " << errno;
1979 return false;
1982 bool result = true;
1983 for (struct ifaddrs* interface = interface_addr;
1984 interface != NULL;
1985 interface = interface->ifa_next) {
1986 if (!(IFF_UP & interface->ifa_flags))
1987 continue;
1988 if (IFF_LOOPBACK & interface->ifa_flags)
1989 continue;
1990 const struct sockaddr* addr = interface->ifa_addr;
1991 if (!addr)
1992 continue;
1993 if (addr->sa_family == AF_INET6) {
1994 // Safe cast since this is AF_INET6.
1995 const struct sockaddr_in6* addr_in6 =
1996 reinterpret_cast<const struct sockaddr_in6*>(addr);
1997 const struct in6_addr* sin6_addr = &addr_in6->sin6_addr;
1998 if (IN6_IS_ADDR_LOOPBACK(sin6_addr) || IN6_IS_ADDR_LINKLOCAL(sin6_addr))
1999 continue;
2001 if (addr->sa_family != AF_INET6 && addr->sa_family != AF_INET)
2002 continue;
2004 result = false;
2005 break;
2007 freeifaddrs(interface_addr);
2008 return result;
2009 #elif defined(OS_WIN)
2010 // TODO(wtc): implement with the GetAdaptersAddresses function.
2011 NOTIMPLEMENTED();
2012 return false;
2013 #else
2014 NOTIMPLEMENTED();
2015 return false;
2016 #endif // defined(various platforms)
2019 AddressFamily GetAddressFamily(const IPAddressNumber& address) {
2020 switch (address.size()) {
2021 case kIPv4AddressSize:
2022 return ADDRESS_FAMILY_IPV4;
2023 case kIPv6AddressSize:
2024 return ADDRESS_FAMILY_IPV6;
2025 default:
2026 return ADDRESS_FAMILY_UNSPECIFIED;
2030 bool ParseIPLiteralToNumber(const std::string& ip_literal,
2031 IPAddressNumber* ip_number) {
2032 // |ip_literal| could be either a IPv4 or an IPv6 literal. If it contains
2033 // a colon however, it must be an IPv6 address.
2034 if (ip_literal.find(':') != std::string::npos) {
2035 // GURL expects IPv6 hostnames to be surrounded with brackets.
2036 std::string host_brackets = "[" + ip_literal + "]";
2037 url_parse::Component host_comp(0, host_brackets.size());
2039 // Try parsing the hostname as an IPv6 literal.
2040 ip_number->resize(16); // 128 bits.
2041 return url_canon::IPv6AddressToNumber(host_brackets.data(),
2042 host_comp,
2043 &(*ip_number)[0]);
2046 // Otherwise the string is an IPv4 address.
2047 ip_number->resize(4); // 32 bits.
2048 url_parse::Component host_comp(0, ip_literal.size());
2049 int num_components;
2050 url_canon::CanonHostInfo::Family family = url_canon::IPv4AddressToNumber(
2051 ip_literal.data(), host_comp, &(*ip_number)[0], &num_components);
2052 return family == url_canon::CanonHostInfo::IPV4;
2055 namespace {
2057 const unsigned char kIPv4MappedPrefix[] =
2058 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF };
2061 IPAddressNumber ConvertIPv4NumberToIPv6Number(
2062 const IPAddressNumber& ipv4_number) {
2063 DCHECK(ipv4_number.size() == 4);
2065 // IPv4-mapped addresses are formed by:
2066 // <80 bits of zeros> + <16 bits of ones> + <32-bit IPv4 address>.
2067 IPAddressNumber ipv6_number;
2068 ipv6_number.reserve(16);
2069 ipv6_number.insert(ipv6_number.end(),
2070 kIPv4MappedPrefix,
2071 kIPv4MappedPrefix + arraysize(kIPv4MappedPrefix));
2072 ipv6_number.insert(ipv6_number.end(), ipv4_number.begin(), ipv4_number.end());
2073 return ipv6_number;
2076 bool IsIPv4Mapped(const IPAddressNumber& address) {
2077 if (address.size() != kIPv6AddressSize)
2078 return false;
2079 return std::equal(address.begin(),
2080 address.begin() + arraysize(kIPv4MappedPrefix),
2081 kIPv4MappedPrefix);
2084 IPAddressNumber ConvertIPv4MappedToIPv4(const IPAddressNumber& address) {
2085 DCHECK(IsIPv4Mapped(address));
2086 return IPAddressNumber(address.begin() + arraysize(kIPv4MappedPrefix),
2087 address.end());
2090 bool ParseCIDRBlock(const std::string& cidr_literal,
2091 IPAddressNumber* ip_number,
2092 size_t* prefix_length_in_bits) {
2093 // We expect CIDR notation to match one of these two templates:
2094 // <IPv4-literal> "/" <number of bits>
2095 // <IPv6-literal> "/" <number of bits>
2097 std::vector<std::string> parts;
2098 base::SplitString(cidr_literal, '/', &parts);
2099 if (parts.size() != 2)
2100 return false;
2102 // Parse the IP address.
2103 if (!ParseIPLiteralToNumber(parts[0], ip_number))
2104 return false;
2106 // Parse the prefix length.
2107 int number_of_bits = -1;
2108 if (!base::StringToInt(parts[1], &number_of_bits))
2109 return false;
2111 // Make sure the prefix length is in a valid range.
2112 if (number_of_bits < 0 ||
2113 number_of_bits > static_cast<int>(ip_number->size() * 8))
2114 return false;
2116 *prefix_length_in_bits = static_cast<size_t>(number_of_bits);
2117 return true;
2120 bool IPNumberMatchesPrefix(const IPAddressNumber& ip_number,
2121 const IPAddressNumber& ip_prefix,
2122 size_t prefix_length_in_bits) {
2123 // Both the input IP address and the prefix IP address should be
2124 // either IPv4 or IPv6.
2125 DCHECK(ip_number.size() == 4 || ip_number.size() == 16);
2126 DCHECK(ip_prefix.size() == 4 || ip_prefix.size() == 16);
2128 DCHECK_LE(prefix_length_in_bits, ip_prefix.size() * 8);
2130 // In case we have an IPv6 / IPv4 mismatch, convert the IPv4 addresses to
2131 // IPv6 addresses in order to do the comparison.
2132 if (ip_number.size() != ip_prefix.size()) {
2133 if (ip_number.size() == 4) {
2134 return IPNumberMatchesPrefix(ConvertIPv4NumberToIPv6Number(ip_number),
2135 ip_prefix, prefix_length_in_bits);
2137 return IPNumberMatchesPrefix(ip_number,
2138 ConvertIPv4NumberToIPv6Number(ip_prefix),
2139 96 + prefix_length_in_bits);
2142 // Otherwise we are comparing two IPv4 addresses, or two IPv6 addresses.
2143 // Compare all the bytes that fall entirely within the prefix.
2144 int num_entire_bytes_in_prefix = prefix_length_in_bits / 8;
2145 for (int i = 0; i < num_entire_bytes_in_prefix; ++i) {
2146 if (ip_number[i] != ip_prefix[i])
2147 return false;
2150 // In case the prefix was not a multiple of 8, there will be 1 byte
2151 // which is only partially masked.
2152 int remaining_bits = prefix_length_in_bits % 8;
2153 if (remaining_bits != 0) {
2154 unsigned char mask = 0xFF << (8 - remaining_bits);
2155 int i = num_entire_bytes_in_prefix;
2156 if ((ip_number[i] & mask) != (ip_prefix[i] & mask))
2157 return false;
2160 return true;
2163 const uint16* GetPortFieldFromSockaddr(const struct sockaddr* address,
2164 socklen_t address_len) {
2165 if (address->sa_family == AF_INET) {
2166 DCHECK_LE(sizeof(sockaddr_in), static_cast<size_t>(address_len));
2167 const struct sockaddr_in* sockaddr =
2168 reinterpret_cast<const struct sockaddr_in*>(address);
2169 return &sockaddr->sin_port;
2170 } else if (address->sa_family == AF_INET6) {
2171 DCHECK_LE(sizeof(sockaddr_in6), static_cast<size_t>(address_len));
2172 const struct sockaddr_in6* sockaddr =
2173 reinterpret_cast<const struct sockaddr_in6*>(address);
2174 return &sockaddr->sin6_port;
2175 } else {
2176 NOTREACHED();
2177 return NULL;
2181 int GetPortFromSockaddr(const struct sockaddr* address, socklen_t address_len) {
2182 const uint16* port_field = GetPortFieldFromSockaddr(address, address_len);
2183 if (!port_field)
2184 return -1;
2185 return base::NetToHost16(*port_field);
2188 bool IsLocalhost(const std::string& host) {
2189 if (host == "localhost" ||
2190 host == "localhost.localdomain" ||
2191 host == "localhost6" ||
2192 host == "localhost6.localdomain6")
2193 return true;
2195 IPAddressNumber ip_number;
2196 if (ParseIPLiteralToNumber(host, &ip_number)) {
2197 size_t size = ip_number.size();
2198 switch (size) {
2199 case kIPv4AddressSize: {
2200 IPAddressNumber localhost_prefix;
2201 localhost_prefix.push_back(127);
2202 for (int i = 0; i < 3; ++i) {
2203 localhost_prefix.push_back(0);
2205 return IPNumberMatchesPrefix(ip_number, localhost_prefix, 8);
2208 case kIPv6AddressSize: {
2209 struct in6_addr sin6_addr;
2210 memcpy(&sin6_addr, &ip_number[0], kIPv6AddressSize);
2211 return !!IN6_IS_ADDR_LOOPBACK(&sin6_addr);
2214 default:
2215 NOTREACHED();
2219 return false;
2222 NetworkInterface::NetworkInterface() {
2225 NetworkInterface::NetworkInterface(const std::string& name,
2226 const IPAddressNumber& address)
2227 : name(name), address(address) {
2230 NetworkInterface::~NetworkInterface() {
2233 } // namespace net