1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/net_util.h"
11 #include "build/build_config.h"
17 #pragma comment(lib, "iphlpapi.lib")
18 #elif defined(OS_POSIX)
20 #if !defined(OS_ANDROID)
25 #include <netinet/in.h>
28 #include "base/basictypes.h"
29 #include "base/file_util.h"
30 #include "base/files/file_path.h"
31 #include "base/i18n/file_util_icu.h"
32 #include "base/i18n/icu_string_conversions.h"
33 #include "base/i18n/time_formatting.h"
34 #include "base/json/string_escape.h"
35 #include "base/lazy_instance.h"
36 #include "base/logging.h"
37 #include "base/memory/singleton.h"
38 #include "base/message_loop.h"
39 #include "base/metrics/histogram.h"
40 #include "base/path_service.h"
41 #include "base/stl_util.h"
42 #include "base/strings/string_number_conversions.h"
43 #include "base/strings/string_piece.h"
44 #include "base/strings/string_split.h"
45 #include "base/strings/string_tokenizer.h"
46 #include "base/strings/string_util.h"
47 #include "base/strings/stringprintf.h"
48 #include "base/strings/sys_string_conversions.h"
49 #include "base/strings/utf_offset_string_conversions.h"
50 #include "base/strings/utf_string_conversions.h"
51 #include "base/synchronization/lock.h"
52 #include "base/sys_byteorder.h"
53 #include "base/time.h"
54 #include "base/values.h"
55 #include "googleurl/src/gurl.h"
56 #include "googleurl/src/url_canon.h"
57 #include "googleurl/src/url_canon_ip.h"
58 #include "googleurl/src/url_parse.h"
59 #include "grit/net_resources.h"
60 #if defined(OS_ANDROID)
61 #include "net/android/network_library.h"
63 #include "net/base/dns_util.h"
64 #include "net/base/escape.h"
65 #include "net/base/mime_util.h"
66 #include "net/base/net_module.h"
68 #include "net/base/winsock_init.h"
70 #include "net/http/http_content_disposition.h"
71 #include "third_party/icu/public/common/unicode/uidna.h"
72 #include "third_party/icu/public/common/unicode/uniset.h"
73 #include "third_party/icu/public/common/unicode/uscript.h"
74 #include "third_party/icu/public/common/unicode/uset.h"
75 #include "third_party/icu/public/i18n/unicode/datefmt.h"
76 #include "third_party/icu/public/i18n/unicode/regex.h"
77 #include "third_party/icu/public/i18n/unicode/ulocdata.h"
85 // what we prepend to get a file URL
86 static const base::FilePath::CharType kFileURLPrefix
[] =
87 FILE_PATH_LITERAL("file:///");
89 // The general list of blocked ports. Will be blocked unless a specific
90 // protocol overrides it. (Ex: ftp can use ports 20 and 21)
91 static const int kRestrictedPorts
[] = {
125 135, // loc-srv /epmap
148 3659, // apple-sasl / PasswordServer
151 6665, // Alternate IRC [Apple addition]
152 6666, // Alternate IRC [Apple addition]
153 6667, // Standard IRC [Apple addition]
154 6668, // Alternate IRC [Apple addition]
155 6669, // Alternate IRC [Apple addition]
156 0xFFFF, // Used to block all invalid port numbers (see
157 // third_party/WebKit/Source/WebCore/platform/KURLGoogle.cpp, port())
160 // FTP overrides the following restricted ports.
161 static const int kAllowedFtpPorts
[] = {
166 // Does some simple normalization of scripts so we can allow certain scripts
167 // to exist together.
168 // TODO(brettw) bug 880223: we should allow some other languages to be
169 // oombined such as Chinese and Latin. We will probably need a more
170 // complicated system of language pairs to have more fine-grained control.
171 UScriptCode
NormalizeScript(UScriptCode code
) {
173 case USCRIPT_KATAKANA
:
174 case USCRIPT_HIRAGANA
:
175 case USCRIPT_KATAKANA_OR_HIRAGANA
:
176 case USCRIPT_HANGUL
: // This one is arguable.
183 bool IsIDNComponentInSingleScript(const base::char16
* str
, int str_len
) {
184 UScriptCode first_script
= USCRIPT_INVALID_CODE
;
185 bool is_first
= true;
188 while (i
< str_len
) {
190 U16_NEXT(str
, i
, str_len
, code_point
);
192 UErrorCode err
= U_ZERO_ERROR
;
193 UScriptCode cur_script
= uscript_getScript(code_point
, &err
);
194 if (err
!= U_ZERO_ERROR
)
195 return false; // Report mixed on error.
196 cur_script
= NormalizeScript(cur_script
);
198 // TODO(brettw) We may have to check for USCRIPT_INHERENT as well.
199 if (is_first
&& cur_script
!= USCRIPT_COMMON
) {
200 first_script
= cur_script
;
203 if (cur_script
!= USCRIPT_COMMON
&& cur_script
!= first_script
)
210 // Check if the script of a language can be 'safely' mixed with
211 // Latin letters in the ASCII range.
212 bool IsCompatibleWithASCIILetters(const std::string
& lang
) {
213 // For now, just list Chinese, Japanese and Korean (positive list).
214 // An alternative is negative-listing (languages using Greek and
215 // Cyrillic letters), but it can be more dangerous.
216 return !lang
.substr(0, 2).compare("zh") ||
217 !lang
.substr(0, 2).compare("ja") ||
218 !lang
.substr(0, 2).compare("ko");
221 typedef std::map
<std::string
, icu::UnicodeSet
*> LangToExemplarSetMap
;
223 class LangToExemplarSet
{
225 static LangToExemplarSet
* GetInstance() {
226 return Singleton
<LangToExemplarSet
>::get();
230 LangToExemplarSetMap map
;
231 LangToExemplarSet() { }
232 ~LangToExemplarSet() {
233 STLDeleteContainerPairSecondPointers(map
.begin(), map
.end());
236 friend class Singleton
<LangToExemplarSet
>;
237 friend struct DefaultSingletonTraits
<LangToExemplarSet
>;
238 friend bool GetExemplarSetForLang(const std::string
&, icu::UnicodeSet
**);
239 friend void SetExemplarSetForLang(const std::string
&, icu::UnicodeSet
*);
241 DISALLOW_COPY_AND_ASSIGN(LangToExemplarSet
);
244 bool GetExemplarSetForLang(const std::string
& lang
,
245 icu::UnicodeSet
** lang_set
) {
246 const LangToExemplarSetMap
& map
= LangToExemplarSet::GetInstance()->map
;
247 LangToExemplarSetMap::const_iterator pos
= map
.find(lang
);
248 if (pos
!= map
.end()) {
249 *lang_set
= pos
->second
;
255 void SetExemplarSetForLang(const std::string
& lang
,
256 icu::UnicodeSet
* lang_set
) {
257 LangToExemplarSetMap
& map
= LangToExemplarSet::GetInstance()->map
;
258 map
.insert(std::make_pair(lang
, lang_set
));
261 static base::LazyInstance
<base::Lock
>::Leaky
262 g_lang_set_lock
= LAZY_INSTANCE_INITIALIZER
;
264 // Returns true if all the characters in component_characters are used by
265 // the language |lang|.
266 bool IsComponentCoveredByLang(const icu::UnicodeSet
& component_characters
,
267 const std::string
& lang
) {
268 CR_DEFINE_STATIC_LOCAL(
269 const icu::UnicodeSet
, kASCIILetters
, ('a', 'z'));
270 icu::UnicodeSet
* lang_set
= NULL
;
271 // We're called from both the UI thread and the history thread.
273 base::AutoLock
lock(g_lang_set_lock
.Get());
274 if (!GetExemplarSetForLang(lang
, &lang_set
)) {
275 UErrorCode status
= U_ZERO_ERROR
;
276 ULocaleData
* uld
= ulocdata_open(lang
.c_str(), &status
);
277 // TODO(jungshik) Turn this check on when the ICU data file is
278 // rebuilt with the minimal subset of locale data for languages
279 // to which Chrome is not localized but which we offer in the list
280 // of languages selectable for Accept-Languages. With the rebuilt ICU
281 // data, ulocdata_open never should fall back to the default locale.
283 // DCHECK(U_SUCCESS(status) && status != U_USING_DEFAULT_WARNING);
284 if (U_SUCCESS(status
) && status
!= U_USING_DEFAULT_WARNING
) {
285 lang_set
= reinterpret_cast<icu::UnicodeSet
*>(
286 ulocdata_getExemplarSet(uld
, NULL
, 0,
287 ULOCDATA_ES_STANDARD
, &status
));
288 // If |lang| is compatible with ASCII Latin letters, add them.
289 if (IsCompatibleWithASCIILetters(lang
))
290 lang_set
->addAll(kASCIILetters
);
292 lang_set
= new icu::UnicodeSet(1, 0);
295 SetExemplarSetForLang(lang
, lang_set
);
299 return !lang_set
->isEmpty() && lang_set
->containsAll(component_characters
);
302 // Returns true if the given Unicode host component is safe to display to the
304 bool IsIDNComponentSafe(const base::char16
* str
,
306 const std::string
& languages
) {
307 // Most common cases (non-IDN) do not reach here so that we don't
308 // need a fast return path.
309 // TODO(jungshik) : Check if there's any character inappropriate
310 // (although allowed) for domain names.
311 // See http://www.unicode.org/reports/tr39/#IDN_Security_Profiles and
312 // http://www.unicode.org/reports/tr39/data/xidmodifications.txt
313 // For now, we borrow the list from Mozilla and tweaked it slightly.
314 // (e.g. Characters like U+00A0, U+3000, U+3002 are omitted because
315 // they're gonna be canonicalized to U+0020 and full stop before
317 // The original list is available at
318 // http://kb.mozillazine.org/Network.IDN.blacklist_chars and
319 // at http://mxr.mozilla.org/seamonkey/source/modules/libpref/src/init/all.js#703
321 UErrorCode status
= U_ZERO_ERROR
;
322 #ifdef U_WCHAR_IS_UTF16
323 icu::UnicodeSet
dangerous_characters(icu::UnicodeString(
324 L
"[[\\ \u00bc\u00bd\u01c3\u0337\u0338"
325 L
"\u05c3\u05f4\u06d4\u0702\u115f\u1160][\u2000-\u200b]"
326 L
"[\u2024\u2027\u2028\u2029\u2039\u203a\u2044\u205f]"
327 L
"[\u2154-\u2156][\u2159-\u215b][\u215f\u2215\u23ae"
328 L
"\u29f6\u29f8\u2afb\u2afd][\u2ff0-\u2ffb][\u3014"
329 L
"\u3015\u3033\u3164\u321d\u321e\u33ae\u33af\u33c6\u33df\ufe14"
330 L
"\ufe15\ufe3f\ufe5d\ufe5e\ufeff\uff0e\uff06\uff61\uffa0\ufff9]"
331 L
"[\ufffa-\ufffd]]"), status
);
332 DCHECK(U_SUCCESS(status
));
333 icu::RegexMatcher
dangerous_patterns(icu::UnicodeString(
334 // Lone katakana no, so, or n
335 L
"[^\\p{Katakana}][\u30ce\u30f3\u30bd][^\\p{Katakana}]"
336 // Repeating Japanese accent characters
337 L
"|[\u3099\u309a\u309b\u309c][\u3099\u309a\u309b\u309c]"),
340 icu::UnicodeSet
dangerous_characters(icu::UnicodeString(
341 "[[\\u0020\\u00bc\\u00bd\\u01c3\\u0337\\u0338"
342 "\\u05c3\\u05f4\\u06d4\\u0702\\u115f\\u1160][\\u2000-\\u200b]"
343 "[\\u2024\\u2027\\u2028\\u2029\\u2039\\u203a\\u2044\\u205f]"
344 "[\\u2154-\\u2156][\\u2159-\\u215b][\\u215f\\u2215\\u23ae"
345 "\\u29f6\\u29f8\\u2afb\\u2afd][\\u2ff0-\\u2ffb][\\u3014"
346 "\\u3015\\u3033\\u3164\\u321d\\u321e\\u33ae\\u33af\\u33c6\\u33df\\ufe14"
347 "\\ufe15\\ufe3f\\ufe5d\\ufe5e\\ufeff\\uff0e\\uff06\\uff61\\uffa0\\ufff9]"
348 "[\\ufffa-\\ufffd]]", -1, US_INV
), status
);
349 DCHECK(U_SUCCESS(status
));
350 icu::RegexMatcher
dangerous_patterns(icu::UnicodeString(
351 // Lone katakana no, so, or n
352 "[^\\p{Katakana}][\\u30ce\\u30f3\u30bd][^\\p{Katakana}]"
353 // Repeating Japanese accent characters
354 "|[\\u3099\\u309a\\u309b\\u309c][\\u3099\\u309a\\u309b\\u309c]"),
357 DCHECK(U_SUCCESS(status
));
358 icu::UnicodeSet component_characters
;
359 icu::UnicodeString
component_string(str
, str_len
);
360 component_characters
.addAll(component_string
);
361 if (dangerous_characters
.containsSome(component_characters
))
364 DCHECK(U_SUCCESS(status
));
365 dangerous_patterns
.reset(component_string
);
366 if (dangerous_patterns
.find())
369 // If the language list is empty, the result is completely determined
370 // by whether a component is a single script or not. This will block
371 // even "safe" script mixing cases like <Chinese, Latin-ASCII> that are
372 // allowed with |languages| (while it blocks Chinese + Latin letters with
373 // an accent as should be the case), but we want to err on the safe side
374 // when |languages| is empty.
375 if (languages
.empty())
376 return IsIDNComponentInSingleScript(str
, str_len
);
378 // |common_characters| is made up of ASCII numbers, hyphen, plus and
379 // underscore that are used across scripts and allowed in domain names.
380 // (sync'd with characters allowed in url_canon_host with square
381 // brackets excluded.) See kHostCharLookup[] array in url_canon_host.cc.
382 icu::UnicodeSet
common_characters(UNICODE_STRING_SIMPLE("[[0-9]\\-_+\\ ]"),
384 DCHECK(U_SUCCESS(status
));
385 // Subtract common characters because they're always allowed so that
386 // we just have to check if a language-specific set contains
388 component_characters
.removeAll(common_characters
);
390 base::StringTokenizer
t(languages
, ",");
391 while (t
.GetNext()) {
392 if (IsComponentCoveredByLang(component_characters
, t
.token()))
398 // Converts one component of a host (between dots) to IDN if safe. The result
399 // will be APPENDED to the given output string and will be the same as the input
400 // if it is not IDN or the IDN is unsafe to display. Returns whether any
401 // conversion was performed.
402 bool IDNToUnicodeOneComponent(const base::char16
* comp
,
404 const std::string
& languages
,
405 base::string16
* out
) {
410 // Only transform if the input can be an IDN component.
411 static const base::char16 kIdnPrefix
[] = {'x', 'n', '-', '-'};
412 if ((comp_len
> arraysize(kIdnPrefix
)) &&
413 !memcmp(comp
, kIdnPrefix
, arraysize(kIdnPrefix
) * sizeof(base::char16
))) {
414 // Repeatedly expand the output string until it's big enough. It looks like
415 // ICU will return the required size of the buffer, but that's not
416 // documented, so we'll just grow by 2x. This should be rare and is not on a
418 size_t original_length
= out
->length();
419 for (int extra_space
= 64; ; extra_space
*= 2) {
420 UErrorCode status
= U_ZERO_ERROR
;
421 out
->resize(out
->length() + extra_space
);
422 int output_chars
= uidna_IDNToUnicode(comp
,
423 static_cast<int32_t>(comp_len
), &(*out
)[original_length
], extra_space
,
424 UIDNA_DEFAULT
, NULL
, &status
);
425 if (status
== U_ZERO_ERROR
) {
426 // Converted successfully.
427 out
->resize(original_length
+ output_chars
);
428 if (IsIDNComponentSafe(out
->data() + original_length
, output_chars
,
433 if (status
!= U_BUFFER_OVERFLOW_ERROR
)
436 // Failed, revert back to original string.
437 out
->resize(original_length
);
440 // We get here with no IDN or on error, in which case we just append the
442 out
->append(comp
, comp_len
);
446 // Clamps the offsets in |offsets_for_adjustment| to the length of |str|.
447 void LimitOffsets(const base::string16
& str
,
448 std::vector
<size_t>* offsets_for_adjustment
) {
449 if (offsets_for_adjustment
) {
450 std::for_each(offsets_for_adjustment
->begin(),
451 offsets_for_adjustment
->end(),
452 base::LimitOffset
<base::string16
>(str
.length()));
456 // TODO(brettw) bug 734373: check the scripts for each host component and
457 // don't un-IDN-ize if there is more than one. Alternatively, only IDN for
458 // scripts that the user has installed. For now, just put the entire
459 // path through IDN. Maybe this feature can be implemented in ICU itself?
461 // We may want to skip this step in the case of file URLs to allow unicode
462 // UNC hostnames regardless of encodings.
463 base::string16
IDNToUnicodeWithOffsets(
464 const std::string
& host
,
465 const std::string
& languages
,
466 std::vector
<size_t>* offsets_for_adjustment
) {
467 // Convert the ASCII input to a base::string16 for ICU.
468 base::string16 input16
;
469 input16
.reserve(host
.length());
470 input16
.insert(input16
.end(), host
.begin(), host
.end());
472 // Do each component of the host separately, since we enforce script matching
473 // on a per-component basis.
474 base::string16 out16
;
476 base::OffsetAdjuster
offset_adjuster(offsets_for_adjustment
);
477 for (size_t component_start
= 0, component_end
;
478 component_start
< input16
.length();
479 component_start
= component_end
+ 1) {
480 // Find the end of the component.
481 component_end
= input16
.find('.', component_start
);
482 if (component_end
== base::string16::npos
)
483 component_end
= input16
.length(); // For getting the last component.
484 size_t component_length
= component_end
- component_start
;
485 size_t new_component_start
= out16
.length();
486 bool converted_idn
= false;
487 if (component_end
> component_start
) {
488 // Add the substring that we just found.
489 converted_idn
= IDNToUnicodeOneComponent(
490 input16
.data() + component_start
, component_length
, languages
,
493 size_t new_component_length
= out16
.length() - new_component_start
;
495 if (converted_idn
&& offsets_for_adjustment
) {
496 offset_adjuster
.Add(base::OffsetAdjuster::Adjustment(component_start
,
497 component_length
, new_component_length
));
500 // Need to add the dot we just found (if we found one).
501 if (component_end
< input16
.length())
502 out16
.push_back('.');
506 LimitOffsets(out16
, offsets_for_adjustment
);
510 // Transforms |original_offsets| by subtracting |component_begin| from all
511 // offsets. Any offset which was not at least this large to begin with is set
512 // to std::string::npos.
513 std::vector
<size_t> OffsetsIntoComponent(
514 const std::vector
<size_t>& original_offsets
,
515 size_t component_begin
) {
516 DCHECK_NE(std::string::npos
, component_begin
);
517 std::vector
<size_t> offsets_into_component(original_offsets
);
518 for (std::vector
<size_t>::iterator
i(offsets_into_component
.begin());
519 i
!= offsets_into_component
.end(); ++i
) {
520 if (*i
!= std::string::npos
)
521 *i
= (*i
< component_begin
) ? std::string::npos
: (*i
- component_begin
);
523 return offsets_into_component
;
526 // Called after we transform a component and append it to an output string.
527 // Maps |transformed_offsets|, which represent offsets into the transformed
528 // component itself, into appropriate offsets for the output string, by adding
529 // |output_component_begin| to each. Determines which offsets need mapping by
530 // checking to see which of the |original_offsets| were within the designated
531 // original component, using its provided endpoints.
532 void AdjustForComponentTransform(
533 const std::vector
<size_t>& original_offsets
,
534 size_t original_component_begin
,
535 size_t original_component_end
,
536 const std::vector
<size_t>& transformed_offsets
,
537 size_t output_component_begin
,
538 std::vector
<size_t>* offsets_for_adjustment
) {
539 if (!offsets_for_adjustment
)
542 DCHECK_NE(std::string::npos
, original_component_begin
);
543 DCHECK_NE(std::string::npos
, original_component_end
);
544 DCHECK_NE(base::string16::npos
, output_component_begin
);
545 size_t offsets_size
= offsets_for_adjustment
->size();
546 DCHECK_EQ(offsets_size
, original_offsets
.size());
547 DCHECK_EQ(offsets_size
, transformed_offsets
.size());
548 for (size_t i
= 0; i
< offsets_size
; ++i
) {
549 size_t original_offset
= original_offsets
[i
];
550 if ((original_offset
>= original_component_begin
) &&
551 (original_offset
< original_component_end
)) {
552 size_t transformed_offset
= transformed_offsets
[i
];
553 (*offsets_for_adjustment
)[i
] =
554 (transformed_offset
== base::string16::npos
) ?
555 base::string16::npos
: (output_component_begin
+ transformed_offset
);
560 // If |component| is valid, its begin is incremented by |delta|.
561 void AdjustComponent(int delta
, url_parse::Component
* component
) {
562 if (!component
->is_valid())
565 DCHECK(delta
>= 0 || component
->begin
>= -delta
);
566 component
->begin
+= delta
;
569 // Adjusts all the components of |parsed| by |delta|, except for the scheme.
570 void AdjustComponents(int delta
, url_parse::Parsed
* parsed
) {
571 AdjustComponent(delta
, &(parsed
->username
));
572 AdjustComponent(delta
, &(parsed
->password
));
573 AdjustComponent(delta
, &(parsed
->host
));
574 AdjustComponent(delta
, &(parsed
->port
));
575 AdjustComponent(delta
, &(parsed
->path
));
576 AdjustComponent(delta
, &(parsed
->query
));
577 AdjustComponent(delta
, &(parsed
->ref
));
580 // Helper for FormatUrlWithOffsets().
581 base::string16
FormatViewSourceUrl(
583 const std::vector
<size_t>& original_offsets
,
584 const std::string
& languages
,
585 FormatUrlTypes format_types
,
586 UnescapeRule::Type unescape_rules
,
587 url_parse::Parsed
* new_parsed
,
589 std::vector
<size_t>* offsets_for_adjustment
) {
591 const char kViewSource
[] = "view-source:";
592 const size_t kViewSourceLength
= arraysize(kViewSource
) - 1;
593 std::vector
<size_t> offsets_into_url(
594 OffsetsIntoComponent(original_offsets
, kViewSourceLength
));
596 GURL
real_url(url
.possibly_invalid_spec().substr(kViewSourceLength
));
597 base::string16
result(ASCIIToUTF16(kViewSource
) +
598 FormatUrlWithOffsets(real_url
, languages
, format_types
, unescape_rules
,
599 new_parsed
, prefix_end
, &offsets_into_url
));
601 // Adjust position values.
602 if (new_parsed
->scheme
.is_nonempty()) {
603 // Assume "view-source:real-scheme" as a scheme.
604 new_parsed
->scheme
.len
+= kViewSourceLength
;
606 new_parsed
->scheme
.begin
= 0;
607 new_parsed
->scheme
.len
= kViewSourceLength
- 1;
609 AdjustComponents(kViewSourceLength
, new_parsed
);
611 *prefix_end
+= kViewSourceLength
;
612 AdjustForComponentTransform(original_offsets
, kViewSourceLength
,
613 url
.possibly_invalid_spec().length(), offsets_into_url
, kViewSourceLength
,
614 offsets_for_adjustment
);
615 LimitOffsets(result
, offsets_for_adjustment
);
619 class AppendComponentTransform
{
621 AppendComponentTransform() {}
622 virtual ~AppendComponentTransform() {}
624 virtual base::string16
Execute(
625 const std::string
& component_text
,
626 std::vector
<size_t>* offsets_into_component
) const = 0;
628 // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an
629 // accessible copy constructor in order to call AppendFormattedComponent()
630 // with an inline temporary (see http://gcc.gnu.org/bugs/#cxx%5Frvalbind ).
633 class HostComponentTransform
: public AppendComponentTransform
{
635 explicit HostComponentTransform(const std::string
& languages
)
636 : languages_(languages
) {
640 virtual base::string16
Execute(
641 const std::string
& component_text
,
642 std::vector
<size_t>* offsets_into_component
) const OVERRIDE
{
643 return IDNToUnicodeWithOffsets(component_text
, languages_
,
644 offsets_into_component
);
647 const std::string
& languages_
;
650 class NonHostComponentTransform
: public AppendComponentTransform
{
652 explicit NonHostComponentTransform(UnescapeRule::Type unescape_rules
)
653 : unescape_rules_(unescape_rules
) {
657 virtual base::string16
Execute(
658 const std::string
& component_text
,
659 std::vector
<size_t>* offsets_into_component
) const OVERRIDE
{
660 return (unescape_rules_
== UnescapeRule::NONE
) ?
661 base::UTF8ToUTF16AndAdjustOffsets(component_text
,
662 offsets_into_component
) :
663 UnescapeAndDecodeUTF8URLComponentWithOffsets(component_text
,
664 unescape_rules_
, offsets_into_component
);
667 const UnescapeRule::Type unescape_rules_
;
670 void AppendFormattedComponent(const std::string
& spec
,
671 const url_parse::Component
& original_component
,
672 const std::vector
<size_t>& original_offsets
,
673 const AppendComponentTransform
& transform
,
674 base::string16
* output
,
675 url_parse::Component
* output_component
,
676 std::vector
<size_t>* offsets_for_adjustment
) {
678 if (original_component
.is_nonempty()) {
679 size_t original_component_begin
=
680 static_cast<size_t>(original_component
.begin
);
681 size_t output_component_begin
= output
->length();
682 if (output_component
)
683 output_component
->begin
= static_cast<int>(output_component_begin
);
685 std::vector
<size_t> offsets_into_component
=
686 OffsetsIntoComponent(original_offsets
, original_component_begin
);
687 output
->append(transform
.Execute(std::string(spec
, original_component_begin
,
688 static_cast<size_t>(original_component
.len
)), &offsets_into_component
));
690 if (output_component
) {
691 output_component
->len
=
692 static_cast<int>(output
->length() - output_component_begin
);
694 AdjustForComponentTransform(original_offsets
, original_component_begin
,
695 static_cast<size_t>(original_component
.end()),
696 offsets_into_component
, output_component_begin
,
697 offsets_for_adjustment
);
698 } else if (output_component
) {
699 output_component
->reset();
703 void SanitizeGeneratedFileName(base::FilePath::StringType
* filename
,
704 bool replace_trailing
) {
705 const base::FilePath::CharType kReplace
[] = FILE_PATH_LITERAL("-");
706 if (filename
->empty())
708 if (replace_trailing
) {
709 // Handle CreateFile() stripping trailing dots and spaces on filenames
710 // http://support.microsoft.com/kb/115827
711 size_t length
= filename
->size();
712 size_t pos
= filename
->find_last_not_of(FILE_PATH_LITERAL(" ."));
713 filename
->resize((pos
== std::string::npos
) ? 0 : (pos
+ 1));
714 TrimWhitespace(*filename
, TRIM_TRAILING
, filename
);
715 if (filename
->empty())
717 size_t trimmed
= length
- filename
->size();
719 filename
->insert(filename
->end(), trimmed
, kReplace
[0]);
721 TrimString(*filename
, FILE_PATH_LITERAL("."), filename
);
722 if (filename
->empty())
724 // Replace any path information by changing path separators.
725 ReplaceSubstringsAfterOffset(filename
, 0, FILE_PATH_LITERAL("/"), kReplace
);
726 ReplaceSubstringsAfterOffset(filename
, 0, FILE_PATH_LITERAL("\\"), kReplace
);
729 // Returns the filename determined from the last component of the path portion
730 // of the URL. Returns an empty string if the URL doesn't have a path or is
731 // invalid. If the generated filename is not reliable,
732 // |should_overwrite_extension| will be set to true, in which case a better
733 // extension should be determined based on the content type.
734 std::string
GetFileNameFromURL(const GURL
& url
,
735 const std::string
& referrer_charset
,
736 bool* should_overwrite_extension
) {
737 // about: and data: URLs don't have file names, but esp. data: URLs may
738 // contain parts that look like ones (i.e., contain a slash). Therefore we
739 // don't attempt to divine a file name out of them.
740 if (!url
.is_valid() || url
.SchemeIs("about") || url
.SchemeIs("data"))
741 return std::string();
743 const std::string unescaped_url_filename
= UnescapeURLComponent(
744 url
.ExtractFileName(),
745 UnescapeRule::SPACES
| UnescapeRule::URL_SPECIAL_CHARS
);
747 // The URL's path should be escaped UTF-8, but may not be.
748 std::string decoded_filename
= unescaped_url_filename
;
749 if (!IsStringUTF8(decoded_filename
)) {
750 // TODO(jshin): this is probably not robust enough. To be sure, we need
751 // encoding detection.
752 base::string16 utf16_output
;
753 if (!referrer_charset
.empty() &&
754 base::CodepageToUTF16(unescaped_url_filename
,
755 referrer_charset
.c_str(),
756 base::OnStringConversionError::FAIL
,
758 decoded_filename
= UTF16ToUTF8(utf16_output
);
760 decoded_filename
= WideToUTF8(
761 base::SysNativeMBToWide(unescaped_url_filename
));
764 // If the URL contains a (possibly empty) query, assume it is a generator, and
765 // allow the determined extension to be overwritten.
766 *should_overwrite_extension
= !decoded_filename
.empty() && url
.has_query();
768 return decoded_filename
;
771 // Returns whether the specified extension is automatically integrated into the
773 bool IsShellIntegratedExtension(const base::FilePath::StringType
& extension
) {
774 base::FilePath::StringType extension_lower
= StringToLowerASCII(extension
);
776 // http://msdn.microsoft.com/en-us/library/ms811694.aspx
777 // Right-clicking on shortcuts can be magical.
778 if ((extension_lower
== FILE_PATH_LITERAL("local")) ||
779 (extension_lower
== FILE_PATH_LITERAL("lnk")))
782 // http://www.juniper.net/security/auto/vulnerabilities/vuln2612.html
783 // Files become magical if they end in a CLSID, so block such extensions.
784 if (!extension_lower
.empty() &&
785 (extension_lower
[0] == FILE_PATH_LITERAL('{')) &&
786 (extension_lower
[extension_lower
.length() - 1] == FILE_PATH_LITERAL('}')))
791 // Returns whether the specified file name is a reserved name on windows.
792 // This includes names like "com2.zip" (which correspond to devices) and
793 // desktop.ini and thumbs.db which have special meaning to the windows shell.
794 bool IsReservedName(const base::FilePath::StringType
& filename
) {
795 // This list is taken from the MSDN article "Naming a file"
796 // http://msdn2.microsoft.com/en-us/library/aa365247(VS.85).aspx
797 // I also added clock$ because GetSaveFileName seems to consider it as a
798 // reserved name too.
799 static const char* const known_devices
[] = {
800 "con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", "com5",
801 "com6", "com7", "com8", "com9", "lpt1", "lpt2", "lpt3", "lpt4",
802 "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "clock$"
805 std::string filename_lower
= StringToLowerASCII(WideToUTF8(filename
));
806 #elif defined(OS_POSIX)
807 std::string filename_lower
= StringToLowerASCII(filename
);
810 for (size_t i
= 0; i
< arraysize(known_devices
); ++i
) {
812 if (filename_lower
== known_devices
[i
])
814 // Starts with "DEVICE.".
815 if (filename_lower
.find(std::string(known_devices
[i
]) + ".") == 0)
819 static const char* const magic_names
[] = {
820 // These file names are used by the "Customize folder" feature of the shell.
825 for (size_t i
= 0; i
< arraysize(magic_names
); ++i
) {
826 if (filename_lower
== magic_names
[i
])
833 // Examines the current extension in |file_name| and modifies it if necessary in
834 // order to ensure the filename is safe. If |file_name| doesn't contain an
835 // extension or if |ignore_extension| is true, then a new extension will be
836 // constructed based on the |mime_type|.
838 // We're addressing two things here:
840 // 1) Usability. If there is no reliable file extension, we want to guess a
841 // reasonable file extension based on the content type.
843 // 2) Shell integration. Some file extensions automatically integrate with the
844 // shell. We block these extensions to prevent a malicious web site from
845 // integrating with the user's shell.
846 void EnsureSafeExtension(const std::string
& mime_type
,
847 bool ignore_extension
,
848 base::FilePath
* file_name
) {
849 // See if our file name already contains an extension.
850 base::FilePath::StringType extension
= file_name
->Extension();
851 if (!extension
.empty())
852 extension
.erase(extension
.begin()); // Erase preceding '.'.
854 if ((ignore_extension
|| extension
.empty()) && !mime_type
.empty()) {
855 base::FilePath::StringType preferred_mime_extension
;
856 std::vector
<base::FilePath::StringType
> all_mime_extensions
;
857 // The GetPreferredExtensionForMimeType call will end up going to disk. Do
858 // this on another thread to avoid slowing the IO thread.
859 // http://crbug.com/61827
860 // TODO(asanka): Remove this ScopedAllowIO once all callers have switched
861 // over to IO safe threads.
862 base::ThreadRestrictions::ScopedAllowIO allow_io
;
863 net::GetPreferredExtensionForMimeType(mime_type
, &preferred_mime_extension
);
864 net::GetExtensionsForMimeType(mime_type
, &all_mime_extensions
);
865 // If the existing extension is in the list of valid extensions for the
866 // given type, use it. This avoids doing things like pointlessly renaming
867 // "foo.jpg" to "foo.jpeg".
868 if (std::find(all_mime_extensions
.begin(),
869 all_mime_extensions
.end(),
870 extension
) != all_mime_extensions
.end()) {
871 // leave |extension| alone
872 } else if (!preferred_mime_extension
.empty()) {
873 extension
= preferred_mime_extension
;
878 static const base::FilePath::CharType default_extension
[] =
879 FILE_PATH_LITERAL("download");
881 // Rename shell-integrated extensions.
882 // TODO(asanka): Consider stripping out the bad extension and replacing it
883 // with the preferred extension for the MIME type if one is available.
884 if (IsShellIntegratedExtension(extension
))
885 extension
.assign(default_extension
);
888 *file_name
= file_name
->ReplaceExtension(extension
);
891 bool FilePathToString16(const base::FilePath
& path
, base::string16
* converted
) {
893 return WideToUTF16(path
.value().c_str(), path
.value().size(), converted
);
894 #elif defined(OS_POSIX)
895 std::string component8
= path
.AsUTF8Unsafe();
896 return !component8
.empty() &&
897 UTF8ToUTF16(component8
.c_str(), component8
.size(), converted
);
903 const FormatUrlType kFormatUrlOmitNothing
= 0;
904 const FormatUrlType kFormatUrlOmitUsernamePassword
= 1 << 0;
905 const FormatUrlType kFormatUrlOmitHTTP
= 1 << 1;
906 const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname
= 1 << 2;
907 const FormatUrlType kFormatUrlOmitAll
= kFormatUrlOmitUsernamePassword
|
908 kFormatUrlOmitHTTP
| kFormatUrlOmitTrailingSlashOnBareHostname
;
910 static base::LazyInstance
<std::multiset
<int> >::Leaky
911 g_explicitly_allowed_ports
= LAZY_INSTANCE_INITIALIZER
;
913 size_t GetCountOfExplicitlyAllowedPorts() {
914 return g_explicitly_allowed_ports
.Get().size();
917 GURL
FilePathToFileURL(const base::FilePath
& path
) {
918 // Produce a URL like "file:///C:/foo" for a regular file, or
919 // "file://///server/path" for UNC. The URL canonicalizer will fix up the
920 // latter case to be the canonical UNC form: "file://server/path"
921 base::FilePath::StringType
url_string(kFileURLPrefix
);
922 url_string
.append(path
.value());
924 // Now do replacement of some characters. Since we assume the input is a
925 // literal filename, anything the URL parser might consider special should
928 // must be the first substitution since others will introduce percents as the
930 ReplaceSubstringsAfterOffset(&url_string
, 0,
931 FILE_PATH_LITERAL("%"), FILE_PATH_LITERAL("%25"));
933 // semicolon is supposed to be some kind of separator according to RFC 2396
934 ReplaceSubstringsAfterOffset(&url_string
, 0,
935 FILE_PATH_LITERAL(";"), FILE_PATH_LITERAL("%3B"));
937 ReplaceSubstringsAfterOffset(&url_string
, 0,
938 FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23"));
940 ReplaceSubstringsAfterOffset(&url_string
, 0,
941 FILE_PATH_LITERAL("?"), FILE_PATH_LITERAL("%3F"));
943 #if defined(OS_POSIX)
944 ReplaceSubstringsAfterOffset(&url_string
, 0,
945 FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C"));
948 return GURL(url_string
);
951 std::string
GetSpecificHeader(const std::string
& headers
,
952 const std::string
& name
) {
953 // We want to grab the Value from the "Key: Value" pairs in the headers,
954 // which should look like this (no leading spaces, \n-separated) (we format
955 // them this way in url_request_inet.cc):
957 // ETag: "6d0b8-947-24f35ec0"\n
958 // Content-Length: 2375\n
959 // Content-Type: text/html; charset=UTF-8\n
960 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n
962 return std::string();
964 std::string
match('\n' + name
+ ':');
966 std::string::const_iterator begin
=
967 std::search(headers
.begin(), headers
.end(), match
.begin(), match
.end(),
968 base::CaseInsensitiveCompareASCII
<char>());
970 if (begin
== headers
.end())
971 return std::string();
973 begin
+= match
.length();
976 TrimWhitespace(std::string(begin
, std::find(begin
, headers
.end(), '\n')),
981 base::string16
IDNToUnicode(const std::string
& host
,
982 const std::string
& languages
) {
983 return IDNToUnicodeWithOffsets(host
, languages
, NULL
);
986 std::string
CanonicalizeHost(const std::string
& host
,
987 url_canon::CanonHostInfo
* host_info
) {
988 // Try to canonicalize the host.
989 const url_parse::Component
raw_host_component(
990 0, static_cast<int>(host
.length()));
991 std::string canon_host
;
992 url_canon::StdStringCanonOutput
canon_host_output(&canon_host
);
993 url_canon::CanonicalizeHostVerbose(host
.c_str(), raw_host_component
,
994 &canon_host_output
, host_info
);
996 if (host_info
->out_host
.is_nonempty() &&
997 host_info
->family
!= url_canon::CanonHostInfo::BROKEN
) {
998 // Success! Assert that there's no extra garbage.
999 canon_host_output
.Complete();
1000 DCHECK_EQ(host_info
->out_host
.len
, static_cast<int>(canon_host
.length()));
1002 // Empty host, or canonicalization failed. We'll return empty.
1009 std::string
GetDirectoryListingHeader(const base::string16
& title
) {
1010 static const base::StringPiece
header(
1011 NetModule::GetResource(IDR_DIR_HEADER_HTML
));
1012 // This can be null in unit tests.
1013 DLOG_IF(WARNING
, header
.empty()) <<
1014 "Missing resource: directory listing header";
1017 if (!header
.empty())
1018 result
.assign(header
.data(), header
.size());
1020 result
.append("<script>start(");
1021 base::JsonDoubleQuote(title
, true, &result
);
1022 result
.append(");</script>\n");
1027 inline bool IsHostCharAlpha(char c
) {
1028 // We can just check lowercase because uppercase characters have already been
1030 return (c
>= 'a') && (c
<= 'z');
1033 inline bool IsHostCharDigit(char c
) {
1034 return (c
>= '0') && (c
<= '9');
1037 bool IsCanonicalizedHostCompliant(const std::string
& host
,
1038 const std::string
& desired_tld
) {
1042 bool in_component
= false;
1043 bool most_recent_component_started_alpha
= false;
1044 bool last_char_was_underscore
= false;
1046 for (std::string::const_iterator
i(host
.begin()); i
!= host
.end(); ++i
) {
1048 if (!in_component
) {
1049 most_recent_component_started_alpha
= IsHostCharAlpha(c
);
1050 if (!most_recent_component_started_alpha
&& !IsHostCharDigit(c
) &&
1053 in_component
= true;
1056 if (last_char_was_underscore
)
1058 in_component
= false;
1059 } else if (IsHostCharAlpha(c
) || IsHostCharDigit(c
) || (c
== '-')) {
1060 last_char_was_underscore
= false;
1061 } else if (c
== '_') {
1062 last_char_was_underscore
= true;
1069 return most_recent_component_started_alpha
||
1070 (!desired_tld
.empty() && IsHostCharAlpha(desired_tld
[0]));
1073 std::string
GetDirectoryListingEntry(const base::string16
& name
,
1074 const std::string
& raw_bytes
,
1079 result
.append("<script>addRow(");
1080 base::JsonDoubleQuote(name
, true, &result
);
1082 if (raw_bytes
.empty()) {
1083 base::JsonDoubleQuote(EscapePath(UTF16ToUTF8(name
)),
1086 base::JsonDoubleQuote(EscapePath(raw_bytes
), true, &result
);
1089 result
.append(",1,");
1091 result
.append(",0,");
1094 // Negative size means unknown or not applicable (e.g. directory).
1095 base::string16 size_string
;
1097 size_string
= FormatBytesUnlocalized(size
);
1098 base::JsonDoubleQuote(size_string
, true, &result
);
1102 base::string16 modified_str
;
1103 // |modified| can be NULL in FTP listings.
1104 if (!modified
.is_null()) {
1105 modified_str
= base::TimeFormatShortDateAndTime(modified
);
1107 base::JsonDoubleQuote(modified_str
, true, &result
);
1109 result
.append(");</script>\n");
1114 base::string16
StripWWW(const base::string16
& text
) {
1115 const base::string16
www(ASCIIToUTF16("www."));
1116 return StartsWith(text
, www
, true) ? text
.substr(www
.length()) : text
;
1119 base::string16
StripWWWFromHost(const GURL
& url
) {
1120 DCHECK(url
.is_valid());
1121 return StripWWW(ASCIIToUTF16(url
.host()));
1124 bool IsSafePortablePathComponent(const base::FilePath
& component
) {
1125 base::string16 component16
;
1126 base::FilePath::StringType sanitized
= component
.value();
1127 SanitizeGeneratedFileName(&sanitized
, true);
1128 base::FilePath::StringType extension
= component
.Extension();
1129 if (!extension
.empty())
1130 extension
.erase(extension
.begin()); // Erase preceding '.'.
1131 return !component
.empty() &&
1132 (component
== component
.BaseName()) &&
1133 (component
== component
.StripTrailingSeparators()) &&
1134 FilePathToString16(component
, &component16
) &&
1135 file_util::IsFilenameLegal(component16
) &&
1136 !IsShellIntegratedExtension(extension
) &&
1137 (sanitized
== component
.value());
1140 bool IsSafePortableBasename(const base::FilePath
& filename
) {
1141 return IsSafePortablePathComponent(filename
) &&
1142 !IsReservedName(filename
.value());
1145 bool IsSafePortableRelativePath(const base::FilePath
& path
) {
1146 if (path
.empty() || path
.IsAbsolute() || path
.EndsWithSeparator())
1148 std::vector
<base::FilePath::StringType
> components
;
1149 path
.GetComponents(&components
);
1150 if (components
.empty())
1152 for (size_t i
= 0; i
< components
.size() - 1; ++i
) {
1153 if (!IsSafePortablePathComponent(base::FilePath(components
[i
])))
1156 return IsSafePortableBasename(path
.BaseName());
1159 void GenerateSafeFileName(const std::string
& mime_type
,
1160 bool ignore_extension
,
1161 base::FilePath
* file_path
) {
1162 // Make sure we get the right file extension
1163 EnsureSafeExtension(mime_type
, ignore_extension
, file_path
);
1166 // Prepend "_" to the file name if it's a reserved name
1167 base::FilePath::StringType leaf_name
= file_path
->BaseName().value();
1168 DCHECK(!leaf_name
.empty());
1169 if (IsReservedName(leaf_name
)) {
1170 leaf_name
= base::FilePath::StringType(FILE_PATH_LITERAL("_")) + leaf_name
;
1171 *file_path
= file_path
->DirName();
1172 if (file_path
->value() == base::FilePath::kCurrentDirectory
) {
1173 *file_path
= base::FilePath(leaf_name
);
1175 *file_path
= file_path
->Append(leaf_name
);
1181 base::string16
GetSuggestedFilename(const GURL
& url
,
1182 const std::string
& content_disposition
,
1183 const std::string
& referrer_charset
,
1184 const std::string
& suggested_name
,
1185 const std::string
& mime_type
,
1186 const std::string
& default_name
) {
1187 // TODO: this function to be updated to match the httpbis recommendations.
1188 // Talk to abarth for the latest news.
1190 // We don't translate this fallback string, "download". If localization is
1191 // needed, the caller should provide localized fallback in |default_name|.
1192 static const base::FilePath::CharType kFinalFallbackName
[] =
1193 FILE_PATH_LITERAL("download");
1194 std::string filename
; // In UTF-8
1195 bool overwrite_extension
= false;
1197 // Try to extract a filename from content-disposition first.
1198 if (!content_disposition
.empty()) {
1199 HttpContentDisposition
header(content_disposition
, referrer_charset
);
1200 filename
= header
.filename();
1203 // Then try to use the suggested name.
1204 if (filename
.empty() && !suggested_name
.empty())
1205 filename
= suggested_name
;
1207 // Now try extracting the filename from the URL. GetFileNameFromURL() only
1208 // looks at the last component of the URL and doesn't return the hostname as a
1210 if (filename
.empty())
1211 filename
= GetFileNameFromURL(url
, referrer_charset
, &overwrite_extension
);
1213 // Finally try the URL hostname, but only if there's no default specified in
1214 // |default_name|. Some schemes (e.g.: file:, about:, data:) do not have a
1216 if (filename
.empty() &&
1217 default_name
.empty() &&
1219 !url
.host().empty()) {
1220 // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451)
1221 filename
= url
.host();
1224 bool replace_trailing
= false;
1225 base::FilePath::StringType result_str
, default_name_str
;
1227 replace_trailing
= true;
1228 result_str
= UTF8ToUTF16(filename
);
1229 default_name_str
= UTF8ToUTF16(default_name
);
1231 result_str
= filename
;
1232 default_name_str
= default_name
;
1234 SanitizeGeneratedFileName(&result_str
, replace_trailing
);
1235 if (result_str
.find_last_not_of(FILE_PATH_LITERAL("-_")) ==
1236 base::FilePath::StringType::npos
) {
1237 result_str
= !default_name_str
.empty() ? default_name_str
:
1238 base::FilePath::StringType(kFinalFallbackName
);
1239 overwrite_extension
= false;
1241 file_util::ReplaceIllegalCharactersInPath(&result_str
, '-');
1242 base::FilePath
result(result_str
);
1243 GenerateSafeFileName(mime_type
, overwrite_extension
, &result
);
1245 base::string16 result16
;
1246 if (!FilePathToString16(result
, &result16
)) {
1247 result
= base::FilePath(default_name_str
);
1248 if (!FilePathToString16(result
, &result16
)) {
1249 result
= base::FilePath(kFinalFallbackName
);
1250 FilePathToString16(result
, &result16
);
1256 base::FilePath
GenerateFileName(const GURL
& url
,
1257 const std::string
& content_disposition
,
1258 const std::string
& referrer_charset
,
1259 const std::string
& suggested_name
,
1260 const std::string
& mime_type
,
1261 const std::string
& default_file_name
) {
1262 base::string16 file_name
= GetSuggestedFilename(url
,
1263 content_disposition
,
1270 base::FilePath
generated_name(file_name
);
1272 base::FilePath
generated_name(
1273 base::SysWideToNativeMB(UTF16ToWide(file_name
)));
1276 #if defined(OS_CHROMEOS)
1277 // When doing file manager operations on ChromeOS, the file paths get
1278 // normalized in WebKit layer, so let's ensure downloaded files have
1279 // normalized names. Otherwise, we won't be able to handle files with NFD
1280 // utf8 encoded characters in name.
1281 file_util::NormalizeFileNameEncoding(&generated_name
);
1284 DCHECK(!generated_name
.empty());
1286 return generated_name
;
1289 bool IsPortAllowedByDefault(int port
) {
1290 int array_size
= arraysize(kRestrictedPorts
);
1291 for (int i
= 0; i
< array_size
; i
++) {
1292 if (kRestrictedPorts
[i
] == port
) {
1299 bool IsPortAllowedByFtp(int port
) {
1300 int array_size
= arraysize(kAllowedFtpPorts
);
1301 for (int i
= 0; i
< array_size
; i
++) {
1302 if (kAllowedFtpPorts
[i
] == port
) {
1306 // Port not explicitly allowed by FTP, so return the default restrictions.
1307 return IsPortAllowedByDefault(port
);
1310 bool IsPortAllowedByOverride(int port
) {
1311 if (g_explicitly_allowed_ports
.Get().empty())
1314 return g_explicitly_allowed_ports
.Get().count(port
) > 0;
1317 int SetNonBlocking(int fd
) {
1319 unsigned long no_block
= 1;
1320 return ioctlsocket(fd
, FIONBIO
, &no_block
);
1321 #elif defined(OS_POSIX)
1322 int flags
= fcntl(fd
, F_GETFL
, 0);
1325 return fcntl(fd
, F_SETFL
, flags
| O_NONBLOCK
);
1329 bool ParseHostAndPort(std::string::const_iterator host_and_port_begin
,
1330 std::string::const_iterator host_and_port_end
,
1333 if (host_and_port_begin
>= host_and_port_end
)
1336 // When using url_parse, we use char*.
1337 const char* auth_begin
= &(*host_and_port_begin
);
1338 int auth_len
= host_and_port_end
- host_and_port_begin
;
1340 url_parse::Component
auth_component(0, auth_len
);
1341 url_parse::Component username_component
;
1342 url_parse::Component password_component
;
1343 url_parse::Component hostname_component
;
1344 url_parse::Component port_component
;
1346 url_parse::ParseAuthority(auth_begin
, auth_component
, &username_component
,
1347 &password_component
, &hostname_component
, &port_component
);
1349 // There shouldn't be a username/password.
1350 if (username_component
.is_valid() || password_component
.is_valid())
1353 if (!hostname_component
.is_nonempty())
1354 return false; // Failed parsing.
1356 int parsed_port_number
= -1;
1357 if (port_component
.is_nonempty()) {
1358 parsed_port_number
= url_parse::ParsePort(auth_begin
, port_component
);
1360 // If parsing failed, port_number will be either PORT_INVALID or
1361 // PORT_UNSPECIFIED, both of which are negative.
1362 if (parsed_port_number
< 0)
1363 return false; // Failed parsing the port number.
1366 if (port_component
.len
== 0)
1367 return false; // Reject inputs like "foo:"
1369 // Pass results back to caller.
1370 host
->assign(auth_begin
+ hostname_component
.begin
, hostname_component
.len
);
1371 *port
= parsed_port_number
;
1373 return true; // Success.
1376 bool ParseHostAndPort(const std::string
& host_and_port
,
1379 return ParseHostAndPort(
1380 host_and_port
.begin(), host_and_port
.end(), host
, port
);
1383 std::string
GetHostAndPort(const GURL
& url
) {
1384 // For IPv6 literals, GURL::host() already includes the brackets so it is
1385 // safe to just append a colon.
1386 return base::StringPrintf("%s:%d", url
.host().c_str(),
1387 url
.EffectiveIntPort());
1390 std::string
GetHostAndOptionalPort(const GURL
& url
) {
1391 // For IPv6 literals, GURL::host() already includes the brackets
1392 // so it is safe to just append a colon.
1394 return base::StringPrintf("%s:%s", url
.host().c_str(), url
.port().c_str());
1398 // Extracts the address and port portions of a sockaddr.
1399 bool GetIPAddressFromSockAddr(const struct sockaddr
* sock_addr
,
1400 socklen_t sock_addr_len
,
1401 const uint8
** address
,
1402 size_t* address_len
,
1404 if (sock_addr
->sa_family
== AF_INET
) {
1405 if (sock_addr_len
< static_cast<socklen_t
>(sizeof(struct sockaddr_in
)))
1407 const struct sockaddr_in
* addr
=
1408 reinterpret_cast<const struct sockaddr_in
*>(sock_addr
);
1409 *address
= reinterpret_cast<const uint8
*>(&addr
->sin_addr
);
1410 *address_len
= kIPv4AddressSize
;
1412 *port
= base::NetToHost16(addr
->sin_port
);
1416 if (sock_addr
->sa_family
== AF_INET6
) {
1417 if (sock_addr_len
< static_cast<socklen_t
>(sizeof(struct sockaddr_in6
)))
1419 const struct sockaddr_in6
* addr
=
1420 reinterpret_cast<const struct sockaddr_in6
*>(sock_addr
);
1421 *address
= reinterpret_cast<const unsigned char*>(&addr
->sin6_addr
);
1422 *address_len
= kIPv6AddressSize
;
1424 *port
= base::NetToHost16(addr
->sin6_port
);
1428 return false; // Unrecognized |sa_family|.
1431 std::string
IPAddressToString(const uint8
* address
,
1432 size_t address_len
) {
1434 url_canon::StdStringCanonOutput
output(&str
);
1436 if (address_len
== kIPv4AddressSize
) {
1437 url_canon::AppendIPv4Address(address
, &output
);
1438 } else if (address_len
== kIPv6AddressSize
) {
1439 url_canon::AppendIPv6Address(address
, &output
);
1441 CHECK(false) << "Invalid IP address with length: " << address_len
;
1448 std::string
IPAddressToStringWithPort(const uint8
* address
,
1451 std::string address_str
= IPAddressToString(address
, address_len
);
1453 if (address_len
== kIPv6AddressSize
) {
1454 // Need to bracket IPv6 addresses since they contain colons.
1455 return base::StringPrintf("[%s]:%d", address_str
.c_str(), port
);
1457 return base::StringPrintf("%s:%d", address_str
.c_str(), port
);
1460 std::string
NetAddressToString(const struct sockaddr
* sa
,
1461 socklen_t sock_addr_len
) {
1462 const uint8
* address
;
1464 if (!GetIPAddressFromSockAddr(sa
, sock_addr_len
, &address
,
1465 &address_len
, NULL
)) {
1467 return std::string();
1469 return IPAddressToString(address
, address_len
);
1472 std::string
NetAddressToStringWithPort(const struct sockaddr
* sa
,
1473 socklen_t sock_addr_len
) {
1474 const uint8
* address
;
1477 if (!GetIPAddressFromSockAddr(sa
, sock_addr_len
, &address
,
1478 &address_len
, &port
)) {
1480 return std::string();
1482 return IPAddressToStringWithPort(address
, address_len
, port
);
1485 std::string
IPAddressToString(const IPAddressNumber
& addr
) {
1486 return IPAddressToString(&addr
.front(), addr
.size());
1489 std::string
IPAddressToStringWithPort(const IPAddressNumber
& addr
,
1491 return IPAddressToStringWithPort(&addr
.front(), addr
.size(), port
);
1494 std::string
GetHostName() {
1496 EnsureWinsockInit();
1499 // Host names are limited to 255 bytes.
1501 int result
= gethostname(buffer
, sizeof(buffer
));
1503 DVLOG(1) << "gethostname() failed with " << result
;
1506 return std::string(buffer
);
1509 void GetIdentityFromURL(const GURL
& url
,
1510 base::string16
* username
,
1511 base::string16
* password
) {
1512 UnescapeRule::Type flags
=
1513 UnescapeRule::SPACES
| UnescapeRule::URL_SPECIAL_CHARS
;
1514 *username
= UnescapeAndDecodeUTF8URLComponent(url
.username(), flags
, NULL
);
1515 *password
= UnescapeAndDecodeUTF8URLComponent(url
.password(), flags
, NULL
);
1518 std::string
GetHostOrSpecFromURL(const GURL
& url
) {
1519 return url
.has_host() ? TrimEndingDot(url
.host()) : url
.spec();
1522 void AppendFormattedHost(const GURL
& url
,
1523 const std::string
& languages
,
1524 base::string16
* output
) {
1525 std::vector
<size_t> offsets
;
1526 AppendFormattedComponent(url
.possibly_invalid_spec(),
1527 url
.parsed_for_possibly_invalid_spec().host
, offsets
,
1528 HostComponentTransform(languages
), output
, NULL
, NULL
);
1531 base::string16
FormatUrlWithOffsets(
1533 const std::string
& languages
,
1534 FormatUrlTypes format_types
,
1535 UnescapeRule::Type unescape_rules
,
1536 url_parse::Parsed
* new_parsed
,
1538 std::vector
<size_t>* offsets_for_adjustment
) {
1539 url_parse::Parsed parsed_temp
;
1541 new_parsed
= &parsed_temp
;
1543 *new_parsed
= url_parse::Parsed();
1544 std::vector
<size_t> original_offsets
;
1545 if (offsets_for_adjustment
)
1546 original_offsets
= *offsets_for_adjustment
;
1548 // Special handling for view-source:. Don't use content::kViewSourceScheme
1549 // because this library shouldn't depend on chrome.
1550 const char* const kViewSource
= "view-source";
1551 // Reject "view-source:view-source:..." to avoid deep recursion.
1552 const char* const kViewSourceTwice
= "view-source:view-source:";
1553 if (url
.SchemeIs(kViewSource
) &&
1554 !StartsWithASCII(url
.possibly_invalid_spec(), kViewSourceTwice
, false)) {
1555 return FormatViewSourceUrl(url
, original_offsets
, languages
, format_types
,
1556 unescape_rules
, new_parsed
, prefix_end
, offsets_for_adjustment
);
1559 // We handle both valid and invalid URLs (this will give us the spec
1560 // regardless of validity).
1561 const std::string
& spec
= url
.possibly_invalid_spec();
1562 const url_parse::Parsed
& parsed
= url
.parsed_for_possibly_invalid_spec();
1564 // Scheme & separators. These are ASCII.
1565 base::string16 url_string
;
1566 url_string
.insert(url_string
.end(), spec
.begin(),
1567 spec
.begin() + parsed
.CountCharactersBefore(url_parse::Parsed::USERNAME
,
1569 const char kHTTP
[] = "http://";
1570 const char kFTP
[] = "ftp.";
1571 // URLFixerUpper::FixupURL() treats "ftp.foo.com" as ftp://ftp.foo.com. This
1572 // means that if we trim "http://" off a URL whose host starts with "ftp." and
1573 // the user inputs this into any field subject to fixup (which is basically
1574 // all input fields), the meaning would be changed. (In fact, often the
1575 // formatted URL is directly pre-filled into an input field.) For this reason
1576 // we avoid stripping "http://" in this case.
1577 bool omit_http
= (format_types
& kFormatUrlOmitHTTP
) &&
1578 EqualsASCII(url_string
, kHTTP
) &&
1579 !StartsWithASCII(url
.host(), kFTP
, true);
1580 new_parsed
->scheme
= parsed
.scheme
;
1582 // Username & password.
1583 if ((format_types
& kFormatUrlOmitUsernamePassword
) != 0) {
1584 // Remove the username and password fields. We don't want to display those
1585 // to the user since they can be used for attacks,
1586 // e.g. "http://google.com:search@evil.ru/"
1587 new_parsed
->username
.reset();
1588 new_parsed
->password
.reset();
1589 // Update the offsets based on removed username and/or password.
1590 if (offsets_for_adjustment
&& !offsets_for_adjustment
->empty() &&
1591 (parsed
.username
.is_nonempty() || parsed
.password
.is_nonempty())) {
1592 base::OffsetAdjuster
offset_adjuster(offsets_for_adjustment
);
1593 if (parsed
.username
.is_nonempty() && parsed
.password
.is_nonempty()) {
1594 // The seeming off-by-one and off-by-two in these first two lines are to
1595 // account for the ':' after the username and '@' after the password.
1596 offset_adjuster
.Add(base::OffsetAdjuster::Adjustment(
1597 static_cast<size_t>(parsed
.username
.begin
),
1598 static_cast<size_t>(parsed
.username
.len
+ parsed
.password
.len
+ 2),
1601 const url_parse::Component
* nonempty_component
=
1602 parsed
.username
.is_nonempty() ? &parsed
.username
: &parsed
.password
;
1603 // The seeming off-by-one in below is to account for the '@' after the
1604 // username/password.
1605 offset_adjuster
.Add(base::OffsetAdjuster::Adjustment(
1606 static_cast<size_t>(nonempty_component
->begin
),
1607 static_cast<size_t>(nonempty_component
->len
+ 1), 0));
1611 AppendFormattedComponent(spec
, parsed
.username
, original_offsets
,
1612 NonHostComponentTransform(unescape_rules
), &url_string
,
1613 &new_parsed
->username
, offsets_for_adjustment
);
1614 if (parsed
.password
.is_valid()) {
1615 size_t colon
= parsed
.username
.end();
1616 DCHECK_EQ(static_cast<size_t>(parsed
.password
.begin
- 1), colon
);
1617 std::vector
<size_t>::const_iterator colon_iter
=
1618 std::find(original_offsets
.begin(), original_offsets
.end(), colon
);
1619 if (colon_iter
!= original_offsets
.end()) {
1620 (*offsets_for_adjustment
)[colon_iter
- original_offsets
.begin()] =
1621 url_string
.length();
1623 url_string
.push_back(':');
1625 AppendFormattedComponent(spec
, parsed
.password
, original_offsets
,
1626 NonHostComponentTransform(unescape_rules
), &url_string
,
1627 &new_parsed
->password
, offsets_for_adjustment
);
1628 if (parsed
.username
.is_valid() || parsed
.password
.is_valid()) {
1629 size_t at_sign
= (parsed
.password
.is_valid() ?
1630 parsed
.password
: parsed
.username
).end();
1631 DCHECK_EQ(static_cast<size_t>(parsed
.host
.begin
- 1), at_sign
);
1632 std::vector
<size_t>::const_iterator at_sign_iter
=
1633 std::find(original_offsets
.begin(), original_offsets
.end(), at_sign
);
1634 if (at_sign_iter
!= original_offsets
.end()) {
1635 (*offsets_for_adjustment
)[at_sign_iter
- original_offsets
.begin()] =
1636 url_string
.length();
1638 url_string
.push_back('@');
1642 *prefix_end
= static_cast<size_t>(url_string
.length());
1645 AppendFormattedComponent(spec
, parsed
.host
, original_offsets
,
1646 HostComponentTransform(languages
), &url_string
, &new_parsed
->host
,
1647 offsets_for_adjustment
);
1650 if (parsed
.port
.is_nonempty()) {
1651 url_string
.push_back(':');
1652 new_parsed
->port
.begin
= url_string
.length();
1653 url_string
.insert(url_string
.end(),
1654 spec
.begin() + parsed
.port
.begin
,
1655 spec
.begin() + parsed
.port
.end());
1656 new_parsed
->port
.len
= url_string
.length() - new_parsed
->port
.begin
;
1658 new_parsed
->port
.reset();
1661 // Path & query. Both get the same general unescape & convert treatment.
1662 if (!(format_types
& kFormatUrlOmitTrailingSlashOnBareHostname
) ||
1663 !CanStripTrailingSlash(url
)) {
1664 AppendFormattedComponent(spec
, parsed
.path
, original_offsets
,
1665 NonHostComponentTransform(unescape_rules
), &url_string
,
1666 &new_parsed
->path
, offsets_for_adjustment
);
1668 if (parsed
.query
.is_valid())
1669 url_string
.push_back('?');
1670 AppendFormattedComponent(spec
, parsed
.query
, original_offsets
,
1671 NonHostComponentTransform(unescape_rules
), &url_string
,
1672 &new_parsed
->query
, offsets_for_adjustment
);
1674 // Ref. This is valid, unescaped UTF-8, so we can just convert.
1675 if (parsed
.ref
.is_valid()) {
1676 url_string
.push_back('#');
1677 size_t original_ref_begin
= static_cast<size_t>(parsed
.ref
.begin
);
1678 size_t output_ref_begin
= url_string
.length();
1679 new_parsed
->ref
.begin
= static_cast<int>(output_ref_begin
);
1681 std::vector
<size_t> offsets_into_ref(
1682 OffsetsIntoComponent(original_offsets
, original_ref_begin
));
1683 if (parsed
.ref
.len
> 0) {
1684 url_string
.append(base::UTF8ToUTF16AndAdjustOffsets(
1685 spec
.substr(original_ref_begin
, static_cast<size_t>(parsed
.ref
.len
)),
1686 &offsets_into_ref
));
1689 new_parsed
->ref
.len
=
1690 static_cast<int>(url_string
.length() - new_parsed
->ref
.begin
);
1691 AdjustForComponentTransform(original_offsets
, original_ref_begin
,
1692 static_cast<size_t>(parsed
.ref
.end()), offsets_into_ref
,
1693 output_ref_begin
, offsets_for_adjustment
);
1696 // If we need to strip out http do it after the fact. This way we don't need
1697 // to worry about how offset_for_adjustment is interpreted.
1698 if (omit_http
&& StartsWith(url_string
, ASCIIToUTF16(kHTTP
), true)) {
1699 const size_t kHTTPSize
= arraysize(kHTTP
) - 1;
1700 url_string
= url_string
.substr(kHTTPSize
);
1701 if (offsets_for_adjustment
&& !offsets_for_adjustment
->empty()) {
1702 base::OffsetAdjuster
offset_adjuster(offsets_for_adjustment
);
1703 offset_adjuster
.Add(base::OffsetAdjuster::Adjustment(0, kHTTPSize
, 0));
1706 *prefix_end
-= kHTTPSize
;
1708 // Adjust new_parsed.
1709 DCHECK(new_parsed
->scheme
.is_valid());
1710 int delta
= -(new_parsed
->scheme
.len
+ 3); // +3 for ://.
1711 new_parsed
->scheme
.reset();
1712 AdjustComponents(delta
, new_parsed
);
1715 LimitOffsets(url_string
, offsets_for_adjustment
);
1719 base::string16
FormatUrl(const GURL
& url
,
1720 const std::string
& languages
,
1721 FormatUrlTypes format_types
,
1722 UnescapeRule::Type unescape_rules
,
1723 url_parse::Parsed
* new_parsed
,
1725 size_t* offset_for_adjustment
) {
1726 std::vector
<size_t> offsets
;
1727 if (offset_for_adjustment
)
1728 offsets
.push_back(*offset_for_adjustment
);
1729 base::string16 result
= FormatUrlWithOffsets(url
, languages
, format_types
,
1730 unescape_rules
, new_parsed
, prefix_end
, &offsets
);
1731 if (offset_for_adjustment
)
1732 *offset_for_adjustment
= offsets
[0];
1736 bool CanStripTrailingSlash(const GURL
& url
) {
1737 // Omit the path only for standard, non-file URLs with nothing but "/" after
1739 return url
.IsStandard() && !url
.SchemeIsFile() &&
1740 !url
.SchemeIsFileSystem() && !url
.has_query() && !url
.has_ref()
1741 && url
.path() == "/";
1744 GURL
SimplifyUrlForRequest(const GURL
& url
) {
1745 DCHECK(url
.is_valid());
1746 GURL::Replacements replacements
;
1747 replacements
.ClearUsername();
1748 replacements
.ClearPassword();
1749 replacements
.ClearRef();
1750 return url
.ReplaceComponents(replacements
);
1753 // Specifies a comma separated list of port numbers that should be accepted
1754 // despite bans. If the string is invalid no allowed ports are stored.
1755 void SetExplicitlyAllowedPorts(const std::string
& allowed_ports
) {
1756 if (allowed_ports
.empty())
1759 std::multiset
<int> ports
;
1761 size_t size
= allowed_ports
.size();
1762 // The comma delimiter.
1763 const std::string::value_type kComma
= ',';
1765 // Overflow is still possible for evil user inputs.
1766 for (size_t i
= 0; i
<= size
; ++i
) {
1767 // The string should be composed of only digits and commas.
1768 if (i
!= size
&& !IsAsciiDigit(allowed_ports
[i
]) &&
1769 (allowed_ports
[i
] != kComma
))
1771 if (i
== size
|| allowed_ports
[i
] == kComma
) {
1774 base::StringToInt(base::StringPiece(allowed_ports
.begin() + last
,
1775 allowed_ports
.begin() + i
),
1782 g_explicitly_allowed_ports
.Get() = ports
;
1785 ScopedPortException::ScopedPortException(int port
) : port_(port
) {
1786 g_explicitly_allowed_ports
.Get().insert(port
);
1789 ScopedPortException::~ScopedPortException() {
1790 std::multiset
<int>::iterator it
=
1791 g_explicitly_allowed_ports
.Get().find(port_
);
1792 if (it
!= g_explicitly_allowed_ports
.Get().end())
1793 g_explicitly_allowed_ports
.Get().erase(it
);
1800 const char* kFinalStatusNames
[] = {
1801 "Cannot create sockets",
1802 "Can create sockets",
1803 "Can't get addresses",
1804 "Global ipv6 address missing",
1805 "Global ipv6 address present",
1806 "Interface array too short",
1807 "Probing not supported", // IPV6_SUPPORT_MAX
1809 COMPILE_ASSERT(arraysize(kFinalStatusNames
) == IPV6_SUPPORT_MAX
+ 1,
1810 IPv6SupportStatus_name_count_mismatch
);
1812 // TODO(jar): The following is a simple estimate of IPv6 support. We may need
1813 // to do a test resolution, and a test connection, to REALLY verify support.
1814 IPv6SupportResult
TestIPv6SupportInternal() {
1815 #if defined(OS_ANDROID)
1816 // TODO: We should fully implement IPv6 probe once 'getifaddrs' API available;
1817 // Another approach is implementing the similar feature by
1818 // java.net.NetworkInterface through JNI.
1820 return IPv6SupportResult(true, IPV6_SUPPORT_MAX
, 0);
1821 #elif defined(OS_POSIX)
1822 int test_socket
= socket(AF_INET6
, SOCK_STREAM
, 0);
1823 if (test_socket
== -1)
1824 return IPv6SupportResult(false, IPV6_CANNOT_CREATE_SOCKETS
, errno
);
1827 // Check to see if any interface has a IPv6 address.
1828 struct ifaddrs
* interface_addr
= NULL
;
1829 int rv
= getifaddrs(&interface_addr
);
1831 // Don't yet block IPv6.
1832 return IPv6SupportResult(true, IPV6_GETIFADDRS_FAILED
, errno
);
1835 bool found_ipv6
= false;
1836 for (struct ifaddrs
* interface
= interface_addr
;
1838 interface
= interface
->ifa_next
) {
1839 if (!(IFF_UP
& interface
->ifa_flags
))
1841 if (IFF_LOOPBACK
& interface
->ifa_flags
)
1843 struct sockaddr
* addr
= interface
->ifa_addr
;
1846 if (addr
->sa_family
!= AF_INET6
)
1848 // Safe cast since this is AF_INET6.
1849 struct sockaddr_in6
* addr_in6
=
1850 reinterpret_cast<struct sockaddr_in6
*>(addr
);
1851 struct in6_addr
* sin6_addr
= &addr_in6
->sin6_addr
;
1852 if (IN6_IS_ADDR_LOOPBACK(sin6_addr
) || IN6_IS_ADDR_LINKLOCAL(sin6_addr
))
1857 freeifaddrs(interface_addr
);
1859 return IPv6SupportResult(false, IPV6_GLOBAL_ADDRESS_MISSING
, 0);
1861 return IPv6SupportResult(true, IPV6_GLOBAL_ADDRESS_PRESENT
, 0);
1862 #elif defined(OS_WIN)
1863 EnsureWinsockInit();
1864 SOCKET test_socket
= socket(AF_INET6
, SOCK_STREAM
, 0);
1865 if (test_socket
== INVALID_SOCKET
) {
1866 return IPv6SupportResult(false,
1867 IPV6_CANNOT_CREATE_SOCKETS
,
1870 closesocket(test_socket
);
1872 // Check to see if any interface has a IPv6 address.
1873 // The GetAdaptersAddresses MSDN page recommends using a size of 15000 to
1874 // avoid reallocation.
1875 ULONG adapters_size
= 15000;
1876 scoped_ptr_malloc
<IP_ADAPTER_ADDRESSES
> adapters
;
1881 reinterpret_cast<PIP_ADAPTER_ADDRESSES
>(malloc(adapters_size
)));
1882 // Return only unicast addresses.
1883 error
= GetAdaptersAddresses(AF_UNSPEC
,
1884 GAA_FLAG_SKIP_ANYCAST
|
1885 GAA_FLAG_SKIP_MULTICAST
|
1886 GAA_FLAG_SKIP_DNS_SERVER
|
1887 GAA_FLAG_SKIP_FRIENDLY_NAME
,
1888 NULL
, adapters
.get(), &adapters_size
);
1890 } while (error
== ERROR_BUFFER_OVERFLOW
&& num_tries
<= 3);
1891 if (error
== ERROR_NO_DATA
)
1892 return IPv6SupportResult(false, IPV6_GLOBAL_ADDRESS_MISSING
, error
);
1893 if (error
!= ERROR_SUCCESS
) {
1894 // Don't yet block IPv6.
1895 return IPv6SupportResult(true, IPV6_GETIFADDRS_FAILED
, error
);
1898 PIP_ADAPTER_ADDRESSES adapter
;
1899 for (adapter
= adapters
.get(); adapter
; adapter
= adapter
->Next
) {
1900 if (adapter
->OperStatus
!= IfOperStatusUp
)
1902 if (adapter
->IfType
== IF_TYPE_SOFTWARE_LOOPBACK
)
1904 PIP_ADAPTER_UNICAST_ADDRESS unicast_address
;
1905 for (unicast_address
= adapter
->FirstUnicastAddress
;
1907 unicast_address
= unicast_address
->Next
) {
1908 if (unicast_address
->Address
.lpSockaddr
->sa_family
!= AF_INET6
)
1910 // Safe cast since this is AF_INET6.
1911 struct sockaddr_in6
* addr_in6
= reinterpret_cast<struct sockaddr_in6
*>(
1912 unicast_address
->Address
.lpSockaddr
);
1913 struct in6_addr
* sin6_addr
= &addr_in6
->sin6_addr
;
1914 if (IN6_IS_ADDR_LOOPBACK(sin6_addr
) || IN6_IS_ADDR_LINKLOCAL(sin6_addr
))
1916 const uint8 kTeredoPrefix
[] = { 0x20, 0x01, 0, 0 };
1917 if (!memcmp(sin6_addr
->s6_addr
, kTeredoPrefix
, arraysize(kTeredoPrefix
)))
1919 return IPv6SupportResult(true, IPV6_GLOBAL_ADDRESS_PRESENT
, 0);
1923 return IPv6SupportResult(false, IPV6_GLOBAL_ADDRESS_MISSING
, 0);
1926 return IPv6SupportResult(true, IPV6_SUPPORT_MAX
, 0);
1927 #endif // defined(various platforms)
1932 IPv6SupportResult::IPv6SupportResult(bool ipv6_supported
,
1933 IPv6SupportStatus ipv6_support_status
,
1935 : ipv6_supported(ipv6_supported
),
1936 ipv6_support_status(ipv6_support_status
),
1937 os_error(os_error
) {
1940 base::Value
* IPv6SupportResult::ToNetLogValue(
1941 NetLog::LogLevel
/* log_level */) const {
1942 base::DictionaryValue
* dict
= new DictionaryValue();
1943 dict
->SetBoolean("ipv6_supported", ipv6_supported
);
1944 dict
->SetString("ipv6_support_status",
1945 kFinalStatusNames
[ipv6_support_status
]);
1947 dict
->SetInteger("os_error", os_error
);
1951 IPv6SupportResult
TestIPv6Support() {
1952 IPv6SupportResult result
= TestIPv6SupportInternal();
1955 if (result
.ipv6_support_status
!= IPV6_SUPPORT_MAX
) {
1956 static bool run_once
= false;
1959 UMA_HISTOGRAM_ENUMERATION("Net.IPv6Status",
1960 result
.ipv6_support_status
,
1963 UMA_HISTOGRAM_ENUMERATION("Net.IPv6Status_retest",
1964 result
.ipv6_support_status
,
1971 bool HaveOnlyLoopbackAddresses() {
1972 #if defined(OS_ANDROID)
1973 return android::HaveOnlyLoopbackAddresses();
1974 #elif defined(OS_POSIX)
1975 struct ifaddrs
* interface_addr
= NULL
;
1976 int rv
= getifaddrs(&interface_addr
);
1978 DVLOG(1) << "getifaddrs() failed with errno = " << errno
;
1983 for (struct ifaddrs
* interface
= interface_addr
;
1985 interface
= interface
->ifa_next
) {
1986 if (!(IFF_UP
& interface
->ifa_flags
))
1988 if (IFF_LOOPBACK
& interface
->ifa_flags
)
1990 const struct sockaddr
* addr
= interface
->ifa_addr
;
1993 if (addr
->sa_family
== AF_INET6
) {
1994 // Safe cast since this is AF_INET6.
1995 const struct sockaddr_in6
* addr_in6
=
1996 reinterpret_cast<const struct sockaddr_in6
*>(addr
);
1997 const struct in6_addr
* sin6_addr
= &addr_in6
->sin6_addr
;
1998 if (IN6_IS_ADDR_LOOPBACK(sin6_addr
) || IN6_IS_ADDR_LINKLOCAL(sin6_addr
))
2001 if (addr
->sa_family
!= AF_INET6
&& addr
->sa_family
!= AF_INET
)
2007 freeifaddrs(interface_addr
);
2009 #elif defined(OS_WIN)
2010 // TODO(wtc): implement with the GetAdaptersAddresses function.
2016 #endif // defined(various platforms)
2019 AddressFamily
GetAddressFamily(const IPAddressNumber
& address
) {
2020 switch (address
.size()) {
2021 case kIPv4AddressSize
:
2022 return ADDRESS_FAMILY_IPV4
;
2023 case kIPv6AddressSize
:
2024 return ADDRESS_FAMILY_IPV6
;
2026 return ADDRESS_FAMILY_UNSPECIFIED
;
2030 bool ParseIPLiteralToNumber(const std::string
& ip_literal
,
2031 IPAddressNumber
* ip_number
) {
2032 // |ip_literal| could be either a IPv4 or an IPv6 literal. If it contains
2033 // a colon however, it must be an IPv6 address.
2034 if (ip_literal
.find(':') != std::string::npos
) {
2035 // GURL expects IPv6 hostnames to be surrounded with brackets.
2036 std::string host_brackets
= "[" + ip_literal
+ "]";
2037 url_parse::Component
host_comp(0, host_brackets
.size());
2039 // Try parsing the hostname as an IPv6 literal.
2040 ip_number
->resize(16); // 128 bits.
2041 return url_canon::IPv6AddressToNumber(host_brackets
.data(),
2046 // Otherwise the string is an IPv4 address.
2047 ip_number
->resize(4); // 32 bits.
2048 url_parse::Component
host_comp(0, ip_literal
.size());
2050 url_canon::CanonHostInfo::Family family
= url_canon::IPv4AddressToNumber(
2051 ip_literal
.data(), host_comp
, &(*ip_number
)[0], &num_components
);
2052 return family
== url_canon::CanonHostInfo::IPV4
;
2057 const unsigned char kIPv4MappedPrefix
[] =
2058 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF };
2061 IPAddressNumber
ConvertIPv4NumberToIPv6Number(
2062 const IPAddressNumber
& ipv4_number
) {
2063 DCHECK(ipv4_number
.size() == 4);
2065 // IPv4-mapped addresses are formed by:
2066 // <80 bits of zeros> + <16 bits of ones> + <32-bit IPv4 address>.
2067 IPAddressNumber ipv6_number
;
2068 ipv6_number
.reserve(16);
2069 ipv6_number
.insert(ipv6_number
.end(),
2071 kIPv4MappedPrefix
+ arraysize(kIPv4MappedPrefix
));
2072 ipv6_number
.insert(ipv6_number
.end(), ipv4_number
.begin(), ipv4_number
.end());
2076 bool IsIPv4Mapped(const IPAddressNumber
& address
) {
2077 if (address
.size() != kIPv6AddressSize
)
2079 return std::equal(address
.begin(),
2080 address
.begin() + arraysize(kIPv4MappedPrefix
),
2084 IPAddressNumber
ConvertIPv4MappedToIPv4(const IPAddressNumber
& address
) {
2085 DCHECK(IsIPv4Mapped(address
));
2086 return IPAddressNumber(address
.begin() + arraysize(kIPv4MappedPrefix
),
2090 bool ParseCIDRBlock(const std::string
& cidr_literal
,
2091 IPAddressNumber
* ip_number
,
2092 size_t* prefix_length_in_bits
) {
2093 // We expect CIDR notation to match one of these two templates:
2094 // <IPv4-literal> "/" <number of bits>
2095 // <IPv6-literal> "/" <number of bits>
2097 std::vector
<std::string
> parts
;
2098 base::SplitString(cidr_literal
, '/', &parts
);
2099 if (parts
.size() != 2)
2102 // Parse the IP address.
2103 if (!ParseIPLiteralToNumber(parts
[0], ip_number
))
2106 // Parse the prefix length.
2107 int number_of_bits
= -1;
2108 if (!base::StringToInt(parts
[1], &number_of_bits
))
2111 // Make sure the prefix length is in a valid range.
2112 if (number_of_bits
< 0 ||
2113 number_of_bits
> static_cast<int>(ip_number
->size() * 8))
2116 *prefix_length_in_bits
= static_cast<size_t>(number_of_bits
);
2120 bool IPNumberMatchesPrefix(const IPAddressNumber
& ip_number
,
2121 const IPAddressNumber
& ip_prefix
,
2122 size_t prefix_length_in_bits
) {
2123 // Both the input IP address and the prefix IP address should be
2124 // either IPv4 or IPv6.
2125 DCHECK(ip_number
.size() == 4 || ip_number
.size() == 16);
2126 DCHECK(ip_prefix
.size() == 4 || ip_prefix
.size() == 16);
2128 DCHECK_LE(prefix_length_in_bits
, ip_prefix
.size() * 8);
2130 // In case we have an IPv6 / IPv4 mismatch, convert the IPv4 addresses to
2131 // IPv6 addresses in order to do the comparison.
2132 if (ip_number
.size() != ip_prefix
.size()) {
2133 if (ip_number
.size() == 4) {
2134 return IPNumberMatchesPrefix(ConvertIPv4NumberToIPv6Number(ip_number
),
2135 ip_prefix
, prefix_length_in_bits
);
2137 return IPNumberMatchesPrefix(ip_number
,
2138 ConvertIPv4NumberToIPv6Number(ip_prefix
),
2139 96 + prefix_length_in_bits
);
2142 // Otherwise we are comparing two IPv4 addresses, or two IPv6 addresses.
2143 // Compare all the bytes that fall entirely within the prefix.
2144 int num_entire_bytes_in_prefix
= prefix_length_in_bits
/ 8;
2145 for (int i
= 0; i
< num_entire_bytes_in_prefix
; ++i
) {
2146 if (ip_number
[i
] != ip_prefix
[i
])
2150 // In case the prefix was not a multiple of 8, there will be 1 byte
2151 // which is only partially masked.
2152 int remaining_bits
= prefix_length_in_bits
% 8;
2153 if (remaining_bits
!= 0) {
2154 unsigned char mask
= 0xFF << (8 - remaining_bits
);
2155 int i
= num_entire_bytes_in_prefix
;
2156 if ((ip_number
[i
] & mask
) != (ip_prefix
[i
] & mask
))
2163 const uint16
* GetPortFieldFromSockaddr(const struct sockaddr
* address
,
2164 socklen_t address_len
) {
2165 if (address
->sa_family
== AF_INET
) {
2166 DCHECK_LE(sizeof(sockaddr_in
), static_cast<size_t>(address_len
));
2167 const struct sockaddr_in
* sockaddr
=
2168 reinterpret_cast<const struct sockaddr_in
*>(address
);
2169 return &sockaddr
->sin_port
;
2170 } else if (address
->sa_family
== AF_INET6
) {
2171 DCHECK_LE(sizeof(sockaddr_in6
), static_cast<size_t>(address_len
));
2172 const struct sockaddr_in6
* sockaddr
=
2173 reinterpret_cast<const struct sockaddr_in6
*>(address
);
2174 return &sockaddr
->sin6_port
;
2181 int GetPortFromSockaddr(const struct sockaddr
* address
, socklen_t address_len
) {
2182 const uint16
* port_field
= GetPortFieldFromSockaddr(address
, address_len
);
2185 return base::NetToHost16(*port_field
);
2188 bool IsLocalhost(const std::string
& host
) {
2189 if (host
== "localhost" ||
2190 host
== "localhost.localdomain" ||
2191 host
== "localhost6" ||
2192 host
== "localhost6.localdomain6")
2195 IPAddressNumber ip_number
;
2196 if (ParseIPLiteralToNumber(host
, &ip_number
)) {
2197 size_t size
= ip_number
.size();
2199 case kIPv4AddressSize
: {
2200 IPAddressNumber localhost_prefix
;
2201 localhost_prefix
.push_back(127);
2202 for (int i
= 0; i
< 3; ++i
) {
2203 localhost_prefix
.push_back(0);
2205 return IPNumberMatchesPrefix(ip_number
, localhost_prefix
, 8);
2208 case kIPv6AddressSize
: {
2209 struct in6_addr sin6_addr
;
2210 memcpy(&sin6_addr
, &ip_number
[0], kIPv6AddressSize
);
2211 return !!IN6_IS_ADDR_LOOPBACK(&sin6_addr
);
2222 NetworkInterface::NetworkInterface() {
2225 NetworkInterface::NetworkInterface(const std::string
& name
,
2226 const IPAddressNumber
& address
)
2227 : name(name
), address(address
) {
2230 NetworkInterface::~NetworkInterface() {