1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsIDNService.h"
7 #include "nsReadableUtils.h"
9 #include "nsUnicharUtils.h"
10 #include "nsIServiceManager.h"
11 #include "nsIPrefService.h"
12 #include "nsIPrefBranch.h"
13 #include "nsIObserverService.h"
14 #include "nsISupportsPrimitives.h"
18 //-----------------------------------------------------------------------------
19 // RFC 1034 - 3.1. Name space specifications and terminology
20 static const uint32_t kMaxDNSNodeLen
= 63;
22 //-----------------------------------------------------------------------------
24 #define NS_NET_PREF_IDNTESTBED "network.IDN_testbed"
25 #define NS_NET_PREF_IDNPREFIX "network.IDN_prefix"
26 #define NS_NET_PREF_IDNBLACKLIST "network.IDN.blacklist_chars"
27 #define NS_NET_PREF_SHOWPUNYCODE "network.IDN_show_punycode"
28 #define NS_NET_PREF_IDNWHITELIST "network.IDN.whitelist."
30 inline bool isOnlySafeChars(const nsAFlatString
& in
,
31 const nsAFlatString
& blacklist
)
33 return (blacklist
.IsEmpty() ||
34 in
.FindCharInSet(blacklist
) == kNotFound
);
37 //-----------------------------------------------------------------------------
39 //-----------------------------------------------------------------------------
41 /* Implementation file */
42 NS_IMPL_THREADSAFE_ISUPPORTS3(nsIDNService
,
45 nsISupportsWeakReference
)
47 nsresult
nsIDNService::Init()
49 nsCOMPtr
<nsIPrefService
> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID
));
51 prefs
->GetBranch(NS_NET_PREF_IDNWHITELIST
, getter_AddRefs(mIDNWhitelistPrefBranch
));
53 nsCOMPtr
<nsIPrefBranch
> prefInternal(do_QueryInterface(prefs
));
55 prefInternal
->AddObserver(NS_NET_PREF_IDNTESTBED
, this, true);
56 prefInternal
->AddObserver(NS_NET_PREF_IDNPREFIX
, this, true);
57 prefInternal
->AddObserver(NS_NET_PREF_IDNBLACKLIST
, this, true);
58 prefInternal
->AddObserver(NS_NET_PREF_SHOWPUNYCODE
, this, true);
59 prefsChanged(prefInternal
, nullptr);
65 NS_IMETHODIMP
nsIDNService::Observe(nsISupports
*aSubject
,
67 const PRUnichar
*aData
)
69 if (!strcmp(aTopic
, NS_PREFBRANCH_PREFCHANGE_TOPIC_ID
)) {
70 nsCOMPtr
<nsIPrefBranch
> prefBranch( do_QueryInterface(aSubject
) );
72 prefsChanged(prefBranch
, aData
);
77 void nsIDNService::prefsChanged(nsIPrefBranch
*prefBranch
, const PRUnichar
*pref
)
79 if (!pref
|| NS_LITERAL_STRING(NS_NET_PREF_IDNTESTBED
).Equals(pref
)) {
81 if (NS_SUCCEEDED(prefBranch
->GetBoolPref(NS_NET_PREF_IDNTESTBED
, &val
)))
82 mMultilingualTestBed
= val
;
84 if (!pref
|| NS_LITERAL_STRING(NS_NET_PREF_IDNPREFIX
).Equals(pref
)) {
85 nsXPIDLCString prefix
;
86 nsresult rv
= prefBranch
->GetCharPref(NS_NET_PREF_IDNPREFIX
, getter_Copies(prefix
));
87 if (NS_SUCCEEDED(rv
) && prefix
.Length() <= kACEPrefixLen
)
88 PL_strncpyz(nsIDNService::mACEPrefix
, prefix
.get(), kACEPrefixLen
+ 1);
90 if (!pref
|| NS_LITERAL_STRING(NS_NET_PREF_IDNBLACKLIST
).Equals(pref
)) {
91 nsCOMPtr
<nsISupportsString
> blacklist
;
92 nsresult rv
= prefBranch
->GetComplexValue(NS_NET_PREF_IDNBLACKLIST
,
93 NS_GET_IID(nsISupportsString
),
94 getter_AddRefs(blacklist
));
96 blacklist
->ToString(getter_Copies(mIDNBlacklist
));
98 mIDNBlacklist
.Truncate();
100 if (!pref
|| NS_LITERAL_STRING(NS_NET_PREF_SHOWPUNYCODE
).Equals(pref
)) {
102 if (NS_SUCCEEDED(prefBranch
->GetBoolPref(NS_NET_PREF_SHOWPUNYCODE
, &val
)))
107 nsIDNService::nsIDNService()
109 // initialize to the official prefix (RFC 3490 "5. ACE prefix")
110 const char kIDNSPrefix
[] = "xn--";
111 strcpy(mACEPrefix
, kIDNSPrefix
);
113 mMultilingualTestBed
= false;
115 if (idn_success
!= idn_nameprep_create(NULL
, &mNamePrepHandle
))
116 mNamePrepHandle
= nullptr;
118 mNormalizer
= do_GetService(NS_UNICODE_NORMALIZER_CONTRACTID
);
119 /* member initializers and constructor code */
122 nsIDNService::~nsIDNService()
124 idn_nameprep_destroy(mNamePrepHandle
);
127 /* ACString ConvertUTF8toACE (in AUTF8String input); */
128 NS_IMETHODIMP
nsIDNService::ConvertUTF8toACE(const nsACString
& input
, nsACString
& ace
)
130 return UTF8toACE(input
, ace
, true);
133 nsresult
nsIDNService::UTF8toACE(const nsACString
& input
, nsACString
& ace
, bool allowUnassigned
)
136 NS_ConvertUTF8toUTF16
ustr(input
);
138 // map ideographic period to ASCII period etc.
139 normalizeFullStops(ustr
);
142 uint32_t len
, offset
;
145 nsAutoCString encodedBuf
;
147 nsAString::const_iterator start
, end
;
148 ustr
.BeginReading(start
);
149 ustr
.EndReading(end
);
152 // encode nodes if non ASCII
153 while (start
!= end
) {
155 if (*start
++ == (PRUnichar
)'.') {
156 rv
= stringPrepAndACE(Substring(ustr
, offset
, len
- 1), encodedBuf
,
158 NS_ENSURE_SUCCESS(rv
, rv
);
160 ace
.Append(encodedBuf
);
167 // add extra node for multilingual test bed
168 if (mMultilingualTestBed
)
169 ace
.AppendLiteral("mltbd.");
170 // encode the last node if non ASCII
172 rv
= stringPrepAndACE(Substring(ustr
, offset
, len
), encodedBuf
,
174 NS_ENSURE_SUCCESS(rv
, rv
);
176 ace
.Append(encodedBuf
);
182 /* AUTF8String convertACEtoUTF8(in ACString input); */
183 NS_IMETHODIMP
nsIDNService::ConvertACEtoUTF8(const nsACString
& input
, nsACString
& _retval
)
185 return ACEtoUTF8(input
, _retval
, true);
188 nsresult
nsIDNService::ACEtoUTF8(const nsACString
& input
, nsACString
& _retval
,
189 bool allowUnassigned
)
191 // RFC 3490 - 4.2 ToUnicode
192 // ToUnicode never fails. If any step fails, then the original input
193 // sequence is returned immediately in that step.
195 if (!IsASCII(input
)) {
196 _retval
.Assign(input
);
200 uint32_t len
= 0, offset
= 0;
201 nsAutoCString decodedBuf
;
203 nsACString::const_iterator start
, end
;
204 input
.BeginReading(start
);
205 input
.EndReading(end
);
208 // loop and decode nodes
209 while (start
!= end
) {
211 if (*start
++ == '.') {
212 if (NS_FAILED(decodeACE(Substring(input
, offset
, len
- 1), decodedBuf
,
214 _retval
.Assign(input
);
218 _retval
.Append(decodedBuf
);
224 // decode the last node
226 if (NS_FAILED(decodeACE(Substring(input
, offset
, len
), decodedBuf
,
228 _retval
.Assign(input
);
230 _retval
.Append(decodedBuf
);
236 /* boolean isACE(in ACString input); */
237 NS_IMETHODIMP
nsIDNService::IsACE(const nsACString
& input
, bool *_retval
)
239 nsACString::const_iterator begin
;
240 input
.BeginReading(begin
);
242 const char *data
= begin
.get();
243 uint32_t dataLen
= begin
.size_forward();
245 // look for the ACE prefix in the input string. it may occur
246 // at the beginning of any segment in the domain name. for
247 // example: "www.xn--ENCODED.com"
249 const char *p
= PL_strncasestr(data
, mACEPrefix
, dataLen
);
251 *_retval
= p
&& (p
== data
|| *(p
- 1) == '.');
255 /* AUTF8String normalize(in AUTF8String input); */
256 NS_IMETHODIMP
nsIDNService::Normalize(const nsACString
& input
, nsACString
& output
)
258 // protect against bogus input
259 NS_ENSURE_TRUE(IsUTF8(input
), NS_ERROR_UNEXPECTED
);
261 NS_ConvertUTF8toUTF16
inUTF16(input
);
262 normalizeFullStops(inUTF16
);
264 // pass the domain name to stringprep label by label
265 nsAutoString outUTF16
, outLabel
;
267 uint32_t len
= 0, offset
= 0;
269 nsAString::const_iterator start
, end
;
270 inUTF16
.BeginReading(start
);
271 inUTF16
.EndReading(end
);
273 while (start
!= end
) {
275 if (*start
++ == PRUnichar('.')) {
276 rv
= stringPrep(Substring(inUTF16
, offset
, len
- 1), outLabel
, true);
277 NS_ENSURE_SUCCESS(rv
, rv
);
279 outUTF16
.Append(outLabel
);
280 outUTF16
.Append(PRUnichar('.'));
286 rv
= stringPrep(Substring(inUTF16
, offset
, len
), outLabel
, true);
287 NS_ENSURE_SUCCESS(rv
, rv
);
289 outUTF16
.Append(outLabel
);
292 CopyUTF16toUTF8(outUTF16
, output
);
293 if (!isOnlySafeChars(outUTF16
, mIDNBlacklist
))
294 return ConvertUTF8toACE(output
, output
);
299 NS_IMETHODIMP
nsIDNService::ConvertToDisplayIDN(const nsACString
& input
, bool * _isASCII
, nsACString
& _retval
)
301 // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
302 // Else, if host is already UTF-8, then make sure it is normalized per IDN.
306 if (IsASCII(input
)) {
307 // first, canonicalize the host to lowercase, for whitelist lookup
309 ToLowerCase(_retval
);
312 IsACE(_retval
, &isACE
);
314 if (isACE
&& !mShowPunycode
&& isInWhitelist(_retval
)) {
315 // ACEtoUTF8() can't fail, but might return the original ACE string
316 nsAutoCString
temp(_retval
);
317 ACEtoUTF8(temp
, _retval
, false);
318 *_isASCII
= IsASCII(_retval
);
323 // We have to normalize the hostname before testing against the domain
324 // whitelist (see bug 315411), and to ensure the entire string gets
326 rv
= Normalize(input
, _retval
);
327 if (NS_FAILED(rv
)) return rv
;
329 if (mShowPunycode
&& NS_SUCCEEDED(ConvertUTF8toACE(_retval
, _retval
))) {
334 // normalization could result in an ASCII-only hostname. alternatively, if
335 // the host is converted to ACE by the normalizer, then the host may contain
336 // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694, and bug 309311.
337 *_isASCII
= IsASCII(_retval
);
338 if (!*_isASCII
&& !isInWhitelist(_retval
)) {
340 return ConvertUTF8toACE(_retval
, _retval
);
347 //-----------------------------------------------------------------------------
349 static void utf16ToUcs4(const nsAString
& in
, uint32_t *out
, uint32_t outBufLen
, uint32_t *outLen
)
352 nsAString::const_iterator start
, end
;
353 in
.BeginReading(start
);
356 while (start
!= end
) {
362 NS_IS_HIGH_SURROGATE(curChar
) &&
363 NS_IS_LOW_SURROGATE(*start
)) {
364 out
[i
] = SURROGATE_TO_UCS4(curChar
, *start
);
371 if (i
>= outBufLen
) {
372 NS_ERROR("input too big, the result truncated");
373 out
[outBufLen
-1] = (uint32_t)'\0';
374 *outLen
= outBufLen
-1;
378 out
[i
] = (uint32_t)'\0';
382 static void ucs4toUtf16(const uint32_t *in
, nsAString
& out
)
385 if (!IS_IN_BMP(*in
)) {
386 out
.Append((PRUnichar
) H_SURROGATE(*in
));
387 out
.Append((PRUnichar
) L_SURROGATE(*in
));
390 out
.Append((PRUnichar
) *in
);
395 static nsresult
punycode(const char* prefix
, const nsAString
& in
, nsACString
& out
)
397 uint32_t ucs4Buf
[kMaxDNSNodeLen
+ 1];
399 utf16ToUcs4(in
, ucs4Buf
, kMaxDNSNodeLen
, &ucs4Len
);
401 // need maximum 20 bits to encode 16 bit Unicode character
402 // (include null terminator)
403 const uint32_t kEncodedBufSize
= kMaxDNSNodeLen
* 20 / 8 + 1 + 1;
404 char encodedBuf
[kEncodedBufSize
];
405 punycode_uint encodedLength
= kEncodedBufSize
;
407 enum punycode_status status
= punycode_encode(ucs4Len
,
413 if (punycode_success
!= status
||
414 encodedLength
>= kEncodedBufSize
)
415 return NS_ERROR_FAILURE
;
417 encodedBuf
[encodedLength
] = '\0';
418 out
.Assign(nsDependentCString(prefix
) + nsDependentCString(encodedBuf
));
423 static nsresult
encodeToRACE(const char* prefix
, const nsAString
& in
, nsACString
& out
)
425 // need maximum 20 bits to encode 16 bit Unicode character
426 // (include null terminator)
427 const uint32_t kEncodedBufSize
= kMaxDNSNodeLen
* 20 / 8 + 1 + 1;
429 // set up a work buffer for RACE encoder
430 PRUnichar temp
[kMaxDNSNodeLen
+ 2];
431 temp
[0] = 0xFFFF; // set a place holder (to be filled by get_compress_mode)
432 temp
[in
.Length() + 1] = (PRUnichar
)'\0';
434 nsAString::const_iterator start
, end
;
435 in
.BeginReading(start
);
438 for (uint32_t i
= 1; start
!= end
; i
++)
441 // encode nodes if non ASCII
443 char encodedBuf
[kEncodedBufSize
];
444 idn_result_t result
= race_compress_encode((const unsigned short *) temp
,
445 get_compress_mode((unsigned short *) temp
+ 1),
446 encodedBuf
, kEncodedBufSize
);
447 if (idn_success
!= result
)
448 return NS_ERROR_FAILURE
;
451 out
.Append(encodedBuf
);
458 // 1) Map -- For each character in the input, check if it has a mapping
459 // and, if so, replace it with its mapping. This is described in section 3.
461 // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
462 // normalization. This is described in section 4.
464 // 3) Prohibit -- Check for any characters that are not allowed in the
465 // output. If any are found, return an error. This is described in section
468 // 4) Check bidi -- Possibly check for right-to-left characters, and if any
469 // are found, make sure that the whole string satisfies the requirements
470 // for bidirectional strings. If the string does not satisfy the requirements
471 // for bidirectional strings, return an error. This is described in section 6.
473 // 5) Check unassigned code points -- If allowUnassigned is false, check for
474 // any unassigned Unicode points and if any are found return an error.
475 // This is described in section 7.
477 nsresult
nsIDNService::stringPrep(const nsAString
& in
, nsAString
& out
,
478 bool allowUnassigned
)
480 if (!mNamePrepHandle
|| !mNormalizer
)
481 return NS_ERROR_FAILURE
;
484 uint32_t ucs4Buf
[kMaxDNSNodeLen
+ 1];
486 utf16ToUcs4(in
, ucs4Buf
, kMaxDNSNodeLen
, &ucs4Len
);
489 idn_result_t idn_err
;
491 uint32_t namePrepBuf
[kMaxDNSNodeLen
* 3]; // map up to three characters
492 idn_err
= idn_nameprep_map(mNamePrepHandle
, (const uint32_t *) ucs4Buf
,
493 (uint32_t *) namePrepBuf
, kMaxDNSNodeLen
* 3);
494 NS_ENSURE_TRUE(idn_err
== idn_success
, NS_ERROR_FAILURE
);
496 nsAutoString namePrepStr
;
497 ucs4toUtf16(namePrepBuf
, namePrepStr
);
498 if (namePrepStr
.Length() >= kMaxDNSNodeLen
)
499 return NS_ERROR_FAILURE
;
502 nsAutoString normlizedStr
;
503 rv
= mNormalizer
->NormalizeUnicodeNFKC(namePrepStr
, normlizedStr
);
504 if (normlizedStr
.Length() >= kMaxDNSNodeLen
)
505 return NS_ERROR_FAILURE
;
508 const uint32_t *found
= nullptr;
509 idn_err
= idn_nameprep_isprohibited(mNamePrepHandle
,
510 (const uint32_t *) ucs4Buf
, &found
);
511 if (idn_err
!= idn_success
|| found
)
512 return NS_ERROR_FAILURE
;
515 idn_err
= idn_nameprep_isvalidbidi(mNamePrepHandle
,
516 (const uint32_t *) ucs4Buf
, &found
);
517 if (idn_err
!= idn_success
|| found
)
518 return NS_ERROR_FAILURE
;
520 if (!allowUnassigned
) {
521 // check unassigned code points
522 idn_err
= idn_nameprep_isunassigned(mNamePrepHandle
,
523 (const uint32_t *) ucs4Buf
, &found
);
524 if (idn_err
!= idn_success
|| found
)
525 return NS_ERROR_FAILURE
;
528 // set the result string
529 out
.Assign(normlizedStr
);
534 nsresult
nsIDNService::encodeToACE(const nsAString
& in
, nsACString
& out
)
536 // RACE encode is supported for existing testing environment
537 if (!strcmp("bq--", mACEPrefix
))
538 return encodeToRACE(mACEPrefix
, in
, out
);
541 return punycode(mACEPrefix
, in
, out
);
544 nsresult
nsIDNService::stringPrepAndACE(const nsAString
& in
, nsACString
& out
,
545 bool allowUnassigned
)
551 if (in
.Length() > kMaxDNSNodeLen
) {
552 NS_WARNING("IDN node too large");
553 return NS_ERROR_FAILURE
;
557 LossyCopyUTF16toASCII(in
, out
);
559 nsAutoString strPrep
;
560 rv
= stringPrep(in
, strPrep
, allowUnassigned
);
561 if (NS_SUCCEEDED(rv
)) {
562 if (IsASCII(strPrep
))
563 LossyCopyUTF16toASCII(strPrep
, out
);
565 rv
= encodeToACE(strPrep
, out
);
569 if (out
.Length() > kMaxDNSNodeLen
) {
570 NS_WARNING("IDN node too large");
571 return NS_ERROR_FAILURE
;
578 // 1) Whenever dots are used as label separators, the following characters
579 // MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
580 // stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
583 void nsIDNService::normalizeFullStops(nsAString
& s
)
585 nsAString::const_iterator start
, end
;
586 s
.BeginReading(start
);
590 while (start
!= end
) {
595 s
.Replace(index
, 1, NS_LITERAL_STRING("."));
605 nsresult
nsIDNService::decodeACE(const nsACString
& in
, nsACString
& out
,
606 bool allowUnassigned
)
615 // RFC 3490 - 4.2 ToUnicode
616 // The ToUnicode output never contains more code points than its input.
617 punycode_uint output_length
= in
.Length() - kACEPrefixLen
+ 1;
618 punycode_uint
*output
= new punycode_uint
[output_length
];
619 NS_ENSURE_TRUE(output
, NS_ERROR_OUT_OF_MEMORY
);
621 enum punycode_status status
= punycode_decode(in
.Length() - kACEPrefixLen
,
622 PromiseFlatCString(in
).get() + kACEPrefixLen
,
626 if (status
!= punycode_success
) {
628 return NS_ERROR_FAILURE
;
632 output
[output_length
] = 0;
634 ucs4toUtf16(output
, utf16
);
636 if (!isOnlySafeChars(utf16
, mIDNBlacklist
))
637 return NS_ERROR_FAILURE
;
638 CopyUTF16toUTF8(utf16
, out
);
640 // Validation: encode back to ACE and compare the strings
642 nsresult rv
= UTF8toACE(out
, ace
, allowUnassigned
);
643 NS_ENSURE_SUCCESS(rv
, rv
);
645 if (!ace
.Equals(in
, nsCaseInsensitiveCStringComparator()))
646 return NS_ERROR_FAILURE
;
651 bool nsIDNService::isInWhitelist(const nsACString
&host
)
653 if (mIDNWhitelistPrefBranch
) {
654 nsAutoCString
tld(host
);
655 // make sure the host is ACE for lookup and check that there are no
656 // unassigned codepoints
657 if (!IsASCII(tld
) && NS_FAILED(UTF8toACE(tld
, tld
, false))) {
661 // truncate trailing dots first
663 int32_t pos
= tld
.RFind(".");
664 if (pos
== kNotFound
)
670 if (NS_SUCCEEDED(mIDNWhitelistPrefBranch
->GetBoolPref(tld
.get(), &safe
)))