1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 2002
20 * the Initial Developer. All Rights Reserved.
23 * Naoki Hotta <nhotta@netscape.com> (original author)
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 #include "nsIDNService.h"
40 #include "nsReadableUtils.h"
42 #include "nsUnicharUtils.h"
43 #include "nsIServiceManager.h"
44 #include "nsIPrefService.h"
45 #include "nsIPrefBranch.h"
46 #include "nsIPrefBranch2.h"
47 #include "nsIObserverService.h"
48 #include "nsISupportsPrimitives.h"
51 #include "mozilla/FunctionTimer.h"
53 //-----------------------------------------------------------------------------
54 // RFC 1034 - 3.1. Name space specifications and terminology
55 static const PRUint32 kMaxDNSNodeLen
= 63;
57 //-----------------------------------------------------------------------------
59 #define NS_NET_PREF_IDNTESTBED "network.IDN_testbed"
60 #define NS_NET_PREF_IDNPREFIX "network.IDN_prefix"
61 #define NS_NET_PREF_IDNBLACKLIST "network.IDN.blacklist_chars"
62 #define NS_NET_PREF_SHOWPUNYCODE "network.IDN_show_punycode"
63 #define NS_NET_PREF_IDNWHITELIST "network.IDN.whitelist."
65 inline PRBool
isOnlySafeChars(const nsAFlatString
& in
,
66 const nsAFlatString
& blacklist
)
68 return (blacklist
.IsEmpty() ||
69 in
.FindCharInSet(blacklist
) == kNotFound
);
72 //-----------------------------------------------------------------------------
74 //-----------------------------------------------------------------------------
76 /* Implementation file */
77 NS_IMPL_THREADSAFE_ISUPPORTS3(nsIDNService
,
80 nsISupportsWeakReference
)
82 nsresult
nsIDNService::Init()
86 nsCOMPtr
<nsIPrefService
> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID
));
88 prefs
->GetBranch(NS_NET_PREF_IDNWHITELIST
, getter_AddRefs(mIDNWhitelistPrefBranch
));
90 nsCOMPtr
<nsIPrefBranch2
> prefInternal(do_QueryInterface(prefs
));
92 prefInternal
->AddObserver(NS_NET_PREF_IDNTESTBED
, this, PR_TRUE
);
93 prefInternal
->AddObserver(NS_NET_PREF_IDNPREFIX
, this, PR_TRUE
);
94 prefInternal
->AddObserver(NS_NET_PREF_IDNBLACKLIST
, this, PR_TRUE
);
95 prefInternal
->AddObserver(NS_NET_PREF_SHOWPUNYCODE
, this, PR_TRUE
);
96 prefsChanged(prefInternal
, nsnull
);
102 NS_IMETHODIMP
nsIDNService::Observe(nsISupports
*aSubject
,
104 const PRUnichar
*aData
)
106 if (!strcmp(aTopic
, NS_PREFBRANCH_PREFCHANGE_TOPIC_ID
)) {
107 nsCOMPtr
<nsIPrefBranch
> prefBranch( do_QueryInterface(aSubject
) );
109 prefsChanged(prefBranch
, aData
);
114 void nsIDNService::prefsChanged(nsIPrefBranch
*prefBranch
, const PRUnichar
*pref
)
116 if (!pref
|| NS_LITERAL_STRING(NS_NET_PREF_IDNTESTBED
).Equals(pref
)) {
118 if (NS_SUCCEEDED(prefBranch
->GetBoolPref(NS_NET_PREF_IDNTESTBED
, &val
)))
119 mMultilingualTestBed
= val
;
121 if (!pref
|| NS_LITERAL_STRING(NS_NET_PREF_IDNPREFIX
).Equals(pref
)) {
122 nsXPIDLCString prefix
;
123 nsresult rv
= prefBranch
->GetCharPref(NS_NET_PREF_IDNPREFIX
, getter_Copies(prefix
));
124 if (NS_SUCCEEDED(rv
) && prefix
.Length() <= kACEPrefixLen
)
125 PL_strncpyz(nsIDNService::mACEPrefix
, prefix
.get(), kACEPrefixLen
+ 1);
127 if (!pref
|| NS_LITERAL_STRING(NS_NET_PREF_IDNBLACKLIST
).Equals(pref
)) {
128 nsCOMPtr
<nsISupportsString
> blacklist
;
129 nsresult rv
= prefBranch
->GetComplexValue(NS_NET_PREF_IDNBLACKLIST
,
130 NS_GET_IID(nsISupportsString
),
131 getter_AddRefs(blacklist
));
132 if (NS_SUCCEEDED(rv
))
133 blacklist
->ToString(getter_Copies(mIDNBlacklist
));
135 mIDNBlacklist
.Truncate();
137 if (!pref
|| NS_LITERAL_STRING(NS_NET_PREF_SHOWPUNYCODE
).Equals(pref
)) {
139 if (NS_SUCCEEDED(prefBranch
->GetBoolPref(NS_NET_PREF_SHOWPUNYCODE
, &val
)))
144 nsIDNService::nsIDNService()
146 // initialize to the official prefix (RFC 3490 "5. ACE prefix")
147 const char kIDNSPrefix
[] = "xn--";
148 strcpy(mACEPrefix
, kIDNSPrefix
);
150 mMultilingualTestBed
= PR_FALSE
;
152 if (idn_success
!= idn_nameprep_create(NULL
, &mNamePrepHandle
))
153 mNamePrepHandle
= nsnull
;
155 mNormalizer
= do_GetService(NS_UNICODE_NORMALIZER_CONTRACTID
);
156 /* member initializers and constructor code */
159 nsIDNService::~nsIDNService()
161 idn_nameprep_destroy(mNamePrepHandle
);
164 /* ACString ConvertUTF8toACE (in AUTF8String input); */
165 NS_IMETHODIMP
nsIDNService::ConvertUTF8toACE(const nsACString
& input
, nsACString
& ace
)
167 return UTF8toACE(input
, ace
, PR_TRUE
);
170 nsresult
nsIDNService::UTF8toACE(const nsACString
& input
, nsACString
& ace
, PRBool allowUnassigned
)
173 NS_ConvertUTF8toUTF16
ustr(input
);
175 // map ideographic period to ASCII period etc.
176 normalizeFullStops(ustr
);
179 PRUint32 len
, offset
;
182 nsCAutoString encodedBuf
;
184 nsAString::const_iterator start
, end
;
185 ustr
.BeginReading(start
);
186 ustr
.EndReading(end
);
189 // encode nodes if non ASCII
190 while (start
!= end
) {
192 if (*start
++ == (PRUnichar
)'.') {
193 rv
= stringPrepAndACE(Substring(ustr
, offset
, len
- 1), encodedBuf
,
195 NS_ENSURE_SUCCESS(rv
, rv
);
197 ace
.Append(encodedBuf
);
204 // add extra node for multilingual test bed
205 if (mMultilingualTestBed
)
206 ace
.AppendLiteral("mltbd.");
207 // encode the last node if non ASCII
209 rv
= stringPrepAndACE(Substring(ustr
, offset
, len
), encodedBuf
,
211 NS_ENSURE_SUCCESS(rv
, rv
);
213 ace
.Append(encodedBuf
);
219 /* AUTF8String convertACEtoUTF8(in ACString input); */
220 NS_IMETHODIMP
nsIDNService::ConvertACEtoUTF8(const nsACString
& input
, nsACString
& _retval
)
222 return ACEtoUTF8(input
, _retval
, PR_TRUE
);
225 nsresult
nsIDNService::ACEtoUTF8(const nsACString
& input
, nsACString
& _retval
,
226 PRBool allowUnassigned
)
228 // RFC 3490 - 4.2 ToUnicode
229 // ToUnicode never fails. If any step fails, then the original input
230 // sequence is returned immediately in that step.
232 if (!IsASCII(input
)) {
233 _retval
.Assign(input
);
237 PRUint32 len
= 0, offset
= 0;
238 nsCAutoString decodedBuf
;
240 nsACString::const_iterator start
, end
;
241 input
.BeginReading(start
);
242 input
.EndReading(end
);
245 // loop and decode nodes
246 while (start
!= end
) {
248 if (*start
++ == '.') {
249 if (NS_FAILED(decodeACE(Substring(input
, offset
, len
- 1), decodedBuf
,
251 _retval
.Assign(input
);
255 _retval
.Append(decodedBuf
);
261 // decode the last node
263 if (NS_FAILED(decodeACE(Substring(input
, offset
, len
), decodedBuf
,
265 _retval
.Assign(input
);
267 _retval
.Append(decodedBuf
);
273 /* boolean isACE(in ACString input); */
274 NS_IMETHODIMP
nsIDNService::IsACE(const nsACString
& input
, PRBool
*_retval
)
276 nsACString::const_iterator begin
;
277 input
.BeginReading(begin
);
279 const char *data
= begin
.get();
280 PRUint32 dataLen
= begin
.size_forward();
282 // look for the ACE prefix in the input string. it may occur
283 // at the beginning of any segment in the domain name. for
284 // example: "www.xn--ENCODED.com"
286 const char *p
= PL_strncasestr(data
, mACEPrefix
, dataLen
);
288 *_retval
= p
&& (p
== data
|| *(p
- 1) == '.');
292 /* AUTF8String normalize(in AUTF8String input); */
293 NS_IMETHODIMP
nsIDNService::Normalize(const nsACString
& input
, nsACString
& output
)
295 // protect against bogus input
296 NS_ENSURE_TRUE(IsUTF8(input
), NS_ERROR_UNEXPECTED
);
298 NS_ConvertUTF8toUTF16
inUTF16(input
);
299 normalizeFullStops(inUTF16
);
301 // pass the domain name to stringprep label by label
302 nsAutoString outUTF16
, outLabel
;
304 PRUint32 len
= 0, offset
= 0;
306 nsAString::const_iterator start
, end
;
307 inUTF16
.BeginReading(start
);
308 inUTF16
.EndReading(end
);
310 while (start
!= end
) {
312 if (*start
++ == PRUnichar('.')) {
313 rv
= stringPrep(Substring(inUTF16
, offset
, len
- 1), outLabel
, PR_TRUE
);
314 NS_ENSURE_SUCCESS(rv
, rv
);
316 outUTF16
.Append(outLabel
);
317 outUTF16
.Append(PRUnichar('.'));
323 rv
= stringPrep(Substring(inUTF16
, offset
, len
), outLabel
, PR_TRUE
);
324 NS_ENSURE_SUCCESS(rv
, rv
);
326 outUTF16
.Append(outLabel
);
329 CopyUTF16toUTF8(outUTF16
, output
);
330 if (!isOnlySafeChars(outUTF16
, mIDNBlacklist
))
331 return ConvertUTF8toACE(output
, output
);
336 NS_IMETHODIMP
nsIDNService::ConvertToDisplayIDN(const nsACString
& input
, PRBool
* _isASCII
, nsACString
& _retval
)
338 // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
339 // Else, if host is already UTF-8, then make sure it is normalized per IDN.
343 if (IsASCII(input
)) {
344 // first, canonicalize the host to lowercase, for whitelist lookup
346 ToLowerCase(_retval
);
349 IsACE(_retval
, &isACE
);
351 if (isACE
&& !mShowPunycode
&& isInWhitelist(_retval
)) {
352 // ACEtoUTF8() can't fail, but might return the original ACE string
353 nsCAutoString
temp(_retval
);
354 ACEtoUTF8(temp
, _retval
, PR_FALSE
);
355 *_isASCII
= IsASCII(_retval
);
360 // We have to normalize the hostname before testing against the domain
361 // whitelist (see bug 315411), and to ensure the entire string gets
363 rv
= Normalize(input
, _retval
);
364 if (NS_FAILED(rv
)) return rv
;
366 if (mShowPunycode
&& NS_SUCCEEDED(ConvertUTF8toACE(_retval
, _retval
))) {
371 // normalization could result in an ASCII-only hostname. alternatively, if
372 // the host is converted to ACE by the normalizer, then the host may contain
373 // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694, and bug 309311.
374 *_isASCII
= IsASCII(_retval
);
375 if (!*_isASCII
&& !isInWhitelist(_retval
)) {
377 return ConvertUTF8toACE(_retval
, _retval
);
384 //-----------------------------------------------------------------------------
386 static void utf16ToUcs4(const nsAString
& in
, PRUint32
*out
, PRUint32 outBufLen
, PRUint32
*outLen
)
389 nsAString::const_iterator start
, end
;
390 in
.BeginReading(start
);
393 while (start
!= end
) {
399 NS_IS_HIGH_SURROGATE(curChar
) &&
400 NS_IS_LOW_SURROGATE(*start
)) {
401 out
[i
] = SURROGATE_TO_UCS4(curChar
, *start
);
408 if (i
>= outBufLen
) {
409 NS_ERROR("input too big, the result truncated");
410 out
[outBufLen
-1] = (PRUint32
)'\0';
411 *outLen
= outBufLen
-1;
415 out
[i
] = (PRUint32
)'\0';
419 static void ucs4toUtf16(const PRUint32
*in
, nsAString
& out
)
422 if (!IS_IN_BMP(*in
)) {
423 out
.Append((PRUnichar
) H_SURROGATE(*in
));
424 out
.Append((PRUnichar
) L_SURROGATE(*in
));
427 out
.Append((PRUnichar
) *in
);
432 static nsresult
punycode(const char* prefix
, const nsAString
& in
, nsACString
& out
)
434 PRUint32 ucs4Buf
[kMaxDNSNodeLen
+ 1];
436 utf16ToUcs4(in
, ucs4Buf
, kMaxDNSNodeLen
, &ucs4Len
);
438 // need maximum 20 bits to encode 16 bit Unicode character
439 // (include null terminator)
440 const PRUint32 kEncodedBufSize
= kMaxDNSNodeLen
* 20 / 8 + 1 + 1;
441 char encodedBuf
[kEncodedBufSize
];
442 punycode_uint encodedLength
= kEncodedBufSize
;
444 enum punycode_status status
= punycode_encode(ucs4Len
,
450 if (punycode_success
!= status
||
451 encodedLength
>= kEncodedBufSize
)
452 return NS_ERROR_FAILURE
;
454 encodedBuf
[encodedLength
] = '\0';
455 out
.Assign(nsDependentCString(prefix
) + nsDependentCString(encodedBuf
));
460 static nsresult
encodeToRACE(const char* prefix
, const nsAString
& in
, nsACString
& out
)
462 // need maximum 20 bits to encode 16 bit Unicode character
463 // (include null terminator)
464 const PRUint32 kEncodedBufSize
= kMaxDNSNodeLen
* 20 / 8 + 1 + 1;
466 // set up a work buffer for RACE encoder
467 PRUnichar temp
[kMaxDNSNodeLen
+ 2];
468 temp
[0] = 0xFFFF; // set a place holder (to be filled by get_compress_mode)
469 temp
[in
.Length() + 1] = (PRUnichar
)'\0';
471 nsAString::const_iterator start
, end
;
472 in
.BeginReading(start
);
475 for (PRUint32 i
= 1; start
!= end
; i
++)
478 // encode nodes if non ASCII
480 char encodedBuf
[kEncodedBufSize
];
481 idn_result_t result
= race_compress_encode((const unsigned short *) temp
,
482 get_compress_mode((unsigned short *) temp
+ 1),
483 encodedBuf
, kEncodedBufSize
);
484 if (idn_success
!= result
)
485 return NS_ERROR_FAILURE
;
488 out
.Append(encodedBuf
);
495 // 1) Map -- For each character in the input, check if it has a mapping
496 // and, if so, replace it with its mapping. This is described in section 3.
498 // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
499 // normalization. This is described in section 4.
501 // 3) Prohibit -- Check for any characters that are not allowed in the
502 // output. If any are found, return an error. This is described in section
505 // 4) Check bidi -- Possibly check for right-to-left characters, and if any
506 // are found, make sure that the whole string satisfies the requirements
507 // for bidirectional strings. If the string does not satisfy the requirements
508 // for bidirectional strings, return an error. This is described in section 6.
510 // 5) Check unassigned code points -- If allowUnassigned is false, check for
511 // any unassigned Unicode points and if any are found return an error.
512 // This is described in section 7.
514 nsresult
nsIDNService::stringPrep(const nsAString
& in
, nsAString
& out
,
515 PRBool allowUnassigned
)
517 if (!mNamePrepHandle
|| !mNormalizer
)
518 return NS_ERROR_FAILURE
;
521 PRUint32 ucs4Buf
[kMaxDNSNodeLen
+ 1];
523 utf16ToUcs4(in
, ucs4Buf
, kMaxDNSNodeLen
, &ucs4Len
);
526 idn_result_t idn_err
;
528 PRUint32 namePrepBuf
[kMaxDNSNodeLen
* 3]; // map up to three characters
529 idn_err
= idn_nameprep_map(mNamePrepHandle
, (const PRUint32
*) ucs4Buf
,
530 (PRUint32
*) namePrepBuf
, kMaxDNSNodeLen
* 3);
531 NS_ENSURE_TRUE(idn_err
== idn_success
, NS_ERROR_FAILURE
);
533 nsAutoString namePrepStr
;
534 ucs4toUtf16(namePrepBuf
, namePrepStr
);
535 if (namePrepStr
.Length() >= kMaxDNSNodeLen
)
536 return NS_ERROR_FAILURE
;
539 nsAutoString normlizedStr
;
540 rv
= mNormalizer
->NormalizeUnicodeNFKC(namePrepStr
, normlizedStr
);
541 if (normlizedStr
.Length() >= kMaxDNSNodeLen
)
542 return NS_ERROR_FAILURE
;
545 const PRUint32
*found
= nsnull
;
546 idn_err
= idn_nameprep_isprohibited(mNamePrepHandle
,
547 (const PRUint32
*) ucs4Buf
, &found
);
548 if (idn_err
!= idn_success
|| found
)
549 return NS_ERROR_FAILURE
;
552 idn_err
= idn_nameprep_isvalidbidi(mNamePrepHandle
,
553 (const PRUint32
*) ucs4Buf
, &found
);
554 if (idn_err
!= idn_success
|| found
)
555 return NS_ERROR_FAILURE
;
557 if (!allowUnassigned
) {
558 // check unassigned code points
559 idn_err
= idn_nameprep_isunassigned(mNamePrepHandle
,
560 (const PRUint32
*) ucs4Buf
, &found
);
561 if (idn_err
!= idn_success
|| found
)
562 return NS_ERROR_FAILURE
;
565 // set the result string
566 out
.Assign(normlizedStr
);
571 nsresult
nsIDNService::encodeToACE(const nsAString
& in
, nsACString
& out
)
573 // RACE encode is supported for existing testing environment
574 if (!strcmp("bq--", mACEPrefix
))
575 return encodeToRACE(mACEPrefix
, in
, out
);
578 return punycode(mACEPrefix
, in
, out
);
581 nsresult
nsIDNService::stringPrepAndACE(const nsAString
& in
, nsACString
& out
,
582 PRBool allowUnassigned
)
588 if (in
.Length() > kMaxDNSNodeLen
) {
589 NS_ERROR("IDN node too large");
590 return NS_ERROR_FAILURE
;
594 LossyCopyUTF16toASCII(in
, out
);
596 nsAutoString strPrep
;
597 rv
= stringPrep(in
, strPrep
, allowUnassigned
);
598 if (NS_SUCCEEDED(rv
)) {
599 if (IsASCII(strPrep
))
600 LossyCopyUTF16toASCII(strPrep
, out
);
602 rv
= encodeToACE(strPrep
, out
);
606 if (out
.Length() > kMaxDNSNodeLen
) {
607 NS_ERROR("IDN node too large");
608 return NS_ERROR_FAILURE
;
615 // 1) Whenever dots are used as label separators, the following characters
616 // MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
617 // stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
620 void nsIDNService::normalizeFullStops(nsAString
& s
)
622 nsAString::const_iterator start
, end
;
623 s
.BeginReading(start
);
627 while (start
!= end
) {
632 s
.Replace(index
, 1, NS_LITERAL_STRING("."));
642 nsresult
nsIDNService::decodeACE(const nsACString
& in
, nsACString
& out
,
643 PRBool allowUnassigned
)
652 // RFC 3490 - 4.2 ToUnicode
653 // The ToUnicode output never contains more code points than its input.
654 punycode_uint output_length
= in
.Length() - kACEPrefixLen
+ 1;
655 punycode_uint
*output
= new punycode_uint
[output_length
];
656 NS_ENSURE_TRUE(output
, NS_ERROR_OUT_OF_MEMORY
);
658 enum punycode_status status
= punycode_decode(in
.Length() - kACEPrefixLen
,
659 PromiseFlatCString(in
).get() + kACEPrefixLen
,
663 if (status
!= punycode_success
) {
665 return NS_ERROR_FAILURE
;
669 output
[output_length
] = 0;
671 ucs4toUtf16(output
, utf16
);
673 if (!isOnlySafeChars(utf16
, mIDNBlacklist
))
674 return NS_ERROR_FAILURE
;
675 CopyUTF16toUTF8(utf16
, out
);
677 // Validation: encode back to ACE and compare the strings
679 nsresult rv
= UTF8toACE(out
, ace
, allowUnassigned
);
680 NS_ENSURE_SUCCESS(rv
, rv
);
682 if (!ace
.Equals(in
, nsCaseInsensitiveCStringComparator()))
683 return NS_ERROR_FAILURE
;
688 PRBool
nsIDNService::isInWhitelist(const nsACString
&host
)
690 if (mIDNWhitelistPrefBranch
) {
691 nsCAutoString
tld(host
);
692 // make sure the host is ACE for lookup and check that there are no
693 // unassigned codepoints
694 if (!IsASCII(tld
) && NS_FAILED(UTF8toACE(tld
, tld
, PR_FALSE
))) {
698 // truncate trailing dots first
700 PRInt32 pos
= tld
.RFind(".");
701 if (pos
== kNotFound
)
707 if (NS_SUCCEEDED(mIDNWhitelistPrefBranch
->GetBoolPref(tld
.get(), &safe
)))