1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsIUnicodeEncoder.h"
7 #include "nsIUnicodeDecoder.h"
8 #include "nsITextToSubURI.h"
10 #include "nsTextToSubURI.h"
12 #include "mozilla/dom/EncodingUtils.h"
14 using mozilla::dom::EncodingUtils
;
16 nsTextToSubURI::nsTextToSubURI()
19 nsTextToSubURI::~nsTextToSubURI()
23 NS_IMPL_ISUPPORTS(nsTextToSubURI
, nsITextToSubURI
)
25 NS_IMETHODIMP
nsTextToSubURI::ConvertAndEscape(
26 const char *charset
, const char16_t
*text
, char **_retval
)
29 return NS_ERROR_NULL_POINTER
;
35 return NS_ERROR_NULL_POINTER
;
38 nsDependentCString
label(charset
);
39 nsAutoCString encoding
;
40 if (!EncodingUtils::FindEncodingForLabelNoReplacement(label
, encoding
)) {
41 return NS_ERROR_UCONV_NOCONV
;
43 nsCOMPtr
<nsIUnicodeEncoder
> encoder
=
44 EncodingUtils::EncoderForEncoding(encoding
);
45 rv
= encoder
->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace
, nullptr, (char16_t
)'?');
46 if (NS_SUCCEEDED(rv
) ) {
49 int32_t ulen
= text
? NS_strlen(text
) : 0;
51 if (NS_SUCCEEDED(rv
= encoder
->GetMaxLength(text
, ulen
, &outlen
))) {
53 pBuf
= (char*)NS_Alloc(outlen
+1);
55 if (nullptr == pBuf
) {
59 int32_t bufLen
= outlen
;
60 if (NS_SUCCEEDED(rv
= encoder
->Convert(text
,&ulen
, pBuf
, &outlen
))) {
61 // put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary
62 int32_t finLen
= bufLen
- outlen
;
64 if (NS_SUCCEEDED(encoder
->Finish((char *)(pBuf
+outlen
), &finLen
))) {
69 *_retval
= nsEscape(pBuf
, url_XPAlphas
);
70 if (nullptr == *_retval
) {
71 rv
= NS_ERROR_OUT_OF_MEMORY
;
83 NS_IMETHODIMP
nsTextToSubURI::UnEscapeAndConvert(
84 const char *charset
, const char *text
, char16_t
**_retval
)
86 if(nullptr == _retval
)
87 return NS_ERROR_NULL_POINTER
;
89 // set empty string instead of returning error
90 // due to compatibility for old version
97 return NS_ERROR_NULL_POINTER
;
101 // unescape the string, unescape changes the input
102 char *unescaped
= NS_strdup(text
);
103 if (nullptr == unescaped
)
104 return NS_ERROR_OUT_OF_MEMORY
;
105 unescaped
= nsUnescape(unescaped
);
106 NS_ASSERTION(unescaped
, "nsUnescape returned null");
108 nsDependentCString
label(charset
);
109 nsAutoCString encoding
;
110 if (!EncodingUtils::FindEncodingForLabelNoReplacement(label
, encoding
)) {
111 return NS_ERROR_UCONV_NOCONV
;
113 nsCOMPtr
<nsIUnicodeDecoder
> decoder
=
114 EncodingUtils::DecoderForEncoding(encoding
);
115 char16_t
*pBuf
= nullptr;
116 int32_t len
= strlen(unescaped
);
118 if (NS_SUCCEEDED(rv
= decoder
->GetMaxLength(unescaped
, len
, &outlen
))) {
119 pBuf
= (char16_t
*) NS_Alloc((outlen
+1)*sizeof(char16_t
));
120 if (nullptr == pBuf
) {
121 rv
= NS_ERROR_OUT_OF_MEMORY
;
123 if (NS_SUCCEEDED(rv
= decoder
->Convert(unescaped
, &len
, pBuf
, &outlen
))) {
136 static bool statefulCharset(const char *charset
)
138 if (!nsCRT::strncasecmp(charset
, "ISO-2022-", sizeof("ISO-2022-")-1) ||
139 !nsCRT::strcasecmp(charset
, "UTF-7") ||
140 !nsCRT::strcasecmp(charset
, "HZ-GB-2312"))
146 nsresult
nsTextToSubURI::convertURItoUnicode(const nsAFlatCString
&aCharset
,
147 const nsAFlatCString
&aURI
,
153 // check for 7bit encoding the data may not be ASCII after we decode
154 bool isStatefulCharset
= statefulCharset(aCharset
.get());
156 if (!isStatefulCharset
&& IsASCII(aURI
)) {
157 CopyASCIItoUTF16(aURI
, _retval
);
161 if (!isStatefulCharset
&& aIRI
) {
163 CopyUTF8toUTF16(aURI
, _retval
);
168 // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
169 NS_ENSURE_FALSE(aCharset
.IsEmpty(), NS_ERROR_INVALID_ARG
);
171 nsAutoCString encoding
;
172 if (!EncodingUtils::FindEncodingForLabelNoReplacement(aCharset
, encoding
)) {
173 return NS_ERROR_UCONV_NOCONV
;
175 nsCOMPtr
<nsIUnicodeDecoder
> unicodeDecoder
=
176 EncodingUtils::DecoderForEncoding(encoding
);
178 NS_ENSURE_SUCCESS(rv
, rv
);
179 unicodeDecoder
->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal
);
181 int32_t srcLen
= aURI
.Length();
183 rv
= unicodeDecoder
->GetMaxLength(aURI
.get(), srcLen
, &dstLen
);
184 NS_ENSURE_SUCCESS(rv
, rv
);
186 char16_t
*ustr
= (char16_t
*) NS_Alloc(dstLen
* sizeof(char16_t
));
187 NS_ENSURE_TRUE(ustr
, NS_ERROR_OUT_OF_MEMORY
);
189 rv
= unicodeDecoder
->Convert(aURI
.get(), &srcLen
, ustr
, &dstLen
);
191 if (NS_SUCCEEDED(rv
))
192 _retval
.Assign(ustr
, dstLen
);
199 NS_IMETHODIMP
nsTextToSubURI::UnEscapeURIForUI(const nsACString
& aCharset
,
200 const nsACString
&aURIFragment
,
203 nsAutoCString unescapedSpec
;
204 // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
205 NS_UnescapeURL(PromiseFlatCString(aURIFragment
),
206 esc_SkipControl
| esc_AlwaysCopy
, unescapedSpec
);
208 // in case of failure, return escaped URI
209 // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
210 // sequences are also considered failure in this context
211 if (convertURItoUnicode(
212 PromiseFlatCString(aCharset
), unescapedSpec
, true, _retval
)
214 // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8
215 CopyUTF8toUTF16(aURIFragment
, _retval
);
219 NS_IMETHODIMP
nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString
& aCharset
,
220 const nsACString
& aURIFragment
,
223 nsAutoCString unescapedSpec
;
224 NS_UnescapeURL(PromiseFlatCString(aURIFragment
),
225 esc_AlwaysCopy
| esc_OnlyNonASCII
, unescapedSpec
);
226 // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
227 // superset since converting "http:" with such an encoding is always a bad
229 if (!IsUTF8(unescapedSpec
) &&
230 (aCharset
.LowerCaseEqualsLiteral("utf-16") ||
231 aCharset
.LowerCaseEqualsLiteral("utf-16be") ||
232 aCharset
.LowerCaseEqualsLiteral("utf-16le") ||
233 aCharset
.LowerCaseEqualsLiteral("utf-7") ||
234 aCharset
.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){
235 CopyASCIItoUTF16(aURIFragment
, _retval
);
239 return convertURItoUnicode(PromiseFlatCString(aCharset
), unescapedSpec
, true, _retval
);
242 //----------------------------------------------------------------------