Bumping manifests a=b2g-bump
[gecko.git] / intl / uconv / nsTextToSubURI.cpp
blobf68ccaee8ce291a50b1c68129d69e3a400fbd3fc
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "nsString.h"
6 #include "nsIUnicodeEncoder.h"
7 #include "nsIUnicodeDecoder.h"
8 #include "nsITextToSubURI.h"
9 #include "nsEscape.h"
10 #include "nsTextToSubURI.h"
11 #include "nsCRT.h"
12 #include "mozilla/dom/EncodingUtils.h"
14 using mozilla::dom::EncodingUtils;
16 nsTextToSubURI::nsTextToSubURI()
19 nsTextToSubURI::~nsTextToSubURI()
23 NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI)
25 NS_IMETHODIMP nsTextToSubURI::ConvertAndEscape(
26 const char *charset, const char16_t *text, char **_retval)
28 if (!_retval) {
29 return NS_ERROR_NULL_POINTER;
31 *_retval = nullptr;
32 nsresult rv = NS_OK;
34 if (!charset) {
35 return NS_ERROR_NULL_POINTER;
38 nsDependentCString label(charset);
39 nsAutoCString encoding;
40 if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) {
41 return NS_ERROR_UCONV_NOCONV;
43 nsCOMPtr<nsIUnicodeEncoder> encoder =
44 EncodingUtils::EncoderForEncoding(encoding);
45 rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (char16_t)'?');
46 if (NS_SUCCEEDED(rv) ) {
47 char buf[256];
48 char *pBuf = buf;
49 int32_t ulen = text ? NS_strlen(text) : 0;
50 int32_t outlen = 0;
51 if (NS_SUCCEEDED(rv = encoder->GetMaxLength(text, ulen, &outlen))) {
52 if (outlen >= 256) {
53 pBuf = (char*)NS_Alloc(outlen+1);
55 if (nullptr == pBuf) {
56 outlen = 255;
57 pBuf = buf;
59 int32_t bufLen = outlen;
60 if (NS_SUCCEEDED(rv = encoder->Convert(text,&ulen, pBuf, &outlen))) {
61 // put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary
62 int32_t finLen = bufLen - outlen;
63 if (finLen > 0) {
64 if (NS_SUCCEEDED(encoder->Finish((char *)(pBuf+outlen), &finLen))) {
65 outlen += finLen;
68 pBuf[outlen] = '\0';
69 *_retval = nsEscape(pBuf, url_XPAlphas);
70 if (nullptr == *_retval) {
71 rv = NS_ERROR_OUT_OF_MEMORY;
75 if (pBuf != buf) {
76 NS_Free(pBuf);
80 return rv;
83 NS_IMETHODIMP nsTextToSubURI::UnEscapeAndConvert(
84 const char *charset, const char *text, char16_t **_retval)
86 if(nullptr == _retval)
87 return NS_ERROR_NULL_POINTER;
88 if(nullptr == text) {
89 // set empty string instead of returning error
90 // due to compatibility for old version
91 text = "";
93 *_retval = nullptr;
94 nsresult rv = NS_OK;
96 if (!charset) {
97 return NS_ERROR_NULL_POINTER;
101 // unescape the string, unescape changes the input
102 char *unescaped = NS_strdup(text);
103 if (nullptr == unescaped)
104 return NS_ERROR_OUT_OF_MEMORY;
105 unescaped = nsUnescape(unescaped);
106 NS_ASSERTION(unescaped, "nsUnescape returned null");
108 nsDependentCString label(charset);
109 nsAutoCString encoding;
110 if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) {
111 return NS_ERROR_UCONV_NOCONV;
113 nsCOMPtr<nsIUnicodeDecoder> decoder =
114 EncodingUtils::DecoderForEncoding(encoding);
115 char16_t *pBuf = nullptr;
116 int32_t len = strlen(unescaped);
117 int32_t outlen = 0;
118 if (NS_SUCCEEDED(rv = decoder->GetMaxLength(unescaped, len, &outlen))) {
119 pBuf = (char16_t *) NS_Alloc((outlen+1)*sizeof(char16_t));
120 if (nullptr == pBuf) {
121 rv = NS_ERROR_OUT_OF_MEMORY;
122 } else {
123 if (NS_SUCCEEDED(rv = decoder->Convert(unescaped, &len, pBuf, &outlen))) {
124 pBuf[outlen] = 0;
125 *_retval = pBuf;
126 } else {
127 NS_Free(pBuf);
131 NS_Free(unescaped);
133 return rv;
136 static bool statefulCharset(const char *charset)
138 if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) ||
139 !nsCRT::strcasecmp(charset, "UTF-7") ||
140 !nsCRT::strcasecmp(charset, "HZ-GB-2312"))
141 return true;
143 return false;
146 nsresult nsTextToSubURI::convertURItoUnicode(const nsAFlatCString &aCharset,
147 const nsAFlatCString &aURI,
148 bool aIRI,
149 nsAString &_retval)
151 nsresult rv = NS_OK;
153 // check for 7bit encoding the data may not be ASCII after we decode
154 bool isStatefulCharset = statefulCharset(aCharset.get());
156 if (!isStatefulCharset && IsASCII(aURI)) {
157 CopyASCIItoUTF16(aURI, _retval);
158 return rv;
161 if (!isStatefulCharset && aIRI) {
162 if (IsUTF8(aURI)) {
163 CopyUTF8toUTF16(aURI, _retval);
164 return rv;
168 // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
169 NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);
171 nsAutoCString encoding;
172 if (!EncodingUtils::FindEncodingForLabelNoReplacement(aCharset, encoding)) {
173 return NS_ERROR_UCONV_NOCONV;
175 nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder =
176 EncodingUtils::DecoderForEncoding(encoding);
178 NS_ENSURE_SUCCESS(rv, rv);
179 unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
181 int32_t srcLen = aURI.Length();
182 int32_t dstLen;
183 rv = unicodeDecoder->GetMaxLength(aURI.get(), srcLen, &dstLen);
184 NS_ENSURE_SUCCESS(rv, rv);
186 char16_t *ustr = (char16_t *) NS_Alloc(dstLen * sizeof(char16_t));
187 NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
189 rv = unicodeDecoder->Convert(aURI.get(), &srcLen, ustr, &dstLen);
191 if (NS_SUCCEEDED(rv))
192 _retval.Assign(ustr, dstLen);
194 NS_Free(ustr);
196 return rv;
199 NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset,
200 const nsACString &aURIFragment,
201 nsAString &_retval)
203 nsAutoCString unescapedSpec;
204 // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
205 NS_UnescapeURL(PromiseFlatCString(aURIFragment),
206 esc_SkipControl | esc_AlwaysCopy, unescapedSpec);
208 // in case of failure, return escaped URI
209 // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
210 // sequences are also considered failure in this context
211 if (convertURItoUnicode(
212 PromiseFlatCString(aCharset), unescapedSpec, true, _retval)
213 != NS_OK)
214 // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8
215 CopyUTF8toUTF16(aURIFragment, _retval);
216 return NS_OK;
219 NS_IMETHODIMP nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString & aCharset,
220 const nsACString & aURIFragment,
221 nsAString &_retval)
223 nsAutoCString unescapedSpec;
224 NS_UnescapeURL(PromiseFlatCString(aURIFragment),
225 esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);
226 // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
227 // superset since converting "http:" with such an encoding is always a bad
228 // idea.
229 if (!IsUTF8(unescapedSpec) &&
230 (aCharset.LowerCaseEqualsLiteral("utf-16") ||
231 aCharset.LowerCaseEqualsLiteral("utf-16be") ||
232 aCharset.LowerCaseEqualsLiteral("utf-16le") ||
233 aCharset.LowerCaseEqualsLiteral("utf-7") ||
234 aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){
235 CopyASCIItoUTF16(aURIFragment, _retval);
236 return NS_OK;
239 return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, true, _retval);
242 //----------------------------------------------------------------------