Bug 1829068 test setSinkId after loop r=padenot
[gecko.git] / intl / uconv / nsTextToSubURI.cpp
blobe70d9ccbd8917d55d8c47f422c55422723a723a1
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "nsString.h"
6 #include "nsITextToSubURI.h"
7 #include "nsEscape.h"
8 #include "nsTextToSubURI.h"
9 #include "nsCRT.h"
10 #include "mozilla/ArrayUtils.h"
11 #include "mozilla/Encoding.h"
12 #include "mozilla/Preferences.h"
13 #include "mozilla/TextUtils.h"
14 #include "mozilla/Utf8.h"
16 using namespace mozilla;
18 nsTextToSubURI::~nsTextToSubURI() = default;
20 NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI)
22 NS_IMETHODIMP
23 nsTextToSubURI::ConvertAndEscape(const nsACString& aCharset,
24 const nsAString& aText, nsACString& aOut) {
25 auto encoding = Encoding::ForLabelNoReplacement(aCharset);
26 if (!encoding) {
27 aOut.Truncate();
28 return NS_ERROR_UCONV_NOCONV;
30 nsresult rv;
31 nsAutoCString intermediate;
32 std::tie(rv, std::ignore) = encoding->Encode(aText, intermediate);
33 if (NS_FAILED(rv)) {
34 aOut.Truncate();
35 return rv;
37 bool ok = NS_Escape(intermediate, aOut, url_XPAlphas);
38 if (!ok) {
39 aOut.Truncate();
40 return NS_ERROR_OUT_OF_MEMORY;
42 return NS_OK;
45 NS_IMETHODIMP
46 nsTextToSubURI::UnEscapeAndConvert(const nsACString& aCharset,
47 const nsACString& aText, nsAString& aOut) {
48 auto encoding = Encoding::ForLabelNoReplacement(aCharset);
49 if (!encoding) {
50 aOut.Truncate();
51 return NS_ERROR_UCONV_NOCONV;
53 nsAutoCString unescaped(aText);
54 NS_UnescapeURL(unescaped);
55 auto rv = encoding->DecodeWithoutBOMHandling(unescaped, aOut);
56 if (NS_SUCCEEDED(rv)) {
57 return NS_OK;
59 return rv;
62 static bool statefulCharset(const char* charset) {
63 // HZ, UTF-7 and the CN and KR ISO-2022 variants are no longer in
64 // mozilla-central but keeping them here just in case for the benefit of
65 // comm-central.
66 if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-") - 1) ||
67 !nsCRT::strcasecmp(charset, "UTF-7") ||
68 !nsCRT::strcasecmp(charset, "HZ-GB-2312"))
69 return true;
71 return false;
74 // static
75 nsresult nsTextToSubURI::convertURItoUnicode(const nsCString& aCharset,
76 const nsCString& aURI,
77 nsAString& aOut) {
78 // check for 7bit encoding the data may not be ASCII after we decode
79 bool isStatefulCharset = statefulCharset(aCharset.get());
81 if (!isStatefulCharset) {
82 if (IsAscii(aURI)) {
83 CopyASCIItoUTF16(aURI, aOut);
84 return NS_OK;
86 if (IsUtf8(aURI)) {
87 CopyUTF8toUTF16(aURI, aOut);
88 return NS_OK;
92 // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
93 NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);
95 auto encoding = Encoding::ForLabelNoReplacement(aCharset);
96 if (!encoding) {
97 aOut.Truncate();
98 return NS_ERROR_UCONV_NOCONV;
100 return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aURI, aOut);
103 NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString& aURIFragment,
104 bool aDontEscape,
105 nsAString& _retval) {
106 nsAutoCString unescapedSpec;
107 // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
108 NS_UnescapeURL(PromiseFlatCString(aURIFragment),
109 esc_SkipControl | esc_AlwaysCopy, unescapedSpec);
111 // in case of failure, return escaped URI
112 // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
113 // sequences are also considered failure in this context
114 if (convertURItoUnicode("UTF-8"_ns, unescapedSpec, _retval) != NS_OK) {
115 // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8
116 CopyUTF8toUTF16(aURIFragment, _retval);
119 if (aDontEscape) {
120 return NS_OK;
123 // If there are any characters that are unsafe for URIs, reescape those.
124 if (mIDNBlocklist.IsEmpty()) {
125 mozilla::net::InitializeBlocklist(mIDNBlocklist);
126 // we allow SPACE and IDEOGRAPHIC SPACE in this method
127 mozilla::net::RemoveCharFromBlocklist(u' ', mIDNBlocklist);
128 mozilla::net::RemoveCharFromBlocklist(0x3000, mIDNBlocklist);
131 MOZ_ASSERT(!mIDNBlocklist.IsEmpty());
132 const nsPromiseFlatString& unescapedResult = PromiseFlatString(_retval);
133 nsString reescapedSpec;
134 _retval = NS_EscapeURL(
135 unescapedResult,
136 [&](char16_t aChar) -> bool {
137 return mozilla::net::CharInBlocklist(aChar, mIDNBlocklist);
139 reescapedSpec);
141 return NS_OK;
144 NS_IMETHODIMP
145 nsTextToSubURI::UnEscapeNonAsciiURIJS(const nsACString& aCharset,
146 const nsACString& aURIFragment,
147 nsAString& _retval) {
148 return UnEscapeNonAsciiURI(aCharset, aURIFragment, _retval);
151 // static
152 nsresult nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString& aCharset,
153 const nsACString& aURIFragment,
154 nsAString& _retval) {
155 nsAutoCString unescapedSpec;
156 NS_UnescapeURL(PromiseFlatCString(aURIFragment),
157 esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);
158 // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
159 // superset since converting "http:" with such an encoding is always a bad
160 // idea.
161 if (!IsUtf8(unescapedSpec) &&
162 (aCharset.LowerCaseEqualsLiteral("utf-16") ||
163 aCharset.LowerCaseEqualsLiteral("utf-16be") ||
164 aCharset.LowerCaseEqualsLiteral("utf-16le") ||
165 aCharset.LowerCaseEqualsLiteral("utf-7") ||
166 aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))) {
167 CopyASCIItoUTF16(aURIFragment, _retval);
168 return NS_OK;
171 nsresult rv =
172 convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, _retval);
173 // NS_OK_UDEC_MOREINPUT is a success code, so caller can't catch the error
174 // if the string ends with a valid (but incomplete) sequence.
175 return rv == NS_OK_UDEC_MOREINPUT ? NS_ERROR_UDEC_ILLEGALINPUT : rv;
178 //----------------------------------------------------------------------