1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsITextToSubURI.h"
8 #include "nsTextToSubURI.h"
10 #include "mozilla/ArrayUtils.h"
11 #include "mozilla/Encoding.h"
12 #include "mozilla/Preferences.h"
13 #include "mozilla/TextUtils.h"
14 #include "mozilla/Utf8.h"
16 using namespace mozilla
;
18 nsTextToSubURI::~nsTextToSubURI() = default;
20 NS_IMPL_ISUPPORTS(nsTextToSubURI
, nsITextToSubURI
)
23 nsTextToSubURI::ConvertAndEscape(const nsACString
& aCharset
,
24 const nsAString
& aText
, nsACString
& aOut
) {
25 auto encoding
= Encoding::ForLabelNoReplacement(aCharset
);
28 return NS_ERROR_UCONV_NOCONV
;
31 nsAutoCString intermediate
;
32 std::tie(rv
, std::ignore
) = encoding
->Encode(aText
, intermediate
);
37 bool ok
= NS_Escape(intermediate
, aOut
, url_XPAlphas
);
40 return NS_ERROR_OUT_OF_MEMORY
;
46 nsTextToSubURI::UnEscapeAndConvert(const nsACString
& aCharset
,
47 const nsACString
& aText
, nsAString
& aOut
) {
48 auto encoding
= Encoding::ForLabelNoReplacement(aCharset
);
51 return NS_ERROR_UCONV_NOCONV
;
53 nsAutoCString
unescaped(aText
);
54 NS_UnescapeURL(unescaped
);
55 auto rv
= encoding
->DecodeWithoutBOMHandling(unescaped
, aOut
);
56 if (NS_SUCCEEDED(rv
)) {
62 static bool statefulCharset(const char* charset
) {
63 // HZ, UTF-7 and the CN and KR ISO-2022 variants are no longer in
64 // mozilla-central but keeping them here just in case for the benefit of
66 if (!nsCRT::strncasecmp(charset
, "ISO-2022-", sizeof("ISO-2022-") - 1) ||
67 !nsCRT::strcasecmp(charset
, "UTF-7") ||
68 !nsCRT::strcasecmp(charset
, "HZ-GB-2312"))
75 nsresult
nsTextToSubURI::convertURItoUnicode(const nsCString
& aCharset
,
76 const nsCString
& aURI
,
78 // check for 7bit encoding the data may not be ASCII after we decode
79 bool isStatefulCharset
= statefulCharset(aCharset
.get());
81 if (!isStatefulCharset
) {
83 CopyASCIItoUTF16(aURI
, aOut
);
87 CopyUTF8toUTF16(aURI
, aOut
);
92 // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
93 NS_ENSURE_FALSE(aCharset
.IsEmpty(), NS_ERROR_INVALID_ARG
);
95 auto encoding
= Encoding::ForLabelNoReplacement(aCharset
);
98 return NS_ERROR_UCONV_NOCONV
;
100 return encoding
->DecodeWithoutBOMHandlingAndWithoutReplacement(aURI
, aOut
);
103 NS_IMETHODIMP
nsTextToSubURI::UnEscapeURIForUI(const nsACString
& aURIFragment
,
105 nsAString
& _retval
) {
106 nsAutoCString unescapedSpec
;
107 // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
108 NS_UnescapeURL(PromiseFlatCString(aURIFragment
),
109 esc_SkipControl
| esc_AlwaysCopy
, unescapedSpec
);
111 // in case of failure, return escaped URI
112 // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
113 // sequences are also considered failure in this context
114 if (convertURItoUnicode("UTF-8"_ns
, unescapedSpec
, _retval
) != NS_OK
) {
115 // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8
116 CopyUTF8toUTF16(aURIFragment
, _retval
);
123 // If there are any characters that are unsafe for URIs, reescape those.
124 if (mIDNBlocklist
.IsEmpty()) {
125 mozilla::net::InitializeBlocklist(mIDNBlocklist
);
126 // we allow SPACE and IDEOGRAPHIC SPACE in this method
127 mozilla::net::RemoveCharFromBlocklist(u
' ', mIDNBlocklist
);
128 mozilla::net::RemoveCharFromBlocklist(0x3000, mIDNBlocklist
);
131 MOZ_ASSERT(!mIDNBlocklist
.IsEmpty());
132 const nsPromiseFlatString
& unescapedResult
= PromiseFlatString(_retval
);
133 nsString reescapedSpec
;
134 _retval
= NS_EscapeURL(
136 [&](char16_t aChar
) -> bool {
137 return mozilla::net::CharInBlocklist(aChar
, mIDNBlocklist
);
145 nsTextToSubURI::UnEscapeNonAsciiURIJS(const nsACString
& aCharset
,
146 const nsACString
& aURIFragment
,
147 nsAString
& _retval
) {
148 return UnEscapeNonAsciiURI(aCharset
, aURIFragment
, _retval
);
152 nsresult
nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString
& aCharset
,
153 const nsACString
& aURIFragment
,
154 nsAString
& _retval
) {
155 nsAutoCString unescapedSpec
;
156 NS_UnescapeURL(PromiseFlatCString(aURIFragment
),
157 esc_AlwaysCopy
| esc_OnlyNonASCII
, unescapedSpec
);
158 // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
159 // superset since converting "http:" with such an encoding is always a bad
161 if (!IsUtf8(unescapedSpec
) &&
162 (aCharset
.LowerCaseEqualsLiteral("utf-16") ||
163 aCharset
.LowerCaseEqualsLiteral("utf-16be") ||
164 aCharset
.LowerCaseEqualsLiteral("utf-16le") ||
165 aCharset
.LowerCaseEqualsLiteral("utf-7") ||
166 aCharset
.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))) {
167 CopyASCIItoUTF16(aURIFragment
, _retval
);
172 convertURItoUnicode(PromiseFlatCString(aCharset
), unescapedSpec
, _retval
);
173 // NS_OK_UDEC_MOREINPUT is a success code, so caller can't catch the error
174 // if the string ends with a valid (but incomplete) sequence.
175 return rv
== NS_OK_UDEC_MOREINPUT
? NS_ERROR_UDEC_ILLEGALINPUT
: rv
;
178 //----------------------------------------------------------------------