1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "mozilla/Maybe.h" // mozilla::Maybe
6 #include "mozilla/Utf8.h" // mozilla::IsTrailingUnit, mozilla::Utf8Unit, mozilla::DecodeOneUtf8CodePoint
8 #include <inttypes.h> // UINT8_MAX
9 #include <stdint.h> // uint16_t
11 #include "js/Exception.h" // JS_IsExceptionPending, JS_ClearPendingException
12 #include "js/RootingAPI.h" // JS::Rooted, JS::MutableHandle
13 #include "jsapi-tests/tests.h" // BEGIN_TEST, END_TEST, CHECK
14 #include "vm/JSAtomUtils.h" // js::AtomizeChars, js::AtomizeUTF8Chars
15 #include "vm/StringType.h" // JSAtom
17 using mozilla::DecodeOneUtf8CodePoint
;
18 using mozilla::IsAscii
;
19 using mozilla::IsTrailingUnit
;
21 using mozilla::Utf8Unit
;
24 using JS::MutableHandle
;
27 BEGIN_TEST(testAtomizeTwoByteUTF8
) {
28 Rooted
<JSAtom
*> atom16(cx
);
29 Rooted
<JSAtom
*> atom8(cx
);
31 for (uint16_t i
= 0; i
<= UINT8_MAX
; i
++) {
32 // Test cases where the first unit is ASCII.
33 if (IsAscii(char16_t(i
))) {
34 for (uint16_t j
= 0; j
<= UINT8_MAX
; j
++) {
35 if (IsAscii(char16_t(j
))) {
36 // If both units are ASCII, the sequence encodes a two-code point
38 if (!shouldBeTwoCodePoints(i
, j
, &atom16
, &atom8
)) {
42 // ASCII followed by non-ASCII should be invalid.
43 if (!shouldBeInvalid(i
, j
)) {
52 // Test remaining cases where the first unit isn't a two-byte lead.
53 if ((i
& 0b1110'0000) != 0b1100'0000) {
54 for (uint16_t j
= 0; j
<= UINT8_MAX
; j
++) {
55 // If the first unit isn't a two-byte lead, the sequence is invalid no
56 // matter what the second unit is.
57 if (!shouldBeInvalid(i
, j
)) {
65 // Test remaining cases where the first unit is the two-byte lead of a
66 // non-Latin-1 code point.
67 if (i
>= 0b1100'0100) {
68 for (uint16_t j
= 0; j
<= UINT8_MAX
; j
++) {
69 if (IsTrailingUnit(Utf8Unit(static_cast<uint8_t>(j
)))) {
70 if (!shouldBeSingleNonLatin1(i
, j
, &atom16
, &atom8
)) {
74 if (!shouldBeInvalid(i
, j
)) {
83 // Test remaining cases where the first unit is the two-byte lead of an
84 // overlong ASCII code point.
85 if (i
< 0b1100'0010) {
86 for (uint16_t j
= 0; j
<= UINT8_MAX
; j
++) {
87 if (!shouldBeInvalid(i
, j
)) {
95 // Finally, test remaining cases where the first unit is the two-byte lead
96 // of a Latin-1 code point.
97 for (uint16_t j
= 0; j
<= UINT8_MAX
; j
++) {
98 if (IsTrailingUnit(Utf8Unit(static_cast<uint8_t>(j
)))) {
99 if (!shouldBeSingleLatin1(i
, j
, &atom16
, &atom8
)) {
103 if (!shouldBeInvalid(i
, j
)) {
113 bool shouldBeTwoCodePoints(uint16_t first
, uint16_t second
,
114 MutableHandle
<JSAtom
*> atom16
,
115 MutableHandle
<JSAtom
*> atom8
) {
116 CHECK(first
<= UINT8_MAX
);
117 CHECK(second
<= UINT8_MAX
);
118 CHECK(IsAscii(char16_t(first
)));
119 CHECK(IsAscii(char16_t(second
)));
121 const char16_t utf16
[] = {static_cast<char16_t
>(first
),
122 static_cast<char16_t
>(second
)};
123 atom16
.set(js::AtomizeChars(cx
, utf16
, 2));
125 CHECK(atom16
->length() == 2);
126 CHECK(atom16
->latin1OrTwoByteChar(0) == first
);
127 CHECK(atom16
->latin1OrTwoByteChar(1) == second
);
129 const char utf8
[] = {static_cast<char>(first
), static_cast<char>(second
)};
130 atom8
.set(js::AtomizeUTF8Chars(cx
, utf8
, 2));
132 CHECK(atom8
->length() == 2);
133 CHECK(atom8
->latin1OrTwoByteChar(0) == first
);
134 CHECK(atom8
->latin1OrTwoByteChar(1) == second
);
136 CHECK(atom16
== atom8
);
141 bool shouldBeOneCodePoint(uint16_t first
, uint16_t second
, char32_t v
,
142 MutableHandle
<JSAtom
*> atom16
,
143 MutableHandle
<JSAtom
*> atom8
) {
144 CHECK(first
<= UINT8_MAX
);
145 CHECK(second
<= UINT8_MAX
);
146 CHECK(v
<= UINT16_MAX
);
148 const char16_t utf16
[] = {static_cast<char16_t
>(v
)};
149 atom16
.set(js::AtomizeChars(cx
, utf16
, 1));
151 CHECK(atom16
->length() == 1);
152 CHECK(atom16
->latin1OrTwoByteChar(0) == v
);
154 const char utf8
[] = {static_cast<char>(first
), static_cast<char>(second
)};
155 atom8
.set(js::AtomizeUTF8Chars(cx
, utf8
, 2));
157 CHECK(atom8
->length() == 1);
158 CHECK(atom8
->latin1OrTwoByteChar(0) == v
);
160 CHECK(atom16
== atom8
);
165 bool shouldBeSingleNonLatin1(uint16_t first
, uint16_t second
,
166 MutableHandle
<JSAtom
*> atom16
,
167 MutableHandle
<JSAtom
*> atom8
) {
168 CHECK(first
<= UINT8_MAX
);
169 CHECK(second
<= UINT8_MAX
);
171 const char bytes
[] = {static_cast<char>(first
), static_cast<char>(second
)};
172 const char* iter
= &bytes
[1];
174 DecodeOneUtf8CodePoint(Utf8Unit(bytes
[0]), &iter
, bytes
+ 2);
177 char32_t v
= cp
.value();
178 CHECK(v
> UINT8_MAX
);
180 return shouldBeOneCodePoint(first
, second
, v
, atom16
, atom8
);
183 bool shouldBeSingleLatin1(uint16_t first
, uint16_t second
,
184 MutableHandle
<JSAtom
*> atom16
,
185 MutableHandle
<JSAtom
*> atom8
) {
186 CHECK(first
<= UINT8_MAX
);
187 CHECK(second
<= UINT8_MAX
);
189 const char bytes
[] = {static_cast<char>(first
), static_cast<char>(second
)};
190 const char* iter
= &bytes
[1];
192 DecodeOneUtf8CodePoint(Utf8Unit(bytes
[0]), &iter
, bytes
+ 2);
195 char32_t v
= cp
.value();
196 CHECK(v
<= UINT8_MAX
);
198 return shouldBeOneCodePoint(first
, second
, v
, atom16
, atom8
);
201 bool shouldBeInvalid(uint16_t first
, uint16_t second
) {
202 CHECK(first
<= UINT8_MAX
);
203 CHECK(second
<= UINT8_MAX
);
205 const char invalid
[] = {static_cast<char>(first
), static_cast<char>(second
)};
206 CHECK(!js::AtomizeUTF8Chars(cx
, invalid
, 2));
207 CHECK(JS_IsExceptionPending(cx
));
208 JS_ClearPendingException(cx
);
212 END_TEST(testAtomizeTwoByteUTF8
)