1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
9 #include "mozilla/Attributes.h"
10 #include "mozilla/Casting.h"
11 #include "mozilla/CheckedInt.h"
12 #include "mozilla/FloatingPoint.h"
13 #include "mozilla/PodOperations.h"
14 #include "mozilla/Range.h"
15 #include "mozilla/TypeTraits.h"
16 #include "mozilla/UniquePtr.h"
33 #include "builtin/Intl.h"
34 #include "builtin/RegExp.h"
35 #include "js/Conversions.h"
37 #include "unicode/unorm.h"
39 #include "vm/GlobalObject.h"
40 #include "vm/Interpreter.h"
41 #include "vm/Opcodes.h"
42 #include "vm/RegExpObject.h"
43 #include "vm/RegExpStatics.h"
44 #include "vm/ScopeObject.h"
45 #include "vm/StringBuffer.h"
47 #include "jsinferinlines.h"
49 #include "vm/Interpreter-inl.h"
50 #include "vm/String-inl.h"
51 #include "vm/StringObject-inl.h"
54 using namespace js::gc
;
55 using namespace js::types
;
56 using namespace js::unicode
;
63 using mozilla::AssertedCast
;
64 using mozilla::CheckedInt
;
66 using mozilla::IsNegativeZero
;
67 using mozilla::IsSame
;
69 using mozilla::PodCopy
;
70 using mozilla::PodEqual
;
71 using mozilla::RangedPtr
;
72 using mozilla::UniquePtr
;
74 using JS::AutoCheckCannotGC
;
76 static JSLinearString
*
77 ArgToRootedString(JSContext
* cx
, CallArgs
& args
, unsigned argno
)
79 if (argno
>= args
.length())
80 return cx
->names().undefined
;
82 JSString
* str
= ToString
<CanGC
>(cx
, args
[argno
]);
86 args
[argno
].setString(str
);
87 return str
->ensureLinear(cx
);
91 * Forward declarations for URI encode/decode and helper routines
94 str_decodeURI(JSContext
* cx
, unsigned argc
, Value
* vp
);
97 str_decodeURI_Component(JSContext
* cx
, unsigned argc
, Value
* vp
);
100 str_encodeURI(JSContext
* cx
, unsigned argc
, Value
* vp
);
103 str_encodeURI_Component(JSContext
* cx
, unsigned argc
, Value
* vp
);
106 * Global string methods
111 template <typename CharT
>
113 Escape(JSContext
* cx
, const CharT
* chars
, uint32_t length
, uint32_t* newLengthOut
)
115 static const uint8_t shouldPassThrough
[128] = {
116 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
117 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
118 0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */
119 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */
120 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */
121 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */
122 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */
123 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */
126 /* Take a first pass and see how big the result string will need to be. */
127 uint32_t newLength
= length
;
128 for (size_t i
= 0; i
< length
; i
++) {
129 char16_t ch
= chars
[i
];
130 if (ch
< 128 && shouldPassThrough
[ch
])
133 /* The character will be encoded as %XX or %uXXXX. */
134 newLength
+= (ch
< 256) ? 2 : 5;
137 * newlength is incremented by at most 5 on each iteration, so worst
138 * case newlength == length * 6. This can't overflow.
140 static_assert(JSString::MAX_LENGTH
< UINT32_MAX
/ 6,
141 "newlength must not overflow");
144 Latin1Char
* newChars
= cx
->pod_malloc
<Latin1Char
>(newLength
+ 1);
148 static const char digits
[] = "0123456789ABCDEF";
151 for (i
= 0, ni
= 0; i
< length
; i
++) {
152 char16_t ch
= chars
[i
];
153 if (ch
< 128 && shouldPassThrough
[ch
]) {
155 } else if (ch
< 256) {
156 newChars
[ni
++] = '%';
157 newChars
[ni
++] = digits
[ch
>> 4];
158 newChars
[ni
++] = digits
[ch
& 0xF];
160 newChars
[ni
++] = '%';
161 newChars
[ni
++] = 'u';
162 newChars
[ni
++] = digits
[ch
>> 12];
163 newChars
[ni
++] = digits
[(ch
& 0xF00) >> 8];
164 newChars
[ni
++] = digits
[(ch
& 0xF0) >> 4];
165 newChars
[ni
++] = digits
[ch
& 0xF];
168 MOZ_ASSERT(ni
== newLength
);
169 newChars
[newLength
] = 0;
171 *newLengthOut
= newLength
;
176 str_escape(JSContext
* cx
, unsigned argc
, Value
* vp
)
178 CallArgs args
= CallArgsFromVp(argc
, vp
);
180 JSLinearString
* str
= ArgToRootedString(cx
, args
, 0);
184 ScopedJSFreePtr
<Latin1Char
> newChars
;
186 if (str
->hasLatin1Chars()) {
187 AutoCheckCannotGC nogc
;
188 newChars
= Escape(cx
, str
->latin1Chars(nogc
), str
->length(), &newLength
);
190 AutoCheckCannotGC nogc
;
191 newChars
= Escape(cx
, str
->twoByteChars(nogc
), str
->length(), &newLength
);
197 JSString
* res
= NewString
<CanGC
>(cx
, newChars
.get(), newLength
);
202 args
.rval().setString(res
);
206 template <typename CharT
>
208 Unhex4(const RangedPtr
<const CharT
> chars
, char16_t
* result
)
210 char16_t a
= chars
[0],
215 if (!(JS7_ISHEX(a
) && JS7_ISHEX(b
) && JS7_ISHEX(c
) && JS7_ISHEX(d
)))
218 *result
= (((((JS7_UNHEX(a
) << 4) + JS7_UNHEX(b
)) << 4) + JS7_UNHEX(c
)) << 4) + JS7_UNHEX(d
);
222 template <typename CharT
>
224 Unhex2(const RangedPtr
<const CharT
> chars
, char16_t
* result
)
226 char16_t a
= chars
[0],
229 if (!(JS7_ISHEX(a
) && JS7_ISHEX(b
)))
232 *result
= (JS7_UNHEX(a
) << 4) + JS7_UNHEX(b
);
236 template <typename CharT
>
238 Unescape(StringBuffer
& sb
, const mozilla::Range
<const CharT
> chars
)
241 * NB: use signed integers for length/index to allow simple length
242 * comparisons without unsigned-underflow hazards.
244 static_assert(JSString::MAX_LENGTH
<= INT_MAX
, "String length must fit in a signed integer");
245 int length
= AssertedCast
<int>(chars
.length());
248 * Note that the spec algorithm has been optimized to avoid building
249 * a string in the case where no escapes are present.
254 bool building
= false;
259 char16_t c
= chars
[k
];
270 if (chars
[k
+ 1] != 'u')
273 #define ENSURE_BUILDING \
277 if (!sb.reserve(length)) \
279 sb.infallibleAppend(chars.start().get(), k); \
284 if (Unhex4(chars
.start() + k
+ 2, &c
)) {
296 if (Unhex2(chars
.start() + k
+ 1, &c
)) {
302 if (building
&& !sb
.append(c
))
310 #undef ENSURE_BUILDING
315 str_unescape(JSContext
* cx
, unsigned argc
, Value
* vp
)
317 CallArgs args
= CallArgsFromVp(argc
, vp
);
320 RootedLinearString
str(cx
, ArgToRootedString(cx
, args
, 0));
326 if (str
->hasTwoByteChars() && !sb
.ensureTwoByteChars())
329 if (str
->hasLatin1Chars()) {
330 AutoCheckCannotGC nogc
;
331 if (!Unescape(sb
, str
->latin1Range(nogc
)))
334 AutoCheckCannotGC nogc
;
335 if (!Unescape(sb
, str
->twoByteRange(nogc
)))
339 JSLinearString
* result
;
341 result
= sb
.finishString();
348 args
.rval().setString(result
);
354 str_uneval(JSContext
* cx
, unsigned argc
, Value
* vp
)
356 CallArgs args
= CallArgsFromVp(argc
, vp
);
357 JSString
* str
= ValueToSource(cx
, args
.get(0));
361 args
.rval().setString(str
);
366 static const JSFunctionSpec string_functions
[] = {
367 JS_FN(js_escape_str
, str_escape
, 1,0),
368 JS_FN(js_unescape_str
, str_unescape
, 1,0),
370 JS_FN(js_uneval_str
, str_uneval
, 1,0),
372 JS_FN(js_decodeURI_str
, str_decodeURI
, 1,0),
373 JS_FN(js_encodeURI_str
, str_encodeURI
, 1,0),
374 JS_FN(js_decodeURIComponent_str
, str_decodeURI_Component
, 1,0),
375 JS_FN(js_encodeURIComponent_str
, str_encodeURI_Component
, 1,0),
380 static const unsigned STRING_ELEMENT_ATTRS
= JSPROP_ENUMERATE
| JSPROP_READONLY
| JSPROP_PERMANENT
;
383 str_enumerate(JSContext
* cx
, HandleObject obj
)
385 RootedString
str(cx
, obj
->as
<StringObject
>().unbox());
386 RootedValue
value(cx
);
387 for (size_t i
= 0, length
= str
->length(); i
< length
; i
++) {
388 JSString
* str1
= NewDependentString(cx
, str
, i
, 1);
391 value
.setString(str1
);
392 if (!JSObject::defineElement(cx
, obj
, i
, value
, nullptr, nullptr, STRING_ELEMENT_ATTRS
))
400 js::str_resolve(JSContext
* cx
, HandleObject obj
, HandleId id
, bool* resolvedp
)
402 if (!JSID_IS_INT(id
))
405 RootedString
str(cx
, obj
->as
<StringObject
>().unbox());
407 int32_t slot
= JSID_TO_INT(id
);
408 if ((size_t)slot
< str
->length()) {
409 JSString
* str1
= cx
->staticStrings().getUnitStringForElement(cx
, str
, size_t(slot
));
412 RootedValue
value(cx
, StringValue(str1
));
413 if (!JSObject::defineElement(cx
, obj
, uint32_t(slot
), value
, nullptr, nullptr,
414 STRING_ELEMENT_ATTRS
))
423 const Class
StringObject::class_
= {
425 JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS
) |
426 JSCLASS_HAS_CACHED_PROTO(JSProto_String
),
427 nullptr, /* addProperty */
428 nullptr, /* delProperty */
429 nullptr, /* getProperty */
430 nullptr, /* setProperty */
436 * Returns a JSString * for the |this| value associated with 'call', or throws
437 * a TypeError if |this| is null or undefined. This algorithm is the same as
438 * calling CheckObjectCoercible(this), then returning ToString(this), as all
439 * String.prototype.* methods do (other than toString and valueOf).
441 static MOZ_ALWAYS_INLINE JSString
*
442 ThisToStringForStringProto(JSContext
* cx
, CallReceiver call
)
444 JS_CHECK_RECURSION(cx
, return nullptr);
446 if (call
.thisv().isString())
447 return call
.thisv().toString();
449 if (call
.thisv().isObject()) {
450 RootedObject
obj(cx
, &call
.thisv().toObject());
451 if (obj
->is
<StringObject
>()) {
452 StringObject
* nobj
= &obj
->as
<StringObject
>();
453 Rooted
<jsid
> id(cx
, NameToId(cx
->names().toString
));
454 if (ClassMethodIsNative(cx
, nobj
, &StringObject::class_
, id
, js_str_toString
)) {
455 JSString
* str
= nobj
->unbox();
456 call
.setThis(StringValue(str
));
460 } else if (call
.thisv().isNullOrUndefined()) {
461 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, nullptr, JSMSG_CANT_CONVERT_TO
,
462 call
.thisv().isNull() ? "null" : "undefined", "object");
466 JSString
* str
= ToStringSlow
<CanGC
>(cx
, call
.thisv());
470 call
.setThis(StringValue(str
));
474 MOZ_ALWAYS_INLINE
bool
475 IsString(HandleValue v
)
477 return v
.isString() || (v
.isObject() && v
.toObject().is
<StringObject
>());
482 MOZ_ALWAYS_INLINE
bool
483 str_toSource_impl(JSContext
* cx
, CallArgs args
)
485 MOZ_ASSERT(IsString(args
.thisv()));
487 Rooted
<JSString
*> str(cx
, ToString
<CanGC
>(cx
, args
.thisv()));
491 str
= js_QuoteString(cx
, str
, '"');
496 if (!sb
.append("(new String(") || !sb
.append(str
) || !sb
.append("))"))
499 str
= sb
.finishString();
502 args
.rval().setString(str
);
507 str_toSource(JSContext
* cx
, unsigned argc
, Value
* vp
)
509 CallArgs args
= CallArgsFromVp(argc
, vp
);
510 return CallNonGenericMethod
<IsString
, str_toSource_impl
>(cx
, args
);
513 #endif /* JS_HAS_TOSOURCE */
515 MOZ_ALWAYS_INLINE
bool
516 str_toString_impl(JSContext
* cx
, CallArgs args
)
518 MOZ_ASSERT(IsString(args
.thisv()));
520 args
.rval().setString(args
.thisv().isString()
521 ? args
.thisv().toString()
522 : args
.thisv().toObject().as
<StringObject
>().unbox());
527 js_str_toString(JSContext
* cx
, unsigned argc
, Value
* vp
)
529 CallArgs args
= CallArgsFromVp(argc
, vp
);
530 return CallNonGenericMethod
<IsString
, str_toString_impl
>(cx
, args
);
534 * Java-like string native methods.
538 js::SubstringKernel(JSContext
* cx
, HandleString str
, int32_t beginInt
, int32_t lengthInt
)
540 MOZ_ASSERT(0 <= beginInt
);
541 MOZ_ASSERT(0 <= lengthInt
);
542 MOZ_ASSERT(uint32_t(beginInt
) <= str
->length());
543 MOZ_ASSERT(uint32_t(lengthInt
) <= str
->length() - beginInt
);
545 uint32_t begin
= beginInt
;
546 uint32_t len
= lengthInt
;
549 * Optimization for one level deep ropes.
550 * This is common for the following pattern:
553 * text = text.substr(0, x) + "bla" + text.substr(x)
554 * test.charCodeAt(x + 1)
558 JSRope
* rope
= &str
->asRope();
560 /* Substring is totally in leftChild of rope. */
561 if (begin
+ len
<= rope
->leftChild()->length())
562 return NewDependentString(cx
, rope
->leftChild(), begin
, len
);
564 /* Substring is totally in rightChild of rope. */
565 if (begin
>= rope
->leftChild()->length()) {
566 begin
-= rope
->leftChild()->length();
567 return NewDependentString(cx
, rope
->rightChild(), begin
, len
);
571 * Requested substring is partly in the left and partly in right child.
572 * Create a rope of substrings for both childs.
574 MOZ_ASSERT(begin
< rope
->leftChild()->length() &&
575 begin
+ len
> rope
->leftChild()->length());
577 size_t lhsLength
= rope
->leftChild()->length() - begin
;
578 size_t rhsLength
= begin
+ len
- rope
->leftChild()->length();
580 Rooted
<JSRope
*> ropeRoot(cx
, rope
);
581 RootedString
lhs(cx
, NewDependentString(cx
, ropeRoot
->leftChild(), begin
, lhsLength
));
585 RootedString
rhs(cx
, NewDependentString(cx
, ropeRoot
->rightChild(), 0, rhsLength
));
589 return JSRope::new_
<CanGC
>(cx
, lhs
, rhs
, len
);
592 return NewDependentString(cx
, str
, begin
, len
);
595 template <typename CharT
>
597 ToLowerCase(JSContext
* cx
, JSLinearString
* str
)
599 // Unlike toUpperCase, toLowerCase has the nice invariant that if the input
600 // is a Latin1 string, the output is also a Latin1 string.
601 UniquePtr
<CharT
[], JS::FreePolicy
> newChars
;
602 size_t length
= str
->length();
604 AutoCheckCannotGC nogc
;
605 const CharT
* chars
= str
->chars
<CharT
>(nogc
);
607 // Look for the first upper case character.
609 for (; i
< length
; i
++) {
610 char16_t c
= chars
[i
];
611 if (unicode::ToLowerCase(c
) != c
)
615 // If all characters are lower case, return the input string.
619 newChars
= cx
->make_pod_array
<CharT
>(length
+ 1);
623 PodCopy(newChars
.get(), chars
, i
);
625 for (; i
< length
; i
++) {
626 char16_t c
= unicode::ToLowerCase(chars
[i
]);
627 MOZ_ASSERT_IF((IsSame
<CharT
, Latin1Char
>::value
), c
<= JSString::MAX_LATIN1_CHAR
);
631 newChars
[length
] = 0;
634 JSString
* res
= NewStringDontDeflate
<CanGC
>(cx
, newChars
.get(), length
);
643 ToLowerCaseHelper(JSContext
* cx
, CallReceiver call
)
645 RootedString
str(cx
, ThisToStringForStringProto(cx
, call
));
649 JSLinearString
* linear
= str
->ensureLinear(cx
);
653 if (linear
->hasLatin1Chars())
654 str
= ToLowerCase
<Latin1Char
>(cx
, linear
);
656 str
= ToLowerCase
<char16_t
>(cx
, linear
);
660 call
.rval().setString(str
);
665 js::str_toLowerCase(JSContext
* cx
, unsigned argc
, Value
* vp
)
667 return ToLowerCaseHelper(cx
, CallArgsFromVp(argc
, vp
));
671 str_toLocaleLowerCase(JSContext
* cx
, unsigned argc
, Value
* vp
)
673 CallArgs args
= CallArgsFromVp(argc
, vp
);
676 * Forcefully ignore the first (or any) argument and return toLowerCase(),
677 * ECMA has reserved that argument, presumably for defining the locale.
679 if (cx
->runtime()->localeCallbacks
&& cx
->runtime()->localeCallbacks
->localeToLowerCase
) {
680 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
684 RootedValue
result(cx
);
685 if (!cx
->runtime()->localeCallbacks
->localeToLowerCase(cx
, str
, &result
))
688 args
.rval().set(result
);
692 return ToLowerCaseHelper(cx
, args
);
695 template <typename DestChar
, typename SrcChar
>
697 ToUpperCaseImpl(DestChar
* destChars
, const SrcChar
* srcChars
, size_t firstLowerCase
, size_t length
)
699 MOZ_ASSERT(firstLowerCase
< length
);
701 for (size_t i
= 0; i
< firstLowerCase
; i
++)
702 destChars
[i
] = srcChars
[i
];
704 for (size_t i
= firstLowerCase
; i
< length
; i
++) {
705 char16_t c
= unicode::ToUpperCase(srcChars
[i
]);
706 MOZ_ASSERT_IF((IsSame
<DestChar
, Latin1Char
>::value
), c
<= JSString::MAX_LATIN1_CHAR
);
710 destChars
[length
] = '\0';
713 template <typename CharT
>
715 ToUpperCase(JSContext
* cx
, JSLinearString
* str
)
717 typedef UniquePtr
<Latin1Char
[], JS::FreePolicy
> Latin1CharPtr
;
718 typedef UniquePtr
<char16_t
[], JS::FreePolicy
> TwoByteCharPtr
;
720 mozilla::MaybeOneOf
<Latin1CharPtr
, TwoByteCharPtr
> newChars
;
721 size_t length
= str
->length();
723 AutoCheckCannotGC nogc
;
724 const CharT
* chars
= str
->chars
<CharT
>(nogc
);
726 // Look for the first lower case character.
728 for (; i
< length
; i
++) {
729 char16_t c
= chars
[i
];
730 if (unicode::ToUpperCase(c
) != c
)
734 // If all characters are upper case, return the input string.
738 // If the string is Latin1, check if it contains the MICRO SIGN (0xb5)
739 // or SMALL LETTER Y WITH DIAERESIS (0xff) character. The corresponding
740 // upper case characters are not in the Latin1 range.
742 if (IsSame
<CharT
, Latin1Char
>::value
) {
743 resultIsLatin1
= true;
744 for (size_t j
= i
; j
< length
; j
++) {
745 Latin1Char c
= chars
[j
];
746 if (c
== 0xb5 || c
== 0xff) {
747 MOZ_ASSERT(unicode::ToUpperCase(c
) > JSString::MAX_LATIN1_CHAR
);
748 resultIsLatin1
= false;
751 MOZ_ASSERT(unicode::ToUpperCase(c
) <= JSString::MAX_LATIN1_CHAR
);
755 resultIsLatin1
= false;
758 if (resultIsLatin1
) {
759 Latin1CharPtr buf
= cx
->make_pod_array
<Latin1Char
>(length
+ 1);
763 ToUpperCaseImpl(buf
.get(), chars
, i
, length
);
764 newChars
.construct
<Latin1CharPtr
>(buf
);
766 TwoByteCharPtr buf
= cx
->make_pod_array
<char16_t
>(length
+ 1);
770 ToUpperCaseImpl(buf
.get(), chars
, i
, length
);
771 newChars
.construct
<TwoByteCharPtr
>(buf
);
776 if (newChars
.constructed
<Latin1CharPtr
>()) {
777 res
= NewStringDontDeflate
<CanGC
>(cx
, newChars
.ref
<Latin1CharPtr
>().get(), length
);
781 newChars
.ref
<Latin1CharPtr
>().release();
783 res
= NewStringDontDeflate
<CanGC
>(cx
, newChars
.ref
<TwoByteCharPtr
>().get(), length
);
787 newChars
.ref
<TwoByteCharPtr
>().release();
794 ToUpperCaseHelper(JSContext
* cx
, CallReceiver call
)
796 RootedString
str(cx
, ThisToStringForStringProto(cx
, call
));
800 JSLinearString
* linear
= str
->ensureLinear(cx
);
804 if (linear
->hasLatin1Chars())
805 str
= ToUpperCase
<Latin1Char
>(cx
, linear
);
807 str
= ToUpperCase
<char16_t
>(cx
, linear
);
811 call
.rval().setString(str
);
816 js::str_toUpperCase(JSContext
* cx
, unsigned argc
, Value
* vp
)
818 return ToUpperCaseHelper(cx
, CallArgsFromVp(argc
, vp
));
822 str_toLocaleUpperCase(JSContext
* cx
, unsigned argc
, Value
* vp
)
824 CallArgs args
= CallArgsFromVp(argc
, vp
);
827 * Forcefully ignore the first (or any) argument and return toUpperCase(),
828 * ECMA has reserved that argument, presumably for defining the locale.
830 if (cx
->runtime()->localeCallbacks
&& cx
->runtime()->localeCallbacks
->localeToUpperCase
) {
831 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
835 RootedValue
result(cx
);
836 if (!cx
->runtime()->localeCallbacks
->localeToUpperCase(cx
, str
, &result
))
839 args
.rval().set(result
);
843 return ToUpperCaseHelper(cx
, args
);
848 str_localeCompare(JSContext
* cx
, unsigned argc
, Value
* vp
)
850 CallArgs args
= CallArgsFromVp(argc
, vp
);
851 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
855 RootedString
thatStr(cx
, ToString
<CanGC
>(cx
, args
.get(0)));
859 if (cx
->runtime()->localeCallbacks
&& cx
->runtime()->localeCallbacks
->localeCompare
) {
860 RootedValue
result(cx
);
861 if (!cx
->runtime()->localeCallbacks
->localeCompare(cx
, str
, thatStr
, &result
))
864 args
.rval().set(result
);
869 if (!CompareStrings(cx
, str
, thatStr
, &result
))
872 args
.rval().setInt32(result
);
878 /* ES6 20140210 draft 21.1.3.12. */
880 str_normalize(JSContext
* cx
, unsigned argc
, Value
* vp
)
882 CallArgs args
= CallArgsFromVp(argc
, vp
);
885 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
890 UNormalizationMode form
;
891 if (!args
.hasDefined(0)) {
895 RootedLinearString
formStr(cx
, ArgToRootedString(cx
, args
, 0));
900 if (formStr
== cx
->names().NFC
) {
902 } else if (formStr
== cx
->names().NFD
) {
904 } else if (formStr
== cx
->names().NFKC
) {
906 } else if (formStr
== cx
->names().NFKD
) {
909 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, nullptr,
910 JSMSG_INVALID_NORMALIZE_FORM
);
916 AutoStableStringChars
stableChars(cx
);
917 if (!str
->ensureFlat(cx
) || !stableChars
.initTwoByte(cx
, str
))
920 static const size_t INLINE_CAPACITY
= 32;
922 const UChar
* srcChars
= Char16ToUChar(stableChars
.twoByteRange().start().get());
923 int32_t srcLen
= AssertedCast
<int32_t>(str
->length());
924 Vector
<char16_t
, INLINE_CAPACITY
> chars(cx
);
925 if (!chars
.resize(INLINE_CAPACITY
))
928 UErrorCode status
= U_ZERO_ERROR
;
929 int32_t size
= unorm_normalize(srcChars
, srcLen
, form
, 0,
930 Char16ToUChar(chars
.begin()), INLINE_CAPACITY
,
932 if (status
== U_BUFFER_OVERFLOW_ERROR
) {
933 if (!chars
.resize(size
))
935 status
= U_ZERO_ERROR
;
939 unorm_normalize(srcChars
, srcLen
, form
, 0,
940 Char16ToUChar(chars
.begin()), size
,
942 MOZ_ASSERT(size
== finalSize
|| U_FAILURE(status
), "unorm_normalize behaved inconsistently");
944 if (U_FAILURE(status
))
947 JSString
* ns
= NewStringCopyN
<CanGC
>(cx
, chars
.begin(), size
);
952 args
.rval().setString(ns
);
958 js_str_charAt(JSContext
* cx
, unsigned argc
, Value
* vp
)
960 CallArgs args
= CallArgsFromVp(argc
, vp
);
962 RootedString
str(cx
);
964 if (args
.thisv().isString() && args
.length() != 0 && args
[0].isInt32()) {
965 str
= args
.thisv().toString();
966 i
= size_t(args
[0].toInt32());
967 if (i
>= str
->length())
970 str
= ThisToStringForStringProto(cx
, args
);
975 if (args
.length() > 0 && !ToInteger(cx
, args
[0], &d
))
978 if (d
< 0 || str
->length() <= d
)
983 str
= cx
->staticStrings().getUnitStringForElement(cx
, str
, i
);
986 args
.rval().setString(str
);
990 args
.rval().setString(cx
->runtime()->emptyString
);
995 js::str_charCodeAt_impl(JSContext
* cx
, HandleString string
, HandleValue index
, MutableHandleValue res
)
997 RootedString
str(cx
);
999 if (index
.isInt32()) {
1000 i
= index
.toInt32();
1001 if (i
>= string
->length())
1005 if (!ToInteger(cx
, index
, &d
))
1007 // check whether d is negative as size_t is unsigned
1008 if (d
< 0 || string
->length() <= d
)
1013 if (!string
->getChar(cx
, i
, &c
))
1024 js_str_charCodeAt(JSContext
* cx
, unsigned argc
, Value
* vp
)
1026 CallArgs args
= CallArgsFromVp(argc
, vp
);
1027 RootedString
str(cx
);
1028 RootedValue
index(cx
);
1029 if (args
.thisv().isString()) {
1030 str
= args
.thisv().toString();
1032 str
= ThisToStringForStringProto(cx
, args
);
1036 if (args
.length() != 0)
1041 return js::str_charCodeAt_impl(cx
, str
, index
, args
.rval());
1045 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
1046 * The patlen argument must be positive and no greater than sBMHPatLenMax.
1048 * Return the index of pat in text, or -1 if not found.
1050 static const uint32_t sBMHCharSetSize
= 256; /* ISO-Latin-1 */
1051 static const uint32_t sBMHPatLenMax
= 255; /* skip table element is uint8_t */
1052 static const int sBMHBadPattern
= -2; /* return value if pat is not ISO-Latin-1 */
1054 template <typename TextChar
, typename PatChar
>
1056 BoyerMooreHorspool(const TextChar
* text
, uint32_t textLen
, const PatChar
* pat
, uint32_t patLen
)
1058 MOZ_ASSERT(0 < patLen
&& patLen
<= sBMHPatLenMax
);
1060 uint8_t skip
[sBMHCharSetSize
];
1061 for (uint32_t i
= 0; i
< sBMHCharSetSize
; i
++)
1062 skip
[i
] = uint8_t(patLen
);
1064 uint32_t patLast
= patLen
- 1;
1065 for (uint32_t i
= 0; i
< patLast
; i
++) {
1066 char16_t c
= pat
[i
];
1067 if (c
>= sBMHCharSetSize
)
1068 return sBMHBadPattern
;
1069 skip
[c
] = uint8_t(patLast
- i
);
1072 for (uint32_t k
= patLast
; k
< textLen
; ) {
1073 for (uint32_t i
= k
, j
= patLast
; ; i
--, j
--) {
1074 if (text
[i
] != pat
[j
])
1077 return static_cast<int>(i
); /* safe: max string size */
1080 char16_t c
= text
[k
];
1081 k
+= (c
>= sBMHCharSetSize
) ? patLen
: skip
[c
];
1086 template <typename TextChar
, typename PatChar
>
1088 typedef uint32_t Extent
;
1089 static MOZ_ALWAYS_INLINE Extent
computeExtent(const PatChar
*, uint32_t patLen
) {
1090 return (patLen
- 1) * sizeof(PatChar
);
1092 static MOZ_ALWAYS_INLINE
bool match(const PatChar
* p
, const TextChar
* t
, Extent extent
) {
1093 MOZ_ASSERT(sizeof(TextChar
) == sizeof(PatChar
));
1094 return memcmp(p
, t
, extent
) == 0;
1098 template <typename TextChar
, typename PatChar
>
1100 typedef const PatChar
* Extent
;
1101 static MOZ_ALWAYS_INLINE Extent
computeExtent(const PatChar
* pat
, uint32_t patLen
) {
1102 return pat
+ patLen
;
1104 static MOZ_ALWAYS_INLINE
bool match(const PatChar
* p
, const TextChar
* t
, Extent extent
) {
1105 for (; p
!= extent
; ++p
, ++t
) {
1113 template <typename TextChar
, typename PatChar
>
1114 static const TextChar
*
1115 FirstCharMatcherUnrolled(const TextChar
* text
, uint32_t n
, const PatChar pat
)
1117 const TextChar
* textend
= text
+ n
;
1118 const TextChar
* t
= text
;
1120 switch ((textend
- t
) & 7) {
1121 case 0: if (*t
++ == pat
) return t
- 1;
1122 case 7: if (*t
++ == pat
) return t
- 1;
1123 case 6: if (*t
++ == pat
) return t
- 1;
1124 case 5: if (*t
++ == pat
) return t
- 1;
1125 case 4: if (*t
++ == pat
) return t
- 1;
1126 case 3: if (*t
++ == pat
) return t
- 1;
1127 case 2: if (*t
++ == pat
) return t
- 1;
1128 case 1: if (*t
++ == pat
) return t
- 1;
1130 while (textend
!= t
) {
1131 if (t
[0] == pat
) return t
;
1132 if (t
[1] == pat
) return t
+ 1;
1133 if (t
[2] == pat
) return t
+ 2;
1134 if (t
[3] == pat
) return t
+ 3;
1135 if (t
[4] == pat
) return t
+ 4;
1136 if (t
[5] == pat
) return t
+ 5;
1137 if (t
[6] == pat
) return t
+ 6;
1138 if (t
[7] == pat
) return t
+ 7;
1145 FirstCharMatcher8bit(const char* text
, uint32_t n
, const char pat
)
1147 #if defined(__clang__)
1148 return FirstCharMatcherUnrolled
<char, char>(text
, n
, pat
);
1150 return reinterpret_cast<const char*>(memchr(text
, pat
, n
));
1154 static const char16_t
*
1155 FirstCharMatcher16bit(const char16_t
* text
, uint32_t n
, const char16_t pat
)
1157 #if defined(XP_MACOSX) || defined(XP_WIN)
1159 * Performance of memchr is horrible in OSX. Windows is better,
1160 * but it is still better to use UnrolledMatcher.
1162 return FirstCharMatcherUnrolled
<char16_t
, char16_t
>(text
, n
, pat
);
1165 * For linux the best performance is obtained by slightly hacking memchr.
1166 * memchr works only on 8bit char but char16_t is 16bit. So we treat char16_t
1167 * in blocks of 8bit and use memchr.
1170 const char* text8
= (const char*) text
;
1171 const char* pat8
= reinterpret_cast<const char*>(&pat
);
1173 MOZ_ASSERT(n
< UINT32_MAX
/2);
1178 /* Find the first 8 bits of 16bit character in text. */
1179 const char* pos8
= FirstCharMatcher8bit(text8
+ i
, n
- i
, pat8
[0]);
1180 if (pos8
== nullptr)
1182 i
= static_cast<uint32_t>(pos8
- text8
);
1184 /* Incorrect match if it matches the last 8 bits of 16bit char. */
1190 /* Test if last 8 bits match last 8 bits of 16bit char. */
1191 if (pat8
[1] == text8
[i
+ 1])
1192 return (text
+ (i
/2));
1200 template <class InnerMatch
, typename TextChar
, typename PatChar
>
1202 Matcher(const TextChar
* text
, uint32_t textlen
, const PatChar
* pat
, uint32_t patlen
)
1204 const typename
InnerMatch::Extent extent
= InnerMatch::computeExtent(pat
, patlen
);
1207 uint32_t n
= textlen
- patlen
+ 1;
1209 const TextChar
* pos
;
1211 if (sizeof(TextChar
) == 2 && sizeof(PatChar
) == 2)
1212 pos
= (TextChar
*) FirstCharMatcher16bit((char16_t
*)text
+ i
, n
- i
, pat
[0]);
1213 else if (sizeof(TextChar
) == 1 && sizeof(PatChar
) == 1)
1214 pos
= (TextChar
*) FirstCharMatcher8bit((char*) text
+ i
, n
- i
, pat
[0]);
1216 pos
= (TextChar
*) FirstCharMatcherUnrolled
<TextChar
, PatChar
>(text
+ i
, n
- i
, pat
[0]);
1221 i
= static_cast<uint32_t>(pos
- text
);
1222 if (InnerMatch::match(pat
+ 1, text
+ i
+ 1, extent
))
1231 template <typename TextChar
, typename PatChar
>
1232 static MOZ_ALWAYS_INLINE
int
1233 StringMatch(const TextChar
* text
, uint32_t textLen
, const PatChar
* pat
, uint32_t patLen
)
1237 if (textLen
< patLen
)
1240 #if defined(__i386__) || defined(_M_IX86) || defined(__i386)
1242 * Given enough registers, the unrolled loop below is faster than the
1243 * following loop. 32-bit x86 does not have enough registers.
1246 const PatChar p0
= *pat
;
1247 for (const TextChar
* c
= text
, *end
= text
+ textLen
; c
!= end
; ++c
) {
1256 * If the text or pattern string is short, BMH will be more expensive than
1257 * the basic linear scan due to initialization cost and a more complex loop
1258 * body. While the correct threshold is input-dependent, we can make a few
1259 * conservative observations:
1260 * - When |textLen| is "big enough", the initialization time will be
1261 * proportionally small, so the worst-case slowdown is minimized.
1262 * - When |patLen| is "too small", even the best case for BMH will be
1263 * slower than a simple scan for large |textLen| due to the more complex
1265 * From this, the values for "big enough" and "too small" are determined
1266 * empirically. See bug 526348.
1268 if (textLen
>= 512 && patLen
>= 11 && patLen
<= sBMHPatLenMax
) {
1269 int index
= BoyerMooreHorspool(text
, textLen
, pat
, patLen
);
1270 if (index
!= sBMHBadPattern
)
1275 * For big patterns with large potential overlap we want the SIMD-optimized
1276 * speed of memcmp. For small patterns, a simple loop is faster. We also can't
1277 * use memcmp if one of the strings is TwoByte and the other is Latin1.
1279 * FIXME: Linux memcmp performance is sad and the manual loop is faster.
1282 #if !defined(__linux__)
1283 (patLen
> 128 && IsSame
<TextChar
, PatChar
>::value
)
1284 ? Matcher
<MemCmp
<TextChar
, PatChar
>, TextChar
, PatChar
>(text
, textLen
, pat
, patLen
)
1287 Matcher
<ManualCmp
<TextChar
, PatChar
>, TextChar
, PatChar
>(text
, textLen
, pat
, patLen
);
1291 StringMatch(JSLinearString
* text
, JSLinearString
* pat
, uint32_t start
= 0)
1293 MOZ_ASSERT(start
<= text
->length());
1294 uint32_t textLen
= text
->length() - start
;
1295 uint32_t patLen
= pat
->length();
1298 AutoCheckCannotGC nogc
;
1299 if (text
->hasLatin1Chars()) {
1300 const Latin1Char
* textChars
= text
->latin1Chars(nogc
) + start
;
1301 if (pat
->hasLatin1Chars())
1302 match
= StringMatch(textChars
, textLen
, pat
->latin1Chars(nogc
), patLen
);
1304 match
= StringMatch(textChars
, textLen
, pat
->twoByteChars(nogc
), patLen
);
1306 const char16_t
* textChars
= text
->twoByteChars(nogc
) + start
;
1307 if (pat
->hasLatin1Chars())
1308 match
= StringMatch(textChars
, textLen
, pat
->latin1Chars(nogc
), patLen
);
1310 match
= StringMatch(textChars
, textLen
, pat
->twoByteChars(nogc
), patLen
);
1313 return (match
== -1) ? -1 : start
+ match
;
1316 static const size_t sRopeMatchThresholdRatioLog2
= 5;
1319 js::StringHasPattern(JSLinearString
* text
, const char16_t
* pat
, uint32_t patLen
)
1321 AutoCheckCannotGC nogc
;
1322 return text
->hasLatin1Chars()
1323 ? StringMatch(text
->latin1Chars(nogc
), text
->length(), pat
, patLen
) != -1
1324 : StringMatch(text
->twoByteChars(nogc
), text
->length(), pat
, patLen
) != -1;
1328 js::StringFindPattern(JSLinearString
* text
, JSLinearString
* pat
, size_t start
)
1330 return StringMatch(text
, pat
, start
);
1333 // When an algorithm does not need a string represented as a single linear
1334 // array of characters, this range utility may be used to traverse the string a
1335 // sequence of linear arrays of characters. This avoids flattening ropes.
1336 class StringSegmentRange
1338 // If malloc() shows up in any profiles from this vector, we can add a new
1339 // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
1340 AutoStringVector stack
;
1341 RootedLinearString cur
;
1343 bool settle(JSString
* str
) {
1344 while (str
->isRope()) {
1345 JSRope
& rope
= str
->asRope();
1346 if (!stack
.append(rope
.rightChild()))
1348 str
= rope
.leftChild();
1350 cur
= &str
->asLinear();
1355 explicit StringSegmentRange(JSContext
* cx
)
1356 : stack(cx
), cur(cx
)
1359 MOZ_WARN_UNUSED_RESULT
bool init(JSString
* str
) {
1360 MOZ_ASSERT(stack
.empty());
1364 bool empty() const {
1365 return cur
== nullptr;
1368 JSLinearString
* front() const {
1369 MOZ_ASSERT(!cur
->isRope());
1373 MOZ_WARN_UNUSED_RESULT
bool popFront() {
1374 MOZ_ASSERT(!empty());
1375 if (stack
.empty()) {
1379 return settle(stack
.popCopy());
1383 typedef Vector
<JSLinearString
*, 16, SystemAllocPolicy
> LinearStringVector
;
1385 template <typename TextChar
, typename PatChar
>
1387 RopeMatchImpl(const AutoCheckCannotGC
& nogc
, LinearStringVector
& strings
,
1388 const PatChar
* pat
, size_t patLen
)
1390 /* Absolute offset from the beginning of the logical text string. */
1393 for (JSLinearString
** outerp
= strings
.begin(); outerp
!= strings
.end(); ++outerp
) {
1394 /* Try to find a match within 'outer'. */
1395 JSLinearString
* outer
= *outerp
;
1396 const TextChar
* chars
= outer
->chars
<TextChar
>(nogc
);
1397 size_t len
= outer
->length();
1398 int matchResult
= StringMatch(chars
, len
, pat
, patLen
);
1399 if (matchResult
!= -1) {
1401 return pos
+ matchResult
;
1404 /* Try to find a match starting in 'outer' and running into other nodes. */
1405 const TextChar
* const text
= chars
+ (patLen
> len
? 0 : len
- patLen
+ 1);
1406 const TextChar
* const textend
= chars
+ len
;
1407 const PatChar p0
= *pat
;
1408 const PatChar
* const p1
= pat
+ 1;
1409 const PatChar
* const patend
= pat
+ patLen
;
1410 for (const TextChar
* t
= text
; t
!= textend
; ) {
1414 JSLinearString
** innerp
= outerp
;
1415 const TextChar
* ttend
= textend
;
1416 const TextChar
* tt
= t
;
1417 for (const PatChar
* pp
= p1
; pp
!= patend
; ++pp
, ++tt
) {
1418 while (tt
== ttend
) {
1419 if (++innerp
== strings
.end())
1422 JSLinearString
* inner
= *innerp
;
1423 tt
= inner
->chars
<TextChar
>(nogc
);
1424 ttend
= tt
+ inner
->length();
1427 goto break_continue
;
1431 return pos
+ (t
- chars
) - 1; /* -1 because of *t++ above */
1443 * RopeMatch takes the text to search and the pattern to search for in the text.
1444 * RopeMatch returns false on OOM and otherwise returns the match index through
1445 * the 'match' outparam (-1 for not found).
1448 RopeMatch(JSContext
* cx
, JSRope
* text
, JSLinearString
* pat
, int* match
)
1450 uint32_t patLen
= pat
->length();
1455 if (text
->length() < patLen
) {
1461 * List of leaf nodes in the rope. If we run out of memory when trying to
1462 * append to this list, we can still fall back to StringMatch, so use the
1463 * system allocator so we don't report OOM in that case.
1465 LinearStringVector strings
;
1468 * We don't want to do rope matching if there is a poor node-to-char ratio,
1469 * since this means spending a lot of time in the match loop below. We also
1470 * need to build the list of leaf nodes. Do both here: iterate over the
1471 * nodes so long as there are not too many.
1473 * We also don't use rope matching if the rope contains both Latin1 and
1474 * TwoByte nodes, to simplify the match algorithm.
1477 size_t threshold
= text
->length() >> sRopeMatchThresholdRatioLog2
;
1478 StringSegmentRange
r(cx
);
1482 bool textIsLatin1
= text
->hasLatin1Chars();
1483 while (!r
.empty()) {
1484 if (threshold
-- == 0 ||
1485 r
.front()->hasLatin1Chars() != textIsLatin1
||
1486 !strings
.append(r
.front()))
1488 JSLinearString
* linear
= text
->ensureLinear(cx
);
1492 *match
= StringMatch(linear
, pat
);
1500 AutoCheckCannotGC nogc
;
1501 if (text
->hasLatin1Chars()) {
1502 if (pat
->hasLatin1Chars())
1503 *match
= RopeMatchImpl
<Latin1Char
>(nogc
, strings
, pat
->latin1Chars(nogc
), patLen
);
1505 *match
= RopeMatchImpl
<Latin1Char
>(nogc
, strings
, pat
->twoByteChars(nogc
), patLen
);
1507 if (pat
->hasLatin1Chars())
1508 *match
= RopeMatchImpl
<char16_t
>(nogc
, strings
, pat
->latin1Chars(nogc
), patLen
);
1510 *match
= RopeMatchImpl
<char16_t
>(nogc
, strings
, pat
->twoByteChars(nogc
), patLen
);
1516 /* ES6 20121026 draft 15.5.4.24. */
1518 str_contains(JSContext
* cx
, unsigned argc
, Value
* vp
)
1520 CallArgs args
= CallArgsFromVp(argc
, vp
);
1522 // Steps 1, 2, and 3
1523 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
1528 RootedLinearString
searchStr(cx
, ArgToRootedString(cx
, args
, 0));
1534 if (args
.hasDefined(1)) {
1535 if (args
[1].isInt32()) {
1536 int i
= args
[1].toInt32();
1537 pos
= (i
< 0) ? 0U : uint32_t(i
);
1540 if (!ToInteger(cx
, args
[1], &d
))
1542 pos
= uint32_t(Min(Max(d
, 0.0), double(UINT32_MAX
)));
1547 uint32_t textLen
= str
->length();
1550 uint32_t start
= Min(Max(pos
, 0U), textLen
);
1553 JSLinearString
* text
= str
->ensureLinear(cx
);
1557 args
.rval().setBoolean(StringMatch(text
, searchStr
, start
) != -1);
1561 /* ES6 20120927 draft 15.5.4.7. */
1563 js::str_indexOf(JSContext
* cx
, unsigned argc
, Value
* vp
)
1565 CallArgs args
= CallArgsFromVp(argc
, vp
);
1567 // Steps 1, 2, and 3
1568 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
1573 RootedLinearString
searchStr(cx
, ArgToRootedString(cx
, args
, 0));
1579 if (args
.hasDefined(1)) {
1580 if (args
[1].isInt32()) {
1581 int i
= args
[1].toInt32();
1582 pos
= (i
< 0) ? 0U : uint32_t(i
);
1585 if (!ToInteger(cx
, args
[1], &d
))
1587 pos
= uint32_t(Min(Max(d
, 0.0), double(UINT32_MAX
)));
1592 uint32_t textLen
= str
->length();
1595 uint32_t start
= Min(Max(pos
, 0U), textLen
);
1598 JSLinearString
* text
= str
->ensureLinear(cx
);
1602 args
.rval().setInt32(StringMatch(text
, searchStr
, start
));
1606 template <typename TextChar
, typename PatChar
>
1608 LastIndexOfImpl(const TextChar
* text
, size_t textLen
, const PatChar
* pat
, size_t patLen
,
1611 MOZ_ASSERT(patLen
> 0);
1612 MOZ_ASSERT(patLen
<= textLen
);
1613 MOZ_ASSERT(start
<= textLen
- patLen
);
1615 const PatChar p0
= *pat
;
1616 const PatChar
* patNext
= pat
+ 1;
1617 const PatChar
* patEnd
= pat
+ patLen
;
1619 for (const TextChar
* t
= text
+ start
; t
>= text
; --t
) {
1621 const TextChar
* t1
= t
+ 1;
1622 for (const PatChar
* p1
= patNext
; p1
< patEnd
; ++p1
, ++t1
) {
1624 goto break_continue
;
1627 return static_cast<int32_t>(t
- text
);
1636 js::str_lastIndexOf(JSContext
* cx
, unsigned argc
, Value
* vp
)
1638 CallArgs args
= CallArgsFromVp(argc
, vp
);
1639 RootedString
textstr(cx
, ThisToStringForStringProto(cx
, args
));
1643 RootedLinearString
pat(cx
, ArgToRootedString(cx
, args
, 0));
1647 size_t textLen
= textstr
->length();
1648 size_t patLen
= pat
->length();
1649 int start
= textLen
- patLen
; // Start searching here
1651 args
.rval().setInt32(-1);
1655 if (args
.hasDefined(1)) {
1656 if (args
[1].isInt32()) {
1657 int i
= args
[1].toInt32();
1664 if (!ToNumber(cx
, args
[1], &d
))
1667 d
= JS::ToInteger(d
);
1677 args
.rval().setInt32(start
);
1681 JSLinearString
* text
= textstr
->ensureLinear(cx
);
1686 AutoCheckCannotGC nogc
;
1687 if (text
->hasLatin1Chars()) {
1688 const Latin1Char
* textChars
= text
->latin1Chars(nogc
);
1689 if (pat
->hasLatin1Chars())
1690 res
= LastIndexOfImpl(textChars
, textLen
, pat
->latin1Chars(nogc
), patLen
, start
);
1692 res
= LastIndexOfImpl(textChars
, textLen
, pat
->twoByteChars(nogc
), patLen
, start
);
1694 const char16_t
* textChars
= text
->twoByteChars(nogc
);
1695 if (pat
->hasLatin1Chars())
1696 res
= LastIndexOfImpl(textChars
, textLen
, pat
->latin1Chars(nogc
), patLen
, start
);
1698 res
= LastIndexOfImpl(textChars
, textLen
, pat
->twoByteChars(nogc
), patLen
, start
);
1701 args
.rval().setInt32(res
);
1706 HasSubstringAt(JSLinearString
* text
, JSLinearString
* pat
, size_t start
)
1708 MOZ_ASSERT(start
+ pat
->length() <= text
->length());
1710 size_t patLen
= pat
->length();
1712 AutoCheckCannotGC nogc
;
1713 if (text
->hasLatin1Chars()) {
1714 const Latin1Char
* textChars
= text
->latin1Chars(nogc
) + start
;
1715 if (pat
->hasLatin1Chars())
1716 return PodEqual(textChars
, pat
->latin1Chars(nogc
), patLen
);
1718 return EqualChars(textChars
, pat
->twoByteChars(nogc
), patLen
);
1721 const char16_t
* textChars
= text
->twoByteChars(nogc
) + start
;
1722 if (pat
->hasTwoByteChars())
1723 return PodEqual(textChars
, pat
->twoByteChars(nogc
), patLen
);
1725 return EqualChars(pat
->latin1Chars(nogc
), textChars
, patLen
);
1728 /* ES6 20131108 draft 21.1.3.18. */
1730 js::str_startsWith(JSContext
* cx
, unsigned argc
, Value
* vp
)
1732 CallArgs args
= CallArgsFromVp(argc
, vp
);
1734 // Steps 1, 2, and 3
1735 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
1740 if (args
.get(0).isObject() && IsObjectWithClass(args
[0], ESClass_RegExp
, cx
)) {
1741 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, nullptr, JSMSG_INVALID_ARG_TYPE
,
1742 "first", "", "Regular Expression");
1747 RootedLinearString
searchStr(cx
, ArgToRootedString(cx
, args
, 0));
1753 if (args
.hasDefined(1)) {
1754 if (args
[1].isInt32()) {
1755 int i
= args
[1].toInt32();
1756 pos
= (i
< 0) ? 0U : uint32_t(i
);
1759 if (!ToInteger(cx
, args
[1], &d
))
1761 pos
= uint32_t(Min(Max(d
, 0.0), double(UINT32_MAX
)));
1766 uint32_t textLen
= str
->length();
1769 uint32_t start
= Min(Max(pos
, 0U), textLen
);
1772 uint32_t searchLen
= searchStr
->length();
1775 if (searchLen
+ start
< searchLen
|| searchLen
+ start
> textLen
) {
1776 args
.rval().setBoolean(false);
1781 JSLinearString
* text
= str
->ensureLinear(cx
);
1785 args
.rval().setBoolean(HasSubstringAt(text
, searchStr
, start
));
1789 /* ES6 20131108 draft 21.1.3.7. */
1791 str_endsWith(JSContext
* cx
, unsigned argc
, Value
* vp
)
1793 CallArgs args
= CallArgsFromVp(argc
, vp
);
1795 // Steps 1, 2, and 3
1796 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
1801 if (args
.get(0).isObject() && IsObjectWithClass(args
[0], ESClass_RegExp
, cx
)) {
1802 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, nullptr, JSMSG_INVALID_ARG_TYPE
,
1803 "first", "", "Regular Expression");
1808 RootedLinearString
searchStr(cx
, ArgToRootedString(cx
, args
, 0));
1813 uint32_t textLen
= str
->length();
1816 uint32_t pos
= textLen
;
1817 if (args
.hasDefined(1)) {
1818 if (args
[1].isInt32()) {
1819 int i
= args
[1].toInt32();
1820 pos
= (i
< 0) ? 0U : uint32_t(i
);
1823 if (!ToInteger(cx
, args
[1], &d
))
1825 pos
= uint32_t(Min(Max(d
, 0.0), double(UINT32_MAX
)));
1830 uint32_t end
= Min(Max(pos
, 0U), textLen
);
1833 uint32_t searchLen
= searchStr
->length();
1835 // Step 13 (reordered)
1836 if (searchLen
> end
) {
1837 args
.rval().setBoolean(false);
1842 uint32_t start
= end
- searchLen
;
1845 JSLinearString
* text
= str
->ensureLinear(cx
);
1849 args
.rval().setBoolean(HasSubstringAt(text
, searchStr
, start
));
1853 template <typename CharT
>
1855 TrimString(const CharT
* chars
, bool trimLeft
, bool trimRight
, size_t length
,
1856 size_t* pBegin
, size_t* pEnd
)
1858 size_t begin
= 0, end
= length
;
1861 while (begin
< length
&& unicode::IsSpace(chars
[begin
]))
1866 while (end
> begin
&& unicode::IsSpace(chars
[end
- 1]))
1875 TrimString(JSContext
* cx
, Value
* vp
, bool trimLeft
, bool trimRight
)
1877 CallReceiver call
= CallReceiverFromVp(vp
);
1878 RootedString
str(cx
, ThisToStringForStringProto(cx
, call
));
1882 JSLinearString
* linear
= str
->ensureLinear(cx
);
1886 size_t length
= linear
->length();
1888 if (linear
->hasLatin1Chars()) {
1889 AutoCheckCannotGC nogc
;
1890 TrimString(linear
->latin1Chars(nogc
), trimLeft
, trimRight
, length
, &begin
, &end
);
1892 AutoCheckCannotGC nogc
;
1893 TrimString(linear
->twoByteChars(nogc
), trimLeft
, trimRight
, length
, &begin
, &end
);
1896 str
= NewDependentString(cx
, str
, begin
, end
- begin
);
1900 call
.rval().setString(str
);
1905 str_trim(JSContext
* cx
, unsigned argc
, Value
* vp
)
1907 return TrimString(cx
, vp
, true, true);
1911 str_trimLeft(JSContext
* cx
, unsigned argc
, Value
* vp
)
1913 return TrimString(cx
, vp
, true, false);
1917 str_trimRight(JSContext
* cx
, unsigned argc
, Value
* vp
)
1919 return TrimString(cx
, vp
, false, true);
1923 * Perl-inspired string functions.
1928 /* Result of a successfully performed flat match. */
1934 friend class StringRegExpGuard
;
1937 explicit FlatMatch(JSContext
* cx
) : pat_(cx
) {}
1938 JSLinearString
* pattern() const { return pat_
; }
1939 size_t patternLength() const { return pat_
->length(); }
1942 * Note: The match is -1 when the match is performed successfully,
1943 * but no match is found.
1945 int32_t match() const { return match_
; }
1948 } /* anonymous namespace */
1951 IsRegExpMetaChar(char16_t c
)
1954 /* Taken from the PatternCharacter production in 15.10.1. */
1955 case '^': case '$': case '\\': case '.': case '*': case '+':
1956 case '?': case '(': case ')': case '[': case ']': case '{':
1964 template <typename CharT
>
1966 js::HasRegExpMetaChars(const CharT
* chars
, size_t length
)
1968 for (size_t i
= 0; i
< length
; ++i
) {
1969 if (IsRegExpMetaChar(chars
[i
]))
1976 js::HasRegExpMetaChars
<Latin1Char
>(const Latin1Char
* chars
, size_t length
);
1979 js::HasRegExpMetaChars
<char16_t
>(const char16_t
* chars
, size_t length
);
1982 js::StringHasRegExpMetaChars(JSLinearString
* str
)
1984 AutoCheckCannotGC nogc
;
1985 if (str
->hasLatin1Chars())
1986 return HasRegExpMetaChars(str
->latin1Chars(nogc
), str
->length());
1988 return HasRegExpMetaChars(str
->twoByteChars(nogc
), str
->length());
1994 * StringRegExpGuard factors logic out of String regexp operations.
1996 * |optarg| indicates in which argument position RegExp flags will be found, if
1997 * present. This is a Mozilla extension and not part of any ECMA spec.
1999 class MOZ_STACK_CLASS StringRegExpGuard
2006 * Upper bound on the number of characters we are willing to potentially
2007 * waste on searching for RegExp meta-characters.
2009 static const size_t MAX_FLAT_PAT_LEN
= 256;
2011 template <typename CharT
>
2013 flattenPattern(StringBuffer
& sb
, const CharT
* chars
, size_t len
)
2015 static const char ESCAPE_CHAR
= '\\';
2016 for (const CharT
* it
= chars
; it
< chars
+ len
; ++it
) {
2017 if (IsRegExpMetaChar(*it
)) {
2018 if (!sb
.append(ESCAPE_CHAR
) || !sb
.append(*it
))
2021 if (!sb
.append(*it
))
2029 flattenPattern(JSContext
* cx
, JSAtom
* pat
)
2031 StringBuffer
sb(cx
);
2032 if (!sb
.reserve(pat
->length()))
2035 if (pat
->hasLatin1Chars()) {
2036 AutoCheckCannotGC nogc
;
2037 if (!flattenPattern(sb
, pat
->latin1Chars(nogc
), pat
->length()))
2040 AutoCheckCannotGC nogc
;
2041 if (!flattenPattern(sb
, pat
->twoByteChars(nogc
), pat
->length()))
2045 return sb
.finishAtom();
2049 explicit StringRegExpGuard(JSContext
* cx
)
2050 : re_(cx
), fm(cx
), obj_(cx
)
2053 /* init must succeed in order to call tryFlatMatch or normalizeRegExp. */
2054 bool init(JSContext
* cx
, CallArgs args
, bool convertVoid
= false)
2056 if (args
.length() != 0 && IsObjectWithClass(args
[0], ESClass_RegExp
, cx
))
2057 return init(cx
, &args
[0].toObject());
2059 if (convertVoid
&& !args
.hasDefined(0)) {
2060 fm
.pat_
= cx
->runtime()->emptyString
;
2064 JSString
* arg
= ArgToRootedString(cx
, args
, 0);
2068 fm
.pat_
= AtomizeString(cx
, arg
);
2075 bool init(JSContext
* cx
, JSObject
* regexp
) {
2078 MOZ_ASSERT(ObjectClassIs(obj_
, ESClass_RegExp
, cx
));
2080 if (!RegExpToShared(cx
, obj_
, &re_
))
2085 bool init(JSContext
* cx
, HandleString pattern
) {
2086 fm
.pat_
= AtomizeString(cx
, pattern
);
2093 * Attempt to match |patstr| to |textstr|. A flags argument, metachars in
2094 * the pattern string, or a lengthy pattern string can thwart this process.
2096 * |checkMetaChars| looks for regexp metachars in the pattern string.
2098 * Return whether flat matching could be used.
2100 * N.B. tryFlatMatch returns nullptr on OOM, so the caller must check
2101 * cx->isExceptionPending().
2104 tryFlatMatch(JSContext
* cx
, JSString
* text
, unsigned optarg
, unsigned argc
,
2105 bool checkMetaChars
= true)
2107 if (re_
.initialized())
2113 size_t patLen
= fm
.pat_
->length();
2114 if (checkMetaChars
&& (patLen
> MAX_FLAT_PAT_LEN
|| StringHasRegExpMetaChars(fm
.pat_
)))
2118 * |text| could be a rope, so we want to avoid flattening it for as
2121 if (text
->isRope()) {
2122 if (!RopeMatch(cx
, &text
->asRope(), fm
.pat_
, &fm
.match_
))
2125 fm
.match_
= StringMatch(&text
->asLinear(), fm
.pat_
, 0);
2131 /* If the pattern is not already a regular expression, make it so. */
2132 bool normalizeRegExp(JSContext
* cx
, bool flat
, unsigned optarg
, CallArgs args
)
2134 if (re_
.initialized())
2137 /* Build RegExp from pattern string. */
2138 RootedString
opt(cx
);
2139 if (optarg
< args
.length()) {
2140 opt
= ToString
<CanGC
>(cx
, args
[optarg
]);
2147 Rooted
<JSAtom
*> pat(cx
);
2149 pat
= flattenPattern(cx
, fm
.pat_
);
2157 return cx
->compartment()->regExps
.get(cx
, pat
, opt
, &re_
);
2160 bool zeroLastIndex(JSContext
* cx
) {
2161 if (!regExpIsObject())
2164 // Use a fast path for same-global RegExp objects with writable
2166 if (obj_
->is
<RegExpObject
>()) {
2167 RegExpObject
* nobj
= &obj_
->as
<RegExpObject
>();
2168 if (nobj
->lookup(cx
, cx
->names().lastIndex
)->writable()) {
2169 nobj
->zeroLastIndex();
2174 // Handle everything else generically (including throwing if .lastIndex is non-writable).
2175 RootedValue
zero(cx
, Int32Value(0));
2176 return JSObject::setProperty(cx
, obj_
, obj_
, cx
->names().lastIndex
, &zero
, true);
2179 RegExpShared
& regExp() { return *re_
; }
2181 bool regExpIsObject() { return obj_
!= nullptr; }
2182 HandleObject
regExpObject() {
2183 MOZ_ASSERT(regExpIsObject());
2188 StringRegExpGuard(const StringRegExpGuard
&) = delete;
2189 void operator=(const StringRegExpGuard
&) = delete;
2192 } /* anonymous namespace */
2195 DoMatchLocal(JSContext
* cx
, CallArgs args
, RegExpStatics
* res
, HandleLinearString input
,
2198 ScopedMatchPairs
matches(&cx
->tempLifoAlloc());
2199 RegExpRunStatus status
= re
.execute(cx
, input
, 0, &matches
);
2200 if (status
== RegExpRunStatus_Error
)
2203 if (status
== RegExpRunStatus_Success_NotFound
) {
2204 args
.rval().setNull();
2208 if (!res
->updateFromMatchPairs(cx
, input
, matches
))
2211 RootedValue
rval(cx
);
2212 if (!CreateRegExpMatchResult(cx
, input
, matches
, &rval
))
2215 args
.rval().set(rval
);
2219 /* ES5 15.5.4.10 step 8. */
2221 DoMatchGlobal(JSContext
* cx
, CallArgs args
, RegExpStatics
* res
, HandleLinearString input
,
2222 StringRegExpGuard
& g
)
2226 // This single zeroing of "lastIndex" covers all "lastIndex" changes in the
2227 // rest of String.prototype.match, particularly in steps 8f(i) and
2228 // 8f(iii)(2)(a). Here's why.
2230 // The inputs to the calls to RegExp.prototype.exec are a RegExp object
2231 // whose .global is true and a string. The only side effect of a call in
2232 // these circumstances is that the RegExp's .lastIndex will be modified to
2233 // the next starting index after the discovered match (or to 0 if there's
2234 // no remaining match). Because .lastIndex is a non-configurable data
2235 // property and no script-controllable code executes after step 8a, passing
2236 // step 8a implies *every* .lastIndex set succeeds. String.prototype.match
2237 // calls RegExp.prototype.exec repeatedly, and the last call doesn't match,
2238 // so the final value of .lastIndex is 0: exactly the state after step 8a
2239 // succeeds. No spec step lets script observe intermediate .lastIndex
2242 // The arrays returned by RegExp.prototype.exec always have a string at
2243 // index 0, for which [[Get]]s have no side effects.
2245 // Filling in a new array using [[DefineOwnProperty]] is unobservable.
2247 // This is a tricky point, because after this set, our implementation *can*
2248 // fail. The key is that script can't distinguish these failure modes from
2249 // one where, in spec terms, we fail immediately after step 8a. That *in
2250 // reality* we might have done extra matching work, or created a partial
2251 // results array to return, or hit an interrupt, is irrelevant. The
2252 // script can't tell we did any of those things but didn't update
2253 // .lastIndex. Thus we can optimize steps 8b onward however we want,
2254 // including eliminating intermediate .lastIndex sets, as long as we don't
2255 // add ways for script to observe the intermediate states.
2257 // In short: it's okay to cheat (by setting .lastIndex to 0, once) because
2258 // we can't get caught.
2259 if (!g
.zeroLastIndex(cx
))
2263 AutoValueVector
elements(cx
);
2265 size_t lastSuccessfulStart
= 0;
2267 // The loop variables from steps 8c-e aren't needed, as we use different
2268 // techniques from the spec to implement step 8f's loop.
2271 ScopedMatchPairs
matches(&cx
->tempLifoAlloc());
2272 size_t charsLen
= input
->length();
2273 RegExpShared
& re
= g
.regExp();
2274 for (size_t searchIndex
= 0; searchIndex
<= charsLen
; ) {
2275 if (!CheckForInterrupt(cx
))
2278 // Steps 8f(i-ii), minus "lastIndex" updates (see above).
2279 RegExpRunStatus status
= re
.execute(cx
, input
, searchIndex
, &matches
);
2280 if (status
== RegExpRunStatus_Error
)
2284 if (status
== RegExpRunStatus_Success_NotFound
)
2287 lastSuccessfulStart
= searchIndex
;
2288 MatchPair
& match
= matches
[0];
2290 // Steps 8f(iii)(1-3).
2291 searchIndex
= match
.isEmpty() ? match
.limit
+ 1 : match
.limit
;
2293 // Step 8f(iii)(4-5).
2294 JSLinearString
* str
= NewDependentString(cx
, input
, match
.start
, match
.length());
2297 if (!elements
.append(StringValue(str
)))
2302 if (elements
.empty()) {
2303 args
.rval().setNull();
2307 // The last *successful* match updates the RegExpStatics. (Interestingly,
2308 // this implies that String.prototype.match's semantics aren't those
2309 // implied by the RegExp.prototype.exec calls in the ES5 algorithm.)
2310 res
->updateLazily(cx
, input
, &re
, lastSuccessfulStart
);
2312 // Steps 8b, 8f(iii)(5-6), 8h.
2313 JSObject
* array
= NewDenseCopiedArray(cx
, elements
.length(), elements
.begin());
2317 args
.rval().setObject(*array
);
2322 BuildFlatMatchArray(JSContext
* cx
, HandleString textstr
, const FlatMatch
& fm
, CallArgs
* args
)
2324 if (fm
.match() < 0) {
2325 args
->rval().setNull();
2329 /* For this non-global match, produce a RegExp.exec-style array. */
2330 RootedObject
obj(cx
, NewDenseEmptyArray(cx
));
2334 RootedValue
patternVal(cx
, StringValue(fm
.pattern()));
2335 RootedValue
matchVal(cx
, Int32Value(fm
.match()));
2336 RootedValue
textVal(cx
, StringValue(textstr
));
2338 if (!JSObject::defineElement(cx
, obj
, 0, patternVal
) ||
2339 !JSObject::defineProperty(cx
, obj
, cx
->names().index
, matchVal
) ||
2340 !JSObject::defineProperty(cx
, obj
, cx
->names().input
, textVal
))
2345 args
->rval().setObject(*obj
);
2349 /* ES5 15.5.4.10. */
2351 js::str_match(JSContext
* cx
, unsigned argc
, Value
* vp
)
2353 CallArgs args
= CallArgsFromVp(argc
, vp
);
2356 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
2360 /* Steps 3-4, plus the trailing-argument "flags" extension. */
2361 StringRegExpGuard
g(cx
);
2362 if (!g
.init(cx
, args
, true))
2365 /* Fast path when the search pattern can be searched for as a string. */
2366 if (const FlatMatch
* fm
= g
.tryFlatMatch(cx
, str
, 1, args
.length()))
2367 return BuildFlatMatchArray(cx
, str
, *fm
, &args
);
2369 /* Return if there was an error in tryFlatMatch. */
2370 if (cx
->isExceptionPending())
2373 /* Create regular-expression internals as needed to perform the match. */
2374 if (!g
.normalizeRegExp(cx
, false, 1, args
))
2377 RegExpStatics
* res
= cx
->global()->getRegExpStatics(cx
);
2381 RootedLinearString
linearStr(cx
, str
->ensureLinear(cx
));
2386 if (!g
.regExp().global())
2387 return DoMatchLocal(cx
, args
, res
, linearStr
, g
.regExp());
2390 return DoMatchGlobal(cx
, args
, res
, linearStr
, g
);
2394 js::str_search(JSContext
* cx
, unsigned argc
, Value
* vp
)
2396 CallArgs args
= CallArgsFromVp(argc
, vp
);
2397 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
2401 StringRegExpGuard
g(cx
);
2402 if (!g
.init(cx
, args
, true))
2404 if (const FlatMatch
* fm
= g
.tryFlatMatch(cx
, str
, 1, args
.length())) {
2405 args
.rval().setInt32(fm
->match());
2409 if (cx
->isExceptionPending()) /* from tryFlatMatch */
2412 if (!g
.normalizeRegExp(cx
, false, 1, args
))
2415 RootedLinearString
linearStr(cx
, str
->ensureLinear(cx
));
2419 RegExpStatics
* res
= cx
->global()->getRegExpStatics(cx
);
2423 /* Per ECMAv5 15.5.4.12 (5) The last index property is ignored and left unchanged. */
2424 ScopedMatchPairs
matches(&cx
->tempLifoAlloc());
2425 RegExpRunStatus status
= g
.regExp().execute(cx
, linearStr
, 0, &matches
);
2426 if (status
== RegExpRunStatus_Error
)
2429 if (status
== RegExpRunStatus_Success
)
2430 res
->updateLazily(cx
, linearStr
, &g
.regExp(), 0);
2432 args
.rval().setInt32(status
== RegExpRunStatus_Success_NotFound
? -1 : matches
[0].start
);
2436 // Utility for building a rope (lazy concatenation) of strings.
2441 RopeBuilder(const RopeBuilder
& other
) = delete;
2442 void operator=(const RopeBuilder
& other
) = delete;
2445 explicit RopeBuilder(JSContext
* cx
)
2446 : cx(cx
), res(cx
, cx
->runtime()->emptyString
)
2449 inline bool append(HandleString str
) {
2450 res
= ConcatStrings
<CanGC
>(cx
, res
, str
);
2454 inline JSString
* result() {
2461 template <typename CharT
>
2463 FindDollarIndex(const CharT
* chars
, size_t length
)
2465 if (const CharT
* p
= js_strchr_limit(chars
, '$', chars
+ length
)) {
2466 uint32_t dollarIndex
= p
- chars
;
2467 MOZ_ASSERT(dollarIndex
< length
);
2475 explicit ReplaceData(JSContext
* cx
)
2476 : str(cx
), g(cx
), lambda(cx
), elembase(cx
), repstr(cx
),
2477 fig(cx
, NullValue()), sb(cx
)
2480 inline void setReplacementString(JSLinearString
* string
) {
2486 AutoCheckCannotGC nogc
;
2487 dollarIndex
= string
->hasLatin1Chars()
2488 ? FindDollarIndex(string
->latin1Chars(nogc
), string
->length())
2489 : FindDollarIndex(string
->twoByteChars(nogc
), string
->length());
2492 inline void setReplacementFunction(JSObject
* func
) {
2497 dollarIndex
= UINT32_MAX
;
2500 RootedString str
; /* 'this' parameter object as a string */
2501 StringRegExpGuard g
; /* regexp parameter object and private data */
2502 RootedObject lambda
; /* replacement function object or null */
2503 RootedNativeObject elembase
; /* object for function(a){return b[a]} replace */
2504 RootedLinearString repstr
; /* replacement string */
2505 uint32_t dollarIndex
; /* index of first $ in repstr, or UINT32_MAX */
2506 int leftIndex
; /* left context index in str->chars */
2507 bool calledBack
; /* record whether callback has been called */
2508 FastInvokeGuard fig
; /* used for lambda calls, also holds arguments */
2509 StringBuffer sb
; /* buffer built during DoMatch */
2512 } /* anonymous namespace */
2515 ReplaceRegExp(JSContext
* cx
, RegExpStatics
* res
, ReplaceData
& rdata
);
2518 DoMatchForReplaceLocal(JSContext
* cx
, RegExpStatics
* res
, HandleLinearString linearStr
,
2519 RegExpShared
& re
, ReplaceData
& rdata
)
2521 ScopedMatchPairs
matches(&cx
->tempLifoAlloc());
2522 RegExpRunStatus status
= re
.execute(cx
, linearStr
, 0, &matches
);
2523 if (status
== RegExpRunStatus_Error
)
2526 if (status
== RegExpRunStatus_Success_NotFound
)
2529 if (!res
->updateFromMatchPairs(cx
, linearStr
, matches
))
2532 return ReplaceRegExp(cx
, res
, rdata
);
2536 DoMatchForReplaceGlobal(JSContext
* cx
, RegExpStatics
* res
, HandleLinearString linearStr
,
2537 RegExpShared
& re
, ReplaceData
& rdata
)
2539 size_t charsLen
= linearStr
->length();
2540 ScopedMatchPairs
matches(&cx
->tempLifoAlloc());
2541 for (size_t count
= 0, searchIndex
= 0; searchIndex
<= charsLen
; ++count
) {
2542 if (!CheckForInterrupt(cx
))
2545 RegExpRunStatus status
= re
.execute(cx
, linearStr
, searchIndex
, &matches
);
2546 if (status
== RegExpRunStatus_Error
)
2549 if (status
== RegExpRunStatus_Success_NotFound
)
2552 MatchPair
& match
= matches
[0];
2553 searchIndex
= match
.isEmpty() ? match
.limit
+ 1 : match
.limit
;
2555 if (!res
->updateFromMatchPairs(cx
, linearStr
, matches
))
2558 if (!ReplaceRegExp(cx
, res
, rdata
))
2565 template <typename CharT
>
2567 InterpretDollar(RegExpStatics
* res
, const CharT
* bp
, const CharT
* dp
, const CharT
* ep
,
2568 ReplaceData
& rdata
, JSSubString
* out
, size_t* skip
)
2570 MOZ_ASSERT(*dp
== '$');
2572 /* If there is only a dollar, bail now */
2576 /* Interpret all Perl match-induced dollar variables. */
2577 char16_t dc
= dp
[1];
2578 if (JS7_ISDEC(dc
)) {
2579 /* ECMA-262 Edition 3: 1-9 or 01-99 */
2580 unsigned num
= JS7_UNDEC(dc
);
2581 if (num
> res
->getMatches().parenCount())
2584 const CharT
* cp
= dp
+ 2;
2585 if (cp
< ep
&& (dc
= *cp
, JS7_ISDEC(dc
))) {
2586 unsigned tmp
= 10 * num
+ JS7_UNDEC(dc
);
2587 if (tmp
<= res
->getMatches().parenCount()) {
2597 MOZ_ASSERT(num
<= res
->getMatches().parenCount());
2600 * Note: we index to get the paren with the (1-indexed) pair
2601 * number, as opposed to a (0-indexed) paren number.
2603 res
->getParen(num
, out
);
2610 out
->init(rdata
.repstr
, dp
- bp
, 1);
2613 res
->getLastMatch(out
);
2616 res
->getLastParen(out
);
2619 res
->getLeftContext(out
);
2622 res
->getRightContext(out
);
2628 template <typename CharT
>
2630 FindReplaceLengthString(JSContext
* cx
, RegExpStatics
* res
, ReplaceData
& rdata
, size_t* sizep
)
2632 JSLinearString
* repstr
= rdata
.repstr
;
2633 CheckedInt
<uint32_t> replen
= repstr
->length();
2635 if (rdata
.dollarIndex
!= UINT32_MAX
) {
2636 AutoCheckCannotGC nogc
;
2637 MOZ_ASSERT(rdata
.dollarIndex
< repstr
->length());
2638 const CharT
* bp
= repstr
->chars
<CharT
>(nogc
);
2639 const CharT
* dp
= bp
+ rdata
.dollarIndex
;
2640 const CharT
* ep
= bp
+ repstr
->length();
2644 if (InterpretDollar(res
, bp
, dp
, ep
, rdata
, &sub
, &skip
)) {
2645 if (sub
.length
> skip
)
2646 replen
+= sub
.length
- skip
;
2648 replen
-= skip
- sub
.length
;
2654 dp
= js_strchr_limit(dp
, '$', ep
);
2658 if (!replen
.isValid()) {
2659 js_ReportAllocationOverflow(cx
);
2663 *sizep
= replen
.value();
2668 FindReplaceLength(JSContext
* cx
, RegExpStatics
* res
, ReplaceData
& rdata
, size_t* sizep
)
2670 if (rdata
.elembase
) {
2672 * The base object is used when replace was passed a lambda which looks like
2673 * 'function(a) { return b[a]; }' for the base object b. b will not change
2674 * in the course of the replace unless we end up making a scripted call due
2675 * to accessing a scripted getter or a value with a scripted toString.
2677 MOZ_ASSERT(rdata
.lambda
);
2678 MOZ_ASSERT(!rdata
.elembase
->getOps()->lookupProperty
);
2679 MOZ_ASSERT(!rdata
.elembase
->getOps()->getProperty
);
2681 RootedValue
match(cx
);
2682 if (!res
->createLastMatch(cx
, &match
))
2684 JSAtom
* atom
= ToAtom
<CanGC
>(cx
, match
);
2689 if (HasDataProperty(cx
, rdata
.elembase
, AtomToId(atom
), v
.address()) && v
.isString()) {
2690 rdata
.repstr
= v
.toString()->ensureLinear(cx
);
2693 *sizep
= rdata
.repstr
->length();
2698 * Couldn't handle this property, fall through and despecialize to the
2699 * general lambda case.
2701 rdata
.elembase
= nullptr;
2705 RootedObject
lambda(cx
, rdata
.lambda
);
2706 PreserveRegExpStatics
staticsGuard(cx
, res
);
2707 if (!staticsGuard
.init(cx
))
2711 * In the lambda case, not only do we find the replacement string's
2712 * length, we compute repstr and return it via rdata for use within
2713 * DoReplace. The lambda is called with arguments ($&, $1, $2, ...,
2714 * index, input), i.e., all the properties of a regexp match array.
2715 * For $&, etc., we must create string jsvals from cx->regExpStatics.
2716 * We grab up stack space to keep the newborn strings GC-rooted.
2718 unsigned p
= res
->getMatches().parenCount();
2719 unsigned argc
= 1 + p
+ 2;
2721 InvokeArgs
& args
= rdata
.fig
.args();
2722 if (!args
.init(argc
))
2725 args
.setCallee(ObjectValue(*lambda
));
2726 args
.setThis(UndefinedValue());
2728 /* Push $&, $1, $2, ... */
2730 if (!res
->createLastMatch(cx
, args
[argi
++]))
2733 for (size_t i
= 0; i
< res
->getMatches().parenCount(); ++i
) {
2734 if (!res
->createParen(cx
, i
+ 1, args
[argi
++]))
2738 /* Push match index and input string. */
2739 args
[argi
++].setInt32(res
->getMatches()[0].start
);
2740 args
[argi
].setString(rdata
.str
);
2742 if (!rdata
.fig
.invoke(cx
))
2745 /* root repstr: rdata is on the stack, so scanned by conservative gc. */
2746 JSString
* repstr
= ToString
<CanGC
>(cx
, args
.rval());
2749 rdata
.repstr
= repstr
->ensureLinear(cx
);
2752 *sizep
= rdata
.repstr
->length();
2756 return rdata
.repstr
->hasLatin1Chars()
2757 ? FindReplaceLengthString
<Latin1Char
>(cx
, res
, rdata
, sizep
)
2758 : FindReplaceLengthString
<char16_t
>(cx
, res
, rdata
, sizep
);
2762 * Precondition: |rdata.sb| already has necessary growth space reserved (as
2763 * derived from FindReplaceLength), and has been inflated to TwoByte if
2766 template <typename CharT
>
2768 DoReplace(RegExpStatics
* res
, ReplaceData
& rdata
)
2770 AutoCheckCannotGC nogc
;
2771 JSLinearString
* repstr
= rdata
.repstr
;
2772 const CharT
* bp
= repstr
->chars
<CharT
>(nogc
);
2773 const CharT
* cp
= bp
;
2775 if (rdata
.dollarIndex
!= UINT32_MAX
) {
2776 MOZ_ASSERT(rdata
.dollarIndex
< repstr
->length());
2777 const CharT
* dp
= bp
+ rdata
.dollarIndex
;
2778 const CharT
* ep
= bp
+ repstr
->length();
2780 /* Move one of the constant portions of the replacement value. */
2781 size_t len
= dp
- cp
;
2782 rdata
.sb
.infallibleAppend(cp
, len
);
2787 if (InterpretDollar(res
, bp
, dp
, ep
, rdata
, &sub
, &skip
)) {
2788 rdata
.sb
.infallibleAppendSubstring(sub
.base
, sub
.offset
, sub
.length
);
2795 dp
= js_strchr_limit(dp
, '$', ep
);
2798 rdata
.sb
.infallibleAppend(cp
, repstr
->length() - (cp
- bp
));
2802 ReplaceRegExp(JSContext
* cx
, RegExpStatics
* res
, ReplaceData
& rdata
)
2805 const MatchPair
& match
= res
->getMatches()[0];
2806 MOZ_ASSERT(!match
.isUndefined());
2807 MOZ_ASSERT(match
.limit
>= match
.start
&& match
.limit
>= 0);
2809 rdata
.calledBack
= true;
2810 size_t leftoff
= rdata
.leftIndex
;
2811 size_t leftlen
= match
.start
- leftoff
;
2812 rdata
.leftIndex
= match
.limit
;
2814 size_t replen
= 0; /* silence 'unused' warning */
2815 if (!FindReplaceLength(cx
, res
, rdata
, &replen
))
2818 CheckedInt
<uint32_t> newlen(rdata
.sb
.length());
2821 if (!newlen
.isValid()) {
2822 js_ReportAllocationOverflow(cx
);
2827 * Inflate the buffer now if needed, to avoid (fallible) Latin1 to TwoByte
2828 * inflation later on.
2830 JSLinearString
& str
= rdata
.str
->asLinear(); /* flattened for regexp */
2831 if (str
.hasTwoByteChars() || rdata
.repstr
->hasTwoByteChars()) {
2832 if (!rdata
.sb
.ensureTwoByteChars())
2836 if (!rdata
.sb
.reserve(newlen
.value()))
2839 /* Append skipped-over portion of the search value. */
2840 rdata
.sb
.infallibleAppendSubstring(&str
, leftoff
, leftlen
);
2842 if (rdata
.repstr
->hasLatin1Chars())
2843 DoReplace
<Latin1Char
>(res
, rdata
);
2845 DoReplace
<char16_t
>(res
, rdata
);
2850 BuildFlatReplacement(JSContext
* cx
, HandleString textstr
, HandleString repstr
,
2851 const FlatMatch
& fm
, MutableHandleValue rval
)
2853 RopeBuilder
builder(cx
);
2854 size_t match
= fm
.match();
2855 size_t matchEnd
= match
+ fm
.patternLength();
2857 if (textstr
->isRope()) {
2859 * If we are replacing over a rope, avoid flattening it by iterating
2860 * through it, building a new rope.
2862 StringSegmentRange
r(cx
);
2863 if (!r
.init(textstr
))
2866 while (!r
.empty()) {
2867 RootedString
str(cx
, r
.front());
2868 size_t len
= str
->length();
2869 size_t strEnd
= pos
+ len
;
2870 if (pos
< matchEnd
&& strEnd
> match
) {
2872 * We need to special-case any part of the rope that overlaps
2873 * with the replacement string.
2877 * If this part of the rope overlaps with the left side of
2878 * the pattern, then it must be the only one to overlap with
2879 * the first character in the pattern, so we include the
2880 * replacement string here.
2882 RootedString
leftSide(cx
, NewDependentString(cx
, str
, 0, match
- pos
));
2884 !builder
.append(leftSide
) ||
2885 !builder
.append(repstr
)) {
2891 * If str runs off the end of the matched string, append the
2894 if (strEnd
> matchEnd
) {
2895 RootedString
rightSide(cx
, NewDependentString(cx
, str
, matchEnd
- pos
,
2896 strEnd
- matchEnd
));
2897 if (!rightSide
|| !builder
.append(rightSide
))
2901 if (!builder
.append(str
))
2904 pos
+= str
->length();
2909 RootedString
leftSide(cx
, NewDependentString(cx
, textstr
, 0, match
));
2912 RootedString
rightSide(cx
);
2913 rightSide
= NewDependentString(cx
, textstr
, match
+ fm
.patternLength(),
2914 textstr
->length() - match
- fm
.patternLength());
2916 !builder
.append(leftSide
) ||
2917 !builder
.append(repstr
) ||
2918 !builder
.append(rightSide
)) {
2923 rval
.setString(builder
.result());
2927 template <typename CharT
>
2929 AppendDollarReplacement(StringBuffer
& newReplaceChars
, size_t firstDollarIndex
,
2930 const FlatMatch
& fm
, JSLinearString
* text
,
2931 const CharT
* repChars
, size_t repLength
)
2933 MOZ_ASSERT(firstDollarIndex
< repLength
);
2935 size_t matchStart
= fm
.match();
2936 size_t matchLimit
= matchStart
+ fm
.patternLength();
2938 /* Move the pre-dollar chunk in bulk. */
2939 newReplaceChars
.infallibleAppend(repChars
, firstDollarIndex
);
2941 /* Move the rest char-by-char, interpreting dollars as we encounter them. */
2942 const CharT
* repLimit
= repChars
+ repLength
;
2943 for (const CharT
* it
= repChars
+ firstDollarIndex
; it
< repLimit
; ++it
) {
2944 if (*it
!= '$' || it
== repLimit
- 1) {
2945 if (!newReplaceChars
.append(*it
))
2950 switch (*(it
+ 1)) {
2951 case '$': /* Eat one of the dollars. */
2952 if (!newReplaceChars
.append(*it
))
2956 if (!newReplaceChars
.appendSubstring(text
, matchStart
, matchLimit
- matchStart
))
2960 if (!newReplaceChars
.appendSubstring(text
, 0, matchStart
))
2964 if (!newReplaceChars
.appendSubstring(text
, matchLimit
, text
->length() - matchLimit
))
2967 default: /* The dollar we saw was not special (no matter what its mother told it). */
2968 if (!newReplaceChars
.append(*it
))
2972 ++it
; /* We always eat an extra char in the above switch. */
2979 * Perform a linear-scan dollar substitution on the replacement text,
2980 * constructing a result string that looks like:
2982 * newstring = string[:matchStart] + dollarSub(replaceValue) + string[matchLimit:]
2985 BuildDollarReplacement(JSContext
* cx
, JSString
* textstrArg
, JSLinearString
* repstr
,
2986 uint32_t firstDollarIndex
, const FlatMatch
& fm
, MutableHandleValue rval
)
2988 RootedLinearString
textstr(cx
, textstrArg
->ensureLinear(cx
));
2992 size_t matchStart
= fm
.match();
2993 size_t matchLimit
= matchStart
+ fm
.patternLength();
2998 * len(newstr) >= len(orig) - len(match) + len(replacement)
3000 * Note that dollar vars _could_ make the resulting text smaller than this.
3002 StringBuffer
newReplaceChars(cx
);
3003 if (repstr
->hasTwoByteChars() && !newReplaceChars
.ensureTwoByteChars())
3006 if (!newReplaceChars
.reserve(textstr
->length() - fm
.patternLength() + repstr
->length()))
3010 if (repstr
->hasLatin1Chars()) {
3011 AutoCheckCannotGC nogc
;
3012 res
= AppendDollarReplacement(newReplaceChars
, firstDollarIndex
, fm
, textstr
,
3013 repstr
->latin1Chars(nogc
), repstr
->length());
3015 AutoCheckCannotGC nogc
;
3016 res
= AppendDollarReplacement(newReplaceChars
, firstDollarIndex
, fm
, textstr
,
3017 repstr
->twoByteChars(nogc
), repstr
->length());
3022 RootedString
leftSide(cx
, NewDependentString(cx
, textstr
, 0, matchStart
));
3026 RootedString
newReplace(cx
, newReplaceChars
.finishString());
3030 MOZ_ASSERT(textstr
->length() >= matchLimit
);
3031 RootedString
rightSide(cx
, NewDependentString(cx
, textstr
, matchLimit
,
3032 textstr
->length() - matchLimit
));
3036 RopeBuilder
builder(cx
);
3037 if (!builder
.append(leftSide
) || !builder
.append(newReplace
) || !builder
.append(rightSide
))
3040 rval
.setString(builder
.result());
3049 StringRange(size_t s
, size_t l
)
3050 : start(s
), length(l
)
3054 template <typename CharT
>
3056 CopySubstringsToFatInline(JSFatInlineString
* dest
, const CharT
* src
, const StringRange
* ranges
,
3057 size_t rangesLen
, size_t outputLen
)
3059 CharT
* buf
= dest
->init
<CharT
>(outputLen
);
3061 for (size_t i
= 0; i
< rangesLen
; i
++) {
3062 PodCopy(buf
+ pos
, src
+ ranges
[i
].start
, ranges
[i
].length
);
3063 pos
+= ranges
[i
].length
;
3066 MOZ_ASSERT(pos
== outputLen
);
3070 static inline JSFatInlineString
*
3071 FlattenSubstrings(JSContext
* cx
, Handle
<JSFlatString
*> flatStr
, const StringRange
* ranges
,
3072 size_t rangesLen
, size_t outputLen
)
3074 JSFatInlineString
* str
= NewGCFatInlineString
<CanGC
>(cx
);
3078 AutoCheckCannotGC nogc
;
3079 if (flatStr
->hasLatin1Chars())
3080 CopySubstringsToFatInline(str
, flatStr
->latin1Chars(nogc
), ranges
, rangesLen
, outputLen
);
3082 CopySubstringsToFatInline(str
, flatStr
->twoByteChars(nogc
), ranges
, rangesLen
, outputLen
);
3087 AppendSubstrings(JSContext
* cx
, Handle
<JSFlatString
*> flatStr
,
3088 const StringRange
* ranges
, size_t rangesLen
)
3090 MOZ_ASSERT(rangesLen
);
3092 /* For single substrings, construct a dependent string. */
3094 return NewDependentString(cx
, flatStr
, ranges
[0].start
, ranges
[0].length
);
3096 bool isLatin1
= flatStr
->hasLatin1Chars();
3097 uint32_t fatInlineMaxLength
= JSFatInlineString::MAX_LENGTH_TWO_BYTE
;
3099 fatInlineMaxLength
= JSFatInlineString::MAX_LENGTH_LATIN1
;
3101 /* Collect substrings into a rope */
3103 RopeBuilder
rope(cx
);
3104 RootedString
part(cx
, nullptr);
3105 while (i
< rangesLen
) {
3107 /* Find maximum range that fits in JSFatInlineString */
3108 size_t substrLen
= 0;
3110 for (; end
< rangesLen
; end
++) {
3111 if (substrLen
+ ranges
[end
].length
> fatInlineMaxLength
)
3113 substrLen
+= ranges
[end
].length
;
3117 /* Not even one range fits JSFatInlineString, use DependentString */
3118 const StringRange
& sr
= ranges
[i
++];
3119 part
= NewDependentString(cx
, flatStr
, sr
.start
, sr
.length
);
3121 /* Copy the ranges (linearly) into a JSFatInlineString */
3122 part
= FlattenSubstrings(cx
, flatStr
, ranges
+ i
, end
- i
, substrLen
);
3129 /* Appending to the rope permanently roots the substring. */
3130 if (!rope
.append(part
))
3134 return rope
.result();
3138 StrReplaceRegexpRemove(JSContext
* cx
, HandleString str
, RegExpShared
& re
, MutableHandleValue rval
)
3140 Rooted
<JSFlatString
*> flatStr(cx
, str
->ensureFlat(cx
));
3144 Vector
<StringRange
, 16, SystemAllocPolicy
> ranges
;
3146 size_t charsLen
= flatStr
->length();
3148 ScopedMatchPairs
matches(&cx
->tempLifoAlloc());
3149 size_t startIndex
= 0; /* Index used for iterating through the string. */
3150 size_t lastIndex
= 0; /* Index after last successful match. */
3151 size_t lazyIndex
= 0; /* Index before last successful match. */
3153 /* Accumulate StringRanges for unmatched substrings. */
3154 while (startIndex
<= charsLen
) {
3155 if (!CheckForInterrupt(cx
))
3158 RegExpRunStatus status
= re
.execute(cx
, flatStr
, startIndex
, &matches
);
3159 if (status
== RegExpRunStatus_Error
)
3161 if (status
== RegExpRunStatus_Success_NotFound
)
3163 MatchPair
& match
= matches
[0];
3165 /* Include the latest unmatched substring. */
3166 if (size_t(match
.start
) > lastIndex
) {
3167 if (!ranges
.append(StringRange(lastIndex
, match
.start
- lastIndex
)))
3171 lazyIndex
= lastIndex
;
3172 lastIndex
= match
.limit
;
3174 startIndex
= match
.isEmpty() ? match
.limit
+ 1 : match
.limit
;
3176 /* Non-global removal executes at most once. */
3183 /* If unmatched, return the input string. */
3185 if (startIndex
> 0) {
3186 res
= cx
->global()->getRegExpStatics(cx
);
3189 res
->updateLazily(cx
, flatStr
, &re
, lazyIndex
);
3191 rval
.setString(str
);
3195 /* The last successful match updates the RegExpStatics. */
3196 res
= cx
->global()->getRegExpStatics(cx
);
3200 res
->updateLazily(cx
, flatStr
, &re
, lazyIndex
);
3202 /* Include any remaining part of the string. */
3203 if (lastIndex
< charsLen
) {
3204 if (!ranges
.append(StringRange(lastIndex
, charsLen
- lastIndex
)))
3208 /* Handle the empty string before calling .begin(). */
3209 if (ranges
.empty()) {
3210 rval
.setString(cx
->runtime()->emptyString
);
3214 JSString
* result
= AppendSubstrings(cx
, flatStr
, ranges
.begin(), ranges
.length());
3218 rval
.setString(result
);
3223 StrReplaceRegExp(JSContext
* cx
, ReplaceData
& rdata
, MutableHandleValue rval
)
3225 rdata
.leftIndex
= 0;
3226 rdata
.calledBack
= false;
3228 RegExpStatics
* res
= cx
->global()->getRegExpStatics(cx
);
3232 RegExpShared
& re
= rdata
.g
.regExp();
3234 // The spec doesn't describe this function very clearly, so we go ahead and
3235 // assume that when the input to String.prototype.replace is a global
3236 // RegExp, calling the replacer function (assuming one was provided) takes
3237 // place only after the matching is done. See the comment at the beginning
3238 // of DoMatchGlobal explaining why we can zero the the RegExp object's
3239 // lastIndex property here.
3240 if (re
.global() && !rdata
.g
.zeroLastIndex(cx
))
3243 /* Optimize removal. */
3244 if (rdata
.repstr
&& rdata
.repstr
->length() == 0) {
3245 MOZ_ASSERT(!rdata
.lambda
&& !rdata
.elembase
&& rdata
.dollarIndex
== UINT32_MAX
);
3246 return StrReplaceRegexpRemove(cx
, rdata
.str
, re
, rval
);
3249 RootedLinearString
linearStr(cx
, rdata
.str
->ensureLinear(cx
));
3254 if (!DoMatchForReplaceGlobal(cx
, res
, linearStr
, re
, rdata
))
3257 if (!DoMatchForReplaceLocal(cx
, res
, linearStr
, re
, rdata
))
3261 if (!rdata
.calledBack
) {
3262 /* Didn't match, so the string is unmodified. */
3263 rval
.setString(rdata
.str
);
3268 res
->getRightContext(&sub
);
3269 if (!rdata
.sb
.appendSubstring(sub
.base
, sub
.offset
, sub
.length
))
3272 JSString
* retstr
= rdata
.sb
.finishString();
3276 rval
.setString(retstr
);
3281 str_replace_regexp(JSContext
* cx
, CallArgs args
, ReplaceData
& rdata
)
3283 if (!rdata
.g
.normalizeRegExp(cx
, true, 2, args
))
3286 return StrReplaceRegExp(cx
, rdata
, args
.rval());
3290 js::str_replace_regexp_raw(JSContext
* cx
, HandleString string
, HandleObject regexp
,
3291 HandleString replacement
, MutableHandleValue rval
)
3293 /* Optimize removal, so we don't have to create ReplaceData */
3294 if (replacement
->length() == 0) {
3295 StringRegExpGuard
guard(cx
);
3296 if (!guard
.init(cx
, regexp
))
3299 RegExpShared
& re
= guard
.regExp();
3300 return StrReplaceRegexpRemove(cx
, string
, re
, rval
);
3303 ReplaceData
rdata(cx
);
3306 JSLinearString
* repl
= replacement
->ensureLinear(cx
);
3310 rdata
.setReplacementString(repl
);
3312 if (!rdata
.g
.init(cx
, regexp
))
3315 return StrReplaceRegExp(cx
, rdata
, rval
);
3319 StrReplaceString(JSContext
* cx
, ReplaceData
& rdata
, const FlatMatch
& fm
, MutableHandleValue rval
)
3322 * Note: we could optimize the text.length == pattern.length case if we wanted,
3323 * even in the presence of dollar metachars.
3325 if (rdata
.dollarIndex
!= UINT32_MAX
)
3326 return BuildDollarReplacement(cx
, rdata
.str
, rdata
.repstr
, rdata
.dollarIndex
, fm
, rval
);
3327 return BuildFlatReplacement(cx
, rdata
.str
, rdata
.repstr
, fm
, rval
);
3330 static const uint32_t ReplaceOptArg
= 2;
3333 js::str_replace_string_raw(JSContext
* cx
, HandleString string
, HandleString pattern
,
3334 HandleString replacement
, MutableHandleValue rval
)
3336 ReplaceData
rdata(cx
);
3339 JSLinearString
* repl
= replacement
->ensureLinear(cx
);
3342 rdata
.setReplacementString(repl
);
3344 if (!rdata
.g
.init(cx
, pattern
))
3346 const FlatMatch
* fm
= rdata
.g
.tryFlatMatch(cx
, rdata
.str
, ReplaceOptArg
, ReplaceOptArg
, false);
3348 if (fm
->match() < 0) {
3349 rval
.setString(string
);
3353 return StrReplaceString(cx
, rdata
, *fm
, rval
);
3357 str_replace_flat_lambda(JSContext
* cx
, CallArgs outerArgs
, ReplaceData
& rdata
, const FlatMatch
& fm
)
3359 RootedString
matchStr(cx
, NewDependentString(cx
, rdata
.str
, fm
.match(), fm
.patternLength()));
3363 /* lambda(matchStr, matchStart, textstr) */
3364 static const uint32_t lambdaArgc
= 3;
3365 if (!rdata
.fig
.args().init(lambdaArgc
))
3368 CallArgs
& args
= rdata
.fig
.args();
3369 args
.setCallee(ObjectValue(*rdata
.lambda
));
3370 args
.setThis(UndefinedValue());
3372 Value
* sp
= args
.array();
3373 sp
[0].setString(matchStr
);
3374 sp
[1].setInt32(fm
.match());
3375 sp
[2].setString(rdata
.str
);
3377 if (!rdata
.fig
.invoke(cx
))
3380 RootedString
repstr(cx
, ToString
<CanGC
>(cx
, args
.rval()));
3384 RootedString
leftSide(cx
, NewDependentString(cx
, rdata
.str
, 0, fm
.match()));
3388 size_t matchLimit
= fm
.match() + fm
.patternLength();
3389 RootedString
rightSide(cx
, NewDependentString(cx
, rdata
.str
, matchLimit
,
3390 rdata
.str
->length() - matchLimit
));
3394 RopeBuilder
builder(cx
);
3395 if (!(builder
.append(leftSide
) &&
3396 builder
.append(repstr
) &&
3397 builder
.append(rightSide
))) {
3401 outerArgs
.rval().setString(builder
.result());
3406 * Pattern match the script to check if it is is indexing into a particular
3407 * object, e.g. 'function(a) { return b[a]; }'. Avoid calling the script in
3408 * such cases, which are used by javascript packers (particularly the popular
3409 * Dean Edwards packer) to efficiently encode large scripts. We only handle the
3410 * code patterns generated by such packers here.
3413 LambdaIsGetElem(JSContext
* cx
, JSObject
& lambda
, MutableHandleNativeObject pobj
)
3415 if (!lambda
.is
<JSFunction
>())
3418 RootedFunction
fun(cx
, &lambda
.as
<JSFunction
>());
3419 if (!fun
->isInterpreted())
3422 JSScript
* script
= fun
->getOrCreateScript(cx
);
3426 jsbytecode
* pc
= script
->code();
3429 * JSOP_GETALIASEDVAR tells us exactly where to find the base object 'b'.
3430 * Rule out the (unlikely) possibility of a heavyweight function since it
3431 * would make our scope walk off by 1.
3433 if (JSOp(*pc
) != JSOP_GETALIASEDVAR
|| fun
->isHeavyweight())
3435 ScopeCoordinate
sc(pc
);
3436 ScopeObject
* scope
= &fun
->environment()->as
<ScopeObject
>();
3437 for (unsigned i
= 0; i
< sc
.hops(); ++i
)
3438 scope
= &scope
->enclosingScope().as
<ScopeObject
>();
3439 Value b
= scope
->aliasedVar(sc
);
3440 pc
+= JSOP_GETALIASEDVAR_LENGTH
;
3442 /* Look for 'a' to be the lambda's first argument. */
3443 if (JSOp(*pc
) != JSOP_GETARG
|| GET_ARGNO(pc
) != 0)
3445 pc
+= JSOP_GETARG_LENGTH
;
3448 if (JSOp(*pc
) != JSOP_GETELEM
)
3450 pc
+= JSOP_GETELEM_LENGTH
;
3453 if (JSOp(*pc
) != JSOP_RETURN
)
3456 /* 'b' must behave like a normal object. */
3460 JSObject
& bobj
= b
.toObject();
3461 const Class
* clasp
= bobj
.getClass();
3462 if (!clasp
->isNative() || clasp
->ops
.lookupProperty
|| clasp
->ops
.getProperty
)
3465 pobj
.set(&bobj
.as
<NativeObject
>());
3470 js::str_replace(JSContext
* cx
, unsigned argc
, Value
* vp
)
3472 CallArgs args
= CallArgsFromVp(argc
, vp
);
3474 ReplaceData
rdata(cx
);
3475 rdata
.str
= ThisToStringForStringProto(cx
, args
);
3479 if (!rdata
.g
.init(cx
, args
))
3482 /* Extract replacement string/function. */
3483 if (args
.length() >= ReplaceOptArg
&& IsCallable(args
[1])) {
3484 rdata
.setReplacementFunction(&args
[1].toObject());
3486 if (!LambdaIsGetElem(cx
, *rdata
.lambda
, &rdata
.elembase
))
3489 JSLinearString
* string
= ArgToRootedString(cx
, args
, 1);
3493 rdata
.setReplacementString(string
);
3496 rdata
.fig
.initFunction(ObjectOrNullValue(rdata
.lambda
));
3499 * Unlike its |String.prototype| brethren, |replace| doesn't convert
3500 * its input to a regular expression. (Even if it contains metachars.)
3502 * However, if the user invokes our (non-standard) |flags| argument
3503 * extension then we revert to creating a regular expression. Note that
3504 * this is observable behavior through the side-effect mutation of the
3508 const FlatMatch
* fm
= rdata
.g
.tryFlatMatch(cx
, rdata
.str
, ReplaceOptArg
, args
.length(), false);
3511 if (cx
->isExceptionPending()) /* oom in RopeMatch in tryFlatMatch */
3513 return str_replace_regexp(cx
, args
, rdata
);
3516 if (fm
->match() < 0) {
3517 args
.rval().setString(rdata
.str
);
3522 return str_replace_flat_lambda(cx
, args
, rdata
, *fm
);
3523 return StrReplaceString(cx
, rdata
, *fm
, args
.rval());
3528 class SplitMatchResult
{
3534 JS_STATIC_ASSERT(SIZE_MAX
> JSString::MAX_LENGTH
);
3535 endIndex_
= SIZE_MAX
;
3537 bool isFailure() const {
3538 return endIndex_
== SIZE_MAX
;
3540 size_t endIndex() const {
3541 MOZ_ASSERT(!isFailure());
3544 size_t length() const {
3545 MOZ_ASSERT(!isFailure());
3548 void setResult(size_t length
, size_t endIndex
) {
3550 endIndex_
= endIndex
;
3554 } /* anonymous namespace */
3556 template<class Matcher
>
3558 SplitHelper(JSContext
* cx
, HandleLinearString str
, uint32_t limit
, const Matcher
& splitMatch
,
3559 Handle
<TypeObject
*> type
)
3561 size_t strLength
= str
->length();
3562 SplitMatchResult result
;
3565 if (strLength
== 0) {
3566 if (!splitMatch(cx
, str
, 0, &result
))
3570 * NB: Unlike in the non-empty string case, it's perfectly fine
3571 * (indeed the spec requires it) if we match at the end of the
3572 * string. Thus these cases should hold:
3574 * var a = "".split("");
3575 * assertEq(a.length, 0);
3576 * var b = "".split(/.?/);
3577 * assertEq(b.length, 0);
3579 if (!result
.isFailure())
3580 return NewDenseEmptyArray(cx
);
3582 RootedValue
v(cx
, StringValue(str
));
3583 return NewDenseCopiedArray(cx
, 1, v
.address());
3587 size_t lastEndIndex
= 0;
3591 AutoValueVector
splits(cx
);
3593 while (index
< strLength
) {
3595 if (!splitMatch(cx
, str
, index
, &result
))
3601 * Our match algorithm differs from the spec in that it returns the
3602 * next index at which a match happens. If no match happens we're
3605 * But what if the match is at the end of the string (and the string is
3606 * not empty)? Per 13(c)(ii) this shouldn't be a match, so we have to
3607 * specially exclude it. Thus this case should hold:
3609 * var a = "abc".split(/\b/);
3610 * assertEq(a.length, 1);
3611 * assertEq(a[0], "abc");
3613 if (result
.isFailure())
3616 /* Step 13(c)(i). */
3617 size_t sepLength
= result
.length();
3618 size_t endIndex
= result
.endIndex();
3619 if (sepLength
== 0 && endIndex
== strLength
)
3622 /* Step 13(c)(ii). */
3623 if (endIndex
== lastEndIndex
) {
3628 /* Step 13(c)(iii). */
3629 MOZ_ASSERT(lastEndIndex
< endIndex
);
3630 MOZ_ASSERT(sepLength
<= strLength
);
3631 MOZ_ASSERT(lastEndIndex
+ sepLength
<= endIndex
);
3633 /* Steps 13(c)(iii)(1-3). */
3634 size_t subLength
= size_t(endIndex
- sepLength
- lastEndIndex
);
3635 JSString
* sub
= NewDependentString(cx
, str
, lastEndIndex
, subLength
);
3636 if (!sub
|| !splits
.append(StringValue(sub
)))
3639 /* Step 13(c)(iii)(4). */
3640 if (splits
.length() == limit
)
3641 return NewDenseCopiedArray(cx
, splits
.length(), splits
.begin());
3643 /* Step 13(c)(iii)(5). */
3644 lastEndIndex
= endIndex
;
3646 /* Step 13(c)(iii)(6-7). */
3647 if (Matcher::returnsCaptures
) {
3648 RegExpStatics
* res
= cx
->global()->getRegExpStatics(cx
);
3652 const MatchPairs
& matches
= res
->getMatches();
3653 for (size_t i
= 0; i
< matches
.parenCount(); i
++) {
3654 /* Steps 13(c)(iii)(7)(a-c). */
3655 if (!matches
[i
+ 1].isUndefined()) {
3657 res
->getParen(i
+ 1, &parsub
);
3658 sub
= NewDependentString(cx
, parsub
.base
, parsub
.offset
, parsub
.length
);
3659 if (!sub
|| !splits
.append(StringValue(sub
)))
3662 /* Only string entries have been accounted for so far. */
3663 AddTypePropertyId(cx
, type
, JSID_VOID
, UndefinedValue());
3664 if (!splits
.append(UndefinedValue()))
3668 /* Step 13(c)(iii)(7)(d). */
3669 if (splits
.length() == limit
)
3670 return NewDenseCopiedArray(cx
, splits
.length(), splits
.begin());
3674 /* Step 13(c)(iii)(8). */
3675 index
= lastEndIndex
;
3679 JSString
* sub
= NewDependentString(cx
, str
, lastEndIndex
, strLength
- lastEndIndex
);
3680 if (!sub
|| !splits
.append(StringValue(sub
)))
3684 return NewDenseCopiedArray(cx
, splits
.length(), splits
.begin());
3687 // Fast-path for splitting a string into a character array via split("").
3689 CharSplitHelper(JSContext
* cx
, HandleLinearString str
, uint32_t limit
)
3691 size_t strLength
= str
->length();
3693 return NewDenseEmptyArray(cx
);
3695 js::StaticStrings
& staticStrings
= cx
->staticStrings();
3696 uint32_t resultlen
= (limit
< strLength
? limit
: strLength
);
3698 AutoValueVector
splits(cx
);
3699 if (!splits
.reserve(resultlen
))
3702 for (size_t i
= 0; i
< resultlen
; ++i
) {
3703 JSString
* sub
= staticStrings
.getUnitStringForElement(cx
, str
, i
);
3706 splits
.infallibleAppend(StringValue(sub
));
3709 return NewDenseCopiedArray(cx
, splits
.length(), splits
.begin());
3715 * The SplitMatch operation from ES5 15.5.4.14 is implemented using different
3716 * paths for regular expression and string separators.
3718 * The algorithm differs from the spec in that the we return the next index at
3719 * which a match happens.
3721 class SplitRegExpMatcher
3727 SplitRegExpMatcher(RegExpShared
& re
, RegExpStatics
* res
) : re(re
), res(res
) {}
3729 static const bool returnsCaptures
= true;
3731 bool operator()(JSContext
* cx
, HandleLinearString str
, size_t index
,
3732 SplitMatchResult
* result
) const
3734 ScopedMatchPairs
matches(&cx
->tempLifoAlloc());
3735 RegExpRunStatus status
= re
.execute(cx
, str
, index
, &matches
);
3736 if (status
== RegExpRunStatus_Error
)
3739 if (status
== RegExpRunStatus_Success_NotFound
) {
3740 result
->setFailure();
3744 if (!res
->updateFromMatchPairs(cx
, str
, matches
))
3748 res
->getLastMatch(&sep
);
3750 result
->setResult(sep
.length
, matches
[0].limit
);
3755 class SplitStringMatcher
3757 RootedLinearString sep
;
3760 SplitStringMatcher(JSContext
* cx
, HandleLinearString sep
)
3764 static const bool returnsCaptures
= false;
3766 bool operator()(JSContext
* cx
, JSLinearString
* str
, size_t index
, SplitMatchResult
* res
) const
3768 MOZ_ASSERT(index
== 0 || index
< str
->length());
3769 int match
= StringMatch(str
, sep
, index
);
3773 res
->setResult(sep
->length(), match
+ sep
->length());
3778 } /* anonymous namespace */
3782 js::str_split(JSContext
* cx
, unsigned argc
, Value
* vp
)
3784 CallArgs args
= CallArgsFromVp(argc
, vp
);
3787 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
3791 RootedTypeObject
type(cx
, GetTypeCallerInitObject(cx
, JSProto_Array
));
3794 AddTypePropertyId(cx
, type
, JSID_VOID
, Type::StringType());
3796 /* Step 5: Use the second argument as the split limit, if given. */
3798 if (args
.hasDefined(1)) {
3800 if (!ToNumber(cx
, args
[1], &d
))
3802 limit
= ToUint32(d
);
3809 RootedLinearString
sepstr(cx
);
3810 bool sepDefined
= args
.hasDefined(0);
3812 if (IsObjectWithClass(args
[0], ESClass_RegExp
, cx
)) {
3813 RootedObject
obj(cx
, &args
[0].toObject());
3814 if (!RegExpToShared(cx
, obj
, &re
))
3817 sepstr
= ArgToRootedString(cx
, args
, 0);
3825 JSObject
* aobj
= NewDenseEmptyArray(cx
);
3828 aobj
->setType(type
);
3829 args
.rval().setObject(*aobj
);
3835 RootedValue
v(cx
, StringValue(str
));
3836 JSObject
* aobj
= NewDenseCopiedArray(cx
, 1, v
.address());
3839 aobj
->setType(type
);
3840 args
.rval().setObject(*aobj
);
3843 RootedLinearString
linearStr(cx
, str
->ensureLinear(cx
));
3848 RootedObject
aobj(cx
);
3849 if (!re
.initialized()) {
3850 if (sepstr
->length() == 0) {
3851 aobj
= CharSplitHelper(cx
, linearStr
, limit
);
3853 SplitStringMatcher
matcher(cx
, sepstr
);
3854 aobj
= SplitHelper(cx
, linearStr
, limit
, matcher
, type
);
3857 RegExpStatics
* res
= cx
->global()->getRegExpStatics(cx
);
3860 SplitRegExpMatcher
matcher(*re
, res
);
3861 aobj
= SplitHelper(cx
, linearStr
, limit
, matcher
, type
);
3867 aobj
->setType(type
);
3868 args
.rval().setObject(*aobj
);
3873 js::str_split_string(JSContext
* cx
, HandleTypeObject type
, HandleString str
, HandleString sep
)
3875 RootedLinearString
linearStr(cx
, str
->ensureLinear(cx
));
3879 RootedLinearString
linearSep(cx
, sep
->ensureLinear(cx
));
3883 uint32_t limit
= UINT32_MAX
;
3885 RootedObject
aobj(cx
);
3886 if (linearSep
->length() == 0) {
3887 aobj
= CharSplitHelper(cx
, linearStr
, limit
);
3889 SplitStringMatcher
matcher(cx
, linearSep
);
3890 aobj
= SplitHelper(cx
, linearStr
, limit
, matcher
, type
);
3896 aobj
->setType(type
);
3901 * Python-esque sequence operations.
3904 str_concat(JSContext
* cx
, unsigned argc
, Value
* vp
)
3906 CallArgs args
= CallArgsFromVp(argc
, vp
);
3907 JSString
* str
= ThisToStringForStringProto(cx
, args
);
3911 for (unsigned i
= 0; i
< args
.length(); i
++) {
3912 JSString
* argStr
= ToString
<NoGC
>(cx
, args
[i
]);
3914 RootedString
strRoot(cx
, str
);
3915 argStr
= ToString
<CanGC
>(cx
, args
[i
]);
3921 JSString
* next
= ConcatStrings
<NoGC
>(cx
, str
, argStr
);
3925 RootedString
strRoot(cx
, str
), argStrRoot(cx
, argStr
);
3926 str
= ConcatStrings
<CanGC
>(cx
, strRoot
, argStrRoot
);
3932 args
.rval().setString(str
);
3936 static const JSFunctionSpec string_methods
[] = {
3938 JS_FN(js_toSource_str
, str_toSource
, 0,0),
3941 /* Java-like methods. */
3942 JS_FN(js_toString_str
, js_str_toString
, 0,0),
3943 JS_FN(js_valueOf_str
, js_str_toString
, 0,0),
3944 JS_FN("toLowerCase", str_toLowerCase
, 0,JSFUN_GENERIC_NATIVE
),
3945 JS_FN("toUpperCase", str_toUpperCase
, 0,JSFUN_GENERIC_NATIVE
),
3946 JS_FN("charAt", js_str_charAt
, 1,JSFUN_GENERIC_NATIVE
),
3947 JS_FN("charCodeAt", js_str_charCodeAt
, 1,JSFUN_GENERIC_NATIVE
),
3948 JS_SELF_HOSTED_FN("substring", "String_substring", 2,0),
3949 JS_SELF_HOSTED_FN("codePointAt", "String_codePointAt", 1,0),
3950 JS_FN("contains", str_contains
, 1,JSFUN_GENERIC_NATIVE
),
3951 JS_FN("indexOf", str_indexOf
, 1,JSFUN_GENERIC_NATIVE
),
3952 JS_FN("lastIndexOf", str_lastIndexOf
, 1,JSFUN_GENERIC_NATIVE
),
3953 JS_FN("startsWith", str_startsWith
, 1,JSFUN_GENERIC_NATIVE
),
3954 JS_FN("endsWith", str_endsWith
, 1,JSFUN_GENERIC_NATIVE
),
3955 JS_FN("trim", str_trim
, 0,JSFUN_GENERIC_NATIVE
),
3956 JS_FN("trimLeft", str_trimLeft
, 0,JSFUN_GENERIC_NATIVE
),
3957 JS_FN("trimRight", str_trimRight
, 0,JSFUN_GENERIC_NATIVE
),
3958 JS_FN("toLocaleLowerCase", str_toLocaleLowerCase
, 0,JSFUN_GENERIC_NATIVE
),
3959 JS_FN("toLocaleUpperCase", str_toLocaleUpperCase
, 0,JSFUN_GENERIC_NATIVE
),
3961 JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1,0),
3963 JS_FN("localeCompare", str_localeCompare
, 1,JSFUN_GENERIC_NATIVE
),
3965 JS_SELF_HOSTED_FN("repeat", "String_repeat", 1,0),
3967 JS_FN("normalize", str_normalize
, 0,JSFUN_GENERIC_NATIVE
),
3970 /* Perl-ish methods (search is actually Python-esque). */
3971 JS_FN("match", str_match
, 1,JSFUN_GENERIC_NATIVE
),
3972 JS_FN("search", str_search
, 1,JSFUN_GENERIC_NATIVE
),
3973 JS_FN("replace", str_replace
, 2,JSFUN_GENERIC_NATIVE
),
3974 JS_FN("split", str_split
, 2,JSFUN_GENERIC_NATIVE
),
3975 JS_SELF_HOSTED_FN("substr", "String_substr", 2,0),
3977 /* Python-esque sequence methods. */
3978 JS_FN("concat", str_concat
, 1,JSFUN_GENERIC_NATIVE
),
3979 JS_SELF_HOSTED_FN("slice", "String_slice", 2,0),
3981 /* HTML string methods. */
3982 JS_SELF_HOSTED_FN("bold", "String_bold", 0,0),
3983 JS_SELF_HOSTED_FN("italics", "String_italics", 0,0),
3984 JS_SELF_HOSTED_FN("fixed", "String_fixed", 0,0),
3985 JS_SELF_HOSTED_FN("strike", "String_strike", 0,0),
3986 JS_SELF_HOSTED_FN("small", "String_small", 0,0),
3987 JS_SELF_HOSTED_FN("big", "String_big", 0,0),
3988 JS_SELF_HOSTED_FN("blink", "String_blink", 0,0),
3989 JS_SELF_HOSTED_FN("sup", "String_sup", 0,0),
3990 JS_SELF_HOSTED_FN("sub", "String_sub", 0,0),
3991 JS_SELF_HOSTED_FN("anchor", "String_anchor", 1,0),
3992 JS_SELF_HOSTED_FN("link", "String_link", 1,0),
3993 JS_SELF_HOSTED_FN("fontcolor","String_fontcolor", 1,0),
3994 JS_SELF_HOSTED_FN("fontsize", "String_fontsize", 1,0),
3996 JS_SELF_HOSTED_SYM_FN(iterator
, "String_iterator", 0,0),
4000 // ES6 rev 27 (2014 Aug 24) 21.1.1
4002 js_String(JSContext
* cx
, unsigned argc
, Value
* vp
)
4004 CallArgs args
= CallArgsFromVp(argc
, vp
);
4006 RootedString
str(cx
);
4007 if (args
.length() > 0) {
4008 if (!args
.isConstructing() && args
[0].isSymbol())
4009 return js::SymbolDescriptiveString(cx
, args
[0].toSymbol(), args
.rval());
4011 str
= ToString
<CanGC
>(cx
, args
[0]);
4015 str
= cx
->runtime()->emptyString
;
4018 if (args
.isConstructing()) {
4019 StringObject
* strobj
= StringObject::create(cx
, str
);
4022 args
.rval().setObject(*strobj
);
4026 args
.rval().setString(str
);
4031 js::str_fromCharCode(JSContext
* cx
, unsigned argc
, Value
* vp
)
4033 CallArgs args
= CallArgsFromVp(argc
, vp
);
4035 MOZ_ASSERT(args
.length() <= ARGS_LENGTH_MAX
);
4036 if (args
.length() == 1)
4037 return str_fromCharCode_one_arg(cx
, args
[0], args
.rval());
4039 char16_t
* chars
= cx
->pod_malloc
<char16_t
>(args
.length() + 1);
4042 for (unsigned i
= 0; i
< args
.length(); i
++) {
4044 if (!ToUint16(cx
, args
[i
], &code
)) {
4048 chars
[i
] = char16_t(code
);
4050 chars
[args
.length()] = 0;
4051 JSString
* str
= NewString
<CanGC
>(cx
, chars
, args
.length());
4057 args
.rval().setString(str
);
4062 js::str_fromCharCode_one_arg(JSContext
* cx
, HandleValue code
, MutableHandleValue rval
)
4066 if (!ToUint16(cx
, code
, &ucode
))
4069 if (StaticStrings::hasUnit(ucode
)) {
4070 rval
.setString(cx
->staticStrings().getUnit(ucode
));
4074 char16_t c
= char16_t(ucode
);
4075 JSString
* str
= NewStringCopyN
<CanGC
>(cx
, &c
, 1);
4079 rval
.setString(str
);
4083 static const JSFunctionSpec string_static_methods
[] = {
4084 JS_FN("fromCharCode", js::str_fromCharCode
, 1, 0),
4086 JS_SELF_HOSTED_FN("fromCodePoint", "String_static_fromCodePoint", 1,0),
4087 JS_SELF_HOSTED_FN("raw", "String_static_raw", 2,0),
4088 JS_SELF_HOSTED_FN("substring", "String_static_substring", 3,0),
4089 JS_SELF_HOSTED_FN("substr", "String_static_substr", 3,0),
4090 JS_SELF_HOSTED_FN("slice", "String_static_slice", 3,0),
4092 // This must be at the end because of bug 853075: functions listed after
4093 // self-hosted methods aren't available in self-hosted code.
4095 JS_SELF_HOSTED_FN("localeCompare", "String_static_localeCompare", 2,0),
4101 StringObject::assignInitialShape(ExclusiveContext
* cx
, Handle
<StringObject
*> obj
)
4103 MOZ_ASSERT(obj
->empty());
4105 return obj
->addDataProperty(cx
, cx
->names().length
, LENGTH_SLOT
,
4106 JSPROP_PERMANENT
| JSPROP_READONLY
);
4110 js_InitStringClass(JSContext
* cx
, HandleObject obj
)
4112 MOZ_ASSERT(obj
->isNative());
4114 Rooted
<GlobalObject
*> global(cx
, &obj
->as
<GlobalObject
>());
4116 Rooted
<JSString
*> empty(cx
, cx
->runtime()->emptyString
);
4117 RootedObject
proto(cx
, global
->createBlankPrototype(cx
, &StringObject::class_
));
4118 if (!proto
|| !proto
->as
<StringObject
>().init(cx
, empty
))
4121 /* Now create the String function. */
4122 RootedFunction
ctor(cx
);
4123 ctor
= global
->createConstructor(cx
, js_String
, cx
->names().String
, 1);
4127 if (!GlobalObject::initBuiltinConstructor(cx
, global
, JSProto_String
, ctor
, proto
))
4130 if (!LinkConstructorAndPrototype(cx
, ctor
, proto
))
4133 if (!DefinePropertiesAndFunctions(cx
, proto
, nullptr, string_methods
) ||
4134 !DefinePropertiesAndFunctions(cx
, ctor
, nullptr, string_static_methods
))
4140 * Define escape/unescape, the URI encode/decode functions, and maybe
4141 * uneval on the global object.
4143 if (!JS_DefineFunctions(cx
, global
, string_functions
))
4150 js_ValueToPrintable(JSContext
* cx
, const Value
& vArg
, JSAutoByteString
* bytes
, bool asSource
)
4152 RootedValue
v(cx
, vArg
);
4155 str
= ValueToSource(cx
, v
);
4157 str
= ToString
<CanGC
>(cx
, v
);
4160 str
= js_QuoteString(cx
, str
, 0);
4163 return bytes
->encodeLatin1(cx
, str
);
4166 template <AllowGC allowGC
>
4168 js::ToStringSlow(ExclusiveContext
* cx
, typename MaybeRooted
<Value
, allowGC
>::HandleType arg
)
4170 /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */
4171 MOZ_ASSERT(!arg
.isString());
4174 if (!v
.isPrimitive()) {
4175 if (!cx
->shouldBeJSContext() || !allowGC
)
4177 RootedValue
v2(cx
, v
);
4178 if (!ToPrimitive(cx
->asJSContext(), JSTYPE_STRING
, &v2
))
4186 } else if (v
.isInt32()) {
4187 str
= Int32ToString
<allowGC
>(cx
, v
.toInt32());
4188 } else if (v
.isDouble()) {
4189 str
= NumberToString
<allowGC
>(cx
, v
.toDouble());
4190 } else if (v
.isBoolean()) {
4191 str
= js_BooleanToString(cx
, v
.toBoolean());
4192 } else if (v
.isNull()) {
4193 str
= cx
->names().null
;
4194 } else if (v
.isSymbol()) {
4195 if (cx
->shouldBeJSContext() && allowGC
) {
4196 JS_ReportErrorNumber(cx
->asJSContext(), js_GetErrorMessage
, nullptr,
4197 JSMSG_SYMBOL_TO_STRING
);
4201 MOZ_ASSERT(v
.isUndefined());
4202 str
= cx
->names().undefined
;
4208 js::ToStringSlow
<CanGC
>(ExclusiveContext
* cx
, HandleValue arg
);
4211 js::ToStringSlow
<NoGC
>(ExclusiveContext
* cx
, Value arg
);
4213 JS_PUBLIC_API(JSString
*)
4214 js::ToStringSlow(JSContext
* cx
, HandleValue v
)
4216 return ToStringSlow
<CanGC
>(cx
, v
);
4220 SymbolToSource(JSContext
* cx
, Symbol
* symbol
)
4222 RootedString
desc(cx
, symbol
->description());
4223 SymbolCode code
= symbol
->code();
4224 if (code
!= SymbolCode::InSymbolRegistry
&& code
!= SymbolCode::UniqueSymbol
) {
4225 // Well-known symbol.
4226 MOZ_ASSERT(uint32_t(code
) < JS::WellKnownSymbolLimit
);
4230 StringBuffer
buf(cx
);
4231 if (code
== SymbolCode::InSymbolRegistry
? !buf
.append("Symbol.for(") : !buf
.append("Symbol("))
4234 desc
= StringToSource(cx
, desc
);
4235 if (!desc
|| !buf
.append(desc
))
4238 if (!buf
.append(')'))
4240 return buf
.finishString();
4244 js::ValueToSource(JSContext
* cx
, HandleValue v
)
4246 JS_CHECK_RECURSION(cx
, return nullptr);
4247 assertSameCompartment(cx
, v
);
4249 if (v
.isUndefined())
4250 return cx
->names().void0
;
4252 return StringToSource(cx
, v
.toString());
4254 return SymbolToSource(cx
, v
.toSymbol());
4255 if (v
.isPrimitive()) {
4256 /* Special case to preserve negative zero, _contra_ toString. */
4257 if (v
.isDouble() && IsNegativeZero(v
.toDouble())) {
4258 /* NB: _ucNstr rather than _ucstr to indicate non-terminated. */
4259 static const char16_t js_negzero_ucNstr
[] = {'-', '0'};
4261 return NewStringCopyN
<CanGC
>(cx
, js_negzero_ucNstr
, 2);
4263 return ToString
<CanGC
>(cx
, v
);
4266 RootedValue
fval(cx
);
4267 RootedObject
obj(cx
, &v
.toObject());
4268 if (!JSObject::getProperty(cx
, obj
, obj
, cx
->names().toSource
, &fval
))
4270 if (IsCallable(fval
)) {
4271 RootedValue
rval(cx
);
4272 if (!Invoke(cx
, ObjectValue(*obj
), fval
, 0, nullptr, &rval
))
4274 return ToString
<CanGC
>(cx
, rval
);
4277 return ObjectToSource(cx
, obj
);
4281 js::StringToSource(JSContext
* cx
, JSString
* str
)
4283 return js_QuoteString(cx
, str
, '"');
4287 js::EqualChars(JSLinearString
* str1
, JSLinearString
* str2
)
4289 MOZ_ASSERT(str1
->length() == str2
->length());
4291 size_t len
= str1
->length();
4293 AutoCheckCannotGC nogc
;
4294 if (str1
->hasTwoByteChars()) {
4295 if (str2
->hasTwoByteChars())
4296 return PodEqual(str1
->twoByteChars(nogc
), str2
->twoByteChars(nogc
), len
);
4298 return EqualChars(str2
->latin1Chars(nogc
), str1
->twoByteChars(nogc
), len
);
4301 if (str2
->hasLatin1Chars())
4302 return PodEqual(str1
->latin1Chars(nogc
), str2
->latin1Chars(nogc
), len
);
4304 return EqualChars(str1
->latin1Chars(nogc
), str2
->twoByteChars(nogc
), len
);
4308 js::EqualStrings(JSContext
* cx
, JSString
* str1
, JSString
* str2
, bool* result
)
4315 size_t length1
= str1
->length();
4316 if (length1
!= str2
->length()) {
4321 JSLinearString
* linear1
= str1
->ensureLinear(cx
);
4324 JSLinearString
* linear2
= str2
->ensureLinear(cx
);
4328 *result
= EqualChars(linear1
, linear2
);
4333 js::EqualStrings(JSLinearString
* str1
, JSLinearString
* str2
)
4338 size_t length1
= str1
->length();
4339 if (length1
!= str2
->length())
4342 return EqualChars(str1
, str2
);
4346 CompareStringsImpl(JSLinearString
* str1
, JSLinearString
* str2
)
4348 size_t len1
= str1
->length();
4349 size_t len2
= str2
->length();
4351 AutoCheckCannotGC nogc
;
4352 if (str1
->hasLatin1Chars()) {
4353 const Latin1Char
* chars1
= str1
->latin1Chars(nogc
);
4354 return str2
->hasLatin1Chars()
4355 ? CompareChars(chars1
, len1
, str2
->latin1Chars(nogc
), len2
)
4356 : CompareChars(chars1
, len1
, str2
->twoByteChars(nogc
), len2
);
4359 const char16_t
* chars1
= str1
->twoByteChars(nogc
);
4360 return str2
->hasLatin1Chars()
4361 ? CompareChars(chars1
, len1
, str2
->latin1Chars(nogc
), len2
)
4362 : CompareChars(chars1
, len1
, str2
->twoByteChars(nogc
), len2
);
4366 js::CompareChars(const char16_t
* s1
, size_t len1
, JSLinearString
* s2
)
4368 AutoCheckCannotGC nogc
;
4369 return s2
->hasLatin1Chars()
4370 ? CompareChars(s1
, len1
, s2
->latin1Chars(nogc
), s2
->length())
4371 : CompareChars(s1
, len1
, s2
->twoByteChars(nogc
), s2
->length());
4375 js::CompareStrings(JSContext
* cx
, JSString
* str1
, JSString
* str2
, int32_t* result
)
4385 JSLinearString
* linear1
= str1
->ensureLinear(cx
);
4389 JSLinearString
* linear2
= str2
->ensureLinear(cx
);
4393 *result
= CompareStringsImpl(linear1
, linear2
);
4398 js::CompareAtoms(JSAtom
* atom1
, JSAtom
* atom2
)
4400 return CompareStringsImpl(atom1
, atom2
);
4404 js::StringEqualsAscii(JSLinearString
* str
, const char* asciiBytes
)
4406 size_t length
= strlen(asciiBytes
);
4408 for (size_t i
= 0; i
!= length
; ++i
)
4409 MOZ_ASSERT(unsigned(asciiBytes
[i
]) <= 127);
4411 if (length
!= str
->length())
4414 const Latin1Char
* latin1
= reinterpret_cast<const Latin1Char
*>(asciiBytes
);
4416 AutoCheckCannotGC nogc
;
4417 return str
->hasLatin1Chars()
4418 ? PodEqual(latin1
, str
->latin1Chars(nogc
), length
)
4419 : EqualChars(latin1
, str
->twoByteChars(nogc
), length
);
4423 js_strlen(const char16_t
* s
)
4427 for (t
= s
; *t
!= 0; t
++)
4429 return (size_t)(t
- s
);
4433 js_strcmp(const char16_t
* lhs
, const char16_t
* rhs
)
4437 return int32_t(*lhs
) - int32_t(*rhs
);
4444 UniquePtr
<char[], JS::FreePolicy
>
4445 js::DuplicateString(js::ExclusiveContext
* cx
, const char* s
)
4447 size_t n
= strlen(s
) + 1;
4448 auto ret
= cx
->make_pod_array
<char>(n
);
4451 PodCopy(ret
.get(), s
, n
);
4455 UniquePtr
<char16_t
[], JS::FreePolicy
>
4456 js::DuplicateString(js::ExclusiveContext
* cx
, const char16_t
* s
)
4458 size_t n
= js_strlen(s
) + 1;
4459 auto ret
= cx
->make_pod_array
<char16_t
>(n
);
4462 PodCopy(ret
.get(), s
, n
);
4466 template <typename CharT
>
4468 js_strchr_limit(const CharT
* s
, char16_t c
, const CharT
* limit
)
4478 template const Latin1Char
*
4479 js_strchr_limit(const Latin1Char
* s
, char16_t c
, const Latin1Char
* limit
);
4481 template const char16_t
*
4482 js_strchr_limit(const char16_t
* s
, char16_t c
, const char16_t
* limit
);
4485 js::InflateString(ExclusiveContext
* cx
, const char* bytes
, size_t* lengthp
)
4489 size_t nbytes
= *lengthp
;
4492 chars
= cx
->pod_malloc
<char16_t
>(nchars
+ 1);
4495 for (size_t i
= 0; i
< nchars
; i
++)
4496 chars
[i
] = (unsigned char) bytes
[i
];
4502 // For compatibility with callers of JS_DecodeBytes we must zero lengthp
4508 template <typename CharT
>
4510 js::DeflateStringToBuffer(JSContext
* maybecx
, const CharT
* src
, size_t srclen
,
4511 char* dst
, size_t* dstlenp
)
4513 size_t dstlen
= *dstlenp
;
4514 if (srclen
> dstlen
) {
4515 for (size_t i
= 0; i
< dstlen
; i
++)
4516 dst
[i
] = char(src
[i
]);
4518 AutoSuppressGC
suppress(maybecx
);
4519 JS_ReportErrorNumber(maybecx
, js_GetErrorMessage
, nullptr,
4520 JSMSG_BUFFER_TOO_SMALL
);
4524 for (size_t i
= 0; i
< srclen
; i
++)
4525 dst
[i
] = char(src
[i
]);
4531 js::DeflateStringToBuffer(JSContext
* maybecx
, const Latin1Char
* src
, size_t srclen
,
4532 char* dst
, size_t* dstlenp
);
4535 js::DeflateStringToBuffer(JSContext
* maybecx
, const char16_t
* src
, size_t srclen
,
4536 char* dst
, size_t* dstlenp
);
4541 * Identifier start chars:
4547 const bool js_isidstart
[] = {
4548 /* 0 1 2 3 4 5 6 7 8 9 */
4549 /* 0 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4550 /* 1 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4551 /* 2 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4552 /* 3 */ ____
, ____
, ____
, ____
, ____
, ____
, true, ____
, ____
, ____
,
4553 /* 4 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4554 /* 5 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4555 /* 6 */ ____
, ____
, ____
, ____
, ____
, true, true, true, true, true,
4556 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4557 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4558 /* 9 */ true, ____
, ____
, ____
, ____
, true, ____
, true, true, true,
4559 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4560 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4561 /* 12 */ true, true, true, ____
, ____
, ____
, ____
, ____
4572 const bool js_isident
[] = {
4573 /* 0 1 2 3 4 5 6 7 8 9 */
4574 /* 0 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4575 /* 1 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4576 /* 2 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4577 /* 3 */ ____
, ____
, ____
, ____
, ____
, ____
, true, ____
, ____
, ____
,
4578 /* 4 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, true, true,
4579 /* 5 */ true, true, true, true, true, true, true, true, ____
, ____
,
4580 /* 6 */ ____
, ____
, ____
, ____
, ____
, true, true, true, true, true,
4581 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4582 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4583 /* 9 */ true, ____
, ____
, ____
, ____
, true, ____
, true, true, true,
4584 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4585 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4586 /* 12 */ true, true, true, ____
, ____
, ____
, ____
, ____
4589 /* Whitespace chars: '\t', '\n', '\v', '\f', '\r', ' '. */
4590 const bool js_isspace
[] = {
4591 /* 0 1 2 3 4 5 6 7 8 9 */
4592 /* 0 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, true,
4593 /* 1 */ true, true, true, true, ____
, ____
, ____
, ____
, ____
, ____
,
4594 /* 2 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4595 /* 3 */ ____
, ____
, true, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4596 /* 4 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4597 /* 5 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4598 /* 6 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4599 /* 7 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4600 /* 8 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4601 /* 9 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4602 /* 10 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4603 /* 11 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4604 /* 12 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
4608 * Uri reserved chars + #:
4621 static const bool js_isUriReservedPlusPound
[] = {
4622 /* 0 1 2 3 4 5 6 7 8 9 */
4623 /* 0 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4624 /* 1 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4625 /* 2 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4626 /* 3 */ ____
, ____
, ____
, ____
, ____
, true, true, ____
, true, ____
,
4627 /* 4 */ ____
, ____
, ____
, true, true, ____
, ____
, true, ____
, ____
,
4628 /* 5 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, true, true,
4629 /* 6 */ ____
, true, ____
, true, true, ____
, ____
, ____
, ____
, ____
,
4630 /* 7 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4631 /* 8 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4632 /* 9 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4633 /* 10 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4634 /* 11 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4635 /* 12 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
4639 * Uri unescaped chars:
4653 static const bool js_isUriUnescaped
[] = {
4654 /* 0 1 2 3 4 5 6 7 8 9 */
4655 /* 0 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4656 /* 1 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4657 /* 2 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4658 /* 3 */ ____
, ____
, ____
, true, ____
, ____
, ____
, ____
, ____
, true,
4659 /* 4 */ true, true, true, ____
, ____
, true, true, ____
, true, true,
4660 /* 5 */ true, true, true, true, true, true, true, true, ____
, ____
,
4661 /* 6 */ ____
, ____
, ____
, ____
, ____
, true, true, true, true, true,
4662 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4663 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4664 /* 9 */ true, ____
, ____
, ____
, ____
, true, ____
, true, true, true,
4665 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4666 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4667 /* 12 */ true, true, true, ____
, ____
, ____
, true, ____
4672 #define URI_CHUNK 64U
4675 TransferBufferToString(StringBuffer
& sb
, MutableHandleValue rval
)
4677 JSString
* str
= sb
.finishString();
4680 rval
.setString(str
);
4685 * ECMA 3, 15.1.3 URI Handling Function Properties
4687 * The following are implementations of the algorithms
4688 * given in the ECMA specification for the hidden functions
4689 * 'Encode' and 'Decode'.
4691 enum EncodeResult
{ Encode_Failure
, Encode_BadUri
, Encode_Success
};
4693 template <typename CharT
>
4695 Encode(StringBuffer
& sb
, const CharT
* chars
, size_t length
,
4696 const bool* unescapedSet
, const bool* unescapedSet2
)
4698 static const char HexDigits
[] = "0123456789ABCDEF"; /* NB: uppercase */
4704 for (size_t k
= 0; k
< length
; k
++) {
4705 char16_t c
= chars
[k
];
4706 if (c
< 128 && (unescapedSet
[c
] || (unescapedSet2
&& unescapedSet2
[c
]))) {
4708 return Encode_Failure
;
4710 if (c
>= 0xDC00 && c
<= 0xDFFF)
4711 return Encode_BadUri
;
4714 if (c
< 0xD800 || c
> 0xDBFF) {
4719 return Encode_BadUri
;
4721 char16_t c2
= chars
[k
];
4722 if (c2
< 0xDC00 || c2
> 0xDFFF)
4723 return Encode_BadUri
;
4725 v
= ((c
- 0xD800) << 10) + (c2
- 0xDC00) + 0x10000;
4728 size_t L
= js_OneUcs4ToUtf8Char(utf8buf
, v
);
4729 for (size_t j
= 0; j
< L
; j
++) {
4730 hexBuf
[1] = HexDigits
[utf8buf
[j
] >> 4];
4731 hexBuf
[2] = HexDigits
[utf8buf
[j
] & 0xf];
4732 if (!sb
.append(hexBuf
, 3))
4733 return Encode_Failure
;
4738 return Encode_Success
;
4742 Encode(JSContext
* cx
, HandleLinearString str
, const bool* unescapedSet
,
4743 const bool* unescapedSet2
, MutableHandleValue rval
)
4745 size_t length
= str
->length();
4747 rval
.setString(cx
->runtime()->emptyString
);
4751 StringBuffer
sb(cx
);
4752 if (!sb
.reserve(length
))
4756 if (str
->hasLatin1Chars()) {
4757 AutoCheckCannotGC nogc
;
4758 res
= Encode(sb
, str
->latin1Chars(nogc
), str
->length(), unescapedSet
, unescapedSet2
);
4760 AutoCheckCannotGC nogc
;
4761 res
= Encode(sb
, str
->twoByteChars(nogc
), str
->length(), unescapedSet
, unescapedSet2
);
4764 if (res
== Encode_Failure
)
4767 if (res
== Encode_BadUri
) {
4768 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, nullptr, JSMSG_BAD_URI
, nullptr);
4772 MOZ_ASSERT(res
== Encode_Success
);
4773 return TransferBufferToString(sb
, rval
);
4776 enum DecodeResult
{ Decode_Failure
, Decode_BadUri
, Decode_Success
};
4778 template <typename CharT
>
4780 Decode(StringBuffer
& sb
, const CharT
* chars
, size_t length
, const bool* reservedSet
)
4782 for (size_t k
= 0; k
< length
; k
++) {
4783 char16_t c
= chars
[k
];
4786 if ((k
+ 2) >= length
)
4787 return Decode_BadUri
;
4789 if (!JS7_ISHEX(chars
[k
+1]) || !JS7_ISHEX(chars
[k
+2]))
4790 return Decode_BadUri
;
4792 uint32_t B
= JS7_UNHEX(chars
[k
+1]) * 16 + JS7_UNHEX(chars
[k
+2]);
4798 while (B
& (0x80 >> n
))
4801 if (n
== 1 || n
> 4)
4802 return Decode_BadUri
;
4805 octets
[0] = (uint8_t)B
;
4806 if (k
+ 3 * (n
- 1) >= length
)
4807 return Decode_BadUri
;
4809 for (int j
= 1; j
< n
; j
++) {
4811 if (chars
[k
] != '%')
4812 return Decode_BadUri
;
4814 if (!JS7_ISHEX(chars
[k
+1]) || !JS7_ISHEX(chars
[k
+2]))
4815 return Decode_BadUri
;
4817 B
= JS7_UNHEX(chars
[k
+1]) * 16 + JS7_UNHEX(chars
[k
+2]);
4818 if ((B
& 0xC0) != 0x80)
4819 return Decode_BadUri
;
4822 octets
[j
] = char(B
);
4824 uint32_t v
= JS::Utf8ToOneUcs4Char(octets
, n
);
4828 return Decode_BadUri
;
4830 c
= char16_t((v
& 0x3FF) + 0xDC00);
4831 char16_t H
= char16_t((v
>> 10) + 0xD800);
4833 return Decode_Failure
;
4838 if (c
< 128 && reservedSet
&& reservedSet
[c
]) {
4839 if (!sb
.append(chars
+ start
, k
- start
+ 1))
4840 return Decode_Failure
;
4843 return Decode_Failure
;
4847 return Decode_Failure
;
4851 return Decode_Success
;
4855 Decode(JSContext
* cx
, HandleLinearString str
, const bool* reservedSet
, MutableHandleValue rval
)
4857 size_t length
= str
->length();
4859 rval
.setString(cx
->runtime()->emptyString
);
4863 StringBuffer
sb(cx
);
4866 if (str
->hasLatin1Chars()) {
4867 AutoCheckCannotGC nogc
;
4868 res
= Decode(sb
, str
->latin1Chars(nogc
), str
->length(), reservedSet
);
4870 AutoCheckCannotGC nogc
;
4871 res
= Decode(sb
, str
->twoByteChars(nogc
), str
->length(), reservedSet
);
4874 if (res
== Decode_Failure
)
4877 if (res
== Decode_BadUri
) {
4878 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, nullptr, JSMSG_BAD_URI
);
4882 MOZ_ASSERT(res
== Decode_Success
);
4883 return TransferBufferToString(sb
, rval
);
4887 str_decodeURI(JSContext
* cx
, unsigned argc
, Value
* vp
)
4889 CallArgs args
= CallArgsFromVp(argc
, vp
);
4890 RootedLinearString
str(cx
, ArgToRootedString(cx
, args
, 0));
4894 return Decode(cx
, str
, js_isUriReservedPlusPound
, args
.rval());
4898 str_decodeURI_Component(JSContext
* cx
, unsigned argc
, Value
* vp
)
4900 CallArgs args
= CallArgsFromVp(argc
, vp
);
4901 RootedLinearString
str(cx
, ArgToRootedString(cx
, args
, 0));
4905 return Decode(cx
, str
, nullptr, args
.rval());
4909 str_encodeURI(JSContext
* cx
, unsigned argc
, Value
* vp
)
4911 CallArgs args
= CallArgsFromVp(argc
, vp
);
4912 RootedLinearString
str(cx
, ArgToRootedString(cx
, args
, 0));
4916 return Encode(cx
, str
, js_isUriUnescaped
, js_isUriReservedPlusPound
, args
.rval());
4920 str_encodeURI_Component(JSContext
* cx
, unsigned argc
, Value
* vp
)
4922 CallArgs args
= CallArgsFromVp(argc
, vp
);
4923 RootedLinearString
str(cx
, ArgToRootedString(cx
, args
, 0));
4927 return Encode(cx
, str
, js_isUriUnescaped
, nullptr, args
.rval());
4931 * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
4932 * least 4 bytes long. Return the number of UTF-8 bytes of data written.
4935 js_OneUcs4ToUtf8Char(uint8_t* utf8Buffer
, uint32_t ucs4Char
)
4939 MOZ_ASSERT(ucs4Char
<= 0x10FFFF);
4940 if (ucs4Char
< 0x80) {
4941 *utf8Buffer
= (uint8_t)ucs4Char
;
4944 uint32_t a
= ucs4Char
>> 11;
4952 utf8Buffer
[i
] = (uint8_t)((ucs4Char
& 0x3F) | 0x80);
4955 *utf8Buffer
= (uint8_t)(0x100 - (1 << (8-utf8Length
)) + ucs4Char
);
4961 js::PutEscapedStringImpl(char* buffer
, size_t bufferSize
, FILE* fp
, JSLinearString
* str
,
4964 size_t len
= str
->length();
4965 AutoCheckCannotGC nogc
;
4966 return str
->hasLatin1Chars()
4967 ? PutEscapedStringImpl(buffer
, bufferSize
, fp
, str
->latin1Chars(nogc
), len
, quote
)
4968 : PutEscapedStringImpl(buffer
, bufferSize
, fp
, str
->twoByteChars(nogc
), len
, quote
);
4971 template <typename CharT
>
4973 js::PutEscapedStringImpl(char* buffer
, size_t bufferSize
, FILE* fp
, const CharT
* chars
,
4974 size_t length
, uint32_t quote
)
4977 STOP
, FIRST_QUOTE
, LAST_QUOTE
, CHARS
, ESCAPE_START
, ESCAPE_MORE
4980 MOZ_ASSERT(quote
== 0 || quote
== '\'' || quote
== '"');
4981 MOZ_ASSERT_IF(!buffer
, bufferSize
== 0);
4982 MOZ_ASSERT_IF(fp
, !buffer
);
4984 if (bufferSize
== 0)
4989 const CharT
* charsEnd
= chars
+ length
;
4991 state
= FIRST_QUOTE
;
4995 char c
= 0; /* to quell GCC warnings */
5012 if (chars
== charsEnd
) {
5019 const char* escape
= strchr(js_EscapeMap
, (int)u
);
5028 if (u
== quote
|| u
== '\\')
5031 } else if (u
< 0x100) {
5046 state
= ESCAPE_START
;
5049 MOZ_ASSERT(' ' <= u
&& u
< 127);
5051 state
= ESCAPE_MORE
;
5059 u
= 0xF & (hex
>> shift
);
5060 c
= (char)(u
+ (u
< 10 ? '0' : 'A' - 10));
5064 MOZ_ASSERT(n
<= bufferSize
);
5065 if (n
!= bufferSize
) {
5072 if (fputc(c
, fp
) < 0)
5084 js::PutEscapedStringImpl(char* buffer
, size_t bufferSize
, FILE* fp
, const Latin1Char
* chars
,
5085 size_t length
, uint32_t quote
);
5088 js::PutEscapedStringImpl(char* buffer
, size_t bufferSize
, FILE* fp
, const char* chars
,
5089 size_t length
, uint32_t quote
);
5092 js::PutEscapedStringImpl(char* buffer
, size_t bufferSize
, FILE* fp
, const char16_t
* chars
,
5093 size_t length
, uint32_t quote
);
5096 js::PutEscapedString(char* buffer
, size_t bufferSize
, const Latin1Char
* chars
, size_t length
,
5100 js::PutEscapedString(char* buffer
, size_t bufferSize
, const char16_t
* chars
, size_t length
,