1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
9 #include "mozilla/Attributes.h"
10 #include "mozilla/Casting.h"
11 #include "mozilla/CheckedInt.h"
12 #include "mozilla/FloatingPoint.h"
13 #include "mozilla/PodOperations.h"
14 #include "mozilla/Range.h"
15 #include "mozilla/TypeTraits.h"
16 #include "mozilla/UniquePtr.h"
33 #include "builtin/Intl.h"
34 #include "builtin/RegExp.h"
36 #include "unicode/unorm.h"
38 #include "vm/GlobalObject.h"
39 #include "vm/Interpreter.h"
40 #include "vm/NumericConversions.h"
41 #include "vm/Opcodes.h"
42 #include "vm/RegExpObject.h"
43 #include "vm/RegExpStatics.h"
44 #include "vm/ScopeObject.h"
45 #include "vm/StringBuffer.h"
47 #include "jsinferinlines.h"
49 #include "vm/Interpreter-inl.h"
50 #include "vm/String-inl.h"
51 #include "vm/StringObject-inl.h"
54 using namespace js::gc
;
55 using namespace js::types
;
56 using namespace js::unicode
;
61 using mozilla::AssertedCast
;
62 using mozilla::CheckedInt
;
64 using mozilla::IsNegativeZero
;
65 using mozilla::IsSame
;
67 using mozilla::PodCopy
;
68 using mozilla::PodEqual
;
69 using mozilla::RangedPtr
;
70 using mozilla::UniquePtr
;
72 using JS::AutoCheckCannotGC
;
74 static JSLinearString
*
75 ArgToRootedString(JSContext
* cx
, CallArgs
& args
, unsigned argno
)
77 if (argno
>= args
.length())
78 return cx
->names().undefined
;
80 JSString
* str
= ToString
<CanGC
>(cx
, args
[argno
]);
84 args
[argno
].setString(str
);
85 return str
->ensureLinear(cx
);
89 * Forward declarations for URI encode/decode and helper routines
92 str_decodeURI(JSContext
* cx
, unsigned argc
, Value
* vp
);
95 str_decodeURI_Component(JSContext
* cx
, unsigned argc
, Value
* vp
);
98 str_encodeURI(JSContext
* cx
, unsigned argc
, Value
* vp
);
101 str_encodeURI_Component(JSContext
* cx
, unsigned argc
, Value
* vp
);
104 * Global string methods
109 template <typename CharT
>
111 Escape(JSContext
* cx
, const CharT
* chars
, uint32_t length
, uint32_t* newLengthOut
)
113 static const uint8_t shouldPassThrough
[128] = {
114 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
115 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
116 0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */
117 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */
118 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */
119 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */
120 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */
121 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */
124 /* Take a first pass and see how big the result string will need to be. */
125 uint32_t newLength
= length
;
126 for (size_t i
= 0; i
< length
; i
++) {
127 jschar ch
= chars
[i
];
128 if (ch
< 128 && shouldPassThrough
[ch
])
131 /* The character will be encoded as %XX or %uXXXX. */
132 newLength
+= (ch
< 256) ? 2 : 5;
135 * newlength is incremented by at most 5 on each iteration, so worst
136 * case newlength == length * 6. This can't overflow.
138 static_assert(JSString::MAX_LENGTH
< UINT32_MAX
/ 6,
139 "newlength must not overflow");
142 Latin1Char
* newChars
= cx
->pod_malloc
<Latin1Char
>(newLength
+ 1);
146 static const char digits
[] = "0123456789ABCDEF";
149 for (i
= 0, ni
= 0; i
< length
; i
++) {
150 jschar ch
= chars
[i
];
151 if (ch
< 128 && shouldPassThrough
[ch
]) {
153 } else if (ch
< 256) {
154 newChars
[ni
++] = '%';
155 newChars
[ni
++] = digits
[ch
>> 4];
156 newChars
[ni
++] = digits
[ch
& 0xF];
158 newChars
[ni
++] = '%';
159 newChars
[ni
++] = 'u';
160 newChars
[ni
++] = digits
[ch
>> 12];
161 newChars
[ni
++] = digits
[(ch
& 0xF00) >> 8];
162 newChars
[ni
++] = digits
[(ch
& 0xF0) >> 4];
163 newChars
[ni
++] = digits
[ch
& 0xF];
166 JS_ASSERT(ni
== newLength
);
167 newChars
[newLength
] = 0;
169 *newLengthOut
= newLength
;
174 str_escape(JSContext
* cx
, unsigned argc
, Value
* vp
)
176 CallArgs args
= CallArgsFromVp(argc
, vp
);
178 JSLinearString
* str
= ArgToRootedString(cx
, args
, 0);
182 ScopedJSFreePtr
<Latin1Char
> newChars
;
184 if (str
->hasLatin1Chars()) {
185 AutoCheckCannotGC nogc
;
186 newChars
= Escape(cx
, str
->latin1Chars(nogc
), str
->length(), &newLength
);
188 AutoCheckCannotGC nogc
;
189 newChars
= Escape(cx
, str
->twoByteChars(nogc
), str
->length(), &newLength
);
195 JSString
* res
= NewString
<CanGC
>(cx
, newChars
.get(), newLength
);
200 args
.rval().setString(res
);
204 template <typename CharT
>
206 Unhex4(const RangedPtr
<const CharT
> chars
, jschar
* result
)
213 if (!(JS7_ISHEX(a
) && JS7_ISHEX(b
) && JS7_ISHEX(c
) && JS7_ISHEX(d
)))
216 *result
= (((((JS7_UNHEX(a
) << 4) + JS7_UNHEX(b
)) << 4) + JS7_UNHEX(c
)) << 4) + JS7_UNHEX(d
);
220 template <typename CharT
>
222 Unhex2(const RangedPtr
<const CharT
> chars
, jschar
* result
)
227 if (!(JS7_ISHEX(a
) && JS7_ISHEX(b
)))
230 *result
= (JS7_UNHEX(a
) << 4) + JS7_UNHEX(b
);
234 template <typename CharT
>
236 Unescape(StringBuffer
& sb
, const mozilla::Range
<const CharT
> chars
)
239 * NB: use signed integers for length/index to allow simple length
240 * comparisons without unsigned-underflow hazards.
242 static_assert(JSString::MAX_LENGTH
<= INT_MAX
, "String length must fit in a signed integer");
243 int length
= AssertedCast
<int>(chars
.length());
246 * Note that the spec algorithm has been optimized to avoid building
247 * a string in the case where no escapes are present.
252 bool building
= false;
268 if (chars
[k
+ 1] != 'u')
271 #define ENSURE_BUILDING \
275 if (!sb.reserve(length)) \
277 sb.infallibleAppend(chars.start().get(), k); \
282 if (Unhex4(chars
.start() + k
+ 2, &c
)) {
294 if (Unhex2(chars
.start() + k
+ 1, &c
)) {
300 if (building
&& !sb
.append(c
))
308 #undef ENSURE_BUILDING
313 str_unescape(JSContext
* cx
, unsigned argc
, Value
* vp
)
315 CallArgs args
= CallArgsFromVp(argc
, vp
);
318 RootedLinearString
str(cx
, ArgToRootedString(cx
, args
, 0));
324 if (str
->hasTwoByteChars() && !sb
.ensureTwoByteChars())
327 if (str
->hasLatin1Chars()) {
328 AutoCheckCannotGC nogc
;
329 if (!Unescape(sb
, str
->latin1Range(nogc
)))
332 AutoCheckCannotGC nogc
;
333 if (!Unescape(sb
, str
->twoByteRange(nogc
)))
337 JSLinearString
* result
;
339 result
= sb
.finishString();
346 args
.rval().setString(result
);
352 str_uneval(JSContext
* cx
, unsigned argc
, Value
* vp
)
354 CallArgs args
= CallArgsFromVp(argc
, vp
);
355 JSString
* str
= ValueToSource(cx
, args
.get(0));
359 args
.rval().setString(str
);
364 static const JSFunctionSpec string_functions
[] = {
365 JS_FN(js_escape_str
, str_escape
, 1,0),
366 JS_FN(js_unescape_str
, str_unescape
, 1,0),
368 JS_FN(js_uneval_str
, str_uneval
, 1,0),
370 JS_FN(js_decodeURI_str
, str_decodeURI
, 1,0),
371 JS_FN(js_encodeURI_str
, str_encodeURI
, 1,0),
372 JS_FN(js_decodeURIComponent_str
, str_decodeURI_Component
, 1,0),
373 JS_FN(js_encodeURIComponent_str
, str_encodeURI_Component
, 1,0),
378 static const unsigned STRING_ELEMENT_ATTRS
= JSPROP_ENUMERATE
| JSPROP_READONLY
| JSPROP_PERMANENT
;
381 str_enumerate(JSContext
* cx
, HandleObject obj
)
383 RootedString
str(cx
, obj
->as
<StringObject
>().unbox());
384 RootedValue
value(cx
);
385 for (size_t i
= 0, length
= str
->length(); i
< length
; i
++) {
386 JSString
* str1
= NewDependentString(cx
, str
, i
, 1);
389 value
.setString(str1
);
390 if (!JSObject::defineElement(cx
, obj
, i
, value
,
391 JS_PropertyStub
, JS_StrictPropertyStub
,
392 STRING_ELEMENT_ATTRS
))
402 js::str_resolve(JSContext
* cx
, HandleObject obj
, HandleId id
, MutableHandleObject objp
)
404 if (!JSID_IS_INT(id
))
407 RootedString
str(cx
, obj
->as
<StringObject
>().unbox());
409 int32_t slot
= JSID_TO_INT(id
);
410 if ((size_t)slot
< str
->length()) {
411 JSString
* str1
= cx
->staticStrings().getUnitStringForElement(cx
, str
, size_t(slot
));
414 RootedValue
value(cx
, StringValue(str1
));
415 if (!JSObject::defineElement(cx
, obj
, uint32_t(slot
), value
, nullptr, nullptr,
416 STRING_ELEMENT_ATTRS
))
425 const Class
StringObject::class_
= {
427 JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS
) |
428 JSCLASS_NEW_RESOLVE
| JSCLASS_HAS_CACHED_PROTO(JSProto_String
),
429 JS_PropertyStub
, /* addProperty */
430 JS_DeletePropertyStub
, /* delProperty */
431 JS_PropertyStub
, /* getProperty */
432 JS_StrictPropertyStub
, /* setProperty */
434 (JSResolveOp
)str_resolve
,
439 * Returns a JSString * for the |this| value associated with 'call', or throws
440 * a TypeError if |this| is null or undefined. This algorithm is the same as
441 * calling CheckObjectCoercible(this), then returning ToString(this), as all
442 * String.prototype.* methods do (other than toString and valueOf).
444 static MOZ_ALWAYS_INLINE JSString
*
445 ThisToStringForStringProto(JSContext
* cx
, CallReceiver call
)
447 JS_CHECK_RECURSION(cx
, return nullptr);
449 if (call
.thisv().isString())
450 return call
.thisv().toString();
452 if (call
.thisv().isObject()) {
453 RootedObject
obj(cx
, &call
.thisv().toObject());
454 if (obj
->is
<StringObject
>()) {
455 Rooted
<jsid
> id(cx
, NameToId(cx
->names().toString
));
456 if (ClassMethodIsNative(cx
, obj
, &StringObject::class_
, id
, js_str_toString
)) {
457 JSString
* str
= obj
->as
<StringObject
>().unbox();
458 call
.setThis(StringValue(str
));
462 } else if (call
.thisv().isNullOrUndefined()) {
463 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, nullptr, JSMSG_CANT_CONVERT_TO
,
464 call
.thisv().isNull() ? "null" : "undefined", "object");
468 JSString
* str
= ToStringSlow
<CanGC
>(cx
, call
.thisv());
472 call
.setThis(StringValue(str
));
476 MOZ_ALWAYS_INLINE
bool
477 IsString(HandleValue v
)
479 return v
.isString() || (v
.isObject() && v
.toObject().is
<StringObject
>());
485 * String.prototype.quote is generic (as are most string methods), unlike
486 * toSource, toString, and valueOf.
489 str_quote(JSContext
* cx
, unsigned argc
, Value
* vp
)
491 CallArgs args
= CallArgsFromVp(argc
, vp
);
492 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
495 str
= js_QuoteString(cx
, str
, '"');
498 args
.rval().setString(str
);
502 MOZ_ALWAYS_INLINE
bool
503 str_toSource_impl(JSContext
* cx
, CallArgs args
)
505 JS_ASSERT(IsString(args
.thisv()));
507 Rooted
<JSString
*> str(cx
, ToString
<CanGC
>(cx
, args
.thisv()));
511 str
= js_QuoteString(cx
, str
, '"');
516 if (!sb
.append("(new String(") || !sb
.append(str
) || !sb
.append("))"))
519 str
= sb
.finishString();
522 args
.rval().setString(str
);
527 str_toSource(JSContext
* cx
, unsigned argc
, Value
* vp
)
529 CallArgs args
= CallArgsFromVp(argc
, vp
);
530 return CallNonGenericMethod
<IsString
, str_toSource_impl
>(cx
, args
);
533 #endif /* JS_HAS_TOSOURCE */
535 MOZ_ALWAYS_INLINE
bool
536 str_toString_impl(JSContext
* cx
, CallArgs args
)
538 JS_ASSERT(IsString(args
.thisv()));
540 args
.rval().setString(args
.thisv().isString()
541 ? args
.thisv().toString()
542 : args
.thisv().toObject().as
<StringObject
>().unbox());
547 js_str_toString(JSContext
* cx
, unsigned argc
, Value
* vp
)
549 CallArgs args
= CallArgsFromVp(argc
, vp
);
550 return CallNonGenericMethod
<IsString
, str_toString_impl
>(cx
, args
);
554 * Java-like string native methods.
557 static MOZ_ALWAYS_INLINE
bool
558 ValueToIntegerRange(JSContext
* cx
, HandleValue v
, int32_t* out
)
564 if (!ToInteger(cx
, v
, &d
))
568 else if (d
< INT32_MIN
)
578 DoSubstr(JSContext
* cx
, JSString
* str
, size_t begin
, size_t len
)
581 * Optimization for one level deep ropes.
582 * This is common for the following pattern:
585 * text = text.substr(0, x) + "bla" + text.substr(x)
586 * test.charCodeAt(x + 1)
590 JSRope
* rope
= &str
->asRope();
592 /* Substring is totally in leftChild of rope. */
593 if (begin
+ len
<= rope
->leftChild()->length()) {
594 str
= rope
->leftChild();
595 return NewDependentString(cx
, str
, begin
, len
);
598 /* Substring is totally in rightChild of rope. */
599 if (begin
>= rope
->leftChild()->length()) {
600 str
= rope
->rightChild();
601 begin
-= rope
->leftChild()->length();
602 return NewDependentString(cx
, str
, begin
, len
);
606 * Requested substring is partly in the left and partly in right child.
607 * Create a rope of substrings for both childs.
609 JS_ASSERT (begin
< rope
->leftChild()->length() &&
610 begin
+ len
> rope
->leftChild()->length());
612 size_t lhsLength
= rope
->leftChild()->length() - begin
;
613 size_t rhsLength
= begin
+ len
- rope
->leftChild()->length();
615 Rooted
<JSRope
*> ropeRoot(cx
, rope
);
616 RootedString
lhs(cx
, NewDependentString(cx
, ropeRoot
->leftChild(), begin
, lhsLength
));
620 RootedString
rhs(cx
, NewDependentString(cx
, ropeRoot
->rightChild(), 0, rhsLength
));
624 return JSRope::new_
<CanGC
>(cx
, lhs
, rhs
, len
);
627 return NewDependentString(cx
, str
, begin
, len
);
631 str_substring(JSContext
* cx
, unsigned argc
, Value
* vp
)
633 CallArgs args
= CallArgsFromVp(argc
, vp
);
635 JSString
* str
= ThisToStringForStringProto(cx
, args
);
639 int32_t length
, begin
, end
;
640 if (args
.length() > 0) {
641 end
= length
= int32_t(str
->length());
643 if (args
[0].isInt32()) {
644 begin
= args
[0].toInt32();
646 RootedString
strRoot(cx
, str
);
647 if (!ValueToIntegerRange(cx
, args
[0], &begin
))
654 else if (begin
> length
)
657 if (args
.hasDefined(1)) {
658 if (args
[1].isInt32()) {
659 end
= args
[1].toInt32();
661 RootedString
strRoot(cx
, str
);
662 if (!ValueToIntegerRange(cx
, args
[1], &end
))
680 str
= DoSubstr(cx
, str
, size_t(begin
), size_t(end
- begin
));
685 args
.rval().setString(str
);
689 template <typename CharT
>
691 ToLowerCase(JSContext
* cx
, JSLinearString
* str
)
693 // Unlike toUpperCase, toLowerCase has the nice invariant that if the input
694 // is a Latin1 string, the output is also a Latin1 string.
695 UniquePtr
<CharT
[], JS::FreePolicy
> newChars
;
696 size_t length
= str
->length();
698 AutoCheckCannotGC nogc
;
699 const CharT
* chars
= str
->chars
<CharT
>(nogc
);
701 // Look for the first upper case character.
703 for (; i
< length
; i
++) {
705 if (unicode::ToLowerCase(c
) != c
)
709 // If all characters are lower case, return the input string.
713 newChars
= cx
->make_pod_array
<CharT
>(length
+ 1);
717 PodCopy(newChars
.get(), chars
, i
);
719 for (; i
< length
; i
++) {
720 jschar c
= unicode::ToLowerCase(chars
[i
]);
721 MOZ_ASSERT_IF((IsSame
<CharT
, Latin1Char
>::value
), c
<= JSString::MAX_LATIN1_CHAR
);
725 newChars
[length
] = 0;
728 JSString
* res
= NewStringDontDeflate
<CanGC
>(cx
, newChars
.get(), length
);
737 ToLowerCaseHelper(JSContext
* cx
, CallReceiver call
)
739 RootedString
str(cx
, ThisToStringForStringProto(cx
, call
));
743 JSLinearString
* linear
= str
->ensureLinear(cx
);
747 if (linear
->hasLatin1Chars())
748 str
= ToLowerCase
<Latin1Char
>(cx
, linear
);
750 str
= ToLowerCase
<jschar
>(cx
, linear
);
754 call
.rval().setString(str
);
759 str_toLowerCase(JSContext
* cx
, unsigned argc
, Value
* vp
)
761 return ToLowerCaseHelper(cx
, CallArgsFromVp(argc
, vp
));
765 str_toLocaleLowerCase(JSContext
* cx
, unsigned argc
, Value
* vp
)
767 CallArgs args
= CallArgsFromVp(argc
, vp
);
770 * Forcefully ignore the first (or any) argument and return toLowerCase(),
771 * ECMA has reserved that argument, presumably for defining the locale.
773 if (cx
->runtime()->localeCallbacks
&& cx
->runtime()->localeCallbacks
->localeToLowerCase
) {
774 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
778 RootedValue
result(cx
);
779 if (!cx
->runtime()->localeCallbacks
->localeToLowerCase(cx
, str
, &result
))
782 args
.rval().set(result
);
786 return ToLowerCaseHelper(cx
, args
);
789 template <typename DestChar
, typename SrcChar
>
791 ToUpperCaseImpl(DestChar
* destChars
, const SrcChar
* srcChars
, size_t firstLowerCase
, size_t length
)
793 MOZ_ASSERT(firstLowerCase
< length
);
795 for (size_t i
= 0; i
< firstLowerCase
; i
++)
796 destChars
[i
] = srcChars
[i
];
798 for (size_t i
= firstLowerCase
; i
< length
; i
++) {
799 jschar c
= unicode::ToUpperCase(srcChars
[i
]);
800 MOZ_ASSERT_IF((IsSame
<DestChar
, Latin1Char
>::value
), c
<= JSString::MAX_LATIN1_CHAR
);
804 destChars
[length
] = '\0';
807 template <typename CharT
>
809 ToUpperCase(JSContext
* cx
, JSLinearString
* str
)
811 typedef UniquePtr
<Latin1Char
[], JS::FreePolicy
> Latin1CharPtr
;
812 typedef UniquePtr
<jschar
[], JS::FreePolicy
> TwoByteCharPtr
;
814 mozilla::MaybeOneOf
<Latin1CharPtr
, TwoByteCharPtr
> newChars
;
815 size_t length
= str
->length();
817 AutoCheckCannotGC nogc
;
818 const CharT
* chars
= str
->chars
<CharT
>(nogc
);
820 // Look for the first lower case character.
822 for (; i
< length
; i
++) {
824 if (unicode::ToUpperCase(c
) != c
)
828 // If all characters are upper case, return the input string.
832 // If the string is Latin1, check if it contains the MICRO SIGN (0xb5)
833 // or SMALL LETTER Y WITH DIAERESIS (0xff) character. The corresponding
834 // upper case characters are not in the Latin1 range.
836 if (IsSame
<CharT
, Latin1Char
>::value
) {
837 resultIsLatin1
= true;
838 for (size_t j
= i
; j
< length
; j
++) {
839 Latin1Char c
= chars
[j
];
840 if (c
== 0xb5 || c
== 0xff) {
841 MOZ_ASSERT(unicode::ToUpperCase(c
) > JSString::MAX_LATIN1_CHAR
);
842 resultIsLatin1
= false;
845 MOZ_ASSERT(unicode::ToUpperCase(c
) <= JSString::MAX_LATIN1_CHAR
);
849 resultIsLatin1
= false;
852 if (resultIsLatin1
) {
853 Latin1CharPtr buf
= cx
->make_pod_array
<Latin1Char
>(length
+ 1);
857 ToUpperCaseImpl(buf
.get(), chars
, i
, length
);
858 newChars
.construct
<Latin1CharPtr
>(buf
);
860 TwoByteCharPtr buf
= cx
->make_pod_array
<jschar
>(length
+ 1);
864 ToUpperCaseImpl(buf
.get(), chars
, i
, length
);
865 newChars
.construct
<TwoByteCharPtr
>(buf
);
870 if (newChars
.constructed
<Latin1CharPtr
>()) {
871 res
= NewStringDontDeflate
<CanGC
>(cx
, newChars
.ref
<Latin1CharPtr
>().get(), length
);
875 newChars
.ref
<Latin1CharPtr
>().release();
877 res
= NewStringDontDeflate
<CanGC
>(cx
, newChars
.ref
<TwoByteCharPtr
>().get(), length
);
881 newChars
.ref
<TwoByteCharPtr
>().release();
888 ToUpperCaseHelper(JSContext
* cx
, CallReceiver call
)
890 RootedString
str(cx
, ThisToStringForStringProto(cx
, call
));
894 JSLinearString
* linear
= str
->ensureLinear(cx
);
898 if (linear
->hasLatin1Chars())
899 str
= ToUpperCase
<Latin1Char
>(cx
, linear
);
901 str
= ToUpperCase
<jschar
>(cx
, linear
);
905 call
.rval().setString(str
);
910 str_toUpperCase(JSContext
* cx
, unsigned argc
, Value
* vp
)
912 return ToUpperCaseHelper(cx
, CallArgsFromVp(argc
, vp
));
916 str_toLocaleUpperCase(JSContext
* cx
, unsigned argc
, Value
* vp
)
918 CallArgs args
= CallArgsFromVp(argc
, vp
);
921 * Forcefully ignore the first (or any) argument and return toUpperCase(),
922 * ECMA has reserved that argument, presumably for defining the locale.
924 if (cx
->runtime()->localeCallbacks
&& cx
->runtime()->localeCallbacks
->localeToUpperCase
) {
925 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
929 RootedValue
result(cx
);
930 if (!cx
->runtime()->localeCallbacks
->localeToUpperCase(cx
, str
, &result
))
933 args
.rval().set(result
);
937 return ToUpperCaseHelper(cx
, args
);
942 str_localeCompare(JSContext
* cx
, unsigned argc
, Value
* vp
)
944 CallArgs args
= CallArgsFromVp(argc
, vp
);
945 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
949 RootedString
thatStr(cx
, ToString
<CanGC
>(cx
, args
.get(0)));
953 if (cx
->runtime()->localeCallbacks
&& cx
->runtime()->localeCallbacks
->localeCompare
) {
954 RootedValue
result(cx
);
955 if (!cx
->runtime()->localeCallbacks
->localeCompare(cx
, str
, thatStr
, &result
))
958 args
.rval().set(result
);
963 if (!CompareStrings(cx
, str
, thatStr
, &result
))
966 args
.rval().setInt32(result
);
972 /* ES6 20140210 draft 21.1.3.12. */
974 str_normalize(JSContext
* cx
, unsigned argc
, Value
* vp
)
976 CallArgs args
= CallArgsFromVp(argc
, vp
);
979 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
984 UNormalizationMode form
;
985 if (!args
.hasDefined(0)) {
989 RootedLinearString
formStr(cx
, ArgToRootedString(cx
, args
, 0));
994 if (formStr
== cx
->names().NFC
) {
996 } else if (formStr
== cx
->names().NFD
) {
998 } else if (formStr
== cx
->names().NFKC
) {
1000 } else if (formStr
== cx
->names().NFKD
) {
1003 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, nullptr,
1004 JSMSG_INVALID_NORMALIZE_FORM
);
1010 AutoStableStringChars
stableChars(cx
);
1011 if (!str
->ensureFlat(cx
) || !stableChars
.initTwoByte(cx
, str
))
1014 static const size_t INLINE_CAPACITY
= 32;
1016 const UChar
* srcChars
= JSCharToUChar(stableChars
.twoByteRange().start().get());
1017 int32_t srcLen
= AssertedCast
<int32_t>(str
->length());
1018 Vector
<jschar
, INLINE_CAPACITY
> chars(cx
);
1019 if (!chars
.resize(INLINE_CAPACITY
))
1022 UErrorCode status
= U_ZERO_ERROR
;
1023 int32_t size
= unorm_normalize(srcChars
, srcLen
, form
, 0,
1024 JSCharToUChar(chars
.begin()), INLINE_CAPACITY
,
1026 if (status
== U_BUFFER_OVERFLOW_ERROR
) {
1027 if (!chars
.resize(size
))
1029 status
= U_ZERO_ERROR
;
1033 unorm_normalize(srcChars
, srcLen
, form
, 0,
1034 JSCharToUChar(chars
.begin()), size
,
1036 MOZ_ASSERT(size
== finalSize
|| U_FAILURE(status
), "unorm_normalize behaved inconsistently");
1038 if (U_FAILURE(status
))
1041 JSString
* ns
= NewStringCopyN
<CanGC
>(cx
, chars
.begin(), size
);
1046 args
.rval().setString(ns
);
1052 js_str_charAt(JSContext
* cx
, unsigned argc
, Value
* vp
)
1054 CallArgs args
= CallArgsFromVp(argc
, vp
);
1056 RootedString
str(cx
);
1058 if (args
.thisv().isString() && args
.length() != 0 && args
[0].isInt32()) {
1059 str
= args
.thisv().toString();
1060 i
= size_t(args
[0].toInt32());
1061 if (i
>= str
->length())
1064 str
= ThisToStringForStringProto(cx
, args
);
1069 if (args
.length() > 0 && !ToInteger(cx
, args
[0], &d
))
1072 if (d
< 0 || str
->length() <= d
)
1077 str
= cx
->staticStrings().getUnitStringForElement(cx
, str
, i
);
1080 args
.rval().setString(str
);
1084 args
.rval().setString(cx
->runtime()->emptyString
);
1089 js::str_charCodeAt_impl(JSContext
* cx
, HandleString string
, HandleValue index
, MutableHandleValue res
)
1091 RootedString
str(cx
);
1093 if (index
.isInt32()) {
1094 i
= index
.toInt32();
1095 if (i
>= string
->length())
1099 if (!ToInteger(cx
, index
, &d
))
1101 // check whether d is negative as size_t is unsigned
1102 if (d
< 0 || string
->length() <= d
)
1107 if (!string
->getChar(cx
, i
, &c
))
1118 js_str_charCodeAt(JSContext
* cx
, unsigned argc
, Value
* vp
)
1120 CallArgs args
= CallArgsFromVp(argc
, vp
);
1121 RootedString
str(cx
);
1122 RootedValue
index(cx
);
1123 if (args
.thisv().isString()) {
1124 str
= args
.thisv().toString();
1126 str
= ThisToStringForStringProto(cx
, args
);
1130 if (args
.length() != 0)
1135 return js::str_charCodeAt_impl(cx
, str
, index
, args
.rval());
1139 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
1140 * The patlen argument must be positive and no greater than sBMHPatLenMax.
1142 * Return the index of pat in text, or -1 if not found.
1144 static const uint32_t sBMHCharSetSize
= 256; /* ISO-Latin-1 */
1145 static const uint32_t sBMHPatLenMax
= 255; /* skip table element is uint8_t */
1146 static const int sBMHBadPattern
= -2; /* return value if pat is not ISO-Latin-1 */
1148 template <typename TextChar
, typename PatChar
>
1150 BoyerMooreHorspool(const TextChar
* text
, uint32_t textLen
, const PatChar
* pat
, uint32_t patLen
)
1152 JS_ASSERT(0 < patLen
&& patLen
<= sBMHPatLenMax
);
1154 uint8_t skip
[sBMHCharSetSize
];
1155 for (uint32_t i
= 0; i
< sBMHCharSetSize
; i
++)
1156 skip
[i
] = uint8_t(patLen
);
1158 uint32_t patLast
= patLen
- 1;
1159 for (uint32_t i
= 0; i
< patLast
; i
++) {
1161 if (c
>= sBMHCharSetSize
)
1162 return sBMHBadPattern
;
1163 skip
[c
] = uint8_t(patLast
- i
);
1166 for (uint32_t k
= patLast
; k
< textLen
; ) {
1167 for (uint32_t i
= k
, j
= patLast
; ; i
--, j
--) {
1168 if (text
[i
] != pat
[j
])
1171 return static_cast<int>(i
); /* safe: max string size */
1175 k
+= (c
>= sBMHCharSetSize
) ? patLen
: skip
[c
];
1180 template <typename TextChar
, typename PatChar
>
1182 typedef uint32_t Extent
;
1183 static MOZ_ALWAYS_INLINE Extent
computeExtent(const PatChar
*, uint32_t patLen
) {
1184 return (patLen
- 1) * sizeof(PatChar
);
1186 static MOZ_ALWAYS_INLINE
bool match(const PatChar
* p
, const TextChar
* t
, Extent extent
) {
1187 MOZ_ASSERT(sizeof(TextChar
) == sizeof(PatChar
));
1188 return memcmp(p
, t
, extent
) == 0;
1192 template <typename TextChar
, typename PatChar
>
1194 typedef const PatChar
* Extent
;
1195 static MOZ_ALWAYS_INLINE Extent
computeExtent(const PatChar
* pat
, uint32_t patLen
) {
1196 return pat
+ patLen
;
1198 static MOZ_ALWAYS_INLINE
bool match(const PatChar
* p
, const TextChar
* t
, Extent extent
) {
1199 for (; p
!= extent
; ++p
, ++t
) {
1207 template <typename TextChar
, typename PatChar
>
1208 static const TextChar
*
1209 FirstCharMatcherUnrolled(const TextChar
* text
, uint32_t n
, const PatChar pat
)
1211 const TextChar
* textend
= text
+ n
;
1212 const TextChar
* t
= text
;
1214 switch ((textend
- t
) & 7) {
1215 case 0: if (*t
++ == pat
) return t
- 1;
1216 case 7: if (*t
++ == pat
) return t
- 1;
1217 case 6: if (*t
++ == pat
) return t
- 1;
1218 case 5: if (*t
++ == pat
) return t
- 1;
1219 case 4: if (*t
++ == pat
) return t
- 1;
1220 case 3: if (*t
++ == pat
) return t
- 1;
1221 case 2: if (*t
++ == pat
) return t
- 1;
1222 case 1: if (*t
++ == pat
) return t
- 1;
1224 while (textend
!= t
) {
1225 if (t
[0] == pat
) return t
;
1226 if (t
[1] == pat
) return t
+ 1;
1227 if (t
[2] == pat
) return t
+ 2;
1228 if (t
[3] == pat
) return t
+ 3;
1229 if (t
[4] == pat
) return t
+ 4;
1230 if (t
[5] == pat
) return t
+ 5;
1231 if (t
[6] == pat
) return t
+ 6;
1232 if (t
[7] == pat
) return t
+ 7;
1239 FirstCharMatcher8bit(const char* text
, uint32_t n
, const char pat
)
1241 #if defined(__clang__)
1242 return FirstCharMatcherUnrolled
<char, char>(text
, n
, pat
);
1244 return reinterpret_cast<const char*>(memchr(text
, pat
, n
));
1248 static const jschar
*
1249 FirstCharMatcher16bit(const jschar
* text
, uint32_t n
, const jschar pat
)
1251 #if defined(XP_MACOSX) || defined(XP_WIN)
1253 * Performance of memchr is horrible in OSX. Windows is better,
1254 * but it is still better to use UnrolledMatcher.
1256 return FirstCharMatcherUnrolled
<jschar
, jschar
>(text
, n
, pat
);
1259 * For linux the best performance is obtained by slightly hacking memchr.
1260 * memchr works only on 8bit char but jschar is 16bit. So we treat jschar
1261 * in blocks of 8bit and use memchr.
1264 const char* text8
= (const char*) text
;
1265 const char* pat8
= reinterpret_cast<const char*>(&pat
);
1267 JS_ASSERT(n
< UINT32_MAX
/2);
1272 /* Find the first 8 bits of 16bit character in text. */
1273 const char* pos8
= FirstCharMatcher8bit(text8
+ i
, n
- i
, pat8
[0]);
1274 if (pos8
== nullptr)
1276 i
= static_cast<uint32_t>(pos8
- text8
);
1278 /* Incorrect match if it matches the last 8 bits of 16bit char. */
1284 /* Test if last 8 bits match last 8 bits of 16bit char. */
1285 if (pat8
[1] == text8
[i
+ 1])
1286 return (text
+ (i
/2));
1294 template <class InnerMatch
, typename TextChar
, typename PatChar
>
1296 Matcher(const TextChar
* text
, uint32_t textlen
, const PatChar
* pat
, uint32_t patlen
)
1298 const typename
InnerMatch::Extent extent
= InnerMatch::computeExtent(pat
, patlen
);
1301 uint32_t n
= textlen
- patlen
+ 1;
1303 const TextChar
* pos
;
1305 if (sizeof(TextChar
) == 2 && sizeof(PatChar
) == 2)
1306 pos
= (TextChar
*) FirstCharMatcher16bit((jschar
*)text
+ i
, n
- i
, pat
[0]);
1307 else if (sizeof(TextChar
) == 1 && sizeof(PatChar
) == 1)
1308 pos
= (TextChar
*) FirstCharMatcher8bit((char*) text
+ i
, n
- i
, pat
[0]);
1310 pos
= (TextChar
*) FirstCharMatcherUnrolled
<TextChar
, PatChar
>(text
+ i
, n
- i
, pat
[0]);
1315 i
= static_cast<uint32_t>(pos
- text
);
1316 if (InnerMatch::match(pat
+ 1, text
+ i
+ 1, extent
))
1325 template <typename TextChar
, typename PatChar
>
1326 static MOZ_ALWAYS_INLINE
int
1327 StringMatch(const TextChar
* text
, uint32_t textLen
, const PatChar
* pat
, uint32_t patLen
)
1331 if (textLen
< patLen
)
1334 #if defined(__i386__) || defined(_M_IX86) || defined(__i386)
1336 * Given enough registers, the unrolled loop below is faster than the
1337 * following loop. 32-bit x86 does not have enough registers.
1340 const PatChar p0
= *pat
;
1341 for (const TextChar
* c
= text
, *end
= text
+ textLen
; c
!= end
; ++c
) {
1350 * If the text or pattern string is short, BMH will be more expensive than
1351 * the basic linear scan due to initialization cost and a more complex loop
1352 * body. While the correct threshold is input-dependent, we can make a few
1353 * conservative observations:
1354 * - When |textLen| is "big enough", the initialization time will be
1355 * proportionally small, so the worst-case slowdown is minimized.
1356 * - When |patLen| is "too small", even the best case for BMH will be
1357 * slower than a simple scan for large |textLen| due to the more complex
1359 * From this, the values for "big enough" and "too small" are determined
1360 * empirically. See bug 526348.
1362 if (textLen
>= 512 && patLen
>= 11 && patLen
<= sBMHPatLenMax
) {
1363 int index
= BoyerMooreHorspool(text
, textLen
, pat
, patLen
);
1364 if (index
!= sBMHBadPattern
)
1369 * For big patterns with large potential overlap we want the SIMD-optimized
1370 * speed of memcmp. For small patterns, a simple loop is faster. We also can't
1371 * use memcmp if one of the strings is TwoByte and the other is Latin1.
1373 * FIXME: Linux memcmp performance is sad and the manual loop is faster.
1376 #if !defined(__linux__)
1377 (patLen
> 128 && IsSame
<TextChar
, PatChar
>::value
)
1378 ? Matcher
<MemCmp
<TextChar
, PatChar
>, TextChar
, PatChar
>(text
, textLen
, pat
, patLen
)
1381 Matcher
<ManualCmp
<TextChar
, PatChar
>, TextChar
, PatChar
>(text
, textLen
, pat
, patLen
);
1385 StringMatch(JSLinearString
* text
, JSLinearString
* pat
, uint32_t start
= 0)
1387 MOZ_ASSERT(start
<= text
->length());
1388 uint32_t textLen
= text
->length() - start
;
1389 uint32_t patLen
= pat
->length();
1392 AutoCheckCannotGC nogc
;
1393 if (text
->hasLatin1Chars()) {
1394 const Latin1Char
* textChars
= text
->latin1Chars(nogc
) + start
;
1395 if (pat
->hasLatin1Chars())
1396 match
= StringMatch(textChars
, textLen
, pat
->latin1Chars(nogc
), patLen
);
1398 match
= StringMatch(textChars
, textLen
, pat
->twoByteChars(nogc
), patLen
);
1400 const jschar
* textChars
= text
->twoByteChars(nogc
) + start
;
1401 if (pat
->hasLatin1Chars())
1402 match
= StringMatch(textChars
, textLen
, pat
->latin1Chars(nogc
), patLen
);
1404 match
= StringMatch(textChars
, textLen
, pat
->twoByteChars(nogc
), patLen
);
1407 return (match
== -1) ? -1 : start
+ match
;
1410 static const size_t sRopeMatchThresholdRatioLog2
= 5;
1413 js::StringHasPattern(JSLinearString
* text
, const jschar
* pat
, uint32_t patLen
)
1415 AutoCheckCannotGC nogc
;
1416 return text
->hasLatin1Chars()
1417 ? StringMatch(text
->latin1Chars(nogc
), text
->length(), pat
, patLen
) != -1
1418 : StringMatch(text
->twoByteChars(nogc
), text
->length(), pat
, patLen
) != -1;
1422 js::StringFindPattern(JSLinearString
* text
, JSLinearString
* pat
, size_t start
)
1424 return StringMatch(text
, pat
, start
);
1427 // When an algorithm does not need a string represented as a single linear
1428 // array of characters, this range utility may be used to traverse the string a
1429 // sequence of linear arrays of characters. This avoids flattening ropes.
1430 class StringSegmentRange
1432 // If malloc() shows up in any profiles from this vector, we can add a new
1433 // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
1434 AutoStringVector stack
;
1435 RootedLinearString cur
;
1437 bool settle(JSString
* str
) {
1438 while (str
->isRope()) {
1439 JSRope
& rope
= str
->asRope();
1440 if (!stack
.append(rope
.rightChild()))
1442 str
= rope
.leftChild();
1444 cur
= &str
->asLinear();
1449 explicit StringSegmentRange(JSContext
* cx
)
1450 : stack(cx
), cur(cx
)
1453 MOZ_WARN_UNUSED_RESULT
bool init(JSString
* str
) {
1454 JS_ASSERT(stack
.empty());
1458 bool empty() const {
1459 return cur
== nullptr;
1462 JSLinearString
* front() const {
1463 JS_ASSERT(!cur
->isRope());
1467 MOZ_WARN_UNUSED_RESULT
bool popFront() {
1468 JS_ASSERT(!empty());
1469 if (stack
.empty()) {
1473 return settle(stack
.popCopy());
1477 typedef Vector
<JSLinearString
*, 16, SystemAllocPolicy
> LinearStringVector
;
1479 template <typename TextChar
, typename PatChar
>
1481 RopeMatchImpl(const AutoCheckCannotGC
& nogc
, LinearStringVector
& strings
,
1482 const PatChar
* pat
, size_t patLen
)
1484 /* Absolute offset from the beginning of the logical text string. */
1487 for (JSLinearString
** outerp
= strings
.begin(); outerp
!= strings
.end(); ++outerp
) {
1488 /* Try to find a match within 'outer'. */
1489 JSLinearString
* outer
= *outerp
;
1490 const TextChar
* chars
= outer
->chars
<TextChar
>(nogc
);
1491 size_t len
= outer
->length();
1492 int matchResult
= StringMatch(chars
, len
, pat
, patLen
);
1493 if (matchResult
!= -1) {
1495 return pos
+ matchResult
;
1498 /* Try to find a match starting in 'outer' and running into other nodes. */
1499 const TextChar
* const text
= chars
+ (patLen
> len
? 0 : len
- patLen
+ 1);
1500 const TextChar
* const textend
= chars
+ len
;
1501 const PatChar p0
= *pat
;
1502 const PatChar
* const p1
= pat
+ 1;
1503 const PatChar
* const patend
= pat
+ patLen
;
1504 for (const TextChar
* t
= text
; t
!= textend
; ) {
1508 JSLinearString
** innerp
= outerp
;
1509 const TextChar
* ttend
= textend
;
1510 const TextChar
* tt
= t
;
1511 for (const PatChar
* pp
= p1
; pp
!= patend
; ++pp
, ++tt
) {
1512 while (tt
== ttend
) {
1513 if (++innerp
== strings
.end())
1516 JSLinearString
* inner
= *innerp
;
1517 tt
= inner
->chars
<TextChar
>(nogc
);
1518 ttend
= tt
+ inner
->length();
1521 goto break_continue
;
1525 return pos
+ (t
- chars
) - 1; /* -1 because of *t++ above */
1537 * RopeMatch takes the text to search and the pattern to search for in the text.
1538 * RopeMatch returns false on OOM and otherwise returns the match index through
1539 * the 'match' outparam (-1 for not found).
1542 RopeMatch(JSContext
* cx
, JSRope
* text
, JSLinearString
* pat
, int* match
)
1544 uint32_t patLen
= pat
->length();
1549 if (text
->length() < patLen
) {
1555 * List of leaf nodes in the rope. If we run out of memory when trying to
1556 * append to this list, we can still fall back to StringMatch, so use the
1557 * system allocator so we don't report OOM in that case.
1559 LinearStringVector strings
;
1562 * We don't want to do rope matching if there is a poor node-to-char ratio,
1563 * since this means spending a lot of time in the match loop below. We also
1564 * need to build the list of leaf nodes. Do both here: iterate over the
1565 * nodes so long as there are not too many.
1567 * We also don't use rope matching if the rope contains both Latin1 and
1568 * TwoByte nodes, to simplify the match algorithm.
1571 size_t threshold
= text
->length() >> sRopeMatchThresholdRatioLog2
;
1572 StringSegmentRange
r(cx
);
1576 bool textIsLatin1
= text
->hasLatin1Chars();
1577 while (!r
.empty()) {
1578 if (threshold
-- == 0 ||
1579 r
.front()->hasLatin1Chars() != textIsLatin1
||
1580 !strings
.append(r
.front()))
1582 JSLinearString
* linear
= text
->ensureLinear(cx
);
1586 *match
= StringMatch(linear
, pat
);
1594 AutoCheckCannotGC nogc
;
1595 if (text
->hasLatin1Chars()) {
1596 if (pat
->hasLatin1Chars())
1597 *match
= RopeMatchImpl
<Latin1Char
>(nogc
, strings
, pat
->latin1Chars(nogc
), patLen
);
1599 *match
= RopeMatchImpl
<Latin1Char
>(nogc
, strings
, pat
->twoByteChars(nogc
), patLen
);
1601 if (pat
->hasLatin1Chars())
1602 *match
= RopeMatchImpl
<jschar
>(nogc
, strings
, pat
->latin1Chars(nogc
), patLen
);
1604 *match
= RopeMatchImpl
<jschar
>(nogc
, strings
, pat
->twoByteChars(nogc
), patLen
);
1610 /* ES6 20121026 draft 15.5.4.24. */
1612 str_contains(JSContext
* cx
, unsigned argc
, Value
* vp
)
1614 CallArgs args
= CallArgsFromVp(argc
, vp
);
1616 // Steps 1, 2, and 3
1617 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
1622 RootedLinearString
searchStr(cx
, ArgToRootedString(cx
, args
, 0));
1628 if (args
.hasDefined(1)) {
1629 if (args
[1].isInt32()) {
1630 int i
= args
[1].toInt32();
1631 pos
= (i
< 0) ? 0U : uint32_t(i
);
1634 if (!ToInteger(cx
, args
[1], &d
))
1636 pos
= uint32_t(Min(Max(d
, 0.0), double(UINT32_MAX
)));
1641 uint32_t textLen
= str
->length();
1644 uint32_t start
= Min(Max(pos
, 0U), textLen
);
1647 JSLinearString
* text
= str
->ensureLinear(cx
);
1651 args
.rval().setBoolean(StringMatch(text
, searchStr
, start
) != -1);
1655 /* ES6 20120927 draft 15.5.4.7. */
1657 str_indexOf(JSContext
* cx
, unsigned argc
, Value
* vp
)
1659 CallArgs args
= CallArgsFromVp(argc
, vp
);
1661 // Steps 1, 2, and 3
1662 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
1667 RootedLinearString
searchStr(cx
, ArgToRootedString(cx
, args
, 0));
1673 if (args
.hasDefined(1)) {
1674 if (args
[1].isInt32()) {
1675 int i
= args
[1].toInt32();
1676 pos
= (i
< 0) ? 0U : uint32_t(i
);
1679 if (!ToInteger(cx
, args
[1], &d
))
1681 pos
= uint32_t(Min(Max(d
, 0.0), double(UINT32_MAX
)));
1686 uint32_t textLen
= str
->length();
1689 uint32_t start
= Min(Max(pos
, 0U), textLen
);
1692 JSLinearString
* text
= str
->ensureLinear(cx
);
1696 args
.rval().setInt32(StringMatch(text
, searchStr
, start
));
1700 template <typename TextChar
, typename PatChar
>
1702 LastIndexOfImpl(const TextChar
* text
, size_t textLen
, const PatChar
* pat
, size_t patLen
,
1705 MOZ_ASSERT(patLen
> 0);
1706 MOZ_ASSERT(patLen
<= textLen
);
1707 MOZ_ASSERT(start
<= textLen
- patLen
);
1709 const PatChar p0
= *pat
;
1710 const PatChar
* patNext
= pat
+ 1;
1711 const PatChar
* patEnd
= pat
+ patLen
;
1713 for (const TextChar
* t
= text
+ start
; t
>= text
; --t
) {
1715 const TextChar
* t1
= t
+ 1;
1716 for (const PatChar
* p1
= patNext
; p1
< patEnd
; ++p1
, ++t1
) {
1718 goto break_continue
;
1721 return static_cast<int32_t>(t
- text
);
1730 str_lastIndexOf(JSContext
* cx
, unsigned argc
, Value
* vp
)
1732 CallArgs args
= CallArgsFromVp(argc
, vp
);
1733 RootedString
textstr(cx
, ThisToStringForStringProto(cx
, args
));
1737 RootedLinearString
pat(cx
, ArgToRootedString(cx
, args
, 0));
1741 size_t textLen
= textstr
->length();
1742 size_t patLen
= pat
->length();
1743 int start
= textLen
- patLen
; // Start searching here
1745 args
.rval().setInt32(-1);
1749 if (args
.hasDefined(1)) {
1750 if (args
[1].isInt32()) {
1751 int i
= args
[1].toInt32();
1758 if (!ToNumber(cx
, args
[1], &d
))
1771 args
.rval().setInt32(start
);
1775 JSLinearString
* text
= textstr
->ensureLinear(cx
);
1780 AutoCheckCannotGC nogc
;
1781 if (text
->hasLatin1Chars()) {
1782 const Latin1Char
* textChars
= text
->latin1Chars(nogc
);
1783 if (pat
->hasLatin1Chars())
1784 res
= LastIndexOfImpl(textChars
, textLen
, pat
->latin1Chars(nogc
), patLen
, start
);
1786 res
= LastIndexOfImpl(textChars
, textLen
, pat
->twoByteChars(nogc
), patLen
, start
);
1788 const jschar
* textChars
= text
->twoByteChars(nogc
);
1789 if (pat
->hasLatin1Chars())
1790 res
= LastIndexOfImpl(textChars
, textLen
, pat
->latin1Chars(nogc
), patLen
, start
);
1792 res
= LastIndexOfImpl(textChars
, textLen
, pat
->twoByteChars(nogc
), patLen
, start
);
1795 args
.rval().setInt32(res
);
1800 HasSubstringAt(JSLinearString
* text
, JSLinearString
* pat
, size_t start
)
1802 MOZ_ASSERT(start
+ pat
->length() <= text
->length());
1804 size_t patLen
= pat
->length();
1806 AutoCheckCannotGC nogc
;
1807 if (text
->hasLatin1Chars()) {
1808 const Latin1Char
* textChars
= text
->latin1Chars(nogc
) + start
;
1809 if (pat
->hasLatin1Chars())
1810 return PodEqual(textChars
, pat
->latin1Chars(nogc
), patLen
);
1812 return EqualChars(textChars
, pat
->twoByteChars(nogc
), patLen
);
1815 const jschar
* textChars
= text
->twoByteChars(nogc
) + start
;
1816 if (pat
->hasTwoByteChars())
1817 return PodEqual(textChars
, pat
->twoByteChars(nogc
), patLen
);
1819 return EqualChars(pat
->latin1Chars(nogc
), textChars
, patLen
);
1822 /* ES6 20131108 draft 21.1.3.18. */
1824 str_startsWith(JSContext
* cx
, unsigned argc
, Value
* vp
)
1826 CallArgs args
= CallArgsFromVp(argc
, vp
);
1828 // Steps 1, 2, and 3
1829 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
1834 if (args
.get(0).isObject() && IsObjectWithClass(args
[0], ESClass_RegExp
, cx
)) {
1835 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, nullptr, JSMSG_INVALID_ARG_TYPE
,
1836 "first", "", "Regular Expression");
1841 RootedLinearString
searchStr(cx
, ArgToRootedString(cx
, args
, 0));
1847 if (args
.hasDefined(1)) {
1848 if (args
[1].isInt32()) {
1849 int i
= args
[1].toInt32();
1850 pos
= (i
< 0) ? 0U : uint32_t(i
);
1853 if (!ToInteger(cx
, args
[1], &d
))
1855 pos
= uint32_t(Min(Max(d
, 0.0), double(UINT32_MAX
)));
1860 uint32_t textLen
= str
->length();
1863 uint32_t start
= Min(Max(pos
, 0U), textLen
);
1866 uint32_t searchLen
= searchStr
->length();
1869 if (searchLen
+ start
< searchLen
|| searchLen
+ start
> textLen
) {
1870 args
.rval().setBoolean(false);
1875 JSLinearString
* text
= str
->ensureLinear(cx
);
1879 args
.rval().setBoolean(HasSubstringAt(text
, searchStr
, start
));
1883 /* ES6 20131108 draft 21.1.3.7. */
1885 str_endsWith(JSContext
* cx
, unsigned argc
, Value
* vp
)
1887 CallArgs args
= CallArgsFromVp(argc
, vp
);
1889 // Steps 1, 2, and 3
1890 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
1895 if (args
.get(0).isObject() && IsObjectWithClass(args
[0], ESClass_RegExp
, cx
)) {
1896 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, nullptr, JSMSG_INVALID_ARG_TYPE
,
1897 "first", "", "Regular Expression");
1902 RootedLinearString
searchStr(cx
, ArgToRootedString(cx
, args
, 0));
1907 uint32_t textLen
= str
->length();
1910 uint32_t pos
= textLen
;
1911 if (args
.hasDefined(1)) {
1912 if (args
[1].isInt32()) {
1913 int i
= args
[1].toInt32();
1914 pos
= (i
< 0) ? 0U : uint32_t(i
);
1917 if (!ToInteger(cx
, args
[1], &d
))
1919 pos
= uint32_t(Min(Max(d
, 0.0), double(UINT32_MAX
)));
1924 uint32_t end
= Min(Max(pos
, 0U), textLen
);
1927 uint32_t searchLen
= searchStr
->length();
1929 // Step 13 (reordered)
1930 if (searchLen
> end
) {
1931 args
.rval().setBoolean(false);
1936 uint32_t start
= end
- searchLen
;
1939 JSLinearString
* text
= str
->ensureLinear(cx
);
1943 args
.rval().setBoolean(HasSubstringAt(text
, searchStr
, start
));
1947 template <typename CharT
>
1949 TrimString(const CharT
* chars
, bool trimLeft
, bool trimRight
, size_t length
,
1950 size_t* pBegin
, size_t* pEnd
)
1952 size_t begin
= 0, end
= length
;
1955 while (begin
< length
&& unicode::IsSpace(chars
[begin
]))
1960 while (end
> begin
&& unicode::IsSpace(chars
[end
- 1]))
1969 TrimString(JSContext
* cx
, Value
* vp
, bool trimLeft
, bool trimRight
)
1971 CallReceiver call
= CallReceiverFromVp(vp
);
1972 RootedString
str(cx
, ThisToStringForStringProto(cx
, call
));
1976 JSLinearString
* linear
= str
->ensureLinear(cx
);
1980 size_t length
= linear
->length();
1982 if (linear
->hasLatin1Chars()) {
1983 AutoCheckCannotGC nogc
;
1984 TrimString(linear
->latin1Chars(nogc
), trimLeft
, trimRight
, length
, &begin
, &end
);
1986 AutoCheckCannotGC nogc
;
1987 TrimString(linear
->twoByteChars(nogc
), trimLeft
, trimRight
, length
, &begin
, &end
);
1990 str
= NewDependentString(cx
, str
, begin
, end
- begin
);
1994 call
.rval().setString(str
);
1999 str_trim(JSContext
* cx
, unsigned argc
, Value
* vp
)
2001 return TrimString(cx
, vp
, true, true);
2005 str_trimLeft(JSContext
* cx
, unsigned argc
, Value
* vp
)
2007 return TrimString(cx
, vp
, true, false);
2011 str_trimRight(JSContext
* cx
, unsigned argc
, Value
* vp
)
2013 return TrimString(cx
, vp
, false, true);
2017 * Perl-inspired string functions.
2022 /* Result of a successfully performed flat match. */
2028 friend class StringRegExpGuard
;
2031 explicit FlatMatch(JSContext
* cx
) : pat_(cx
) {}
2032 JSLinearString
* pattern() const { return pat_
; }
2033 size_t patternLength() const { return pat_
->length(); }
2036 * Note: The match is -1 when the match is performed successfully,
2037 * but no match is found.
2039 int32_t match() const { return match_
; }
2042 } /* anonymous namespace */
2045 IsRegExpMetaChar(jschar c
)
2048 /* Taken from the PatternCharacter production in 15.10.1. */
2049 case '^': case '$': case '\\': case '.': case '*': case '+':
2050 case '?': case '(': case ')': case '[': case ']': case '{':
2058 template <typename CharT
>
2060 HasRegExpMetaChars(const CharT
* chars
, size_t length
)
2062 for (size_t i
= 0; i
< length
; ++i
) {
2063 if (IsRegExpMetaChar(chars
[i
]))
2070 js::StringHasRegExpMetaChars(JSLinearString
* str
, size_t beginOffset
, size_t endOffset
)
2072 JS_ASSERT(beginOffset
+ endOffset
<= str
->length());
2074 AutoCheckCannotGC nogc
;
2075 if (str
->hasLatin1Chars())
2076 return HasRegExpMetaChars(str
->latin1Chars(nogc
) + beginOffset
, str
->length() - beginOffset
- endOffset
);
2078 return HasRegExpMetaChars(str
->twoByteChars(nogc
) + beginOffset
, str
->length() - beginOffset
- endOffset
);
2084 * StringRegExpGuard factors logic out of String regexp operations.
2086 * |optarg| indicates in which argument position RegExp flags will be found, if
2087 * present. This is a Mozilla extension and not part of any ECMA spec.
2089 class MOZ_STACK_CLASS StringRegExpGuard
2096 * Upper bound on the number of characters we are willing to potentially
2097 * waste on searching for RegExp meta-characters.
2099 static const size_t MAX_FLAT_PAT_LEN
= 256;
2101 template <typename CharT
>
2103 flattenPattern(StringBuffer
& sb
, const CharT
* chars
, size_t len
)
2105 static const char ESCAPE_CHAR
= '\\';
2106 for (const CharT
* it
= chars
; it
< chars
+ len
; ++it
) {
2107 if (IsRegExpMetaChar(*it
)) {
2108 if (!sb
.append(ESCAPE_CHAR
) || !sb
.append(*it
))
2111 if (!sb
.append(*it
))
2119 flattenPattern(JSContext
* cx
, JSAtom
* pat
)
2121 StringBuffer
sb(cx
);
2122 if (!sb
.reserve(pat
->length()))
2125 if (pat
->hasLatin1Chars()) {
2126 AutoCheckCannotGC nogc
;
2127 if (!flattenPattern(sb
, pat
->latin1Chars(nogc
), pat
->length()))
2130 AutoCheckCannotGC nogc
;
2131 if (!flattenPattern(sb
, pat
->twoByteChars(nogc
), pat
->length()))
2135 return sb
.finishAtom();
2139 explicit StringRegExpGuard(JSContext
* cx
)
2140 : re_(cx
), fm(cx
), obj_(cx
)
2143 /* init must succeed in order to call tryFlatMatch or normalizeRegExp. */
2144 bool init(JSContext
* cx
, CallArgs args
, bool convertVoid
= false)
2146 if (args
.length() != 0 && IsObjectWithClass(args
[0], ESClass_RegExp
, cx
))
2147 return init(cx
, &args
[0].toObject());
2149 if (convertVoid
&& !args
.hasDefined(0)) {
2150 fm
.pat_
= cx
->runtime()->emptyString
;
2154 JSString
* arg
= ArgToRootedString(cx
, args
, 0);
2158 fm
.pat_
= AtomizeString(cx
, arg
);
2165 bool init(JSContext
* cx
, JSObject
* regexp
) {
2168 JS_ASSERT(ObjectClassIs(obj_
, ESClass_RegExp
, cx
));
2170 if (!RegExpToShared(cx
, obj_
, &re_
))
2175 bool init(JSContext
* cx
, HandleString pattern
) {
2176 fm
.pat_
= AtomizeString(cx
, pattern
);
2183 * Attempt to match |patstr| to |textstr|. A flags argument, metachars in
2184 * the pattern string, or a lengthy pattern string can thwart this process.
2186 * |checkMetaChars| looks for regexp metachars in the pattern string.
2188 * Return whether flat matching could be used.
2190 * N.B. tryFlatMatch returns nullptr on OOM, so the caller must check
2191 * cx->isExceptionPending().
2194 tryFlatMatch(JSContext
* cx
, JSString
* text
, unsigned optarg
, unsigned argc
,
2195 bool checkMetaChars
= true)
2197 if (re_
.initialized())
2203 size_t patLen
= fm
.pat_
->length();
2204 if (checkMetaChars
&& (patLen
> MAX_FLAT_PAT_LEN
|| StringHasRegExpMetaChars(fm
.pat_
)))
2208 * |text| could be a rope, so we want to avoid flattening it for as
2211 if (text
->isRope()) {
2212 if (!RopeMatch(cx
, &text
->asRope(), fm
.pat_
, &fm
.match_
))
2215 fm
.match_
= StringMatch(&text
->asLinear(), fm
.pat_
, 0);
2221 /* If the pattern is not already a regular expression, make it so. */
2222 bool normalizeRegExp(JSContext
* cx
, bool flat
, unsigned optarg
, CallArgs args
)
2224 if (re_
.initialized())
2227 /* Build RegExp from pattern string. */
2228 RootedString
opt(cx
);
2229 if (optarg
< args
.length()) {
2230 opt
= ToString
<CanGC
>(cx
, args
[optarg
]);
2237 Rooted
<JSAtom
*> pat(cx
);
2239 pat
= flattenPattern(cx
, fm
.pat_
);
2247 return cx
->compartment()->regExps
.get(cx
, pat
, opt
, &re_
);
2250 bool zeroLastIndex(JSContext
* cx
) {
2251 if (!regExpIsObject())
2254 // Use a fast path for same-global RegExp objects with writable
2256 if (obj_
->is
<RegExpObject
>() && obj_
->nativeLookup(cx
, cx
->names().lastIndex
)->writable()) {
2257 obj_
->as
<RegExpObject
>().zeroLastIndex();
2261 // Handle everything else generically (including throwing if .lastIndex is non-writable).
2262 RootedValue
zero(cx
, Int32Value(0));
2263 return JSObject::setProperty(cx
, obj_
, obj_
, cx
->names().lastIndex
, &zero
, true);
2266 RegExpShared
& regExp() { return *re_
; }
2268 bool regExpIsObject() { return obj_
!= nullptr; }
2269 HandleObject
regExpObject() {
2270 JS_ASSERT(regExpIsObject());
2275 StringRegExpGuard(const StringRegExpGuard
&) MOZ_DELETE
;
2276 void operator=(const StringRegExpGuard
&) MOZ_DELETE
;
2279 } /* anonymous namespace */
2282 DoMatchLocal(JSContext
* cx
, CallArgs args
, RegExpStatics
* res
, HandleLinearString input
,
2286 ScopedMatchPairs
matches(&cx
->tempLifoAlloc());
2287 RegExpRunStatus status
= re
.execute(cx
, input
, &i
, matches
);
2288 if (status
== RegExpRunStatus_Error
)
2291 if (status
== RegExpRunStatus_Success_NotFound
) {
2292 args
.rval().setNull();
2296 if (!res
->updateFromMatchPairs(cx
, input
, matches
))
2299 RootedValue
rval(cx
);
2300 if (!CreateRegExpMatchResult(cx
, input
, matches
, &rval
))
2303 args
.rval().set(rval
);
2307 /* ES5 15.5.4.10 step 8. */
2309 DoMatchGlobal(JSContext
* cx
, CallArgs args
, RegExpStatics
* res
, HandleLinearString input
,
2310 StringRegExpGuard
& g
)
2314 // This single zeroing of "lastIndex" covers all "lastIndex" changes in the
2315 // rest of String.prototype.match, particularly in steps 8f(i) and
2316 // 8f(iii)(2)(a). Here's why.
2318 // The inputs to the calls to RegExp.prototype.exec are a RegExp object
2319 // whose .global is true and a string. The only side effect of a call in
2320 // these circumstances is that the RegExp's .lastIndex will be modified to
2321 // the next starting index after the discovered match (or to 0 if there's
2322 // no remaining match). Because .lastIndex is a non-configurable data
2323 // property and no script-controllable code executes after step 8a, passing
2324 // step 8a implies *every* .lastIndex set succeeds. String.prototype.match
2325 // calls RegExp.prototype.exec repeatedly, and the last call doesn't match,
2326 // so the final value of .lastIndex is 0: exactly the state after step 8a
2327 // succeeds. No spec step lets script observe intermediate .lastIndex
2330 // The arrays returned by RegExp.prototype.exec always have a string at
2331 // index 0, for which [[Get]]s have no side effects.
2333 // Filling in a new array using [[DefineOwnProperty]] is unobservable.
2335 // This is a tricky point, because after this set, our implementation *can*
2336 // fail. The key is that script can't distinguish these failure modes from
2337 // one where, in spec terms, we fail immediately after step 8a. That *in
2338 // reality* we might have done extra matching work, or created a partial
2339 // results array to return, or hit an interrupt, is irrelevant. The
2340 // script can't tell we did any of those things but didn't update
2341 // .lastIndex. Thus we can optimize steps 8b onward however we want,
2342 // including eliminating intermediate .lastIndex sets, as long as we don't
2343 // add ways for script to observe the intermediate states.
2345 // In short: it's okay to cheat (by setting .lastIndex to 0, once) because
2346 // we can't get caught.
2347 if (!g
.zeroLastIndex(cx
))
2351 AutoValueVector
elements(cx
);
2353 size_t lastSuccessfulStart
= 0;
2355 // The loop variables from steps 8c-e aren't needed, as we use different
2356 // techniques from the spec to implement step 8f's loop.
2359 ScopedMatchPairs
matches(&cx
->tempLifoAlloc());
2360 size_t charsLen
= input
->length();
2361 RegExpShared
& re
= g
.regExp();
2362 for (size_t searchIndex
= 0; searchIndex
<= charsLen
; ) {
2363 if (!CheckForInterrupt(cx
))
2366 // Steps 8f(i-ii), minus "lastIndex" updates (see above).
2367 size_t nextSearchIndex
= searchIndex
;
2368 RegExpRunStatus status
= re
.execute(cx
, input
, &nextSearchIndex
, matches
);
2369 if (status
== RegExpRunStatus_Error
)
2373 if (status
== RegExpRunStatus_Success_NotFound
)
2376 lastSuccessfulStart
= searchIndex
;
2377 MatchPair
& match
= matches
[0];
2379 // Steps 8f(iii)(1-3).
2380 searchIndex
= match
.isEmpty() ? nextSearchIndex
+ 1 : nextSearchIndex
;
2382 // Step 8f(iii)(4-5).
2383 JSLinearString
* str
= NewDependentString(cx
, input
, match
.start
, match
.length());
2386 if (!elements
.append(StringValue(str
)))
2391 if (elements
.empty()) {
2392 args
.rval().setNull();
2396 // The last *successful* match updates the RegExpStatics. (Interestingly,
2397 // this implies that String.prototype.match's semantics aren't those
2398 // implied by the RegExp.prototype.exec calls in the ES5 algorithm.)
2399 res
->updateLazily(cx
, input
, &re
, lastSuccessfulStart
);
2401 // Steps 8b, 8f(iii)(5-6), 8h.
2402 JSObject
* array
= NewDenseCopiedArray(cx
, elements
.length(), elements
.begin());
2406 args
.rval().setObject(*array
);
2411 BuildFlatMatchArray(JSContext
* cx
, HandleString textstr
, const FlatMatch
& fm
, CallArgs
* args
)
2413 if (fm
.match() < 0) {
2414 args
->rval().setNull();
2418 /* For this non-global match, produce a RegExp.exec-style array. */
2419 RootedObject
obj(cx
, NewDenseEmptyArray(cx
));
2423 RootedValue
patternVal(cx
, StringValue(fm
.pattern()));
2424 RootedValue
matchVal(cx
, Int32Value(fm
.match()));
2425 RootedValue
textVal(cx
, StringValue(textstr
));
2427 if (!JSObject::defineElement(cx
, obj
, 0, patternVal
) ||
2428 !JSObject::defineProperty(cx
, obj
, cx
->names().index
, matchVal
) ||
2429 !JSObject::defineProperty(cx
, obj
, cx
->names().input
, textVal
))
2434 args
->rval().setObject(*obj
);
2438 /* ES5 15.5.4.10. */
2440 js::str_match(JSContext
* cx
, unsigned argc
, Value
* vp
)
2442 CallArgs args
= CallArgsFromVp(argc
, vp
);
2445 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
2449 /* Steps 3-4, plus the trailing-argument "flags" extension. */
2450 StringRegExpGuard
g(cx
);
2451 if (!g
.init(cx
, args
, true))
2454 /* Fast path when the search pattern can be searched for as a string. */
2455 if (const FlatMatch
* fm
= g
.tryFlatMatch(cx
, str
, 1, args
.length()))
2456 return BuildFlatMatchArray(cx
, str
, *fm
, &args
);
2458 /* Return if there was an error in tryFlatMatch. */
2459 if (cx
->isExceptionPending())
2462 /* Create regular-expression internals as needed to perform the match. */
2463 if (!g
.normalizeRegExp(cx
, false, 1, args
))
2466 RegExpStatics
* res
= cx
->global()->getRegExpStatics(cx
);
2470 RootedLinearString
linearStr(cx
, str
->ensureLinear(cx
));
2475 if (!g
.regExp().global())
2476 return DoMatchLocal(cx
, args
, res
, linearStr
, g
.regExp());
2479 return DoMatchGlobal(cx
, args
, res
, linearStr
, g
);
2483 js::str_search(JSContext
* cx
, unsigned argc
, Value
* vp
)
2485 CallArgs args
= CallArgsFromVp(argc
, vp
);
2486 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
2490 StringRegExpGuard
g(cx
);
2491 if (!g
.init(cx
, args
, true))
2493 if (const FlatMatch
* fm
= g
.tryFlatMatch(cx
, str
, 1, args
.length())) {
2494 args
.rval().setInt32(fm
->match());
2498 if (cx
->isExceptionPending()) /* from tryFlatMatch */
2501 if (!g
.normalizeRegExp(cx
, false, 1, args
))
2504 RootedLinearString
linearStr(cx
, str
->ensureLinear(cx
));
2508 RegExpStatics
* res
= cx
->global()->getRegExpStatics(cx
);
2512 /* Per ECMAv5 15.5.4.12 (5) The last index property is ignored and left unchanged. */
2514 ScopedMatchPairs
matches(&cx
->tempLifoAlloc());
2515 RegExpRunStatus status
= g
.regExp().execute(cx
, linearStr
, &i
, matches
);
2516 if (status
== RegExpRunStatus_Error
)
2519 if (status
== RegExpRunStatus_Success
)
2520 res
->updateLazily(cx
, linearStr
, &g
.regExp(), 0);
2522 args
.rval().setInt32(status
== RegExpRunStatus_Success_NotFound
? -1 : matches
[0].start
);
2526 // Utility for building a rope (lazy concatenation) of strings.
2531 RopeBuilder(const RopeBuilder
& other
) MOZ_DELETE
;
2532 void operator=(const RopeBuilder
& other
) MOZ_DELETE
;
2535 explicit RopeBuilder(JSContext
* cx
)
2536 : cx(cx
), res(cx
, cx
->runtime()->emptyString
)
2539 inline bool append(HandleString str
) {
2540 res
= ConcatStrings
<CanGC
>(cx
, res
, str
);
2544 inline JSString
* result() {
2551 template <typename CharT
>
2553 FindDollarIndex(const CharT
* chars
, size_t length
)
2555 if (const CharT
* p
= js_strchr_limit(chars
, '$', chars
+ length
)) {
2556 uint32_t dollarIndex
= p
- chars
;
2557 MOZ_ASSERT(dollarIndex
< length
);
2565 explicit ReplaceData(JSContext
* cx
)
2566 : str(cx
), g(cx
), lambda(cx
), elembase(cx
), repstr(cx
),
2567 fig(cx
, NullValue()), sb(cx
)
2570 inline void setReplacementString(JSLinearString
* string
) {
2576 AutoCheckCannotGC nogc
;
2577 dollarIndex
= string
->hasLatin1Chars()
2578 ? FindDollarIndex(string
->latin1Chars(nogc
), string
->length())
2579 : FindDollarIndex(string
->twoByteChars(nogc
), string
->length());
2582 inline void setReplacementFunction(JSObject
* func
) {
2587 dollarIndex
= UINT32_MAX
;
2590 RootedString str
; /* 'this' parameter object as a string */
2591 StringRegExpGuard g
; /* regexp parameter object and private data */
2592 RootedObject lambda
; /* replacement function object or null */
2593 RootedObject elembase
; /* object for function(a){return b[a]} replace */
2594 RootedLinearString repstr
; /* replacement string */
2595 uint32_t dollarIndex
; /* index of first $ in repstr, or UINT32_MAX */
2596 int leftIndex
; /* left context index in str->chars */
2597 bool calledBack
; /* record whether callback has been called */
2598 FastInvokeGuard fig
; /* used for lambda calls, also holds arguments */
2599 StringBuffer sb
; /* buffer built during DoMatch */
2602 } /* anonymous namespace */
2605 ReplaceRegExp(JSContext
* cx
, RegExpStatics
* res
, ReplaceData
& rdata
);
2608 DoMatchForReplaceLocal(JSContext
* cx
, RegExpStatics
* res
, HandleLinearString linearStr
,
2609 RegExpShared
& re
, ReplaceData
& rdata
)
2612 ScopedMatchPairs
matches(&cx
->tempLifoAlloc());
2613 RegExpRunStatus status
= re
.execute(cx
, linearStr
, &i
, matches
);
2614 if (status
== RegExpRunStatus_Error
)
2617 if (status
== RegExpRunStatus_Success_NotFound
)
2620 if (!res
->updateFromMatchPairs(cx
, linearStr
, matches
))
2623 return ReplaceRegExp(cx
, res
, rdata
);
2627 DoMatchForReplaceGlobal(JSContext
* cx
, RegExpStatics
* res
, HandleLinearString linearStr
,
2628 RegExpShared
& re
, ReplaceData
& rdata
)
2630 size_t charsLen
= linearStr
->length();
2631 ScopedMatchPairs
matches(&cx
->tempLifoAlloc());
2632 for (size_t count
= 0, i
= 0; i
<= charsLen
; ++count
) {
2633 if (!CheckForInterrupt(cx
))
2636 RegExpRunStatus status
= re
.execute(cx
, linearStr
, &i
, matches
);
2637 if (status
== RegExpRunStatus_Error
)
2640 if (status
== RegExpRunStatus_Success_NotFound
)
2643 if (!res
->updateFromMatchPairs(cx
, linearStr
, matches
))
2646 if (!ReplaceRegExp(cx
, res
, rdata
))
2648 if (!res
->matched())
2655 template <typename CharT
>
2657 InterpretDollar(RegExpStatics
* res
, const CharT
* bp
, const CharT
* dp
, const CharT
* ep
,
2658 ReplaceData
& rdata
, JSSubString
* out
, size_t* skip
)
2660 JS_ASSERT(*dp
== '$');
2662 /* If there is only a dollar, bail now */
2666 /* Interpret all Perl match-induced dollar variables. */
2668 if (JS7_ISDEC(dc
)) {
2669 /* ECMA-262 Edition 3: 1-9 or 01-99 */
2670 unsigned num
= JS7_UNDEC(dc
);
2671 if (num
> res
->getMatches().parenCount())
2674 const CharT
* cp
= dp
+ 2;
2675 if (cp
< ep
&& (dc
= *cp
, JS7_ISDEC(dc
))) {
2676 unsigned tmp
= 10 * num
+ JS7_UNDEC(dc
);
2677 if (tmp
<= res
->getMatches().parenCount()) {
2687 JS_ASSERT(num
<= res
->getMatches().parenCount());
2690 * Note: we index to get the paren with the (1-indexed) pair
2691 * number, as opposed to a (0-indexed) paren number.
2693 res
->getParen(num
, out
);
2700 out
->init(rdata
.repstr
, dp
- bp
, 1);
2703 res
->getLastMatch(out
);
2706 res
->getLastParen(out
);
2709 res
->getLeftContext(out
);
2712 res
->getRightContext(out
);
2718 template <typename CharT
>
2720 FindReplaceLengthString(JSContext
* cx
, RegExpStatics
* res
, ReplaceData
& rdata
, size_t* sizep
)
2722 JSLinearString
* repstr
= rdata
.repstr
;
2723 CheckedInt
<uint32_t> replen
= repstr
->length();
2725 if (rdata
.dollarIndex
!= UINT32_MAX
) {
2726 AutoCheckCannotGC nogc
;
2727 MOZ_ASSERT(rdata
.dollarIndex
< repstr
->length());
2728 const CharT
* bp
= repstr
->chars
<CharT
>(nogc
);
2729 const CharT
* dp
= bp
+ rdata
.dollarIndex
;
2730 const CharT
* ep
= bp
+ repstr
->length();
2734 if (InterpretDollar(res
, bp
, dp
, ep
, rdata
, &sub
, &skip
)) {
2735 if (sub
.length
> skip
)
2736 replen
+= sub
.length
- skip
;
2738 replen
-= skip
- sub
.length
;
2744 dp
= js_strchr_limit(dp
, '$', ep
);
2748 if (!replen
.isValid()) {
2749 js_ReportAllocationOverflow(cx
);
2753 *sizep
= replen
.value();
2758 FindReplaceLength(JSContext
* cx
, RegExpStatics
* res
, ReplaceData
& rdata
, size_t* sizep
)
2760 if (rdata
.elembase
) {
2762 * The base object is used when replace was passed a lambda which looks like
2763 * 'function(a) { return b[a]; }' for the base object b. b will not change
2764 * in the course of the replace unless we end up making a scripted call due
2765 * to accessing a scripted getter or a value with a scripted toString.
2767 JS_ASSERT(rdata
.lambda
);
2768 JS_ASSERT(!rdata
.elembase
->getOps()->lookupProperty
);
2769 JS_ASSERT(!rdata
.elembase
->getOps()->getProperty
);
2771 RootedValue
match(cx
);
2772 if (!res
->createLastMatch(cx
, &match
))
2774 JSAtom
* atom
= ToAtom
<CanGC
>(cx
, match
);
2779 if (HasDataProperty(cx
, rdata
.elembase
, AtomToId(atom
), v
.address()) && v
.isString()) {
2780 rdata
.repstr
= v
.toString()->ensureLinear(cx
);
2783 *sizep
= rdata
.repstr
->length();
2788 * Couldn't handle this property, fall through and despecialize to the
2789 * general lambda case.
2791 rdata
.elembase
= nullptr;
2795 RootedObject
lambda(cx
, rdata
.lambda
);
2796 PreserveRegExpStatics
staticsGuard(cx
, res
);
2797 if (!staticsGuard
.init(cx
))
2801 * In the lambda case, not only do we find the replacement string's
2802 * length, we compute repstr and return it via rdata for use within
2803 * DoReplace. The lambda is called with arguments ($&, $1, $2, ...,
2804 * index, input), i.e., all the properties of a regexp match array.
2805 * For $&, etc., we must create string jsvals from cx->regExpStatics.
2806 * We grab up stack space to keep the newborn strings GC-rooted.
2808 unsigned p
= res
->getMatches().parenCount();
2809 unsigned argc
= 1 + p
+ 2;
2811 InvokeArgs
& args
= rdata
.fig
.args();
2812 if (!args
.init(argc
))
2815 args
.setCallee(ObjectValue(*lambda
));
2816 args
.setThis(UndefinedValue());
2818 /* Push $&, $1, $2, ... */
2820 if (!res
->createLastMatch(cx
, args
[argi
++]))
2823 for (size_t i
= 0; i
< res
->getMatches().parenCount(); ++i
) {
2824 if (!res
->createParen(cx
, i
+ 1, args
[argi
++]))
2828 /* Push match index and input string. */
2829 args
[argi
++].setInt32(res
->getMatches()[0].start
);
2830 args
[argi
].setString(rdata
.str
);
2832 if (!rdata
.fig
.invoke(cx
))
2835 /* root repstr: rdata is on the stack, so scanned by conservative gc. */
2836 JSString
* repstr
= ToString
<CanGC
>(cx
, args
.rval());
2839 rdata
.repstr
= repstr
->ensureLinear(cx
);
2842 *sizep
= rdata
.repstr
->length();
2846 return rdata
.repstr
->hasLatin1Chars()
2847 ? FindReplaceLengthString
<Latin1Char
>(cx
, res
, rdata
, sizep
)
2848 : FindReplaceLengthString
<jschar
>(cx
, res
, rdata
, sizep
);
2852 * Precondition: |rdata.sb| already has necessary growth space reserved (as
2853 * derived from FindReplaceLength), and has been inflated to TwoByte if
2856 template <typename CharT
>
2858 DoReplace(RegExpStatics
* res
, ReplaceData
& rdata
)
2860 AutoCheckCannotGC nogc
;
2861 JSLinearString
* repstr
= rdata
.repstr
;
2862 const CharT
* bp
= repstr
->chars
<CharT
>(nogc
);
2863 const CharT
* cp
= bp
;
2865 if (rdata
.dollarIndex
!= UINT32_MAX
) {
2866 MOZ_ASSERT(rdata
.dollarIndex
< repstr
->length());
2867 const CharT
* dp
= bp
+ rdata
.dollarIndex
;
2868 const CharT
* ep
= bp
+ repstr
->length();
2870 /* Move one of the constant portions of the replacement value. */
2871 size_t len
= dp
- cp
;
2872 rdata
.sb
.infallibleAppend(cp
, len
);
2877 if (InterpretDollar(res
, bp
, dp
, ep
, rdata
, &sub
, &skip
)) {
2878 rdata
.sb
.infallibleAppendSubstring(sub
.base
, sub
.offset
, sub
.length
);
2885 dp
= js_strchr_limit(dp
, '$', ep
);
2888 rdata
.sb
.infallibleAppend(cp
, repstr
->length() - (cp
- bp
));
2892 ReplaceRegExp(JSContext
* cx
, RegExpStatics
* res
, ReplaceData
& rdata
)
2895 const MatchPair
& match
= res
->getMatches()[0];
2896 JS_ASSERT(!match
.isUndefined());
2897 JS_ASSERT(match
.limit
>= match
.start
&& match
.limit
>= 0);
2899 rdata
.calledBack
= true;
2900 size_t leftoff
= rdata
.leftIndex
;
2901 size_t leftlen
= match
.start
- leftoff
;
2902 rdata
.leftIndex
= match
.limit
;
2904 size_t replen
= 0; /* silence 'unused' warning */
2905 if (!FindReplaceLength(cx
, res
, rdata
, &replen
))
2908 CheckedInt
<uint32_t> newlen(rdata
.sb
.length());
2911 if (!newlen
.isValid()) {
2912 js_ReportAllocationOverflow(cx
);
2917 * Inflate the buffer now if needed, to avoid (fallible) Latin1 to TwoByte
2918 * inflation later on.
2920 JSLinearString
& str
= rdata
.str
->asLinear(); /* flattened for regexp */
2921 if (str
.hasTwoByteChars() || rdata
.repstr
->hasTwoByteChars()) {
2922 if (!rdata
.sb
.ensureTwoByteChars())
2926 if (!rdata
.sb
.reserve(newlen
.value()))
2929 /* Append skipped-over portion of the search value. */
2930 rdata
.sb
.infallibleAppendSubstring(&str
, leftoff
, leftlen
);
2932 if (rdata
.repstr
->hasLatin1Chars())
2933 DoReplace
<Latin1Char
>(res
, rdata
);
2935 DoReplace
<jschar
>(res
, rdata
);
2940 BuildFlatReplacement(JSContext
* cx
, HandleString textstr
, HandleString repstr
,
2941 const FlatMatch
& fm
, MutableHandleValue rval
)
2943 RopeBuilder
builder(cx
);
2944 size_t match
= fm
.match();
2945 size_t matchEnd
= match
+ fm
.patternLength();
2947 if (textstr
->isRope()) {
2949 * If we are replacing over a rope, avoid flattening it by iterating
2950 * through it, building a new rope.
2952 StringSegmentRange
r(cx
);
2953 if (!r
.init(textstr
))
2956 while (!r
.empty()) {
2957 RootedString
str(cx
, r
.front());
2958 size_t len
= str
->length();
2959 size_t strEnd
= pos
+ len
;
2960 if (pos
< matchEnd
&& strEnd
> match
) {
2962 * We need to special-case any part of the rope that overlaps
2963 * with the replacement string.
2967 * If this part of the rope overlaps with the left side of
2968 * the pattern, then it must be the only one to overlap with
2969 * the first character in the pattern, so we include the
2970 * replacement string here.
2972 RootedString
leftSide(cx
, NewDependentString(cx
, str
, 0, match
- pos
));
2974 !builder
.append(leftSide
) ||
2975 !builder
.append(repstr
)) {
2981 * If str runs off the end of the matched string, append the
2984 if (strEnd
> matchEnd
) {
2985 RootedString
rightSide(cx
, NewDependentString(cx
, str
, matchEnd
- pos
,
2986 strEnd
- matchEnd
));
2987 if (!rightSide
|| !builder
.append(rightSide
))
2991 if (!builder
.append(str
))
2994 pos
+= str
->length();
2999 RootedString
leftSide(cx
, NewDependentString(cx
, textstr
, 0, match
));
3002 RootedString
rightSide(cx
);
3003 rightSide
= NewDependentString(cx
, textstr
, match
+ fm
.patternLength(),
3004 textstr
->length() - match
- fm
.patternLength());
3006 !builder
.append(leftSide
) ||
3007 !builder
.append(repstr
) ||
3008 !builder
.append(rightSide
)) {
3013 rval
.setString(builder
.result());
3017 template <typename CharT
>
3019 AppendDollarReplacement(StringBuffer
& newReplaceChars
, size_t firstDollarIndex
,
3020 const FlatMatch
& fm
, JSLinearString
* text
,
3021 const CharT
* repChars
, size_t repLength
)
3023 JS_ASSERT(firstDollarIndex
< repLength
);
3025 size_t matchStart
= fm
.match();
3026 size_t matchLimit
= matchStart
+ fm
.patternLength();
3028 /* Move the pre-dollar chunk in bulk. */
3029 newReplaceChars
.infallibleAppend(repChars
, firstDollarIndex
);
3031 /* Move the rest char-by-char, interpreting dollars as we encounter them. */
3032 const CharT
* repLimit
= repChars
+ repLength
;
3033 for (const CharT
* it
= repChars
+ firstDollarIndex
; it
< repLimit
; ++it
) {
3034 if (*it
!= '$' || it
== repLimit
- 1) {
3035 if (!newReplaceChars
.append(*it
))
3040 switch (*(it
+ 1)) {
3041 case '$': /* Eat one of the dollars. */
3042 if (!newReplaceChars
.append(*it
))
3046 if (!newReplaceChars
.appendSubstring(text
, matchStart
, matchLimit
- matchStart
))
3050 if (!newReplaceChars
.appendSubstring(text
, 0, matchStart
))
3054 if (!newReplaceChars
.appendSubstring(text
, matchLimit
, text
->length() - matchLimit
))
3057 default: /* The dollar we saw was not special (no matter what its mother told it). */
3058 if (!newReplaceChars
.append(*it
))
3062 ++it
; /* We always eat an extra char in the above switch. */
3069 * Perform a linear-scan dollar substitution on the replacement text,
3070 * constructing a result string that looks like:
3072 * newstring = string[:matchStart] + dollarSub(replaceValue) + string[matchLimit:]
3075 BuildDollarReplacement(JSContext
* cx
, JSString
* textstrArg
, JSLinearString
* repstr
,
3076 uint32_t firstDollarIndex
, const FlatMatch
& fm
, MutableHandleValue rval
)
3078 RootedLinearString
textstr(cx
, textstrArg
->ensureLinear(cx
));
3082 size_t matchStart
= fm
.match();
3083 size_t matchLimit
= matchStart
+ fm
.patternLength();
3088 * len(newstr) >= len(orig) - len(match) + len(replacement)
3090 * Note that dollar vars _could_ make the resulting text smaller than this.
3092 StringBuffer
newReplaceChars(cx
);
3093 if (repstr
->hasTwoByteChars() && !newReplaceChars
.ensureTwoByteChars())
3096 if (!newReplaceChars
.reserve(textstr
->length() - fm
.patternLength() + repstr
->length()))
3100 if (repstr
->hasLatin1Chars()) {
3101 AutoCheckCannotGC nogc
;
3102 res
= AppendDollarReplacement(newReplaceChars
, firstDollarIndex
, fm
, textstr
,
3103 repstr
->latin1Chars(nogc
), repstr
->length());
3105 AutoCheckCannotGC nogc
;
3106 res
= AppendDollarReplacement(newReplaceChars
, firstDollarIndex
, fm
, textstr
,
3107 repstr
->twoByteChars(nogc
), repstr
->length());
3112 RootedString
leftSide(cx
, NewDependentString(cx
, textstr
, 0, matchStart
));
3116 RootedString
newReplace(cx
, newReplaceChars
.finishString());
3120 JS_ASSERT(textstr
->length() >= matchLimit
);
3121 RootedString
rightSide(cx
, NewDependentString(cx
, textstr
, matchLimit
,
3122 textstr
->length() - matchLimit
));
3126 RopeBuilder
builder(cx
);
3127 if (!builder
.append(leftSide
) || !builder
.append(newReplace
) || !builder
.append(rightSide
))
3130 rval
.setString(builder
.result());
3139 StringRange(size_t s
, size_t l
)
3140 : start(s
), length(l
)
3144 template <typename CharT
>
3146 CopySubstringsToFatInline(JSFatInlineString
* dest
, const CharT
* src
, const StringRange
* ranges
,
3147 size_t rangesLen
, size_t outputLen
)
3149 CharT
* buf
= dest
->init
<CharT
>(outputLen
);
3151 for (size_t i
= 0; i
< rangesLen
; i
++) {
3152 PodCopy(buf
+ pos
, src
+ ranges
[i
].start
, ranges
[i
].length
);
3153 pos
+= ranges
[i
].length
;
3156 MOZ_ASSERT(pos
== outputLen
);
3160 static inline JSFatInlineString
*
3161 FlattenSubstrings(JSContext
* cx
, Handle
<JSFlatString
*> flatStr
, const StringRange
* ranges
,
3162 size_t rangesLen
, size_t outputLen
)
3164 JSFatInlineString
* str
= NewGCFatInlineString
<CanGC
>(cx
);
3168 AutoCheckCannotGC nogc
;
3169 if (flatStr
->hasLatin1Chars())
3170 CopySubstringsToFatInline(str
, flatStr
->latin1Chars(nogc
), ranges
, rangesLen
, outputLen
);
3172 CopySubstringsToFatInline(str
, flatStr
->twoByteChars(nogc
), ranges
, rangesLen
, outputLen
);
3177 AppendSubstrings(JSContext
* cx
, Handle
<JSFlatString
*> flatStr
,
3178 const StringRange
* ranges
, size_t rangesLen
)
3180 JS_ASSERT(rangesLen
);
3182 /* For single substrings, construct a dependent string. */
3184 return NewDependentString(cx
, flatStr
, ranges
[0].start
, ranges
[0].length
);
3186 bool isLatin1
= flatStr
->hasLatin1Chars();
3187 uint32_t fatInlineMaxLength
= JSFatInlineString::MAX_LENGTH_TWO_BYTE
;
3189 fatInlineMaxLength
= JSFatInlineString::MAX_LENGTH_LATIN1
;
3191 /* Collect substrings into a rope */
3193 RopeBuilder
rope(cx
);
3194 RootedString
part(cx
, nullptr);
3195 while (i
< rangesLen
) {
3197 /* Find maximum range that fits in JSFatInlineString */
3198 size_t substrLen
= 0;
3200 for (; end
< rangesLen
; end
++) {
3201 if (substrLen
+ ranges
[end
].length
> fatInlineMaxLength
)
3203 substrLen
+= ranges
[end
].length
;
3207 /* Not even one range fits JSFatInlineString, use DependentString */
3208 const StringRange
& sr
= ranges
[i
++];
3209 part
= NewDependentString(cx
, flatStr
, sr
.start
, sr
.length
);
3211 /* Copy the ranges (linearly) into a JSFatInlineString */
3212 part
= FlattenSubstrings(cx
, flatStr
, ranges
+ i
, end
- i
, substrLen
);
3219 /* Appending to the rope permanently roots the substring. */
3220 if (!rope
.append(part
))
3224 return rope
.result();
3228 StrReplaceRegexpRemove(JSContext
* cx
, HandleString str
, RegExpShared
& re
, MutableHandleValue rval
)
3230 Rooted
<JSFlatString
*> flatStr(cx
, str
->ensureFlat(cx
));
3234 Vector
<StringRange
, 16, SystemAllocPolicy
> ranges
;
3236 size_t charsLen
= flatStr
->length();
3238 ScopedMatchPairs
matches(&cx
->tempLifoAlloc());
3239 size_t startIndex
= 0; /* Index used for iterating through the string. */
3240 size_t lastIndex
= 0; /* Index after last successful match. */
3241 size_t lazyIndex
= 0; /* Index before last successful match. */
3243 /* Accumulate StringRanges for unmatched substrings. */
3244 while (startIndex
<= charsLen
) {
3245 if (!CheckForInterrupt(cx
))
3248 RegExpRunStatus status
= re
.execute(cx
, flatStr
, &startIndex
, matches
);
3249 if (status
== RegExpRunStatus_Error
)
3251 if (status
== RegExpRunStatus_Success_NotFound
)
3253 MatchPair
& match
= matches
[0];
3255 /* Include the latest unmatched substring. */
3256 if (size_t(match
.start
) > lastIndex
) {
3257 if (!ranges
.append(StringRange(lastIndex
, match
.start
- lastIndex
)))
3261 lazyIndex
= lastIndex
;
3262 lastIndex
= startIndex
;
3264 if (match
.isEmpty())
3267 /* Non-global removal executes at most once. */
3274 /* If unmatched, return the input string. */
3276 if (startIndex
> 0) {
3277 res
= cx
->global()->getRegExpStatics(cx
);
3280 res
->updateLazily(cx
, flatStr
, &re
, lazyIndex
);
3282 rval
.setString(str
);
3286 /* The last successful match updates the RegExpStatics. */
3287 res
= cx
->global()->getRegExpStatics(cx
);
3291 res
->updateLazily(cx
, flatStr
, &re
, lazyIndex
);
3293 /* Include any remaining part of the string. */
3294 if (lastIndex
< charsLen
) {
3295 if (!ranges
.append(StringRange(lastIndex
, charsLen
- lastIndex
)))
3299 /* Handle the empty string before calling .begin(). */
3300 if (ranges
.empty()) {
3301 rval
.setString(cx
->runtime()->emptyString
);
3305 JSString
* result
= AppendSubstrings(cx
, flatStr
, ranges
.begin(), ranges
.length());
3309 rval
.setString(result
);
3314 StrReplaceRegExp(JSContext
* cx
, ReplaceData
& rdata
, MutableHandleValue rval
)
3316 rdata
.leftIndex
= 0;
3317 rdata
.calledBack
= false;
3319 RegExpStatics
* res
= cx
->global()->getRegExpStatics(cx
);
3323 RegExpShared
& re
= rdata
.g
.regExp();
3325 // The spec doesn't describe this function very clearly, so we go ahead and
3326 // assume that when the input to String.prototype.replace is a global
3327 // RegExp, calling the replacer function (assuming one was provided) takes
3328 // place only after the matching is done. See the comment at the beginning
3329 // of DoMatchGlobal explaining why we can zero the the RegExp object's
3330 // lastIndex property here.
3331 if (re
.global() && !rdata
.g
.zeroLastIndex(cx
))
3334 /* Optimize removal. */
3335 if (rdata
.repstr
&& rdata
.repstr
->length() == 0) {
3336 JS_ASSERT(!rdata
.lambda
&& !rdata
.elembase
&& rdata
.dollarIndex
== UINT32_MAX
);
3337 return StrReplaceRegexpRemove(cx
, rdata
.str
, re
, rval
);
3340 RootedLinearString
linearStr(cx
, rdata
.str
->ensureLinear(cx
));
3345 if (!DoMatchForReplaceGlobal(cx
, res
, linearStr
, re
, rdata
))
3348 if (!DoMatchForReplaceLocal(cx
, res
, linearStr
, re
, rdata
))
3352 if (!rdata
.calledBack
) {
3353 /* Didn't match, so the string is unmodified. */
3354 rval
.setString(rdata
.str
);
3359 res
->getRightContext(&sub
);
3360 if (!rdata
.sb
.appendSubstring(sub
.base
, sub
.offset
, sub
.length
))
3363 JSString
* retstr
= rdata
.sb
.finishString();
3367 rval
.setString(retstr
);
3372 str_replace_regexp(JSContext
* cx
, CallArgs args
, ReplaceData
& rdata
)
3374 if (!rdata
.g
.normalizeRegExp(cx
, true, 2, args
))
3377 return StrReplaceRegExp(cx
, rdata
, args
.rval());
3381 js::str_replace_regexp_raw(JSContext
* cx
, HandleString string
, HandleObject regexp
,
3382 HandleString replacement
, MutableHandleValue rval
)
3384 /* Optimize removal, so we don't have to create ReplaceData */
3385 if (replacement
->length() == 0) {
3386 StringRegExpGuard
guard(cx
);
3387 if (!guard
.init(cx
, regexp
))
3390 RegExpShared
& re
= guard
.regExp();
3391 return StrReplaceRegexpRemove(cx
, string
, re
, rval
);
3394 ReplaceData
rdata(cx
);
3397 JSLinearString
* repl
= replacement
->ensureLinear(cx
);
3401 rdata
.setReplacementString(repl
);
3403 if (!rdata
.g
.init(cx
, regexp
))
3406 return StrReplaceRegExp(cx
, rdata
, rval
);
3410 StrReplaceString(JSContext
* cx
, ReplaceData
& rdata
, const FlatMatch
& fm
, MutableHandleValue rval
)
3413 * Note: we could optimize the text.length == pattern.length case if we wanted,
3414 * even in the presence of dollar metachars.
3416 if (rdata
.dollarIndex
!= UINT32_MAX
)
3417 return BuildDollarReplacement(cx
, rdata
.str
, rdata
.repstr
, rdata
.dollarIndex
, fm
, rval
);
3418 return BuildFlatReplacement(cx
, rdata
.str
, rdata
.repstr
, fm
, rval
);
3421 static const uint32_t ReplaceOptArg
= 2;
3424 js::str_replace_string_raw(JSContext
* cx
, HandleString string
, HandleString pattern
,
3425 HandleString replacement
, MutableHandleValue rval
)
3427 ReplaceData
rdata(cx
);
3430 JSLinearString
* repl
= replacement
->ensureLinear(cx
);
3433 rdata
.setReplacementString(repl
);
3435 if (!rdata
.g
.init(cx
, pattern
))
3437 const FlatMatch
* fm
= rdata
.g
.tryFlatMatch(cx
, rdata
.str
, ReplaceOptArg
, ReplaceOptArg
, false);
3439 if (fm
->match() < 0) {
3440 rval
.setString(string
);
3444 return StrReplaceString(cx
, rdata
, *fm
, rval
);
3448 str_replace_flat_lambda(JSContext
* cx
, CallArgs outerArgs
, ReplaceData
& rdata
, const FlatMatch
& fm
)
3450 RootedString
matchStr(cx
, NewDependentString(cx
, rdata
.str
, fm
.match(), fm
.patternLength()));
3454 /* lambda(matchStr, matchStart, textstr) */
3455 static const uint32_t lambdaArgc
= 3;
3456 if (!rdata
.fig
.args().init(lambdaArgc
))
3459 CallArgs
& args
= rdata
.fig
.args();
3460 args
.setCallee(ObjectValue(*rdata
.lambda
));
3461 args
.setThis(UndefinedValue());
3463 Value
* sp
= args
.array();
3464 sp
[0].setString(matchStr
);
3465 sp
[1].setInt32(fm
.match());
3466 sp
[2].setString(rdata
.str
);
3468 if (!rdata
.fig
.invoke(cx
))
3471 RootedString
repstr(cx
, ToString
<CanGC
>(cx
, args
.rval()));
3475 RootedString
leftSide(cx
, NewDependentString(cx
, rdata
.str
, 0, fm
.match()));
3479 size_t matchLimit
= fm
.match() + fm
.patternLength();
3480 RootedString
rightSide(cx
, NewDependentString(cx
, rdata
.str
, matchLimit
,
3481 rdata
.str
->length() - matchLimit
));
3485 RopeBuilder
builder(cx
);
3486 if (!(builder
.append(leftSide
) &&
3487 builder
.append(repstr
) &&
3488 builder
.append(rightSide
))) {
3492 outerArgs
.rval().setString(builder
.result());
3497 * Pattern match the script to check if it is is indexing into a particular
3498 * object, e.g. 'function(a) { return b[a]; }'. Avoid calling the script in
3499 * such cases, which are used by javascript packers (particularly the popular
3500 * Dean Edwards packer) to efficiently encode large scripts. We only handle the
3501 * code patterns generated by such packers here.
3504 LambdaIsGetElem(JSContext
* cx
, JSObject
& lambda
, MutableHandleObject pobj
)
3506 if (!lambda
.is
<JSFunction
>())
3509 RootedFunction
fun(cx
, &lambda
.as
<JSFunction
>());
3510 if (!fun
->isInterpreted())
3513 JSScript
* script
= fun
->getOrCreateScript(cx
);
3517 jsbytecode
* pc
= script
->code();
3520 * JSOP_GETALIASEDVAR tells us exactly where to find the base object 'b'.
3521 * Rule out the (unlikely) possibility of a heavyweight function since it
3522 * would make our scope walk off by 1.
3524 if (JSOp(*pc
) != JSOP_GETALIASEDVAR
|| fun
->isHeavyweight())
3526 ScopeCoordinate
sc(pc
);
3527 ScopeObject
* scope
= &fun
->environment()->as
<ScopeObject
>();
3528 for (unsigned i
= 0; i
< sc
.hops(); ++i
)
3529 scope
= &scope
->enclosingScope().as
<ScopeObject
>();
3530 Value b
= scope
->aliasedVar(sc
);
3531 pc
+= JSOP_GETALIASEDVAR_LENGTH
;
3533 /* Look for 'a' to be the lambda's first argument. */
3534 if (JSOp(*pc
) != JSOP_GETARG
|| GET_ARGNO(pc
) != 0)
3536 pc
+= JSOP_GETARG_LENGTH
;
3539 if (JSOp(*pc
) != JSOP_GETELEM
)
3541 pc
+= JSOP_GETELEM_LENGTH
;
3544 if (JSOp(*pc
) != JSOP_RETURN
)
3547 /* 'b' must behave like a normal object. */
3551 JSObject
& bobj
= b
.toObject();
3552 const Class
* clasp
= bobj
.getClass();
3553 if (!clasp
->isNative() || clasp
->ops
.lookupProperty
|| clasp
->ops
.getProperty
)
3561 js::str_replace(JSContext
* cx
, unsigned argc
, Value
* vp
)
3563 CallArgs args
= CallArgsFromVp(argc
, vp
);
3565 ReplaceData
rdata(cx
);
3566 rdata
.str
= ThisToStringForStringProto(cx
, args
);
3570 if (!rdata
.g
.init(cx
, args
))
3573 /* Extract replacement string/function. */
3574 if (args
.length() >= ReplaceOptArg
&& IsCallable(args
[1])) {
3575 rdata
.setReplacementFunction(&args
[1].toObject());
3577 if (!LambdaIsGetElem(cx
, *rdata
.lambda
, &rdata
.elembase
))
3580 JSLinearString
* string
= ArgToRootedString(cx
, args
, 1);
3584 rdata
.setReplacementString(string
);
3587 rdata
.fig
.initFunction(ObjectOrNullValue(rdata
.lambda
));
3590 * Unlike its |String.prototype| brethren, |replace| doesn't convert
3591 * its input to a regular expression. (Even if it contains metachars.)
3593 * However, if the user invokes our (non-standard) |flags| argument
3594 * extension then we revert to creating a regular expression. Note that
3595 * this is observable behavior through the side-effect mutation of the
3599 const FlatMatch
* fm
= rdata
.g
.tryFlatMatch(cx
, rdata
.str
, ReplaceOptArg
, args
.length(), false);
3602 if (cx
->isExceptionPending()) /* oom in RopeMatch in tryFlatMatch */
3604 return str_replace_regexp(cx
, args
, rdata
);
3607 if (fm
->match() < 0) {
3608 args
.rval().setString(rdata
.str
);
3613 return str_replace_flat_lambda(cx
, args
, rdata
, *fm
);
3614 return StrReplaceString(cx
, rdata
, *fm
, args
.rval());
3619 class SplitMatchResult
{
3625 JS_STATIC_ASSERT(SIZE_MAX
> JSString::MAX_LENGTH
);
3626 endIndex_
= SIZE_MAX
;
3628 bool isFailure() const {
3629 return endIndex_
== SIZE_MAX
;
3631 size_t endIndex() const {
3632 JS_ASSERT(!isFailure());
3635 size_t length() const {
3636 JS_ASSERT(!isFailure());
3639 void setResult(size_t length
, size_t endIndex
) {
3641 endIndex_
= endIndex
;
3645 } /* anonymous namespace */
3647 template<class Matcher
>
3649 SplitHelper(JSContext
* cx
, HandleLinearString str
, uint32_t limit
, const Matcher
& splitMatch
,
3650 Handle
<TypeObject
*> type
)
3652 size_t strLength
= str
->length();
3653 SplitMatchResult result
;
3656 if (strLength
== 0) {
3657 if (!splitMatch(cx
, str
, 0, &result
))
3661 * NB: Unlike in the non-empty string case, it's perfectly fine
3662 * (indeed the spec requires it) if we match at the end of the
3663 * string. Thus these cases should hold:
3665 * var a = "".split("");
3666 * assertEq(a.length, 0);
3667 * var b = "".split(/.?/);
3668 * assertEq(b.length, 0);
3670 if (!result
.isFailure())
3671 return NewDenseEmptyArray(cx
);
3673 RootedValue
v(cx
, StringValue(str
));
3674 return NewDenseCopiedArray(cx
, 1, v
.address());
3678 size_t lastEndIndex
= 0;
3682 AutoValueVector
splits(cx
);
3684 while (index
< strLength
) {
3686 if (!splitMatch(cx
, str
, index
, &result
))
3692 * Our match algorithm differs from the spec in that it returns the
3693 * next index at which a match happens. If no match happens we're
3696 * But what if the match is at the end of the string (and the string is
3697 * not empty)? Per 13(c)(ii) this shouldn't be a match, so we have to
3698 * specially exclude it. Thus this case should hold:
3700 * var a = "abc".split(/\b/);
3701 * assertEq(a.length, 1);
3702 * assertEq(a[0], "abc");
3704 if (result
.isFailure())
3707 /* Step 13(c)(i). */
3708 size_t sepLength
= result
.length();
3709 size_t endIndex
= result
.endIndex();
3710 if (sepLength
== 0 && endIndex
== strLength
)
3713 /* Step 13(c)(ii). */
3714 if (endIndex
== lastEndIndex
) {
3719 /* Step 13(c)(iii). */
3720 JS_ASSERT(lastEndIndex
< endIndex
);
3721 JS_ASSERT(sepLength
<= strLength
);
3722 JS_ASSERT(lastEndIndex
+ sepLength
<= endIndex
);
3724 /* Steps 13(c)(iii)(1-3). */
3725 size_t subLength
= size_t(endIndex
- sepLength
- lastEndIndex
);
3726 JSString
* sub
= NewDependentString(cx
, str
, lastEndIndex
, subLength
);
3727 if (!sub
|| !splits
.append(StringValue(sub
)))
3730 /* Step 13(c)(iii)(4). */
3731 if (splits
.length() == limit
)
3732 return NewDenseCopiedArray(cx
, splits
.length(), splits
.begin());
3734 /* Step 13(c)(iii)(5). */
3735 lastEndIndex
= endIndex
;
3737 /* Step 13(c)(iii)(6-7). */
3738 if (Matcher::returnsCaptures
) {
3739 RegExpStatics
* res
= cx
->global()->getRegExpStatics(cx
);
3743 const MatchPairs
& matches
= res
->getMatches();
3744 for (size_t i
= 0; i
< matches
.parenCount(); i
++) {
3745 /* Steps 13(c)(iii)(7)(a-c). */
3746 if (!matches
[i
+ 1].isUndefined()) {
3748 res
->getParen(i
+ 1, &parsub
);
3749 sub
= NewDependentString(cx
, parsub
.base
, parsub
.offset
, parsub
.length
);
3750 if (!sub
|| !splits
.append(StringValue(sub
)))
3753 /* Only string entries have been accounted for so far. */
3754 AddTypePropertyId(cx
, type
, JSID_VOID
, UndefinedValue());
3755 if (!splits
.append(UndefinedValue()))
3759 /* Step 13(c)(iii)(7)(d). */
3760 if (splits
.length() == limit
)
3761 return NewDenseCopiedArray(cx
, splits
.length(), splits
.begin());
3765 /* Step 13(c)(iii)(8). */
3766 index
= lastEndIndex
;
3770 JSString
* sub
= NewDependentString(cx
, str
, lastEndIndex
, strLength
- lastEndIndex
);
3771 if (!sub
|| !splits
.append(StringValue(sub
)))
3775 return NewDenseCopiedArray(cx
, splits
.length(), splits
.begin());
3778 // Fast-path for splitting a string into a character array via split("").
3780 CharSplitHelper(JSContext
* cx
, HandleLinearString str
, uint32_t limit
)
3782 size_t strLength
= str
->length();
3784 return NewDenseEmptyArray(cx
);
3786 js::StaticStrings
& staticStrings
= cx
->staticStrings();
3787 uint32_t resultlen
= (limit
< strLength
? limit
: strLength
);
3789 AutoValueVector
splits(cx
);
3790 if (!splits
.reserve(resultlen
))
3793 for (size_t i
= 0; i
< resultlen
; ++i
) {
3794 JSString
* sub
= staticStrings
.getUnitStringForElement(cx
, str
, i
);
3797 splits
.infallibleAppend(StringValue(sub
));
3800 return NewDenseCopiedArray(cx
, splits
.length(), splits
.begin());
3806 * The SplitMatch operation from ES5 15.5.4.14 is implemented using different
3807 * paths for regular expression and string separators.
3809 * The algorithm differs from the spec in that the we return the next index at
3810 * which a match happens.
3812 class SplitRegExpMatcher
3818 SplitRegExpMatcher(RegExpShared
& re
, RegExpStatics
* res
) : re(re
), res(res
) {}
3820 static const bool returnsCaptures
= true;
3822 bool operator()(JSContext
* cx
, HandleLinearString str
, size_t index
,
3823 SplitMatchResult
* result
) const
3825 ScopedMatchPairs
matches(&cx
->tempLifoAlloc());
3826 RegExpRunStatus status
= re
.execute(cx
, str
, &index
, matches
);
3827 if (status
== RegExpRunStatus_Error
)
3830 if (status
== RegExpRunStatus_Success_NotFound
) {
3831 result
->setFailure();
3835 if (!res
->updateFromMatchPairs(cx
, str
, matches
))
3839 res
->getLastMatch(&sep
);
3841 result
->setResult(sep
.length
, index
);
3846 class SplitStringMatcher
3848 RootedLinearString sep
;
3851 SplitStringMatcher(JSContext
* cx
, HandleLinearString sep
)
3855 static const bool returnsCaptures
= false;
3857 bool operator()(JSContext
* cx
, JSLinearString
* str
, size_t index
, SplitMatchResult
* res
) const
3859 JS_ASSERT(index
== 0 || index
< str
->length());
3860 int match
= StringMatch(str
, sep
, index
);
3864 res
->setResult(sep
->length(), match
+ sep
->length());
3869 } /* anonymous namespace */
3873 js::str_split(JSContext
* cx
, unsigned argc
, Value
* vp
)
3875 CallArgs args
= CallArgsFromVp(argc
, vp
);
3878 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
3882 RootedTypeObject
type(cx
, GetTypeCallerInitObject(cx
, JSProto_Array
));
3885 AddTypePropertyId(cx
, type
, JSID_VOID
, Type::StringType());
3887 /* Step 5: Use the second argument as the split limit, if given. */
3889 if (args
.hasDefined(1)) {
3891 if (!ToNumber(cx
, args
[1], &d
))
3893 limit
= ToUint32(d
);
3900 RootedLinearString
sepstr(cx
);
3901 bool sepDefined
= args
.hasDefined(0);
3903 if (IsObjectWithClass(args
[0], ESClass_RegExp
, cx
)) {
3904 RootedObject
obj(cx
, &args
[0].toObject());
3905 if (!RegExpToShared(cx
, obj
, &re
))
3908 sepstr
= ArgToRootedString(cx
, args
, 0);
3916 JSObject
* aobj
= NewDenseEmptyArray(cx
);
3919 aobj
->setType(type
);
3920 args
.rval().setObject(*aobj
);
3926 RootedValue
v(cx
, StringValue(str
));
3927 JSObject
* aobj
= NewDenseCopiedArray(cx
, 1, v
.address());
3930 aobj
->setType(type
);
3931 args
.rval().setObject(*aobj
);
3934 RootedLinearString
linearStr(cx
, str
->ensureLinear(cx
));
3939 RootedObject
aobj(cx
);
3940 if (!re
.initialized()) {
3941 if (sepstr
->length() == 0) {
3942 aobj
= CharSplitHelper(cx
, linearStr
, limit
);
3944 SplitStringMatcher
matcher(cx
, sepstr
);
3945 aobj
= SplitHelper(cx
, linearStr
, limit
, matcher
, type
);
3948 RegExpStatics
* res
= cx
->global()->getRegExpStatics(cx
);
3951 SplitRegExpMatcher
matcher(*re
, res
);
3952 aobj
= SplitHelper(cx
, linearStr
, limit
, matcher
, type
);
3958 aobj
->setType(type
);
3959 args
.rval().setObject(*aobj
);
3964 js::str_split_string(JSContext
* cx
, HandleTypeObject type
, HandleString str
, HandleString sep
)
3966 RootedLinearString
linearStr(cx
, str
->ensureLinear(cx
));
3970 RootedLinearString
linearSep(cx
, sep
->ensureLinear(cx
));
3974 uint32_t limit
= UINT32_MAX
;
3976 RootedObject
aobj(cx
);
3977 if (linearSep
->length() == 0) {
3978 aobj
= CharSplitHelper(cx
, linearStr
, limit
);
3980 SplitStringMatcher
matcher(cx
, linearSep
);
3981 aobj
= SplitHelper(cx
, linearStr
, limit
, matcher
, type
);
3987 aobj
->setType(type
);
3992 str_substr(JSContext
* cx
, unsigned argc
, Value
* vp
)
3994 CallArgs args
= CallArgsFromVp(argc
, vp
);
3995 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
3999 int32_t length
, len
, begin
;
4000 if (args
.length() > 0) {
4001 length
= int32_t(str
->length());
4002 if (!ValueToIntegerRange(cx
, args
[0], &begin
))
4005 if (begin
>= length
) {
4006 args
.rval().setString(cx
->runtime()->emptyString
);
4010 begin
+= length
; /* length + INT_MIN will always be less than 0 */
4015 if (args
.hasDefined(1)) {
4016 if (!ValueToIntegerRange(cx
, args
[1], &len
))
4020 args
.rval().setString(cx
->runtime()->emptyString
);
4024 if (uint32_t(length
) < uint32_t(begin
+ len
))
4025 len
= length
- begin
;
4027 len
= length
- begin
;
4030 str
= DoSubstr(cx
, str
, size_t(begin
), size_t(len
));
4035 args
.rval().setString(str
);
4040 * Python-esque sequence operations.
4043 str_concat(JSContext
* cx
, unsigned argc
, Value
* vp
)
4045 CallArgs args
= CallArgsFromVp(argc
, vp
);
4046 JSString
* str
= ThisToStringForStringProto(cx
, args
);
4050 for (unsigned i
= 0; i
< args
.length(); i
++) {
4051 JSString
* argStr
= ToString
<NoGC
>(cx
, args
[i
]);
4053 RootedString
strRoot(cx
, str
);
4054 argStr
= ToString
<CanGC
>(cx
, args
[i
]);
4060 JSString
* next
= ConcatStrings
<NoGC
>(cx
, str
, argStr
);
4064 RootedString
strRoot(cx
, str
), argStrRoot(cx
, argStr
);
4065 str
= ConcatStrings
<CanGC
>(cx
, strRoot
, argStrRoot
);
4071 args
.rval().setString(str
);
4076 str_slice(JSContext
* cx
, unsigned argc
, Value
* vp
)
4078 CallArgs args
= CallArgsFromVp(argc
, vp
);
4080 if (args
.length() == 1 && args
.thisv().isString() && args
[0].isInt32()) {
4081 JSString
* str
= args
.thisv().toString();
4082 size_t begin
= args
[0].toInt32();
4083 size_t end
= str
->length();
4085 size_t length
= end
- begin
;
4087 str
= cx
->runtime()->emptyString
;
4090 ? cx
->staticStrings().getUnitStringForElement(cx
, str
, begin
)
4091 : NewDependentString(cx
, str
, begin
, length
);
4095 args
.rval().setString(str
);
4100 RootedString
str(cx
, ThisToStringForStringProto(cx
, args
));
4104 if (args
.length() != 0) {
4105 double begin
, end
, length
;
4107 if (!ToInteger(cx
, args
[0], &begin
))
4109 length
= str
->length();
4114 } else if (begin
> length
) {
4118 if (args
.hasDefined(1)) {
4119 if (!ToInteger(cx
, args
[1], &end
))
4125 } else if (end
> length
) {
4134 str
= NewDependentString(cx
, str
, size_t(begin
), size_t(end
- begin
));
4138 args
.rval().setString(str
);
4142 static const JSFunctionSpec string_methods
[] = {
4144 JS_FN("quote", str_quote
, 0,JSFUN_GENERIC_NATIVE
),
4145 JS_FN(js_toSource_str
, str_toSource
, 0,0),
4148 /* Java-like methods. */
4149 JS_FN(js_toString_str
, js_str_toString
, 0,0),
4150 JS_FN(js_valueOf_str
, js_str_toString
, 0,0),
4151 JS_FN("substring", str_substring
, 2,JSFUN_GENERIC_NATIVE
),
4152 JS_FN("toLowerCase", str_toLowerCase
, 0,JSFUN_GENERIC_NATIVE
),
4153 JS_FN("toUpperCase", str_toUpperCase
, 0,JSFUN_GENERIC_NATIVE
),
4154 JS_FN("charAt", js_str_charAt
, 1,JSFUN_GENERIC_NATIVE
),
4155 JS_FN("charCodeAt", js_str_charCodeAt
, 1,JSFUN_GENERIC_NATIVE
),
4156 JS_SELF_HOSTED_FN("codePointAt", "String_codePointAt", 1,0),
4157 JS_FN("contains", str_contains
, 1,JSFUN_GENERIC_NATIVE
),
4158 JS_FN("indexOf", str_indexOf
, 1,JSFUN_GENERIC_NATIVE
),
4159 JS_FN("lastIndexOf", str_lastIndexOf
, 1,JSFUN_GENERIC_NATIVE
),
4160 JS_FN("startsWith", str_startsWith
, 1,JSFUN_GENERIC_NATIVE
),
4161 JS_FN("endsWith", str_endsWith
, 1,JSFUN_GENERIC_NATIVE
),
4162 JS_FN("trim", str_trim
, 0,JSFUN_GENERIC_NATIVE
),
4163 JS_FN("trimLeft", str_trimLeft
, 0,JSFUN_GENERIC_NATIVE
),
4164 JS_FN("trimRight", str_trimRight
, 0,JSFUN_GENERIC_NATIVE
),
4165 JS_FN("toLocaleLowerCase", str_toLocaleLowerCase
, 0,JSFUN_GENERIC_NATIVE
),
4166 JS_FN("toLocaleUpperCase", str_toLocaleUpperCase
, 0,JSFUN_GENERIC_NATIVE
),
4168 JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1,0),
4170 JS_FN("localeCompare", str_localeCompare
, 1,JSFUN_GENERIC_NATIVE
),
4172 JS_SELF_HOSTED_FN("repeat", "String_repeat", 1,0),
4174 JS_FN("normalize", str_normalize
, 0,JSFUN_GENERIC_NATIVE
),
4177 /* Perl-ish methods (search is actually Python-esque). */
4178 JS_FN("match", str_match
, 1,JSFUN_GENERIC_NATIVE
),
4179 JS_FN("search", str_search
, 1,JSFUN_GENERIC_NATIVE
),
4180 JS_FN("replace", str_replace
, 2,JSFUN_GENERIC_NATIVE
),
4181 JS_FN("split", str_split
, 2,JSFUN_GENERIC_NATIVE
),
4182 JS_FN("substr", str_substr
, 2,JSFUN_GENERIC_NATIVE
),
4184 /* Python-esque sequence methods. */
4185 JS_FN("concat", str_concat
, 1,JSFUN_GENERIC_NATIVE
),
4186 JS_FN("slice", str_slice
, 2,JSFUN_GENERIC_NATIVE
),
4188 /* HTML string methods. */
4189 JS_SELF_HOSTED_FN("bold", "String_bold", 0,0),
4190 JS_SELF_HOSTED_FN("italics", "String_italics", 0,0),
4191 JS_SELF_HOSTED_FN("fixed", "String_fixed", 0,0),
4192 JS_SELF_HOSTED_FN("strike", "String_strike", 0,0),
4193 JS_SELF_HOSTED_FN("small", "String_small", 0,0),
4194 JS_SELF_HOSTED_FN("big", "String_big", 0,0),
4195 JS_SELF_HOSTED_FN("blink", "String_blink", 0,0),
4196 JS_SELF_HOSTED_FN("sup", "String_sup", 0,0),
4197 JS_SELF_HOSTED_FN("sub", "String_sub", 0,0),
4198 JS_SELF_HOSTED_FN("anchor", "String_anchor", 1,0),
4199 JS_SELF_HOSTED_FN("link", "String_link", 1,0),
4200 JS_SELF_HOSTED_FN("fontcolor","String_fontcolor", 1,0),
4201 JS_SELF_HOSTED_FN("fontsize", "String_fontsize", 1,0),
4203 JS_SELF_HOSTED_FN("@@iterator", "String_iterator", 0,0),
4208 js_String(JSContext
* cx
, unsigned argc
, Value
* vp
)
4210 CallArgs args
= CallArgsFromVp(argc
, vp
);
4212 RootedString
str(cx
);
4213 if (args
.length() > 0) {
4214 str
= ToString
<CanGC
>(cx
, args
[0]);
4218 str
= cx
->runtime()->emptyString
;
4221 if (args
.isConstructing()) {
4222 StringObject
* strobj
= StringObject::create(cx
, str
);
4225 args
.rval().setObject(*strobj
);
4229 args
.rval().setString(str
);
4234 js::str_fromCharCode(JSContext
* cx
, unsigned argc
, Value
* vp
)
4236 CallArgs args
= CallArgsFromVp(argc
, vp
);
4238 JS_ASSERT(args
.length() <= ARGS_LENGTH_MAX
);
4239 if (args
.length() == 1)
4240 return str_fromCharCode_one_arg(cx
, args
[0], args
.rval());
4242 jschar
* chars
= cx
->pod_malloc
<jschar
>(args
.length() + 1);
4245 for (unsigned i
= 0; i
< args
.length(); i
++) {
4247 if (!ToUint16(cx
, args
[i
], &code
)) {
4251 chars
[i
] = jschar(code
);
4253 chars
[args
.length()] = 0;
4254 JSString
* str
= NewString
<CanGC
>(cx
, chars
, args
.length());
4260 args
.rval().setString(str
);
4265 js::str_fromCharCode_one_arg(JSContext
* cx
, HandleValue code
, MutableHandleValue rval
)
4269 if (!ToUint16(cx
, code
, &ucode
))
4272 if (StaticStrings::hasUnit(ucode
)) {
4273 rval
.setString(cx
->staticStrings().getUnit(ucode
));
4277 jschar c
= jschar(ucode
);
4278 JSString
* str
= NewStringCopyN
<CanGC
>(cx
, &c
, 1);
4282 rval
.setString(str
);
4286 static const JSFunctionSpec string_static_methods
[] = {
4287 JS_FN("fromCharCode", js::str_fromCharCode
, 1, 0),
4288 JS_SELF_HOSTED_FN("fromCodePoint", "String_static_fromCodePoint", 0,0),
4289 JS_SELF_HOSTED_FN("raw", "String_static_raw", 2, 0),
4291 // This must be at the end because of bug 853075: functions listed after
4292 // self-hosted methods aren't available in self-hosted code.
4294 JS_SELF_HOSTED_FN("localeCompare", "String_static_localeCompare", 2,0),
4300 StringObject::assignInitialShape(ExclusiveContext
* cx
, Handle
<StringObject
*> obj
)
4302 JS_ASSERT(obj
->nativeEmpty());
4304 return obj
->addDataProperty(cx
, cx
->names().length
, LENGTH_SLOT
,
4305 JSPROP_PERMANENT
| JSPROP_READONLY
);
4309 js_InitStringClass(JSContext
* cx
, HandleObject obj
)
4311 JS_ASSERT(obj
->isNative());
4313 Rooted
<GlobalObject
*> global(cx
, &obj
->as
<GlobalObject
>());
4315 Rooted
<JSString
*> empty(cx
, cx
->runtime()->emptyString
);
4316 RootedObject
proto(cx
, global
->createBlankPrototype(cx
, &StringObject::class_
));
4317 if (!proto
|| !proto
->as
<StringObject
>().init(cx
, empty
))
4320 /* Now create the String function. */
4321 RootedFunction
ctor(cx
);
4322 ctor
= global
->createConstructor(cx
, js_String
, cx
->names().String
, 1);
4326 if (!GlobalObject::initBuiltinConstructor(cx
, global
, JSProto_String
, ctor
, proto
))
4329 if (!LinkConstructorAndPrototype(cx
, ctor
, proto
))
4332 if (!DefinePropertiesAndFunctions(cx
, proto
, nullptr, string_methods
) ||
4333 !DefinePropertiesAndFunctions(cx
, ctor
, nullptr, string_static_methods
))
4339 * Define escape/unescape, the URI encode/decode functions, and maybe
4340 * uneval on the global object.
4342 if (!JS_DefineFunctions(cx
, global
, string_functions
))
4349 js_ValueToPrintable(JSContext
* cx
, const Value
& vArg
, JSAutoByteString
* bytes
, bool asSource
)
4351 RootedValue
v(cx
, vArg
);
4354 str
= ValueToSource(cx
, v
);
4356 str
= ToString
<CanGC
>(cx
, v
);
4359 str
= js_QuoteString(cx
, str
, 0);
4362 return bytes
->encodeLatin1(cx
, str
);
4365 template <AllowGC allowGC
>
4367 js::ToStringSlow(ExclusiveContext
* cx
, typename MaybeRooted
<Value
, allowGC
>::HandleType arg
)
4369 /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */
4370 JS_ASSERT(!arg
.isString());
4373 if (!v
.isPrimitive()) {
4374 if (!cx
->shouldBeJSContext() || !allowGC
)
4376 RootedValue
v2(cx
, v
);
4377 if (!ToPrimitive(cx
->asJSContext(), JSTYPE_STRING
, &v2
))
4385 } else if (v
.isInt32()) {
4386 str
= Int32ToString
<allowGC
>(cx
, v
.toInt32());
4387 } else if (v
.isDouble()) {
4388 str
= NumberToString
<allowGC
>(cx
, v
.toDouble());
4389 } else if (v
.isBoolean()) {
4390 str
= js_BooleanToString(cx
, v
.toBoolean());
4391 } else if (v
.isNull()) {
4392 str
= cx
->names().null
;
4393 } else if (v
.isSymbol()) {
4394 if (cx
->shouldBeJSContext() && allowGC
) {
4395 JS_ReportErrorNumber(cx
->asJSContext(), js_GetErrorMessage
, nullptr,
4396 JSMSG_SYMBOL_TO_STRING
);
4400 MOZ_ASSERT(v
.isUndefined());
4401 str
= cx
->names().undefined
;
4407 js::ToStringSlow
<CanGC
>(ExclusiveContext
* cx
, HandleValue arg
);
4410 js::ToStringSlow
<NoGC
>(ExclusiveContext
* cx
, Value arg
);
4412 JS_PUBLIC_API(JSString
*)
4413 js::ToStringSlow(JSContext
* cx
, HandleValue v
)
4415 return ToStringSlow
<CanGC
>(cx
, v
);
4419 SymbolToSource(JSContext
* cx
, Symbol
* symbol
)
4421 RootedString
desc(cx
, symbol
->description());
4422 SymbolCode code
= symbol
->code();
4423 if (code
!= SymbolCode::InSymbolRegistry
&& code
!= SymbolCode::UniqueSymbol
) {
4424 // Well-known symbol.
4425 MOZ_ASSERT(uint32_t(code
) < JS::WellKnownSymbolLimit
);
4429 StringBuffer
buf(cx
);
4430 if (code
== SymbolCode::InSymbolRegistry
? !buf
.append("Symbol.for(") : !buf
.append("Symbol("))
4433 desc
= StringToSource(cx
, desc
);
4434 if (!desc
|| !buf
.append(desc
))
4437 if (!buf
.append(')'))
4439 return buf
.finishString();
4443 js::ValueToSource(JSContext
* cx
, HandleValue v
)
4445 JS_CHECK_RECURSION(cx
, return nullptr);
4446 assertSameCompartment(cx
, v
);
4448 if (v
.isUndefined())
4449 return cx
->names().void0
;
4451 return StringToSource(cx
, v
.toString());
4453 return SymbolToSource(cx
, v
.toSymbol());
4454 if (v
.isPrimitive()) {
4455 /* Special case to preserve negative zero, _contra_ toString. */
4456 if (v
.isDouble() && IsNegativeZero(v
.toDouble())) {
4457 /* NB: _ucNstr rather than _ucstr to indicate non-terminated. */
4458 static const jschar js_negzero_ucNstr
[] = {'-', '0'};
4460 return NewStringCopyN
<CanGC
>(cx
, js_negzero_ucNstr
, 2);
4462 return ToString
<CanGC
>(cx
, v
);
4465 RootedValue
fval(cx
);
4466 RootedObject
obj(cx
, &v
.toObject());
4467 if (!JSObject::getProperty(cx
, obj
, obj
, cx
->names().toSource
, &fval
))
4469 if (IsCallable(fval
)) {
4470 RootedValue
rval(cx
);
4471 if (!Invoke(cx
, ObjectValue(*obj
), fval
, 0, nullptr, &rval
))
4473 return ToString
<CanGC
>(cx
, rval
);
4476 return ObjectToSource(cx
, obj
);
4480 js::StringToSource(JSContext
* cx
, JSString
* str
)
4482 return js_QuoteString(cx
, str
, '"');
4486 js::EqualChars(JSLinearString
* str1
, JSLinearString
* str2
)
4488 MOZ_ASSERT(str1
->length() == str2
->length());
4490 size_t len
= str1
->length();
4492 AutoCheckCannotGC nogc
;
4493 if (str1
->hasTwoByteChars()) {
4494 if (str2
->hasTwoByteChars())
4495 return PodEqual(str1
->twoByteChars(nogc
), str2
->twoByteChars(nogc
), len
);
4497 return EqualChars(str2
->latin1Chars(nogc
), str1
->twoByteChars(nogc
), len
);
4500 if (str2
->hasLatin1Chars())
4501 return PodEqual(str1
->latin1Chars(nogc
), str2
->latin1Chars(nogc
), len
);
4503 return EqualChars(str1
->latin1Chars(nogc
), str2
->twoByteChars(nogc
), len
);
4507 js::EqualStrings(JSContext
* cx
, JSString
* str1
, JSString
* str2
, bool* result
)
4514 size_t length1
= str1
->length();
4515 if (length1
!= str2
->length()) {
4520 JSLinearString
* linear1
= str1
->ensureLinear(cx
);
4523 JSLinearString
* linear2
= str2
->ensureLinear(cx
);
4527 *result
= EqualChars(linear1
, linear2
);
4532 js::EqualStrings(JSLinearString
* str1
, JSLinearString
* str2
)
4537 size_t length1
= str1
->length();
4538 if (length1
!= str2
->length())
4541 return EqualChars(str1
, str2
);
4545 CompareStringsImpl(JSLinearString
* str1
, JSLinearString
* str2
)
4547 size_t len1
= str1
->length();
4548 size_t len2
= str2
->length();
4550 AutoCheckCannotGC nogc
;
4551 if (str1
->hasLatin1Chars()) {
4552 const Latin1Char
* chars1
= str1
->latin1Chars(nogc
);
4553 return str2
->hasLatin1Chars()
4554 ? CompareChars(chars1
, len1
, str2
->latin1Chars(nogc
), len2
)
4555 : CompareChars(chars1
, len1
, str2
->twoByteChars(nogc
), len2
);
4558 const jschar
* chars1
= str1
->twoByteChars(nogc
);
4559 return str2
->hasLatin1Chars()
4560 ? CompareChars(chars1
, len1
, str2
->latin1Chars(nogc
), len2
)
4561 : CompareChars(chars1
, len1
, str2
->twoByteChars(nogc
), len2
);
4565 js::CompareChars(const jschar
* s1
, size_t len1
, JSLinearString
* s2
)
4567 AutoCheckCannotGC nogc
;
4568 return s2
->hasLatin1Chars()
4569 ? CompareChars(s1
, len1
, s2
->latin1Chars(nogc
), s2
->length())
4570 : CompareChars(s1
, len1
, s2
->twoByteChars(nogc
), s2
->length());
4574 js::CompareStrings(JSContext
* cx
, JSString
* str1
, JSString
* str2
, int32_t* result
)
4584 JSLinearString
* linear1
= str1
->ensureLinear(cx
);
4588 JSLinearString
* linear2
= str2
->ensureLinear(cx
);
4592 *result
= CompareStringsImpl(linear1
, linear2
);
4597 js::CompareAtoms(JSAtom
* atom1
, JSAtom
* atom2
)
4599 return CompareStringsImpl(atom1
, atom2
);
4603 js::StringEqualsAscii(JSLinearString
* str
, const char* asciiBytes
)
4605 size_t length
= strlen(asciiBytes
);
4607 for (size_t i
= 0; i
!= length
; ++i
)
4608 JS_ASSERT(unsigned(asciiBytes
[i
]) <= 127);
4610 if (length
!= str
->length())
4613 const Latin1Char
* latin1
= reinterpret_cast<const Latin1Char
*>(asciiBytes
);
4615 AutoCheckCannotGC nogc
;
4616 return str
->hasLatin1Chars()
4617 ? PodEqual(latin1
, str
->latin1Chars(nogc
), length
)
4618 : EqualChars(latin1
, str
->twoByteChars(nogc
), length
);
4622 js_strlen(const jschar
* s
)
4626 for (t
= s
; *t
!= 0; t
++)
4628 return (size_t)(t
- s
);
4632 js_strcmp(const jschar
* lhs
, const jschar
* rhs
)
4636 return int32_t(*lhs
) - int32_t(*rhs
);
4643 UniquePtr
<char[], JS::FreePolicy
>
4644 js::DuplicateString(js::ThreadSafeContext
* cx
, const char* s
)
4646 size_t n
= strlen(s
) + 1;
4647 auto ret
= cx
->make_pod_array
<char>(n
);
4650 PodCopy(ret
.get(), s
, n
);
4654 UniquePtr
<jschar
[], JS::FreePolicy
>
4655 js::DuplicateString(js::ThreadSafeContext
* cx
, const jschar
* s
)
4657 size_t n
= js_strlen(s
) + 1;
4658 auto ret
= cx
->make_pod_array
<jschar
>(n
);
4661 PodCopy(ret
.get(), s
, n
);
4665 template <typename CharT
>
4667 js_strchr_limit(const CharT
* s
, jschar c
, const CharT
* limit
)
4677 template const Latin1Char
*
4678 js_strchr_limit(const Latin1Char
* s
, jschar c
, const Latin1Char
* limit
);
4680 template const jschar
*
4681 js_strchr_limit(const jschar
* s
, jschar c
, const jschar
* limit
);
4684 js::InflateString(ThreadSafeContext
* cx
, const char* bytes
, size_t* lengthp
)
4688 size_t nbytes
= *lengthp
;
4691 chars
= cx
->pod_malloc
<jschar
>(nchars
+ 1);
4694 for (size_t i
= 0; i
< nchars
; i
++)
4695 chars
[i
] = (unsigned char) bytes
[i
];
4701 // For compatibility with callers of JS_DecodeBytes we must zero lengthp
4707 template <typename CharT
>
4709 js::DeflateStringToBuffer(JSContext
* maybecx
, const CharT
* src
, size_t srclen
,
4710 char* dst
, size_t* dstlenp
)
4712 size_t dstlen
= *dstlenp
;
4713 if (srclen
> dstlen
) {
4714 for (size_t i
= 0; i
< dstlen
; i
++)
4715 dst
[i
] = char(src
[i
]);
4717 AutoSuppressGC
suppress(maybecx
);
4718 JS_ReportErrorNumber(maybecx
, js_GetErrorMessage
, nullptr,
4719 JSMSG_BUFFER_TOO_SMALL
);
4723 for (size_t i
= 0; i
< srclen
; i
++)
4724 dst
[i
] = char(src
[i
]);
4730 js::DeflateStringToBuffer(JSContext
* maybecx
, const Latin1Char
* src
, size_t srclen
,
4731 char* dst
, size_t* dstlenp
);
4734 js::DeflateStringToBuffer(JSContext
* maybecx
, const jschar
* src
, size_t srclen
,
4735 char* dst
, size_t* dstlenp
);
4740 * Identifier start chars:
4746 const bool js_isidstart
[] = {
4747 /* 0 1 2 3 4 5 6 7 8 9 */
4748 /* 0 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4749 /* 1 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4750 /* 2 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4751 /* 3 */ ____
, ____
, ____
, ____
, ____
, ____
, true, ____
, ____
, ____
,
4752 /* 4 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4753 /* 5 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4754 /* 6 */ ____
, ____
, ____
, ____
, ____
, true, true, true, true, true,
4755 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4756 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4757 /* 9 */ true, ____
, ____
, ____
, ____
, true, ____
, true, true, true,
4758 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4759 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4760 /* 12 */ true, true, true, ____
, ____
, ____
, ____
, ____
4771 const bool js_isident
[] = {
4772 /* 0 1 2 3 4 5 6 7 8 9 */
4773 /* 0 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4774 /* 1 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4775 /* 2 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4776 /* 3 */ ____
, ____
, ____
, ____
, ____
, ____
, true, ____
, ____
, ____
,
4777 /* 4 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, true, true,
4778 /* 5 */ true, true, true, true, true, true, true, true, ____
, ____
,
4779 /* 6 */ ____
, ____
, ____
, ____
, ____
, true, true, true, true, true,
4780 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4781 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4782 /* 9 */ true, ____
, ____
, ____
, ____
, true, ____
, true, true, true,
4783 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4784 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4785 /* 12 */ true, true, true, ____
, ____
, ____
, ____
, ____
4788 /* Whitespace chars: '\t', '\n', '\v', '\f', '\r', ' '. */
4789 const bool js_isspace
[] = {
4790 /* 0 1 2 3 4 5 6 7 8 9 */
4791 /* 0 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, true,
4792 /* 1 */ true, true, true, true, ____
, ____
, ____
, ____
, ____
, ____
,
4793 /* 2 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4794 /* 3 */ ____
, ____
, true, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4795 /* 4 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4796 /* 5 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4797 /* 6 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4798 /* 7 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4799 /* 8 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4800 /* 9 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4801 /* 10 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4802 /* 11 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4803 /* 12 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
4807 * Uri reserved chars + #:
4820 static const bool js_isUriReservedPlusPound
[] = {
4821 /* 0 1 2 3 4 5 6 7 8 9 */
4822 /* 0 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4823 /* 1 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4824 /* 2 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4825 /* 3 */ ____
, ____
, ____
, ____
, ____
, true, true, ____
, true, ____
,
4826 /* 4 */ ____
, ____
, ____
, true, true, ____
, ____
, true, ____
, ____
,
4827 /* 5 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, true, true,
4828 /* 6 */ ____
, true, ____
, true, true, ____
, ____
, ____
, ____
, ____
,
4829 /* 7 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4830 /* 8 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4831 /* 9 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4832 /* 10 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4833 /* 11 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4834 /* 12 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
4838 * Uri unescaped chars:
4852 static const bool js_isUriUnescaped
[] = {
4853 /* 0 1 2 3 4 5 6 7 8 9 */
4854 /* 0 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4855 /* 1 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4856 /* 2 */ ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
, ____
,
4857 /* 3 */ ____
, ____
, ____
, true, ____
, ____
, ____
, ____
, ____
, true,
4858 /* 4 */ true, true, true, ____
, ____
, true, true, ____
, true, true,
4859 /* 5 */ true, true, true, true, true, true, true, true, ____
, ____
,
4860 /* 6 */ ____
, ____
, ____
, ____
, ____
, true, true, true, true, true,
4861 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4862 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4863 /* 9 */ true, ____
, ____
, ____
, ____
, true, ____
, true, true, true,
4864 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4865 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4866 /* 12 */ true, true, true, ____
, ____
, ____
, true, ____
4871 #define URI_CHUNK 64U
4874 TransferBufferToString(StringBuffer
& sb
, MutableHandleValue rval
)
4876 JSString
* str
= sb
.finishString();
4879 rval
.setString(str
);
4884 * ECMA 3, 15.1.3 URI Handling Function Properties
4886 * The following are implementations of the algorithms
4887 * given in the ECMA specification for the hidden functions
4888 * 'Encode' and 'Decode'.
4890 enum EncodeResult
{ Encode_Failure
, Encode_BadUri
, Encode_Success
};
4892 template <typename CharT
>
4894 Encode(StringBuffer
& sb
, const CharT
* chars
, size_t length
,
4895 const bool* unescapedSet
, const bool* unescapedSet2
)
4897 static const char HexDigits
[] = "0123456789ABCDEF"; /* NB: uppercase */
4903 for (size_t k
= 0; k
< length
; k
++) {
4904 jschar c
= chars
[k
];
4905 if (c
< 128 && (unescapedSet
[c
] || (unescapedSet2
&& unescapedSet2
[c
]))) {
4907 return Encode_Failure
;
4909 if (c
>= 0xDC00 && c
<= 0xDFFF)
4910 return Encode_BadUri
;
4913 if (c
< 0xD800 || c
> 0xDBFF) {
4918 return Encode_BadUri
;
4920 jschar c2
= chars
[k
];
4921 if (c2
< 0xDC00 || c2
> 0xDFFF)
4922 return Encode_BadUri
;
4924 v
= ((c
- 0xD800) << 10) + (c2
- 0xDC00) + 0x10000;
4927 size_t L
= js_OneUcs4ToUtf8Char(utf8buf
, v
);
4928 for (size_t j
= 0; j
< L
; j
++) {
4929 hexBuf
[1] = HexDigits
[utf8buf
[j
] >> 4];
4930 hexBuf
[2] = HexDigits
[utf8buf
[j
] & 0xf];
4931 if (!sb
.append(hexBuf
, 3))
4932 return Encode_Failure
;
4937 return Encode_Success
;
4941 Encode(JSContext
* cx
, HandleLinearString str
, const bool* unescapedSet
,
4942 const bool* unescapedSet2
, MutableHandleValue rval
)
4944 size_t length
= str
->length();
4946 rval
.setString(cx
->runtime()->emptyString
);
4950 StringBuffer
sb(cx
);
4951 if (!sb
.reserve(length
))
4955 if (str
->hasLatin1Chars()) {
4956 AutoCheckCannotGC nogc
;
4957 res
= Encode(sb
, str
->latin1Chars(nogc
), str
->length(), unescapedSet
, unescapedSet2
);
4959 AutoCheckCannotGC nogc
;
4960 res
= Encode(sb
, str
->twoByteChars(nogc
), str
->length(), unescapedSet
, unescapedSet2
);
4963 if (res
== Encode_Failure
)
4966 if (res
== Encode_BadUri
) {
4967 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, nullptr, JSMSG_BAD_URI
, nullptr);
4971 MOZ_ASSERT(res
== Encode_Success
);
4972 return TransferBufferToString(sb
, rval
);
4975 enum DecodeResult
{ Decode_Failure
, Decode_BadUri
, Decode_Success
};
4977 template <typename CharT
>
4979 Decode(StringBuffer
& sb
, const CharT
* chars
, size_t length
, const bool* reservedSet
)
4981 for (size_t k
= 0; k
< length
; k
++) {
4982 jschar c
= chars
[k
];
4985 if ((k
+ 2) >= length
)
4986 return Decode_BadUri
;
4988 if (!JS7_ISHEX(chars
[k
+1]) || !JS7_ISHEX(chars
[k
+2]))
4989 return Decode_BadUri
;
4991 uint32_t B
= JS7_UNHEX(chars
[k
+1]) * 16 + JS7_UNHEX(chars
[k
+2]);
4997 while (B
& (0x80 >> n
))
5000 if (n
== 1 || n
> 4)
5001 return Decode_BadUri
;
5004 octets
[0] = (uint8_t)B
;
5005 if (k
+ 3 * (n
- 1) >= length
)
5006 return Decode_BadUri
;
5008 for (int j
= 1; j
< n
; j
++) {
5010 if (chars
[k
] != '%')
5011 return Decode_BadUri
;
5013 if (!JS7_ISHEX(chars
[k
+1]) || !JS7_ISHEX(chars
[k
+2]))
5014 return Decode_BadUri
;
5016 B
= JS7_UNHEX(chars
[k
+1]) * 16 + JS7_UNHEX(chars
[k
+2]);
5017 if ((B
& 0xC0) != 0x80)
5018 return Decode_BadUri
;
5021 octets
[j
] = char(B
);
5023 uint32_t v
= JS::Utf8ToOneUcs4Char(octets
, n
);
5027 return Decode_BadUri
;
5029 c
= jschar((v
& 0x3FF) + 0xDC00);
5030 jschar H
= jschar((v
>> 10) + 0xD800);
5032 return Decode_Failure
;
5037 if (c
< 128 && reservedSet
&& reservedSet
[c
]) {
5038 if (!sb
.append(chars
+ start
, k
- start
+ 1))
5039 return Decode_Failure
;
5042 return Decode_Failure
;
5046 return Decode_Failure
;
5050 return Decode_Success
;
5054 Decode(JSContext
* cx
, HandleLinearString str
, const bool* reservedSet
, MutableHandleValue rval
)
5056 size_t length
= str
->length();
5058 rval
.setString(cx
->runtime()->emptyString
);
5062 StringBuffer
sb(cx
);
5065 if (str
->hasLatin1Chars()) {
5066 AutoCheckCannotGC nogc
;
5067 res
= Decode(sb
, str
->latin1Chars(nogc
), str
->length(), reservedSet
);
5069 AutoCheckCannotGC nogc
;
5070 res
= Decode(sb
, str
->twoByteChars(nogc
), str
->length(), reservedSet
);
5073 if (res
== Decode_Failure
)
5076 if (res
== Decode_BadUri
) {
5077 JS_ReportErrorNumber(cx
, js_GetErrorMessage
, nullptr, JSMSG_BAD_URI
);
5081 MOZ_ASSERT(res
== Decode_Success
);
5082 return TransferBufferToString(sb
, rval
);
5086 str_decodeURI(JSContext
* cx
, unsigned argc
, Value
* vp
)
5088 CallArgs args
= CallArgsFromVp(argc
, vp
);
5089 RootedLinearString
str(cx
, ArgToRootedString(cx
, args
, 0));
5093 return Decode(cx
, str
, js_isUriReservedPlusPound
, args
.rval());
5097 str_decodeURI_Component(JSContext
* cx
, unsigned argc
, Value
* vp
)
5099 CallArgs args
= CallArgsFromVp(argc
, vp
);
5100 RootedLinearString
str(cx
, ArgToRootedString(cx
, args
, 0));
5104 return Decode(cx
, str
, nullptr, args
.rval());
5108 str_encodeURI(JSContext
* cx
, unsigned argc
, Value
* vp
)
5110 CallArgs args
= CallArgsFromVp(argc
, vp
);
5111 RootedLinearString
str(cx
, ArgToRootedString(cx
, args
, 0));
5115 return Encode(cx
, str
, js_isUriUnescaped
, js_isUriReservedPlusPound
, args
.rval());
5119 str_encodeURI_Component(JSContext
* cx
, unsigned argc
, Value
* vp
)
5121 CallArgs args
= CallArgsFromVp(argc
, vp
);
5122 RootedLinearString
str(cx
, ArgToRootedString(cx
, args
, 0));
5126 return Encode(cx
, str
, js_isUriUnescaped
, nullptr, args
.rval());
5130 * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
5131 * least 4 bytes long. Return the number of UTF-8 bytes of data written.
5134 js_OneUcs4ToUtf8Char(uint8_t* utf8Buffer
, uint32_t ucs4Char
)
5138 JS_ASSERT(ucs4Char
<= 0x10FFFF);
5139 if (ucs4Char
< 0x80) {
5140 *utf8Buffer
= (uint8_t)ucs4Char
;
5143 uint32_t a
= ucs4Char
>> 11;
5151 utf8Buffer
[i
] = (uint8_t)((ucs4Char
& 0x3F) | 0x80);
5154 *utf8Buffer
= (uint8_t)(0x100 - (1 << (8-utf8Length
)) + ucs4Char
);
5160 js::PutEscapedStringImpl(char* buffer
, size_t bufferSize
, FILE* fp
, JSLinearString
* str
,
5163 size_t len
= str
->length();
5164 AutoCheckCannotGC nogc
;
5165 return str
->hasLatin1Chars()
5166 ? PutEscapedStringImpl(buffer
, bufferSize
, fp
, str
->latin1Chars(nogc
), len
, quote
)
5167 : PutEscapedStringImpl(buffer
, bufferSize
, fp
, str
->twoByteChars(nogc
), len
, quote
);
5170 template <typename CharT
>
5172 js::PutEscapedStringImpl(char* buffer
, size_t bufferSize
, FILE* fp
, const CharT
* chars
,
5173 size_t length
, uint32_t quote
)
5176 STOP
, FIRST_QUOTE
, LAST_QUOTE
, CHARS
, ESCAPE_START
, ESCAPE_MORE
5179 JS_ASSERT(quote
== 0 || quote
== '\'' || quote
== '"');
5180 JS_ASSERT_IF(!buffer
, bufferSize
== 0);
5181 JS_ASSERT_IF(fp
, !buffer
);
5183 if (bufferSize
== 0)
5188 const CharT
* charsEnd
= chars
+ length
;
5190 state
= FIRST_QUOTE
;
5194 char c
= 0; /* to quell GCC warnings */
5211 if (chars
== charsEnd
) {
5218 const char* escape
= strchr(js_EscapeMap
, (int)u
);
5227 if (u
== quote
|| u
== '\\')
5230 } else if (u
< 0x100) {
5245 state
= ESCAPE_START
;
5248 JS_ASSERT(' ' <= u
&& u
< 127);
5250 state
= ESCAPE_MORE
;
5258 u
= 0xF & (hex
>> shift
);
5259 c
= (char)(u
+ (u
< 10 ? '0' : 'A' - 10));
5263 JS_ASSERT(n
<= bufferSize
);
5264 if (n
!= bufferSize
) {
5271 if (fputc(c
, fp
) < 0)
5283 js::PutEscapedStringImpl(char* buffer
, size_t bufferSize
, FILE* fp
, const Latin1Char
* chars
,
5284 size_t length
, uint32_t quote
);
5287 js::PutEscapedStringImpl(char* buffer
, size_t bufferSize
, FILE* fp
, const jschar
* chars
,
5288 size_t length
, uint32_t quote
);
5291 js::PutEscapedString(char* buffer
, size_t bufferSize
, const Latin1Char
* chars
, size_t length
,
5295 js::PutEscapedString(char* buffer
, size_t bufferSize
, const jschar
* chars
, size_t length
,