Bumping gaia.json for 1 gaia revision(s) a=gaia-bump
[gecko.git] / js / src / jsstr.cpp
blob3eec9f3e2659a0c6ae363b52ae72ba78e7a3ca5b
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "jsstr.h"
9 #include "mozilla/Attributes.h"
10 #include "mozilla/Casting.h"
11 #include "mozilla/CheckedInt.h"
12 #include "mozilla/FloatingPoint.h"
13 #include "mozilla/PodOperations.h"
14 #include "mozilla/Range.h"
15 #include "mozilla/TypeTraits.h"
16 #include "mozilla/UniquePtr.h"
18 #include <ctype.h>
19 #include <string.h>
21 #include "jsapi.h"
22 #include "jsarray.h"
23 #include "jsatom.h"
24 #include "jsbool.h"
25 #include "jscntxt.h"
26 #include "jsgc.h"
27 #include "jsnum.h"
28 #include "jsobj.h"
29 #include "jsopcode.h"
30 #include "jstypes.h"
31 #include "jsutil.h"
33 #include "builtin/Intl.h"
34 #include "builtin/RegExp.h"
35 #include "js/Conversions.h"
36 #if ENABLE_INTL_API
37 #include "unicode/unorm.h"
38 #endif
39 #include "vm/GlobalObject.h"
40 #include "vm/Interpreter.h"
41 #include "vm/Opcodes.h"
42 #include "vm/RegExpObject.h"
43 #include "vm/RegExpStatics.h"
44 #include "vm/ScopeObject.h"
45 #include "vm/StringBuffer.h"
47 #include "jsinferinlines.h"
49 #include "vm/Interpreter-inl.h"
50 #include "vm/String-inl.h"
51 #include "vm/StringObject-inl.h"
53 using namespace js;
54 using namespace js::gc;
55 using namespace js::types;
56 using namespace js::unicode;
58 using JS::Symbol;
59 using JS::SymbolCode;
60 using JS::ToInt32;
61 using JS::ToUint32;
63 using mozilla::AssertedCast;
64 using mozilla::CheckedInt;
65 using mozilla::IsNaN;
66 using mozilla::IsNegativeZero;
67 using mozilla::IsSame;
68 using mozilla::Move;
69 using mozilla::PodCopy;
70 using mozilla::PodEqual;
71 using mozilla::RangedPtr;
72 using mozilla::UniquePtr;
74 using JS::AutoCheckCannotGC;
76 static JSLinearString*
77 ArgToRootedString(JSContext* cx, CallArgs& args, unsigned argno)
79 if (argno >= args.length())
80 return cx->names().undefined;
82 JSString* str = ToString<CanGC>(cx, args[argno]);
83 if (!str)
84 return nullptr;
86 args[argno].setString(str);
87 return str->ensureLinear(cx);
91 * Forward declarations for URI encode/decode and helper routines
93 static bool
94 str_decodeURI(JSContext* cx, unsigned argc, Value* vp);
96 static bool
97 str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
99 static bool
100 str_encodeURI(JSContext* cx, unsigned argc, Value* vp);
102 static bool
103 str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
106 * Global string methods
110 /* ES5 B.2.1 */
111 template <typename CharT>
112 static Latin1Char*
113 Escape(JSContext* cx, const CharT* chars, uint32_t length, uint32_t* newLengthOut)
115 static const uint8_t shouldPassThrough[128] = {
116 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
117 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
118 0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */
119 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */
120 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */
121 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */
122 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */
123 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */
126 /* Take a first pass and see how big the result string will need to be. */
127 uint32_t newLength = length;
128 for (size_t i = 0; i < length; i++) {
129 char16_t ch = chars[i];
130 if (ch < 128 && shouldPassThrough[ch])
131 continue;
133 /* The character will be encoded as %XX or %uXXXX. */
134 newLength += (ch < 256) ? 2 : 5;
137 * newlength is incremented by at most 5 on each iteration, so worst
138 * case newlength == length * 6. This can't overflow.
140 static_assert(JSString::MAX_LENGTH < UINT32_MAX / 6,
141 "newlength must not overflow");
144 Latin1Char* newChars = cx->pod_malloc<Latin1Char>(newLength + 1);
145 if (!newChars)
146 return nullptr;
148 static const char digits[] = "0123456789ABCDEF";
150 size_t i, ni;
151 for (i = 0, ni = 0; i < length; i++) {
152 char16_t ch = chars[i];
153 if (ch < 128 && shouldPassThrough[ch]) {
154 newChars[ni++] = ch;
155 } else if (ch < 256) {
156 newChars[ni++] = '%';
157 newChars[ni++] = digits[ch >> 4];
158 newChars[ni++] = digits[ch & 0xF];
159 } else {
160 newChars[ni++] = '%';
161 newChars[ni++] = 'u';
162 newChars[ni++] = digits[ch >> 12];
163 newChars[ni++] = digits[(ch & 0xF00) >> 8];
164 newChars[ni++] = digits[(ch & 0xF0) >> 4];
165 newChars[ni++] = digits[ch & 0xF];
168 MOZ_ASSERT(ni == newLength);
169 newChars[newLength] = 0;
171 *newLengthOut = newLength;
172 return newChars;
175 static bool
176 str_escape(JSContext* cx, unsigned argc, Value* vp)
178 CallArgs args = CallArgsFromVp(argc, vp);
180 JSLinearString* str = ArgToRootedString(cx, args, 0);
181 if (!str)
182 return false;
184 ScopedJSFreePtr<Latin1Char> newChars;
185 uint32_t newLength;
186 if (str->hasLatin1Chars()) {
187 AutoCheckCannotGC nogc;
188 newChars = Escape(cx, str->latin1Chars(nogc), str->length(), &newLength);
189 } else {
190 AutoCheckCannotGC nogc;
191 newChars = Escape(cx, str->twoByteChars(nogc), str->length(), &newLength);
194 if (!newChars)
195 return false;
197 JSString* res = NewString<CanGC>(cx, newChars.get(), newLength);
198 if (!res)
199 return false;
201 newChars.forget();
202 args.rval().setString(res);
203 return true;
206 template <typename CharT>
207 static inline bool
208 Unhex4(const RangedPtr<const CharT> chars, char16_t* result)
210 char16_t a = chars[0],
211 b = chars[1],
212 c = chars[2],
213 d = chars[3];
215 if (!(JS7_ISHEX(a) && JS7_ISHEX(b) && JS7_ISHEX(c) && JS7_ISHEX(d)))
216 return false;
218 *result = (((((JS7_UNHEX(a) << 4) + JS7_UNHEX(b)) << 4) + JS7_UNHEX(c)) << 4) + JS7_UNHEX(d);
219 return true;
222 template <typename CharT>
223 static inline bool
224 Unhex2(const RangedPtr<const CharT> chars, char16_t* result)
226 char16_t a = chars[0],
227 b = chars[1];
229 if (!(JS7_ISHEX(a) && JS7_ISHEX(b)))
230 return false;
232 *result = (JS7_UNHEX(a) << 4) + JS7_UNHEX(b);
233 return true;
236 template <typename CharT>
237 static bool
238 Unescape(StringBuffer& sb, const mozilla::Range<const CharT> chars)
241 * NB: use signed integers for length/index to allow simple length
242 * comparisons without unsigned-underflow hazards.
244 static_assert(JSString::MAX_LENGTH <= INT_MAX, "String length must fit in a signed integer");
245 int length = AssertedCast<int>(chars.length());
248 * Note that the spec algorithm has been optimized to avoid building
249 * a string in the case where no escapes are present.
252 /* Step 4. */
253 int k = 0;
254 bool building = false;
256 /* Step 5. */
257 while (k < length) {
258 /* Step 6. */
259 char16_t c = chars[k];
261 /* Step 7. */
262 if (c != '%')
263 goto step_18;
265 /* Step 8. */
266 if (k > length - 6)
267 goto step_14;
269 /* Step 9. */
270 if (chars[k + 1] != 'u')
271 goto step_14;
273 #define ENSURE_BUILDING \
274 do { \
275 if (!building) { \
276 building = true; \
277 if (!sb.reserve(length)) \
278 return false; \
279 sb.infallibleAppend(chars.start().get(), k); \
281 } while(false);
283 /* Step 10-13. */
284 if (Unhex4(chars.start() + k + 2, &c)) {
285 ENSURE_BUILDING;
286 k += 5;
287 goto step_18;
290 step_14:
291 /* Step 14. */
292 if (k > length - 3)
293 goto step_18;
295 /* Step 15-17. */
296 if (Unhex2(chars.start() + k + 1, &c)) {
297 ENSURE_BUILDING;
298 k += 2;
301 step_18:
302 if (building && !sb.append(c))
303 return false;
305 /* Step 19. */
306 k += 1;
309 return true;
310 #undef ENSURE_BUILDING
313 /* ES5 B.2.2 */
314 static bool
315 str_unescape(JSContext* cx, unsigned argc, Value* vp)
317 CallArgs args = CallArgsFromVp(argc, vp);
319 /* Step 1. */
320 RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
321 if (!str)
322 return false;
324 /* Step 3. */
325 StringBuffer sb(cx);
326 if (str->hasTwoByteChars() && !sb.ensureTwoByteChars())
327 return false;
329 if (str->hasLatin1Chars()) {
330 AutoCheckCannotGC nogc;
331 if (!Unescape(sb, str->latin1Range(nogc)))
332 return false;
333 } else {
334 AutoCheckCannotGC nogc;
335 if (!Unescape(sb, str->twoByteRange(nogc)))
336 return false;
339 JSLinearString* result;
340 if (!sb.empty()) {
341 result = sb.finishString();
342 if (!result)
343 return false;
344 } else {
345 result = str;
348 args.rval().setString(result);
349 return true;
352 #if JS_HAS_UNEVAL
353 static bool
354 str_uneval(JSContext* cx, unsigned argc, Value* vp)
356 CallArgs args = CallArgsFromVp(argc, vp);
357 JSString* str = ValueToSource(cx, args.get(0));
358 if (!str)
359 return false;
361 args.rval().setString(str);
362 return true;
364 #endif
366 static const JSFunctionSpec string_functions[] = {
367 JS_FN(js_escape_str, str_escape, 1,0),
368 JS_FN(js_unescape_str, str_unescape, 1,0),
369 #if JS_HAS_UNEVAL
370 JS_FN(js_uneval_str, str_uneval, 1,0),
371 #endif
372 JS_FN(js_decodeURI_str, str_decodeURI, 1,0),
373 JS_FN(js_encodeURI_str, str_encodeURI, 1,0),
374 JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1,0),
375 JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1,0),
377 JS_FS_END
380 static const unsigned STRING_ELEMENT_ATTRS = JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
382 static bool
383 str_enumerate(JSContext* cx, HandleObject obj)
385 RootedString str(cx, obj->as<StringObject>().unbox());
386 RootedValue value(cx);
387 for (size_t i = 0, length = str->length(); i < length; i++) {
388 JSString* str1 = NewDependentString(cx, str, i, 1);
389 if (!str1)
390 return false;
391 value.setString(str1);
392 if (!JSObject::defineElement(cx, obj, i, value, nullptr, nullptr, STRING_ELEMENT_ATTRS))
393 return false;
396 return true;
399 bool
400 js::str_resolve(JSContext* cx, HandleObject obj, HandleId id, bool* resolvedp)
402 if (!JSID_IS_INT(id))
403 return true;
405 RootedString str(cx, obj->as<StringObject>().unbox());
407 int32_t slot = JSID_TO_INT(id);
408 if ((size_t)slot < str->length()) {
409 JSString* str1 = cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot));
410 if (!str1)
411 return false;
412 RootedValue value(cx, StringValue(str1));
413 if (!JSObject::defineElement(cx, obj, uint32_t(slot), value, nullptr, nullptr,
414 STRING_ELEMENT_ATTRS))
416 return false;
418 *resolvedp = true;
420 return true;
423 const Class StringObject::class_ = {
424 js_String_str,
425 JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
426 JSCLASS_HAS_CACHED_PROTO(JSProto_String),
427 nullptr, /* addProperty */
428 nullptr, /* delProperty */
429 nullptr, /* getProperty */
430 nullptr, /* setProperty */
431 str_enumerate,
432 str_resolve
436 * Returns a JSString * for the |this| value associated with 'call', or throws
437 * a TypeError if |this| is null or undefined. This algorithm is the same as
438 * calling CheckObjectCoercible(this), then returning ToString(this), as all
439 * String.prototype.* methods do (other than toString and valueOf).
441 static MOZ_ALWAYS_INLINE JSString*
442 ThisToStringForStringProto(JSContext* cx, CallReceiver call)
444 JS_CHECK_RECURSION(cx, return nullptr);
446 if (call.thisv().isString())
447 return call.thisv().toString();
449 if (call.thisv().isObject()) {
450 RootedObject obj(cx, &call.thisv().toObject());
451 if (obj->is<StringObject>()) {
452 StringObject* nobj = &obj->as<StringObject>();
453 Rooted<jsid> id(cx, NameToId(cx->names().toString));
454 if (ClassMethodIsNative(cx, nobj, &StringObject::class_, id, js_str_toString)) {
455 JSString* str = nobj->unbox();
456 call.setThis(StringValue(str));
457 return str;
460 } else if (call.thisv().isNullOrUndefined()) {
461 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_CANT_CONVERT_TO,
462 call.thisv().isNull() ? "null" : "undefined", "object");
463 return nullptr;
466 JSString* str = ToStringSlow<CanGC>(cx, call.thisv());
467 if (!str)
468 return nullptr;
470 call.setThis(StringValue(str));
471 return str;
474 MOZ_ALWAYS_INLINE bool
475 IsString(HandleValue v)
477 return v.isString() || (v.isObject() && v.toObject().is<StringObject>());
480 #if JS_HAS_TOSOURCE
482 MOZ_ALWAYS_INLINE bool
483 str_toSource_impl(JSContext* cx, CallArgs args)
485 MOZ_ASSERT(IsString(args.thisv()));
487 Rooted<JSString*> str(cx, ToString<CanGC>(cx, args.thisv()));
488 if (!str)
489 return false;
491 str = js_QuoteString(cx, str, '"');
492 if (!str)
493 return false;
495 StringBuffer sb(cx);
496 if (!sb.append("(new String(") || !sb.append(str) || !sb.append("))"))
497 return false;
499 str = sb.finishString();
500 if (!str)
501 return false;
502 args.rval().setString(str);
503 return true;
506 static bool
507 str_toSource(JSContext* cx, unsigned argc, Value* vp)
509 CallArgs args = CallArgsFromVp(argc, vp);
510 return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args);
513 #endif /* JS_HAS_TOSOURCE */
515 MOZ_ALWAYS_INLINE bool
516 str_toString_impl(JSContext* cx, CallArgs args)
518 MOZ_ASSERT(IsString(args.thisv()));
520 args.rval().setString(args.thisv().isString()
521 ? args.thisv().toString()
522 : args.thisv().toObject().as<StringObject>().unbox());
523 return true;
526 bool
527 js_str_toString(JSContext* cx, unsigned argc, Value* vp)
529 CallArgs args = CallArgsFromVp(argc, vp);
530 return CallNonGenericMethod<IsString, str_toString_impl>(cx, args);
534 * Java-like string native methods.
537 JSString*
538 js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t lengthInt)
540 MOZ_ASSERT(0 <= beginInt);
541 MOZ_ASSERT(0 <= lengthInt);
542 MOZ_ASSERT(uint32_t(beginInt) <= str->length());
543 MOZ_ASSERT(uint32_t(lengthInt) <= str->length() - beginInt);
545 uint32_t begin = beginInt;
546 uint32_t len = lengthInt;
549 * Optimization for one level deep ropes.
550 * This is common for the following pattern:
552 * while() {
553 * text = text.substr(0, x) + "bla" + text.substr(x)
554 * test.charCodeAt(x + 1)
557 if (str->isRope()) {
558 JSRope* rope = &str->asRope();
560 /* Substring is totally in leftChild of rope. */
561 if (begin + len <= rope->leftChild()->length())
562 return NewDependentString(cx, rope->leftChild(), begin, len);
564 /* Substring is totally in rightChild of rope. */
565 if (begin >= rope->leftChild()->length()) {
566 begin -= rope->leftChild()->length();
567 return NewDependentString(cx, rope->rightChild(), begin, len);
571 * Requested substring is partly in the left and partly in right child.
572 * Create a rope of substrings for both childs.
574 MOZ_ASSERT(begin < rope->leftChild()->length() &&
575 begin + len > rope->leftChild()->length());
577 size_t lhsLength = rope->leftChild()->length() - begin;
578 size_t rhsLength = begin + len - rope->leftChild()->length();
580 Rooted<JSRope*> ropeRoot(cx, rope);
581 RootedString lhs(cx, NewDependentString(cx, ropeRoot->leftChild(), begin, lhsLength));
582 if (!lhs)
583 return nullptr;
585 RootedString rhs(cx, NewDependentString(cx, ropeRoot->rightChild(), 0, rhsLength));
586 if (!rhs)
587 return nullptr;
589 return JSRope::new_<CanGC>(cx, lhs, rhs, len);
592 return NewDependentString(cx, str, begin, len);
595 template <typename CharT>
596 static JSString*
597 ToLowerCase(JSContext* cx, JSLinearString* str)
599 // Unlike toUpperCase, toLowerCase has the nice invariant that if the input
600 // is a Latin1 string, the output is also a Latin1 string.
601 UniquePtr<CharT[], JS::FreePolicy> newChars;
602 size_t length = str->length();
604 AutoCheckCannotGC nogc;
605 const CharT* chars = str->chars<CharT>(nogc);
607 // Look for the first upper case character.
608 size_t i = 0;
609 for (; i < length; i++) {
610 char16_t c = chars[i];
611 if (unicode::ToLowerCase(c) != c)
612 break;
615 // If all characters are lower case, return the input string.
616 if (i == length)
617 return str;
619 newChars = cx->make_pod_array<CharT>(length + 1);
620 if (!newChars)
621 return nullptr;
623 PodCopy(newChars.get(), chars, i);
625 for (; i < length; i++) {
626 char16_t c = unicode::ToLowerCase(chars[i]);
627 MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
628 newChars[i] = c;
631 newChars[length] = 0;
634 JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), length);
635 if (!res)
636 return nullptr;
638 newChars.release();
639 return res;
642 static inline bool
643 ToLowerCaseHelper(JSContext* cx, CallReceiver call)
645 RootedString str(cx, ThisToStringForStringProto(cx, call));
646 if (!str)
647 return false;
649 JSLinearString* linear = str->ensureLinear(cx);
650 if (!linear)
651 return false;
653 if (linear->hasLatin1Chars())
654 str = ToLowerCase<Latin1Char>(cx, linear);
655 else
656 str = ToLowerCase<char16_t>(cx, linear);
657 if (!str)
658 return false;
660 call.rval().setString(str);
661 return true;
664 bool
665 js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp)
667 return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp));
670 static bool
671 str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp)
673 CallArgs args = CallArgsFromVp(argc, vp);
676 * Forcefully ignore the first (or any) argument and return toLowerCase(),
677 * ECMA has reserved that argument, presumably for defining the locale.
679 if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToLowerCase) {
680 RootedString str(cx, ThisToStringForStringProto(cx, args));
681 if (!str)
682 return false;
684 RootedValue result(cx);
685 if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result))
686 return false;
688 args.rval().set(result);
689 return true;
692 return ToLowerCaseHelper(cx, args);
695 template <typename DestChar, typename SrcChar>
696 static void
697 ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t firstLowerCase, size_t length)
699 MOZ_ASSERT(firstLowerCase < length);
701 for (size_t i = 0; i < firstLowerCase; i++)
702 destChars[i] = srcChars[i];
704 for (size_t i = firstLowerCase; i < length; i++) {
705 char16_t c = unicode::ToUpperCase(srcChars[i]);
706 MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
707 destChars[i] = c;
710 destChars[length] = '\0';
713 template <typename CharT>
714 static JSString*
715 ToUpperCase(JSContext* cx, JSLinearString* str)
717 typedef UniquePtr<Latin1Char[], JS::FreePolicy> Latin1CharPtr;
718 typedef UniquePtr<char16_t[], JS::FreePolicy> TwoByteCharPtr;
720 mozilla::MaybeOneOf<Latin1CharPtr, TwoByteCharPtr> newChars;
721 size_t length = str->length();
723 AutoCheckCannotGC nogc;
724 const CharT* chars = str->chars<CharT>(nogc);
726 // Look for the first lower case character.
727 size_t i = 0;
728 for (; i < length; i++) {
729 char16_t c = chars[i];
730 if (unicode::ToUpperCase(c) != c)
731 break;
734 // If all characters are upper case, return the input string.
735 if (i == length)
736 return str;
738 // If the string is Latin1, check if it contains the MICRO SIGN (0xb5)
739 // or SMALL LETTER Y WITH DIAERESIS (0xff) character. The corresponding
740 // upper case characters are not in the Latin1 range.
741 bool resultIsLatin1;
742 if (IsSame<CharT, Latin1Char>::value) {
743 resultIsLatin1 = true;
744 for (size_t j = i; j < length; j++) {
745 Latin1Char c = chars[j];
746 if (c == 0xb5 || c == 0xff) {
747 MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
748 resultIsLatin1 = false;
749 break;
750 } else {
751 MOZ_ASSERT(unicode::ToUpperCase(c) <= JSString::MAX_LATIN1_CHAR);
754 } else {
755 resultIsLatin1 = false;
758 if (resultIsLatin1) {
759 Latin1CharPtr buf = cx->make_pod_array<Latin1Char>(length + 1);
760 if (!buf)
761 return nullptr;
763 ToUpperCaseImpl(buf.get(), chars, i, length);
764 newChars.construct<Latin1CharPtr>(buf);
765 } else {
766 TwoByteCharPtr buf = cx->make_pod_array<char16_t>(length + 1);
767 if (!buf)
768 return nullptr;
770 ToUpperCaseImpl(buf.get(), chars, i, length);
771 newChars.construct<TwoByteCharPtr>(buf);
775 JSString* res;
776 if (newChars.constructed<Latin1CharPtr>()) {
777 res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), length);
778 if (!res)
779 return nullptr;
781 newChars.ref<Latin1CharPtr>().release();
782 } else {
783 res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), length);
784 if (!res)
785 return nullptr;
787 newChars.ref<TwoByteCharPtr>().release();
790 return res;
793 static bool
794 ToUpperCaseHelper(JSContext* cx, CallReceiver call)
796 RootedString str(cx, ThisToStringForStringProto(cx, call));
797 if (!str)
798 return false;
800 JSLinearString* linear = str->ensureLinear(cx);
801 if (!linear)
802 return false;
804 if (linear->hasLatin1Chars())
805 str = ToUpperCase<Latin1Char>(cx, linear);
806 else
807 str = ToUpperCase<char16_t>(cx, linear);
808 if (!str)
809 return false;
811 call.rval().setString(str);
812 return true;
815 bool
816 js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp)
818 return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp));
821 static bool
822 str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp)
824 CallArgs args = CallArgsFromVp(argc, vp);
827 * Forcefully ignore the first (or any) argument and return toUpperCase(),
828 * ECMA has reserved that argument, presumably for defining the locale.
830 if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToUpperCase) {
831 RootedString str(cx, ThisToStringForStringProto(cx, args));
832 if (!str)
833 return false;
835 RootedValue result(cx);
836 if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result))
837 return false;
839 args.rval().set(result);
840 return true;
843 return ToUpperCaseHelper(cx, args);
846 #if !EXPOSE_INTL_API
847 static bool
848 str_localeCompare(JSContext* cx, unsigned argc, Value* vp)
850 CallArgs args = CallArgsFromVp(argc, vp);
851 RootedString str(cx, ThisToStringForStringProto(cx, args));
852 if (!str)
853 return false;
855 RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0)));
856 if (!thatStr)
857 return false;
859 if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeCompare) {
860 RootedValue result(cx);
861 if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr, &result))
862 return false;
864 args.rval().set(result);
865 return true;
868 int32_t result;
869 if (!CompareStrings(cx, str, thatStr, &result))
870 return false;
872 args.rval().setInt32(result);
873 return true;
875 #endif
877 #if EXPOSE_INTL_API
878 /* ES6 20140210 draft 21.1.3.12. */
879 static bool
880 str_normalize(JSContext* cx, unsigned argc, Value* vp)
882 CallArgs args = CallArgsFromVp(argc, vp);
884 // Steps 1-3.
885 RootedString str(cx, ThisToStringForStringProto(cx, args));
886 if (!str)
887 return false;
889 // Step 4.
890 UNormalizationMode form;
891 if (!args.hasDefined(0)) {
892 form = UNORM_NFC;
893 } else {
894 // Steps 5-6.
895 RootedLinearString formStr(cx, ArgToRootedString(cx, args, 0));
896 if (!formStr)
897 return false;
899 // Step 7.
900 if (formStr == cx->names().NFC) {
901 form = UNORM_NFC;
902 } else if (formStr == cx->names().NFD) {
903 form = UNORM_NFD;
904 } else if (formStr == cx->names().NFKC) {
905 form = UNORM_NFKC;
906 } else if (formStr == cx->names().NFKD) {
907 form = UNORM_NFKD;
908 } else {
909 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr,
910 JSMSG_INVALID_NORMALIZE_FORM);
911 return false;
915 // Step 8.
916 AutoStableStringChars stableChars(cx);
917 if (!str->ensureFlat(cx) || !stableChars.initTwoByte(cx, str))
918 return false;
920 static const size_t INLINE_CAPACITY = 32;
922 const UChar* srcChars = Char16ToUChar(stableChars.twoByteRange().start().get());
923 int32_t srcLen = AssertedCast<int32_t>(str->length());
924 Vector<char16_t, INLINE_CAPACITY> chars(cx);
925 if (!chars.resize(INLINE_CAPACITY))
926 return false;
928 UErrorCode status = U_ZERO_ERROR;
929 int32_t size = unorm_normalize(srcChars, srcLen, form, 0,
930 Char16ToUChar(chars.begin()), INLINE_CAPACITY,
931 &status);
932 if (status == U_BUFFER_OVERFLOW_ERROR) {
933 if (!chars.resize(size))
934 return false;
935 status = U_ZERO_ERROR;
936 #ifdef DEBUG
937 int32_t finalSize =
938 #endif
939 unorm_normalize(srcChars, srcLen, form, 0,
940 Char16ToUChar(chars.begin()), size,
941 &status);
942 MOZ_ASSERT(size == finalSize || U_FAILURE(status), "unorm_normalize behaved inconsistently");
944 if (U_FAILURE(status))
945 return false;
947 JSString* ns = NewStringCopyN<CanGC>(cx, chars.begin(), size);
948 if (!ns)
949 return false;
951 // Step 9.
952 args.rval().setString(ns);
953 return true;
955 #endif
957 bool
958 js_str_charAt(JSContext* cx, unsigned argc, Value* vp)
960 CallArgs args = CallArgsFromVp(argc, vp);
962 RootedString str(cx);
963 size_t i;
964 if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
965 str = args.thisv().toString();
966 i = size_t(args[0].toInt32());
967 if (i >= str->length())
968 goto out_of_range;
969 } else {
970 str = ThisToStringForStringProto(cx, args);
971 if (!str)
972 return false;
974 double d = 0.0;
975 if (args.length() > 0 && !ToInteger(cx, args[0], &d))
976 return false;
978 if (d < 0 || str->length() <= d)
979 goto out_of_range;
980 i = size_t(d);
983 str = cx->staticStrings().getUnitStringForElement(cx, str, i);
984 if (!str)
985 return false;
986 args.rval().setString(str);
987 return true;
989 out_of_range:
990 args.rval().setString(cx->runtime()->emptyString);
991 return true;
994 bool
995 js::str_charCodeAt_impl(JSContext* cx, HandleString string, HandleValue index, MutableHandleValue res)
997 RootedString str(cx);
998 size_t i;
999 if (index.isInt32()) {
1000 i = index.toInt32();
1001 if (i >= string->length())
1002 goto out_of_range;
1003 } else {
1004 double d = 0.0;
1005 if (!ToInteger(cx, index, &d))
1006 return false;
1007 // check whether d is negative as size_t is unsigned
1008 if (d < 0 || string->length() <= d )
1009 goto out_of_range;
1010 i = size_t(d);
1012 char16_t c;
1013 if (!string->getChar(cx, i , &c))
1014 return false;
1015 res.setInt32(c);
1016 return true;
1018 out_of_range:
1019 res.setNaN();
1020 return true;
1023 bool
1024 js_str_charCodeAt(JSContext* cx, unsigned argc, Value* vp)
1026 CallArgs args = CallArgsFromVp(argc, vp);
1027 RootedString str(cx);
1028 RootedValue index(cx);
1029 if (args.thisv().isString()) {
1030 str = args.thisv().toString();
1031 } else {
1032 str = ThisToStringForStringProto(cx, args);
1033 if (!str)
1034 return false;
1036 if (args.length() != 0)
1037 index = args[0];
1038 else
1039 index.setInt32(0);
1041 return js::str_charCodeAt_impl(cx, str, index, args.rval());
1045 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
1046 * The patlen argument must be positive and no greater than sBMHPatLenMax.
1048 * Return the index of pat in text, or -1 if not found.
1050 static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */
1051 static const uint32_t sBMHPatLenMax = 255; /* skip table element is uint8_t */
1052 static const int sBMHBadPattern = -2; /* return value if pat is not ISO-Latin-1 */
1054 template <typename TextChar, typename PatChar>
1055 static int
1056 BoyerMooreHorspool(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t patLen)
1058 MOZ_ASSERT(0 < patLen && patLen <= sBMHPatLenMax);
1060 uint8_t skip[sBMHCharSetSize];
1061 for (uint32_t i = 0; i < sBMHCharSetSize; i++)
1062 skip[i] = uint8_t(patLen);
1064 uint32_t patLast = patLen - 1;
1065 for (uint32_t i = 0; i < patLast; i++) {
1066 char16_t c = pat[i];
1067 if (c >= sBMHCharSetSize)
1068 return sBMHBadPattern;
1069 skip[c] = uint8_t(patLast - i);
1072 for (uint32_t k = patLast; k < textLen; ) {
1073 for (uint32_t i = k, j = patLast; ; i--, j--) {
1074 if (text[i] != pat[j])
1075 break;
1076 if (j == 0)
1077 return static_cast<int>(i); /* safe: max string size */
1080 char16_t c = text[k];
1081 k += (c >= sBMHCharSetSize) ? patLen : skip[c];
1083 return -1;
1086 template <typename TextChar, typename PatChar>
1087 struct MemCmp {
1088 typedef uint32_t Extent;
1089 static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*, uint32_t patLen) {
1090 return (patLen - 1) * sizeof(PatChar);
1092 static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t, Extent extent) {
1093 MOZ_ASSERT(sizeof(TextChar) == sizeof(PatChar));
1094 return memcmp(p, t, extent) == 0;
1098 template <typename TextChar, typename PatChar>
1099 struct ManualCmp {
1100 typedef const PatChar* Extent;
1101 static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar* pat, uint32_t patLen) {
1102 return pat + patLen;
1104 static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t, Extent extent) {
1105 for (; p != extent; ++p, ++t) {
1106 if (*p != *t)
1107 return false;
1109 return true;
1113 template <typename TextChar, typename PatChar>
1114 static const TextChar*
1115 FirstCharMatcherUnrolled(const TextChar* text, uint32_t n, const PatChar pat)
1117 const TextChar* textend = text + n;
1118 const TextChar* t = text;
1120 switch ((textend - t) & 7) {
1121 case 0: if (*t++ == pat) return t - 1;
1122 case 7: if (*t++ == pat) return t - 1;
1123 case 6: if (*t++ == pat) return t - 1;
1124 case 5: if (*t++ == pat) return t - 1;
1125 case 4: if (*t++ == pat) return t - 1;
1126 case 3: if (*t++ == pat) return t - 1;
1127 case 2: if (*t++ == pat) return t - 1;
1128 case 1: if (*t++ == pat) return t - 1;
1130 while (textend != t) {
1131 if (t[0] == pat) return t;
1132 if (t[1] == pat) return t + 1;
1133 if (t[2] == pat) return t + 2;
1134 if (t[3] == pat) return t + 3;
1135 if (t[4] == pat) return t + 4;
1136 if (t[5] == pat) return t + 5;
1137 if (t[6] == pat) return t + 6;
1138 if (t[7] == pat) return t + 7;
1139 t += 8;
1141 return nullptr;
1144 static const char*
1145 FirstCharMatcher8bit(const char* text, uint32_t n, const char pat)
1147 #if defined(__clang__)
1148 return FirstCharMatcherUnrolled<char, char>(text, n, pat);
1149 #else
1150 return reinterpret_cast<const char*>(memchr(text, pat, n));
1151 #endif
1154 static const char16_t*
1155 FirstCharMatcher16bit(const char16_t* text, uint32_t n, const char16_t pat)
1157 #if defined(XP_MACOSX) || defined(XP_WIN)
1159 * Performance of memchr is horrible in OSX. Windows is better,
1160 * but it is still better to use UnrolledMatcher.
1162 return FirstCharMatcherUnrolled<char16_t, char16_t>(text, n, pat);
1163 #else
1165 * For linux the best performance is obtained by slightly hacking memchr.
1166 * memchr works only on 8bit char but char16_t is 16bit. So we treat char16_t
1167 * in blocks of 8bit and use memchr.
1170 const char* text8 = (const char*) text;
1171 const char* pat8 = reinterpret_cast<const char*>(&pat);
1173 MOZ_ASSERT(n < UINT32_MAX/2);
1174 n *= 2;
1176 uint32_t i = 0;
1177 while (i < n) {
1178 /* Find the first 8 bits of 16bit character in text. */
1179 const char* pos8 = FirstCharMatcher8bit(text8 + i, n - i, pat8[0]);
1180 if (pos8 == nullptr)
1181 return nullptr;
1182 i = static_cast<uint32_t>(pos8 - text8);
1184 /* Incorrect match if it matches the last 8 bits of 16bit char. */
1185 if (i % 2 != 0) {
1186 i++;
1187 continue;
1190 /* Test if last 8 bits match last 8 bits of 16bit char. */
1191 if (pat8[1] == text8[i + 1])
1192 return (text + (i/2));
1194 i += 2;
1196 return nullptr;
1197 #endif
1200 template <class InnerMatch, typename TextChar, typename PatChar>
1201 static int
1202 Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat, uint32_t patlen)
1204 const typename InnerMatch::Extent extent = InnerMatch::computeExtent(pat, patlen);
1206 uint32_t i = 0;
1207 uint32_t n = textlen - patlen + 1;
1208 while (i < n) {
1209 const TextChar* pos;
1211 if (sizeof(TextChar) == 2 && sizeof(PatChar) == 2)
1212 pos = (TextChar*) FirstCharMatcher16bit((char16_t*)text + i, n - i, pat[0]);
1213 else if (sizeof(TextChar) == 1 && sizeof(PatChar) == 1)
1214 pos = (TextChar*) FirstCharMatcher8bit((char*) text + i, n - i, pat[0]);
1215 else
1216 pos = (TextChar*) FirstCharMatcherUnrolled<TextChar, PatChar>(text + i, n - i, pat[0]);
1218 if (pos == nullptr)
1219 return -1;
1221 i = static_cast<uint32_t>(pos - text);
1222 if (InnerMatch::match(pat + 1, text + i + 1, extent))
1223 return i;
1225 i += 1;
1227 return -1;
1231 template <typename TextChar, typename PatChar>
1232 static MOZ_ALWAYS_INLINE int
1233 StringMatch(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t patLen)
1235 if (patLen == 0)
1236 return 0;
1237 if (textLen < patLen)
1238 return -1;
1240 #if defined(__i386__) || defined(_M_IX86) || defined(__i386)
1242 * Given enough registers, the unrolled loop below is faster than the
1243 * following loop. 32-bit x86 does not have enough registers.
1245 if (patLen == 1) {
1246 const PatChar p0 = *pat;
1247 for (const TextChar* c = text, *end = text + textLen; c != end; ++c) {
1248 if (*c == p0)
1249 return c - text;
1251 return -1;
1253 #endif
1256 * If the text or pattern string is short, BMH will be more expensive than
1257 * the basic linear scan due to initialization cost and a more complex loop
1258 * body. While the correct threshold is input-dependent, we can make a few
1259 * conservative observations:
1260 * - When |textLen| is "big enough", the initialization time will be
1261 * proportionally small, so the worst-case slowdown is minimized.
1262 * - When |patLen| is "too small", even the best case for BMH will be
1263 * slower than a simple scan for large |textLen| due to the more complex
1264 * loop body of BMH.
1265 * From this, the values for "big enough" and "too small" are determined
1266 * empirically. See bug 526348.
1268 if (textLen >= 512 && patLen >= 11 && patLen <= sBMHPatLenMax) {
1269 int index = BoyerMooreHorspool(text, textLen, pat, patLen);
1270 if (index != sBMHBadPattern)
1271 return index;
1275 * For big patterns with large potential overlap we want the SIMD-optimized
1276 * speed of memcmp. For small patterns, a simple loop is faster. We also can't
1277 * use memcmp if one of the strings is TwoByte and the other is Latin1.
1279 * FIXME: Linux memcmp performance is sad and the manual loop is faster.
1281 return
1282 #if !defined(__linux__)
1283 (patLen > 128 && IsSame<TextChar, PatChar>::value)
1284 ? Matcher<MemCmp<TextChar, PatChar>, TextChar, PatChar>(text, textLen, pat, patLen)
1286 #endif
1287 Matcher<ManualCmp<TextChar, PatChar>, TextChar, PatChar>(text, textLen, pat, patLen);
1290 static int32_t
1291 StringMatch(JSLinearString* text, JSLinearString* pat, uint32_t start = 0)
1293 MOZ_ASSERT(start <= text->length());
1294 uint32_t textLen = text->length() - start;
1295 uint32_t patLen = pat->length();
1297 int match;
1298 AutoCheckCannotGC nogc;
1299 if (text->hasLatin1Chars()) {
1300 const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1301 if (pat->hasLatin1Chars())
1302 match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1303 else
1304 match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1305 } else {
1306 const char16_t* textChars = text->twoByteChars(nogc) + start;
1307 if (pat->hasLatin1Chars())
1308 match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1309 else
1310 match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1313 return (match == -1) ? -1 : start + match;
1316 static const size_t sRopeMatchThresholdRatioLog2 = 5;
1318 bool
1319 js::StringHasPattern(JSLinearString* text, const char16_t* pat, uint32_t patLen)
1321 AutoCheckCannotGC nogc;
1322 return text->hasLatin1Chars()
1323 ? StringMatch(text->latin1Chars(nogc), text->length(), pat, patLen) != -1
1324 : StringMatch(text->twoByteChars(nogc), text->length(), pat, patLen) != -1;
1328 js::StringFindPattern(JSLinearString* text, JSLinearString* pat, size_t start)
1330 return StringMatch(text, pat, start);
1333 // When an algorithm does not need a string represented as a single linear
1334 // array of characters, this range utility may be used to traverse the string a
1335 // sequence of linear arrays of characters. This avoids flattening ropes.
1336 class StringSegmentRange
1338 // If malloc() shows up in any profiles from this vector, we can add a new
1339 // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
1340 AutoStringVector stack;
1341 RootedLinearString cur;
1343 bool settle(JSString* str) {
1344 while (str->isRope()) {
1345 JSRope& rope = str->asRope();
1346 if (!stack.append(rope.rightChild()))
1347 return false;
1348 str = rope.leftChild();
1350 cur = &str->asLinear();
1351 return true;
1354 public:
1355 explicit StringSegmentRange(JSContext* cx)
1356 : stack(cx), cur(cx)
1359 MOZ_WARN_UNUSED_RESULT bool init(JSString* str) {
1360 MOZ_ASSERT(stack.empty());
1361 return settle(str);
1364 bool empty() const {
1365 return cur == nullptr;
1368 JSLinearString* front() const {
1369 MOZ_ASSERT(!cur->isRope());
1370 return cur;
1373 MOZ_WARN_UNUSED_RESULT bool popFront() {
1374 MOZ_ASSERT(!empty());
1375 if (stack.empty()) {
1376 cur = nullptr;
1377 return true;
1379 return settle(stack.popCopy());
1383 typedef Vector<JSLinearString*, 16, SystemAllocPolicy> LinearStringVector;
1385 template <typename TextChar, typename PatChar>
1386 static int
1387 RopeMatchImpl(const AutoCheckCannotGC& nogc, LinearStringVector& strings,
1388 const PatChar* pat, size_t patLen)
1390 /* Absolute offset from the beginning of the logical text string. */
1391 int pos = 0;
1393 for (JSLinearString** outerp = strings.begin(); outerp != strings.end(); ++outerp) {
1394 /* Try to find a match within 'outer'. */
1395 JSLinearString* outer = *outerp;
1396 const TextChar* chars = outer->chars<TextChar>(nogc);
1397 size_t len = outer->length();
1398 int matchResult = StringMatch(chars, len, pat, patLen);
1399 if (matchResult != -1) {
1400 /* Matched! */
1401 return pos + matchResult;
1404 /* Try to find a match starting in 'outer' and running into other nodes. */
1405 const TextChar* const text = chars + (patLen > len ? 0 : len - patLen + 1);
1406 const TextChar* const textend = chars + len;
1407 const PatChar p0 = *pat;
1408 const PatChar* const p1 = pat + 1;
1409 const PatChar* const patend = pat + patLen;
1410 for (const TextChar* t = text; t != textend; ) {
1411 if (*t++ != p0)
1412 continue;
1414 JSLinearString** innerp = outerp;
1415 const TextChar* ttend = textend;
1416 const TextChar* tt = t;
1417 for (const PatChar* pp = p1; pp != patend; ++pp, ++tt) {
1418 while (tt == ttend) {
1419 if (++innerp == strings.end())
1420 return -1;
1422 JSLinearString* inner = *innerp;
1423 tt = inner->chars<TextChar>(nogc);
1424 ttend = tt + inner->length();
1426 if (*pp != *tt)
1427 goto break_continue;
1430 /* Matched! */
1431 return pos + (t - chars) - 1; /* -1 because of *t++ above */
1433 break_continue:;
1436 pos += len;
1439 return -1;
1443 * RopeMatch takes the text to search and the pattern to search for in the text.
1444 * RopeMatch returns false on OOM and otherwise returns the match index through
1445 * the 'match' outparam (-1 for not found).
1447 static bool
1448 RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat, int* match)
1450 uint32_t patLen = pat->length();
1451 if (patLen == 0) {
1452 *match = 0;
1453 return true;
1455 if (text->length() < patLen) {
1456 *match = -1;
1457 return true;
1461 * List of leaf nodes in the rope. If we run out of memory when trying to
1462 * append to this list, we can still fall back to StringMatch, so use the
1463 * system allocator so we don't report OOM in that case.
1465 LinearStringVector strings;
1468 * We don't want to do rope matching if there is a poor node-to-char ratio,
1469 * since this means spending a lot of time in the match loop below. We also
1470 * need to build the list of leaf nodes. Do both here: iterate over the
1471 * nodes so long as there are not too many.
1473 * We also don't use rope matching if the rope contains both Latin1 and
1474 * TwoByte nodes, to simplify the match algorithm.
1477 size_t threshold = text->length() >> sRopeMatchThresholdRatioLog2;
1478 StringSegmentRange r(cx);
1479 if (!r.init(text))
1480 return false;
1482 bool textIsLatin1 = text->hasLatin1Chars();
1483 while (!r.empty()) {
1484 if (threshold-- == 0 ||
1485 r.front()->hasLatin1Chars() != textIsLatin1 ||
1486 !strings.append(r.front()))
1488 JSLinearString* linear = text->ensureLinear(cx);
1489 if (!linear)
1490 return false;
1492 *match = StringMatch(linear, pat);
1493 return true;
1495 if (!r.popFront())
1496 return false;
1500 AutoCheckCannotGC nogc;
1501 if (text->hasLatin1Chars()) {
1502 if (pat->hasLatin1Chars())
1503 *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->latin1Chars(nogc), patLen);
1504 else
1505 *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->twoByteChars(nogc), patLen);
1506 } else {
1507 if (pat->hasLatin1Chars())
1508 *match = RopeMatchImpl<char16_t>(nogc, strings, pat->latin1Chars(nogc), patLen);
1509 else
1510 *match = RopeMatchImpl<char16_t>(nogc, strings, pat->twoByteChars(nogc), patLen);
1513 return true;
1516 /* ES6 20121026 draft 15.5.4.24. */
1517 static bool
1518 str_contains(JSContext* cx, unsigned argc, Value* vp)
1520 CallArgs args = CallArgsFromVp(argc, vp);
1522 // Steps 1, 2, and 3
1523 RootedString str(cx, ThisToStringForStringProto(cx, args));
1524 if (!str)
1525 return false;
1527 // Steps 4 and 5
1528 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1529 if (!searchStr)
1530 return false;
1532 // Steps 6 and 7
1533 uint32_t pos = 0;
1534 if (args.hasDefined(1)) {
1535 if (args[1].isInt32()) {
1536 int i = args[1].toInt32();
1537 pos = (i < 0) ? 0U : uint32_t(i);
1538 } else {
1539 double d;
1540 if (!ToInteger(cx, args[1], &d))
1541 return false;
1542 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1546 // Step 8
1547 uint32_t textLen = str->length();
1549 // Step 9
1550 uint32_t start = Min(Max(pos, 0U), textLen);
1552 // Steps 10 and 11
1553 JSLinearString* text = str->ensureLinear(cx);
1554 if (!text)
1555 return false;
1557 args.rval().setBoolean(StringMatch(text, searchStr, start) != -1);
1558 return true;
1561 /* ES6 20120927 draft 15.5.4.7. */
1562 bool
1563 js::str_indexOf(JSContext* cx, unsigned argc, Value* vp)
1565 CallArgs args = CallArgsFromVp(argc, vp);
1567 // Steps 1, 2, and 3
1568 RootedString str(cx, ThisToStringForStringProto(cx, args));
1569 if (!str)
1570 return false;
1572 // Steps 4 and 5
1573 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1574 if (!searchStr)
1575 return false;
1577 // Steps 6 and 7
1578 uint32_t pos = 0;
1579 if (args.hasDefined(1)) {
1580 if (args[1].isInt32()) {
1581 int i = args[1].toInt32();
1582 pos = (i < 0) ? 0U : uint32_t(i);
1583 } else {
1584 double d;
1585 if (!ToInteger(cx, args[1], &d))
1586 return false;
1587 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1591 // Step 8
1592 uint32_t textLen = str->length();
1594 // Step 9
1595 uint32_t start = Min(Max(pos, 0U), textLen);
1597 // Steps 10 and 11
1598 JSLinearString* text = str->ensureLinear(cx);
1599 if (!text)
1600 return false;
1602 args.rval().setInt32(StringMatch(text, searchStr, start));
1603 return true;
1606 template <typename TextChar, typename PatChar>
1607 static int32_t
1608 LastIndexOfImpl(const TextChar* text, size_t textLen, const PatChar* pat, size_t patLen,
1609 size_t start)
1611 MOZ_ASSERT(patLen > 0);
1612 MOZ_ASSERT(patLen <= textLen);
1613 MOZ_ASSERT(start <= textLen - patLen);
1615 const PatChar p0 = *pat;
1616 const PatChar* patNext = pat + 1;
1617 const PatChar* patEnd = pat + patLen;
1619 for (const TextChar* t = text + start; t >= text; --t) {
1620 if (*t == p0) {
1621 const TextChar* t1 = t + 1;
1622 for (const PatChar* p1 = patNext; p1 < patEnd; ++p1, ++t1) {
1623 if (*t1 != *p1)
1624 goto break_continue;
1627 return static_cast<int32_t>(t - text);
1629 break_continue:;
1632 return -1;
1635 bool
1636 js::str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp)
1638 CallArgs args = CallArgsFromVp(argc, vp);
1639 RootedString textstr(cx, ThisToStringForStringProto(cx, args));
1640 if (!textstr)
1641 return false;
1643 RootedLinearString pat(cx, ArgToRootedString(cx, args, 0));
1644 if (!pat)
1645 return false;
1647 size_t textLen = textstr->length();
1648 size_t patLen = pat->length();
1649 int start = textLen - patLen; // Start searching here
1650 if (start < 0) {
1651 args.rval().setInt32(-1);
1652 return true;
1655 if (args.hasDefined(1)) {
1656 if (args[1].isInt32()) {
1657 int i = args[1].toInt32();
1658 if (i <= 0)
1659 start = 0;
1660 else if (i < start)
1661 start = i;
1662 } else {
1663 double d;
1664 if (!ToNumber(cx, args[1], &d))
1665 return false;
1666 if (!IsNaN(d)) {
1667 d = JS::ToInteger(d);
1668 if (d <= 0)
1669 start = 0;
1670 else if (d < start)
1671 start = int(d);
1676 if (patLen == 0) {
1677 args.rval().setInt32(start);
1678 return true;
1681 JSLinearString* text = textstr->ensureLinear(cx);
1682 if (!text)
1683 return false;
1685 int32_t res;
1686 AutoCheckCannotGC nogc;
1687 if (text->hasLatin1Chars()) {
1688 const Latin1Char* textChars = text->latin1Chars(nogc);
1689 if (pat->hasLatin1Chars())
1690 res = LastIndexOfImpl(textChars, textLen, pat->latin1Chars(nogc), patLen, start);
1691 else
1692 res = LastIndexOfImpl(textChars, textLen, pat->twoByteChars(nogc), patLen, start);
1693 } else {
1694 const char16_t* textChars = text->twoByteChars(nogc);
1695 if (pat->hasLatin1Chars())
1696 res = LastIndexOfImpl(textChars, textLen, pat->latin1Chars(nogc), patLen, start);
1697 else
1698 res = LastIndexOfImpl(textChars, textLen, pat->twoByteChars(nogc), patLen, start);
1701 args.rval().setInt32(res);
1702 return true;
1705 static bool
1706 HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start)
1708 MOZ_ASSERT(start + pat->length() <= text->length());
1710 size_t patLen = pat->length();
1712 AutoCheckCannotGC nogc;
1713 if (text->hasLatin1Chars()) {
1714 const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1715 if (pat->hasLatin1Chars())
1716 return PodEqual(textChars, pat->latin1Chars(nogc), patLen);
1718 return EqualChars(textChars, pat->twoByteChars(nogc), patLen);
1721 const char16_t* textChars = text->twoByteChars(nogc) + start;
1722 if (pat->hasTwoByteChars())
1723 return PodEqual(textChars, pat->twoByteChars(nogc), patLen);
1725 return EqualChars(pat->latin1Chars(nogc), textChars, patLen);
1728 /* ES6 20131108 draft 21.1.3.18. */
1729 bool
1730 js::str_startsWith(JSContext* cx, unsigned argc, Value* vp)
1732 CallArgs args = CallArgsFromVp(argc, vp);
1734 // Steps 1, 2, and 3
1735 RootedString str(cx, ThisToStringForStringProto(cx, args));
1736 if (!str)
1737 return false;
1739 // Step 4
1740 if (args.get(0).isObject() && IsObjectWithClass(args[0], ESClass_RegExp, cx)) {
1741 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE,
1742 "first", "", "Regular Expression");
1743 return false;
1746 // Steps 5 and 6
1747 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1748 if (!searchStr)
1749 return false;
1751 // Steps 7 and 8
1752 uint32_t pos = 0;
1753 if (args.hasDefined(1)) {
1754 if (args[1].isInt32()) {
1755 int i = args[1].toInt32();
1756 pos = (i < 0) ? 0U : uint32_t(i);
1757 } else {
1758 double d;
1759 if (!ToInteger(cx, args[1], &d))
1760 return false;
1761 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1765 // Step 9
1766 uint32_t textLen = str->length();
1768 // Step 10
1769 uint32_t start = Min(Max(pos, 0U), textLen);
1771 // Step 11
1772 uint32_t searchLen = searchStr->length();
1774 // Step 12
1775 if (searchLen + start < searchLen || searchLen + start > textLen) {
1776 args.rval().setBoolean(false);
1777 return true;
1780 // Steps 13 and 14
1781 JSLinearString* text = str->ensureLinear(cx);
1782 if (!text)
1783 return false;
1785 args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
1786 return true;
1789 /* ES6 20131108 draft 21.1.3.7. */
1790 static bool
1791 str_endsWith(JSContext* cx, unsigned argc, Value* vp)
1793 CallArgs args = CallArgsFromVp(argc, vp);
1795 // Steps 1, 2, and 3
1796 RootedString str(cx, ThisToStringForStringProto(cx, args));
1797 if (!str)
1798 return false;
1800 // Step 4
1801 if (args.get(0).isObject() && IsObjectWithClass(args[0], ESClass_RegExp, cx)) {
1802 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE,
1803 "first", "", "Regular Expression");
1804 return false;
1807 // Steps 5 and 6
1808 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1809 if (!searchStr)
1810 return false;
1812 // Step 7
1813 uint32_t textLen = str->length();
1815 // Steps 8 and 9
1816 uint32_t pos = textLen;
1817 if (args.hasDefined(1)) {
1818 if (args[1].isInt32()) {
1819 int i = args[1].toInt32();
1820 pos = (i < 0) ? 0U : uint32_t(i);
1821 } else {
1822 double d;
1823 if (!ToInteger(cx, args[1], &d))
1824 return false;
1825 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1829 // Step 10
1830 uint32_t end = Min(Max(pos, 0U), textLen);
1832 // Step 11
1833 uint32_t searchLen = searchStr->length();
1835 // Step 13 (reordered)
1836 if (searchLen > end) {
1837 args.rval().setBoolean(false);
1838 return true;
1841 // Step 12
1842 uint32_t start = end - searchLen;
1844 // Steps 14 and 15
1845 JSLinearString* text = str->ensureLinear(cx);
1846 if (!text)
1847 return false;
1849 args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
1850 return true;
1853 template <typename CharT>
1854 static void
1855 TrimString(const CharT* chars, bool trimLeft, bool trimRight, size_t length,
1856 size_t* pBegin, size_t* pEnd)
1858 size_t begin = 0, end = length;
1860 if (trimLeft) {
1861 while (begin < length && unicode::IsSpace(chars[begin]))
1862 ++begin;
1865 if (trimRight) {
1866 while (end > begin && unicode::IsSpace(chars[end - 1]))
1867 --end;
1870 *pBegin = begin;
1871 *pEnd = end;
1874 static bool
1875 TrimString(JSContext* cx, Value* vp, bool trimLeft, bool trimRight)
1877 CallReceiver call = CallReceiverFromVp(vp);
1878 RootedString str(cx, ThisToStringForStringProto(cx, call));
1879 if (!str)
1880 return false;
1882 JSLinearString* linear = str->ensureLinear(cx);
1883 if (!linear)
1884 return false;
1886 size_t length = linear->length();
1887 size_t begin, end;
1888 if (linear->hasLatin1Chars()) {
1889 AutoCheckCannotGC nogc;
1890 TrimString(linear->latin1Chars(nogc), trimLeft, trimRight, length, &begin, &end);
1891 } else {
1892 AutoCheckCannotGC nogc;
1893 TrimString(linear->twoByteChars(nogc), trimLeft, trimRight, length, &begin, &end);
1896 str = NewDependentString(cx, str, begin, end - begin);
1897 if (!str)
1898 return false;
1900 call.rval().setString(str);
1901 return true;
1904 static bool
1905 str_trim(JSContext* cx, unsigned argc, Value* vp)
1907 return TrimString(cx, vp, true, true);
1910 static bool
1911 str_trimLeft(JSContext* cx, unsigned argc, Value* vp)
1913 return TrimString(cx, vp, true, false);
1916 static bool
1917 str_trimRight(JSContext* cx, unsigned argc, Value* vp)
1919 return TrimString(cx, vp, false, true);
1923 * Perl-inspired string functions.
1926 namespace {
1928 /* Result of a successfully performed flat match. */
1929 class FlatMatch
1931 RootedAtom pat_;
1932 int32_t match_;
1934 friend class StringRegExpGuard;
1936 public:
1937 explicit FlatMatch(JSContext* cx) : pat_(cx) {}
1938 JSLinearString* pattern() const { return pat_; }
1939 size_t patternLength() const { return pat_->length(); }
1942 * Note: The match is -1 when the match is performed successfully,
1943 * but no match is found.
1945 int32_t match() const { return match_; }
1948 } /* anonymous namespace */
1950 static inline bool
1951 IsRegExpMetaChar(char16_t c)
1953 switch (c) {
1954 /* Taken from the PatternCharacter production in 15.10.1. */
1955 case '^': case '$': case '\\': case '.': case '*': case '+':
1956 case '?': case '(': case ')': case '[': case ']': case '{':
1957 case '}': case '|':
1958 return true;
1959 default:
1960 return false;
1964 template <typename CharT>
1965 bool
1966 js::HasRegExpMetaChars(const CharT* chars, size_t length)
1968 for (size_t i = 0; i < length; ++i) {
1969 if (IsRegExpMetaChar(chars[i]))
1970 return true;
1972 return false;
1975 template bool
1976 js::HasRegExpMetaChars<Latin1Char>(const Latin1Char* chars, size_t length);
1978 template bool
1979 js::HasRegExpMetaChars<char16_t>(const char16_t* chars, size_t length);
1981 bool
1982 js::StringHasRegExpMetaChars(JSLinearString* str)
1984 AutoCheckCannotGC nogc;
1985 if (str->hasLatin1Chars())
1986 return HasRegExpMetaChars(str->latin1Chars(nogc), str->length());
1988 return HasRegExpMetaChars(str->twoByteChars(nogc), str->length());
1991 namespace {
1994 * StringRegExpGuard factors logic out of String regexp operations.
1996 * |optarg| indicates in which argument position RegExp flags will be found, if
1997 * present. This is a Mozilla extension and not part of any ECMA spec.
1999 class MOZ_STACK_CLASS StringRegExpGuard
2001 RegExpGuard re_;
2002 FlatMatch fm;
2003 RootedObject obj_;
2006 * Upper bound on the number of characters we are willing to potentially
2007 * waste on searching for RegExp meta-characters.
2009 static const size_t MAX_FLAT_PAT_LEN = 256;
2011 template <typename CharT>
2012 static bool
2013 flattenPattern(StringBuffer& sb, const CharT* chars, size_t len)
2015 static const char ESCAPE_CHAR = '\\';
2016 for (const CharT* it = chars; it < chars + len; ++it) {
2017 if (IsRegExpMetaChar(*it)) {
2018 if (!sb.append(ESCAPE_CHAR) || !sb.append(*it))
2019 return false;
2020 } else {
2021 if (!sb.append(*it))
2022 return false;
2025 return true;
2028 static JSAtom*
2029 flattenPattern(JSContext* cx, JSAtom* pat)
2031 StringBuffer sb(cx);
2032 if (!sb.reserve(pat->length()))
2033 return nullptr;
2035 if (pat->hasLatin1Chars()) {
2036 AutoCheckCannotGC nogc;
2037 if (!flattenPattern(sb, pat->latin1Chars(nogc), pat->length()))
2038 return nullptr;
2039 } else {
2040 AutoCheckCannotGC nogc;
2041 if (!flattenPattern(sb, pat->twoByteChars(nogc), pat->length()))
2042 return nullptr;
2045 return sb.finishAtom();
2048 public:
2049 explicit StringRegExpGuard(JSContext* cx)
2050 : re_(cx), fm(cx), obj_(cx)
2053 /* init must succeed in order to call tryFlatMatch or normalizeRegExp. */
2054 bool init(JSContext* cx, CallArgs args, bool convertVoid = false)
2056 if (args.length() != 0 && IsObjectWithClass(args[0], ESClass_RegExp, cx))
2057 return init(cx, &args[0].toObject());
2059 if (convertVoid && !args.hasDefined(0)) {
2060 fm.pat_ = cx->runtime()->emptyString;
2061 return true;
2064 JSString* arg = ArgToRootedString(cx, args, 0);
2065 if (!arg)
2066 return false;
2068 fm.pat_ = AtomizeString(cx, arg);
2069 if (!fm.pat_)
2070 return false;
2072 return true;
2075 bool init(JSContext* cx, JSObject* regexp) {
2076 obj_ = regexp;
2078 MOZ_ASSERT(ObjectClassIs(obj_, ESClass_RegExp, cx));
2080 if (!RegExpToShared(cx, obj_, &re_))
2081 return false;
2082 return true;
2085 bool init(JSContext* cx, HandleString pattern) {
2086 fm.pat_ = AtomizeString(cx, pattern);
2087 if (!fm.pat_)
2088 return false;
2089 return true;
2093 * Attempt to match |patstr| to |textstr|. A flags argument, metachars in
2094 * the pattern string, or a lengthy pattern string can thwart this process.
2096 * |checkMetaChars| looks for regexp metachars in the pattern string.
2098 * Return whether flat matching could be used.
2100 * N.B. tryFlatMatch returns nullptr on OOM, so the caller must check
2101 * cx->isExceptionPending().
2103 const FlatMatch*
2104 tryFlatMatch(JSContext* cx, JSString* text, unsigned optarg, unsigned argc,
2105 bool checkMetaChars = true)
2107 if (re_.initialized())
2108 return nullptr;
2110 if (optarg < argc)
2111 return nullptr;
2113 size_t patLen = fm.pat_->length();
2114 if (checkMetaChars && (patLen > MAX_FLAT_PAT_LEN || StringHasRegExpMetaChars(fm.pat_)))
2115 return nullptr;
2118 * |text| could be a rope, so we want to avoid flattening it for as
2119 * long as possible.
2121 if (text->isRope()) {
2122 if (!RopeMatch(cx, &text->asRope(), fm.pat_, &fm.match_))
2123 return nullptr;
2124 } else {
2125 fm.match_ = StringMatch(&text->asLinear(), fm.pat_, 0);
2128 return &fm;
2131 /* If the pattern is not already a regular expression, make it so. */
2132 bool normalizeRegExp(JSContext* cx, bool flat, unsigned optarg, CallArgs args)
2134 if (re_.initialized())
2135 return true;
2137 /* Build RegExp from pattern string. */
2138 RootedString opt(cx);
2139 if (optarg < args.length()) {
2140 opt = ToString<CanGC>(cx, args[optarg]);
2141 if (!opt)
2142 return false;
2143 } else {
2144 opt = nullptr;
2147 Rooted<JSAtom*> pat(cx);
2148 if (flat) {
2149 pat = flattenPattern(cx, fm.pat_);
2150 if (!pat)
2151 return false;
2152 } else {
2153 pat = fm.pat_;
2155 MOZ_ASSERT(pat);
2157 return cx->compartment()->regExps.get(cx, pat, opt, &re_);
2160 bool zeroLastIndex(JSContext* cx) {
2161 if (!regExpIsObject())
2162 return true;
2164 // Use a fast path for same-global RegExp objects with writable
2165 // lastIndex.
2166 if (obj_->is<RegExpObject>()) {
2167 RegExpObject* nobj = &obj_->as<RegExpObject>();
2168 if (nobj->lookup(cx, cx->names().lastIndex)->writable()) {
2169 nobj->zeroLastIndex();
2170 return true;
2174 // Handle everything else generically (including throwing if .lastIndex is non-writable).
2175 RootedValue zero(cx, Int32Value(0));
2176 return JSObject::setProperty(cx, obj_, obj_, cx->names().lastIndex, &zero, true);
2179 RegExpShared& regExp() { return *re_; }
2181 bool regExpIsObject() { return obj_ != nullptr; }
2182 HandleObject regExpObject() {
2183 MOZ_ASSERT(regExpIsObject());
2184 return obj_;
2187 private:
2188 StringRegExpGuard(const StringRegExpGuard&) = delete;
2189 void operator=(const StringRegExpGuard&) = delete;
2192 } /* anonymous namespace */
2194 static bool
2195 DoMatchLocal(JSContext* cx, CallArgs args, RegExpStatics* res, HandleLinearString input,
2196 RegExpShared& re)
2198 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2199 RegExpRunStatus status = re.execute(cx, input, 0, &matches);
2200 if (status == RegExpRunStatus_Error)
2201 return false;
2203 if (status == RegExpRunStatus_Success_NotFound) {
2204 args.rval().setNull();
2205 return true;
2208 if (!res->updateFromMatchPairs(cx, input, matches))
2209 return false;
2211 RootedValue rval(cx);
2212 if (!CreateRegExpMatchResult(cx, input, matches, &rval))
2213 return false;
2215 args.rval().set(rval);
2216 return true;
2219 /* ES5 15.5.4.10 step 8. */
2220 static bool
2221 DoMatchGlobal(JSContext* cx, CallArgs args, RegExpStatics* res, HandleLinearString input,
2222 StringRegExpGuard& g)
2224 // Step 8a.
2226 // This single zeroing of "lastIndex" covers all "lastIndex" changes in the
2227 // rest of String.prototype.match, particularly in steps 8f(i) and
2228 // 8f(iii)(2)(a). Here's why.
2230 // The inputs to the calls to RegExp.prototype.exec are a RegExp object
2231 // whose .global is true and a string. The only side effect of a call in
2232 // these circumstances is that the RegExp's .lastIndex will be modified to
2233 // the next starting index after the discovered match (or to 0 if there's
2234 // no remaining match). Because .lastIndex is a non-configurable data
2235 // property and no script-controllable code executes after step 8a, passing
2236 // step 8a implies *every* .lastIndex set succeeds. String.prototype.match
2237 // calls RegExp.prototype.exec repeatedly, and the last call doesn't match,
2238 // so the final value of .lastIndex is 0: exactly the state after step 8a
2239 // succeeds. No spec step lets script observe intermediate .lastIndex
2240 // values.
2242 // The arrays returned by RegExp.prototype.exec always have a string at
2243 // index 0, for which [[Get]]s have no side effects.
2245 // Filling in a new array using [[DefineOwnProperty]] is unobservable.
2247 // This is a tricky point, because after this set, our implementation *can*
2248 // fail. The key is that script can't distinguish these failure modes from
2249 // one where, in spec terms, we fail immediately after step 8a. That *in
2250 // reality* we might have done extra matching work, or created a partial
2251 // results array to return, or hit an interrupt, is irrelevant. The
2252 // script can't tell we did any of those things but didn't update
2253 // .lastIndex. Thus we can optimize steps 8b onward however we want,
2254 // including eliminating intermediate .lastIndex sets, as long as we don't
2255 // add ways for script to observe the intermediate states.
2257 // In short: it's okay to cheat (by setting .lastIndex to 0, once) because
2258 // we can't get caught.
2259 if (!g.zeroLastIndex(cx))
2260 return false;
2262 // Step 8b.
2263 AutoValueVector elements(cx);
2265 size_t lastSuccessfulStart = 0;
2267 // The loop variables from steps 8c-e aren't needed, as we use different
2268 // techniques from the spec to implement step 8f's loop.
2270 // Step 8f.
2271 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2272 size_t charsLen = input->length();
2273 RegExpShared& re = g.regExp();
2274 for (size_t searchIndex = 0; searchIndex <= charsLen; ) {
2275 if (!CheckForInterrupt(cx))
2276 return false;
2278 // Steps 8f(i-ii), minus "lastIndex" updates (see above).
2279 RegExpRunStatus status = re.execute(cx, input, searchIndex, &matches);
2280 if (status == RegExpRunStatus_Error)
2281 return false;
2283 // Step 8f(ii).
2284 if (status == RegExpRunStatus_Success_NotFound)
2285 break;
2287 lastSuccessfulStart = searchIndex;
2288 MatchPair& match = matches[0];
2290 // Steps 8f(iii)(1-3).
2291 searchIndex = match.isEmpty() ? match.limit + 1 : match.limit;
2293 // Step 8f(iii)(4-5).
2294 JSLinearString* str = NewDependentString(cx, input, match.start, match.length());
2295 if (!str)
2296 return false;
2297 if (!elements.append(StringValue(str)))
2298 return false;
2301 // Step 8g.
2302 if (elements.empty()) {
2303 args.rval().setNull();
2304 return true;
2307 // The last *successful* match updates the RegExpStatics. (Interestingly,
2308 // this implies that String.prototype.match's semantics aren't those
2309 // implied by the RegExp.prototype.exec calls in the ES5 algorithm.)
2310 res->updateLazily(cx, input, &re, lastSuccessfulStart);
2312 // Steps 8b, 8f(iii)(5-6), 8h.
2313 JSObject* array = NewDenseCopiedArray(cx, elements.length(), elements.begin());
2314 if (!array)
2315 return false;
2317 args.rval().setObject(*array);
2318 return true;
2321 static bool
2322 BuildFlatMatchArray(JSContext* cx, HandleString textstr, const FlatMatch& fm, CallArgs* args)
2324 if (fm.match() < 0) {
2325 args->rval().setNull();
2326 return true;
2329 /* For this non-global match, produce a RegExp.exec-style array. */
2330 RootedObject obj(cx, NewDenseEmptyArray(cx));
2331 if (!obj)
2332 return false;
2334 RootedValue patternVal(cx, StringValue(fm.pattern()));
2335 RootedValue matchVal(cx, Int32Value(fm.match()));
2336 RootedValue textVal(cx, StringValue(textstr));
2338 if (!JSObject::defineElement(cx, obj, 0, patternVal) ||
2339 !JSObject::defineProperty(cx, obj, cx->names().index, matchVal) ||
2340 !JSObject::defineProperty(cx, obj, cx->names().input, textVal))
2342 return false;
2345 args->rval().setObject(*obj);
2346 return true;
2349 /* ES5 15.5.4.10. */
2350 bool
2351 js::str_match(JSContext* cx, unsigned argc, Value* vp)
2353 CallArgs args = CallArgsFromVp(argc, vp);
2355 /* Steps 1-2. */
2356 RootedString str(cx, ThisToStringForStringProto(cx, args));
2357 if (!str)
2358 return false;
2360 /* Steps 3-4, plus the trailing-argument "flags" extension. */
2361 StringRegExpGuard g(cx);
2362 if (!g.init(cx, args, true))
2363 return false;
2365 /* Fast path when the search pattern can be searched for as a string. */
2366 if (const FlatMatch* fm = g.tryFlatMatch(cx, str, 1, args.length()))
2367 return BuildFlatMatchArray(cx, str, *fm, &args);
2369 /* Return if there was an error in tryFlatMatch. */
2370 if (cx->isExceptionPending())
2371 return false;
2373 /* Create regular-expression internals as needed to perform the match. */
2374 if (!g.normalizeRegExp(cx, false, 1, args))
2375 return false;
2377 RegExpStatics* res = cx->global()->getRegExpStatics(cx);
2378 if (!res)
2379 return false;
2381 RootedLinearString linearStr(cx, str->ensureLinear(cx));
2382 if (!linearStr)
2383 return false;
2385 /* Steps 5-6, 7. */
2386 if (!g.regExp().global())
2387 return DoMatchLocal(cx, args, res, linearStr, g.regExp());
2389 /* Steps 6, 8. */
2390 return DoMatchGlobal(cx, args, res, linearStr, g);
2393 bool
2394 js::str_search(JSContext* cx, unsigned argc, Value* vp)
2396 CallArgs args = CallArgsFromVp(argc, vp);
2397 RootedString str(cx, ThisToStringForStringProto(cx, args));
2398 if (!str)
2399 return false;
2401 StringRegExpGuard g(cx);
2402 if (!g.init(cx, args, true))
2403 return false;
2404 if (const FlatMatch* fm = g.tryFlatMatch(cx, str, 1, args.length())) {
2405 args.rval().setInt32(fm->match());
2406 return true;
2409 if (cx->isExceptionPending()) /* from tryFlatMatch */
2410 return false;
2412 if (!g.normalizeRegExp(cx, false, 1, args))
2413 return false;
2415 RootedLinearString linearStr(cx, str->ensureLinear(cx));
2416 if (!linearStr)
2417 return false;
2419 RegExpStatics* res = cx->global()->getRegExpStatics(cx);
2420 if (!res)
2421 return false;
2423 /* Per ECMAv5 15.5.4.12 (5) The last index property is ignored and left unchanged. */
2424 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2425 RegExpRunStatus status = g.regExp().execute(cx, linearStr, 0, &matches);
2426 if (status == RegExpRunStatus_Error)
2427 return false;
2429 if (status == RegExpRunStatus_Success)
2430 res->updateLazily(cx, linearStr, &g.regExp(), 0);
2432 args.rval().setInt32(status == RegExpRunStatus_Success_NotFound ? -1 : matches[0].start);
2433 return true;
2436 // Utility for building a rope (lazy concatenation) of strings.
2437 class RopeBuilder {
2438 JSContext* cx;
2439 RootedString res;
2441 RopeBuilder(const RopeBuilder& other) = delete;
2442 void operator=(const RopeBuilder& other) = delete;
2444 public:
2445 explicit RopeBuilder(JSContext* cx)
2446 : cx(cx), res(cx, cx->runtime()->emptyString)
2449 inline bool append(HandleString str) {
2450 res = ConcatStrings<CanGC>(cx, res, str);
2451 return !!res;
2454 inline JSString* result() {
2455 return res;
2459 namespace {
2461 template <typename CharT>
2462 static uint32_t
2463 FindDollarIndex(const CharT* chars, size_t length)
2465 if (const CharT* p = js_strchr_limit(chars, '$', chars + length)) {
2466 uint32_t dollarIndex = p - chars;
2467 MOZ_ASSERT(dollarIndex < length);
2468 return dollarIndex;
2470 return UINT32_MAX;
2473 struct ReplaceData
2475 explicit ReplaceData(JSContext* cx)
2476 : str(cx), g(cx), lambda(cx), elembase(cx), repstr(cx),
2477 fig(cx, NullValue()), sb(cx)
2480 inline void setReplacementString(JSLinearString* string) {
2481 MOZ_ASSERT(string);
2482 lambda = nullptr;
2483 elembase = nullptr;
2484 repstr = string;
2486 AutoCheckCannotGC nogc;
2487 dollarIndex = string->hasLatin1Chars()
2488 ? FindDollarIndex(string->latin1Chars(nogc), string->length())
2489 : FindDollarIndex(string->twoByteChars(nogc), string->length());
2492 inline void setReplacementFunction(JSObject* func) {
2493 MOZ_ASSERT(func);
2494 lambda = func;
2495 elembase = nullptr;
2496 repstr = nullptr;
2497 dollarIndex = UINT32_MAX;
2500 RootedString str; /* 'this' parameter object as a string */
2501 StringRegExpGuard g; /* regexp parameter object and private data */
2502 RootedObject lambda; /* replacement function object or null */
2503 RootedNativeObject elembase; /* object for function(a){return b[a]} replace */
2504 RootedLinearString repstr; /* replacement string */
2505 uint32_t dollarIndex; /* index of first $ in repstr, or UINT32_MAX */
2506 int leftIndex; /* left context index in str->chars */
2507 bool calledBack; /* record whether callback has been called */
2508 FastInvokeGuard fig; /* used for lambda calls, also holds arguments */
2509 StringBuffer sb; /* buffer built during DoMatch */
2512 } /* anonymous namespace */
2514 static bool
2515 ReplaceRegExp(JSContext* cx, RegExpStatics* res, ReplaceData& rdata);
2517 static bool
2518 DoMatchForReplaceLocal(JSContext* cx, RegExpStatics* res, HandleLinearString linearStr,
2519 RegExpShared& re, ReplaceData& rdata)
2521 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2522 RegExpRunStatus status = re.execute(cx, linearStr, 0, &matches);
2523 if (status == RegExpRunStatus_Error)
2524 return false;
2526 if (status == RegExpRunStatus_Success_NotFound)
2527 return true;
2529 if (!res->updateFromMatchPairs(cx, linearStr, matches))
2530 return false;
2532 return ReplaceRegExp(cx, res, rdata);
2535 static bool
2536 DoMatchForReplaceGlobal(JSContext* cx, RegExpStatics* res, HandleLinearString linearStr,
2537 RegExpShared& re, ReplaceData& rdata)
2539 size_t charsLen = linearStr->length();
2540 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2541 for (size_t count = 0, searchIndex = 0; searchIndex <= charsLen; ++count) {
2542 if (!CheckForInterrupt(cx))
2543 return false;
2545 RegExpRunStatus status = re.execute(cx, linearStr, searchIndex, &matches);
2546 if (status == RegExpRunStatus_Error)
2547 return false;
2549 if (status == RegExpRunStatus_Success_NotFound)
2550 break;
2552 MatchPair& match = matches[0];
2553 searchIndex = match.isEmpty() ? match.limit + 1 : match.limit;
2555 if (!res->updateFromMatchPairs(cx, linearStr, matches))
2556 return false;
2558 if (!ReplaceRegExp(cx, res, rdata))
2559 return false;
2562 return true;
2565 template <typename CharT>
2566 static bool
2567 InterpretDollar(RegExpStatics* res, const CharT* bp, const CharT* dp, const CharT* ep,
2568 ReplaceData& rdata, JSSubString* out, size_t* skip)
2570 MOZ_ASSERT(*dp == '$');
2572 /* If there is only a dollar, bail now */
2573 if (dp + 1 >= ep)
2574 return false;
2576 /* Interpret all Perl match-induced dollar variables. */
2577 char16_t dc = dp[1];
2578 if (JS7_ISDEC(dc)) {
2579 /* ECMA-262 Edition 3: 1-9 or 01-99 */
2580 unsigned num = JS7_UNDEC(dc);
2581 if (num > res->getMatches().parenCount())
2582 return false;
2584 const CharT* cp = dp + 2;
2585 if (cp < ep && (dc = *cp, JS7_ISDEC(dc))) {
2586 unsigned tmp = 10 * num + JS7_UNDEC(dc);
2587 if (tmp <= res->getMatches().parenCount()) {
2588 cp++;
2589 num = tmp;
2592 if (num == 0)
2593 return false;
2595 *skip = cp - dp;
2597 MOZ_ASSERT(num <= res->getMatches().parenCount());
2600 * Note: we index to get the paren with the (1-indexed) pair
2601 * number, as opposed to a (0-indexed) paren number.
2603 res->getParen(num, out);
2604 return true;
2607 *skip = 2;
2608 switch (dc) {
2609 case '$':
2610 out->init(rdata.repstr, dp - bp, 1);
2611 return true;
2612 case '&':
2613 res->getLastMatch(out);
2614 return true;
2615 case '+':
2616 res->getLastParen(out);
2617 return true;
2618 case '`':
2619 res->getLeftContext(out);
2620 return true;
2621 case '\'':
2622 res->getRightContext(out);
2623 return true;
2625 return false;
2628 template <typename CharT>
2629 static bool
2630 FindReplaceLengthString(JSContext* cx, RegExpStatics* res, ReplaceData& rdata, size_t* sizep)
2632 JSLinearString* repstr = rdata.repstr;
2633 CheckedInt<uint32_t> replen = repstr->length();
2635 if (rdata.dollarIndex != UINT32_MAX) {
2636 AutoCheckCannotGC nogc;
2637 MOZ_ASSERT(rdata.dollarIndex < repstr->length());
2638 const CharT* bp = repstr->chars<CharT>(nogc);
2639 const CharT* dp = bp + rdata.dollarIndex;
2640 const CharT* ep = bp + repstr->length();
2641 do {
2642 JSSubString sub;
2643 size_t skip;
2644 if (InterpretDollar(res, bp, dp, ep, rdata, &sub, &skip)) {
2645 if (sub.length > skip)
2646 replen += sub.length - skip;
2647 else
2648 replen -= skip - sub.length;
2649 dp += skip;
2650 } else {
2651 dp++;
2654 dp = js_strchr_limit(dp, '$', ep);
2655 } while (dp);
2658 if (!replen.isValid()) {
2659 js_ReportAllocationOverflow(cx);
2660 return false;
2663 *sizep = replen.value();
2664 return true;
2667 static bool
2668 FindReplaceLength(JSContext* cx, RegExpStatics* res, ReplaceData& rdata, size_t* sizep)
2670 if (rdata.elembase) {
2672 * The base object is used when replace was passed a lambda which looks like
2673 * 'function(a) { return b[a]; }' for the base object b. b will not change
2674 * in the course of the replace unless we end up making a scripted call due
2675 * to accessing a scripted getter or a value with a scripted toString.
2677 MOZ_ASSERT(rdata.lambda);
2678 MOZ_ASSERT(!rdata.elembase->getOps()->lookupProperty);
2679 MOZ_ASSERT(!rdata.elembase->getOps()->getProperty);
2681 RootedValue match(cx);
2682 if (!res->createLastMatch(cx, &match))
2683 return false;
2684 JSAtom* atom = ToAtom<CanGC>(cx, match);
2685 if (!atom)
2686 return false;
2688 RootedValue v(cx);
2689 if (HasDataProperty(cx, rdata.elembase, AtomToId(atom), v.address()) && v.isString()) {
2690 rdata.repstr = v.toString()->ensureLinear(cx);
2691 if (!rdata.repstr)
2692 return false;
2693 *sizep = rdata.repstr->length();
2694 return true;
2698 * Couldn't handle this property, fall through and despecialize to the
2699 * general lambda case.
2701 rdata.elembase = nullptr;
2704 if (rdata.lambda) {
2705 RootedObject lambda(cx, rdata.lambda);
2706 PreserveRegExpStatics staticsGuard(cx, res);
2707 if (!staticsGuard.init(cx))
2708 return false;
2711 * In the lambda case, not only do we find the replacement string's
2712 * length, we compute repstr and return it via rdata for use within
2713 * DoReplace. The lambda is called with arguments ($&, $1, $2, ...,
2714 * index, input), i.e., all the properties of a regexp match array.
2715 * For $&, etc., we must create string jsvals from cx->regExpStatics.
2716 * We grab up stack space to keep the newborn strings GC-rooted.
2718 unsigned p = res->getMatches().parenCount();
2719 unsigned argc = 1 + p + 2;
2721 InvokeArgs& args = rdata.fig.args();
2722 if (!args.init(argc))
2723 return false;
2725 args.setCallee(ObjectValue(*lambda));
2726 args.setThis(UndefinedValue());
2728 /* Push $&, $1, $2, ... */
2729 unsigned argi = 0;
2730 if (!res->createLastMatch(cx, args[argi++]))
2731 return false;
2733 for (size_t i = 0; i < res->getMatches().parenCount(); ++i) {
2734 if (!res->createParen(cx, i + 1, args[argi++]))
2735 return false;
2738 /* Push match index and input string. */
2739 args[argi++].setInt32(res->getMatches()[0].start);
2740 args[argi].setString(rdata.str);
2742 if (!rdata.fig.invoke(cx))
2743 return false;
2745 /* root repstr: rdata is on the stack, so scanned by conservative gc. */
2746 JSString* repstr = ToString<CanGC>(cx, args.rval());
2747 if (!repstr)
2748 return false;
2749 rdata.repstr = repstr->ensureLinear(cx);
2750 if (!rdata.repstr)
2751 return false;
2752 *sizep = rdata.repstr->length();
2753 return true;
2756 return rdata.repstr->hasLatin1Chars()
2757 ? FindReplaceLengthString<Latin1Char>(cx, res, rdata, sizep)
2758 : FindReplaceLengthString<char16_t>(cx, res, rdata, sizep);
2762 * Precondition: |rdata.sb| already has necessary growth space reserved (as
2763 * derived from FindReplaceLength), and has been inflated to TwoByte if
2764 * necessary.
2766 template <typename CharT>
2767 static void
2768 DoReplace(RegExpStatics* res, ReplaceData& rdata)
2770 AutoCheckCannotGC nogc;
2771 JSLinearString* repstr = rdata.repstr;
2772 const CharT* bp = repstr->chars<CharT>(nogc);
2773 const CharT* cp = bp;
2775 if (rdata.dollarIndex != UINT32_MAX) {
2776 MOZ_ASSERT(rdata.dollarIndex < repstr->length());
2777 const CharT* dp = bp + rdata.dollarIndex;
2778 const CharT* ep = bp + repstr->length();
2779 do {
2780 /* Move one of the constant portions of the replacement value. */
2781 size_t len = dp - cp;
2782 rdata.sb.infallibleAppend(cp, len);
2783 cp = dp;
2785 JSSubString sub;
2786 size_t skip;
2787 if (InterpretDollar(res, bp, dp, ep, rdata, &sub, &skip)) {
2788 rdata.sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length);
2789 cp += skip;
2790 dp += skip;
2791 } else {
2792 dp++;
2795 dp = js_strchr_limit(dp, '$', ep);
2796 } while (dp);
2798 rdata.sb.infallibleAppend(cp, repstr->length() - (cp - bp));
2801 static bool
2802 ReplaceRegExp(JSContext* cx, RegExpStatics* res, ReplaceData& rdata)
2805 const MatchPair& match = res->getMatches()[0];
2806 MOZ_ASSERT(!match.isUndefined());
2807 MOZ_ASSERT(match.limit >= match.start && match.limit >= 0);
2809 rdata.calledBack = true;
2810 size_t leftoff = rdata.leftIndex;
2811 size_t leftlen = match.start - leftoff;
2812 rdata.leftIndex = match.limit;
2814 size_t replen = 0; /* silence 'unused' warning */
2815 if (!FindReplaceLength(cx, res, rdata, &replen))
2816 return false;
2818 CheckedInt<uint32_t> newlen(rdata.sb.length());
2819 newlen += leftlen;
2820 newlen += replen;
2821 if (!newlen.isValid()) {
2822 js_ReportAllocationOverflow(cx);
2823 return false;
2827 * Inflate the buffer now if needed, to avoid (fallible) Latin1 to TwoByte
2828 * inflation later on.
2830 JSLinearString& str = rdata.str->asLinear(); /* flattened for regexp */
2831 if (str.hasTwoByteChars() || rdata.repstr->hasTwoByteChars()) {
2832 if (!rdata.sb.ensureTwoByteChars())
2833 return false;
2836 if (!rdata.sb.reserve(newlen.value()))
2837 return false;
2839 /* Append skipped-over portion of the search value. */
2840 rdata.sb.infallibleAppendSubstring(&str, leftoff, leftlen);
2842 if (rdata.repstr->hasLatin1Chars())
2843 DoReplace<Latin1Char>(res, rdata);
2844 else
2845 DoReplace<char16_t>(res, rdata);
2846 return true;
2849 static bool
2850 BuildFlatReplacement(JSContext* cx, HandleString textstr, HandleString repstr,
2851 const FlatMatch& fm, MutableHandleValue rval)
2853 RopeBuilder builder(cx);
2854 size_t match = fm.match();
2855 size_t matchEnd = match + fm.patternLength();
2857 if (textstr->isRope()) {
2859 * If we are replacing over a rope, avoid flattening it by iterating
2860 * through it, building a new rope.
2862 StringSegmentRange r(cx);
2863 if (!r.init(textstr))
2864 return false;
2865 size_t pos = 0;
2866 while (!r.empty()) {
2867 RootedString str(cx, r.front());
2868 size_t len = str->length();
2869 size_t strEnd = pos + len;
2870 if (pos < matchEnd && strEnd > match) {
2872 * We need to special-case any part of the rope that overlaps
2873 * with the replacement string.
2875 if (match >= pos) {
2877 * If this part of the rope overlaps with the left side of
2878 * the pattern, then it must be the only one to overlap with
2879 * the first character in the pattern, so we include the
2880 * replacement string here.
2882 RootedString leftSide(cx, NewDependentString(cx, str, 0, match - pos));
2883 if (!leftSide ||
2884 !builder.append(leftSide) ||
2885 !builder.append(repstr)) {
2886 return false;
2891 * If str runs off the end of the matched string, append the
2892 * last part of str.
2894 if (strEnd > matchEnd) {
2895 RootedString rightSide(cx, NewDependentString(cx, str, matchEnd - pos,
2896 strEnd - matchEnd));
2897 if (!rightSide || !builder.append(rightSide))
2898 return false;
2900 } else {
2901 if (!builder.append(str))
2902 return false;
2904 pos += str->length();
2905 if (!r.popFront())
2906 return false;
2908 } else {
2909 RootedString leftSide(cx, NewDependentString(cx, textstr, 0, match));
2910 if (!leftSide)
2911 return false;
2912 RootedString rightSide(cx);
2913 rightSide = NewDependentString(cx, textstr, match + fm.patternLength(),
2914 textstr->length() - match - fm.patternLength());
2915 if (!rightSide ||
2916 !builder.append(leftSide) ||
2917 !builder.append(repstr) ||
2918 !builder.append(rightSide)) {
2919 return false;
2923 rval.setString(builder.result());
2924 return true;
2927 template <typename CharT>
2928 static bool
2929 AppendDollarReplacement(StringBuffer& newReplaceChars, size_t firstDollarIndex,
2930 const FlatMatch& fm, JSLinearString* text,
2931 const CharT* repChars, size_t repLength)
2933 MOZ_ASSERT(firstDollarIndex < repLength);
2935 size_t matchStart = fm.match();
2936 size_t matchLimit = matchStart + fm.patternLength();
2938 /* Move the pre-dollar chunk in bulk. */
2939 newReplaceChars.infallibleAppend(repChars, firstDollarIndex);
2941 /* Move the rest char-by-char, interpreting dollars as we encounter them. */
2942 const CharT* repLimit = repChars + repLength;
2943 for (const CharT* it = repChars + firstDollarIndex; it < repLimit; ++it) {
2944 if (*it != '$' || it == repLimit - 1) {
2945 if (!newReplaceChars.append(*it))
2946 return false;
2947 continue;
2950 switch (*(it + 1)) {
2951 case '$': /* Eat one of the dollars. */
2952 if (!newReplaceChars.append(*it))
2953 return false;
2954 break;
2955 case '&':
2956 if (!newReplaceChars.appendSubstring(text, matchStart, matchLimit - matchStart))
2957 return false;
2958 break;
2959 case '`':
2960 if (!newReplaceChars.appendSubstring(text, 0, matchStart))
2961 return false;
2962 break;
2963 case '\'':
2964 if (!newReplaceChars.appendSubstring(text, matchLimit, text->length() - matchLimit))
2965 return false;
2966 break;
2967 default: /* The dollar we saw was not special (no matter what its mother told it). */
2968 if (!newReplaceChars.append(*it))
2969 return false;
2970 continue;
2972 ++it; /* We always eat an extra char in the above switch. */
2975 return true;
2979 * Perform a linear-scan dollar substitution on the replacement text,
2980 * constructing a result string that looks like:
2982 * newstring = string[:matchStart] + dollarSub(replaceValue) + string[matchLimit:]
2984 static inline bool
2985 BuildDollarReplacement(JSContext* cx, JSString* textstrArg, JSLinearString* repstr,
2986 uint32_t firstDollarIndex, const FlatMatch& fm, MutableHandleValue rval)
2988 RootedLinearString textstr(cx, textstrArg->ensureLinear(cx));
2989 if (!textstr)
2990 return false;
2992 size_t matchStart = fm.match();
2993 size_t matchLimit = matchStart + fm.patternLength();
2996 * Most probably:
2998 * len(newstr) >= len(orig) - len(match) + len(replacement)
3000 * Note that dollar vars _could_ make the resulting text smaller than this.
3002 StringBuffer newReplaceChars(cx);
3003 if (repstr->hasTwoByteChars() && !newReplaceChars.ensureTwoByteChars())
3004 return false;
3006 if (!newReplaceChars.reserve(textstr->length() - fm.patternLength() + repstr->length()))
3007 return false;
3009 bool res;
3010 if (repstr->hasLatin1Chars()) {
3011 AutoCheckCannotGC nogc;
3012 res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, fm, textstr,
3013 repstr->latin1Chars(nogc), repstr->length());
3014 } else {
3015 AutoCheckCannotGC nogc;
3016 res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, fm, textstr,
3017 repstr->twoByteChars(nogc), repstr->length());
3019 if (!res)
3020 return false;
3022 RootedString leftSide(cx, NewDependentString(cx, textstr, 0, matchStart));
3023 if (!leftSide)
3024 return false;
3026 RootedString newReplace(cx, newReplaceChars.finishString());
3027 if (!newReplace)
3028 return false;
3030 MOZ_ASSERT(textstr->length() >= matchLimit);
3031 RootedString rightSide(cx, NewDependentString(cx, textstr, matchLimit,
3032 textstr->length() - matchLimit));
3033 if (!rightSide)
3034 return false;
3036 RopeBuilder builder(cx);
3037 if (!builder.append(leftSide) || !builder.append(newReplace) || !builder.append(rightSide))
3038 return false;
3040 rval.setString(builder.result());
3041 return true;
3044 struct StringRange
3046 size_t start;
3047 size_t length;
3049 StringRange(size_t s, size_t l)
3050 : start(s), length(l)
3054 template <typename CharT>
3055 static void
3056 CopySubstringsToFatInline(JSFatInlineString* dest, const CharT* src, const StringRange* ranges,
3057 size_t rangesLen, size_t outputLen)
3059 CharT* buf = dest->init<CharT>(outputLen);
3060 size_t pos = 0;
3061 for (size_t i = 0; i < rangesLen; i++) {
3062 PodCopy(buf + pos, src + ranges[i].start, ranges[i].length);
3063 pos += ranges[i].length;
3066 MOZ_ASSERT(pos == outputLen);
3067 buf[outputLen] = 0;
3070 static inline JSFatInlineString*
3071 FlattenSubstrings(JSContext* cx, Handle<JSFlatString*> flatStr, const StringRange* ranges,
3072 size_t rangesLen, size_t outputLen)
3074 JSFatInlineString* str = NewGCFatInlineString<CanGC>(cx);
3075 if (!str)
3076 return nullptr;
3078 AutoCheckCannotGC nogc;
3079 if (flatStr->hasLatin1Chars())
3080 CopySubstringsToFatInline(str, flatStr->latin1Chars(nogc), ranges, rangesLen, outputLen);
3081 else
3082 CopySubstringsToFatInline(str, flatStr->twoByteChars(nogc), ranges, rangesLen, outputLen);
3083 return str;
3086 static JSString*
3087 AppendSubstrings(JSContext* cx, Handle<JSFlatString*> flatStr,
3088 const StringRange* ranges, size_t rangesLen)
3090 MOZ_ASSERT(rangesLen);
3092 /* For single substrings, construct a dependent string. */
3093 if (rangesLen == 1)
3094 return NewDependentString(cx, flatStr, ranges[0].start, ranges[0].length);
3096 bool isLatin1 = flatStr->hasLatin1Chars();
3097 uint32_t fatInlineMaxLength = JSFatInlineString::MAX_LENGTH_TWO_BYTE;
3098 if (isLatin1)
3099 fatInlineMaxLength = JSFatInlineString::MAX_LENGTH_LATIN1;
3101 /* Collect substrings into a rope */
3102 size_t i = 0;
3103 RopeBuilder rope(cx);
3104 RootedString part(cx, nullptr);
3105 while (i < rangesLen) {
3107 /* Find maximum range that fits in JSFatInlineString */
3108 size_t substrLen = 0;
3109 size_t end = i;
3110 for (; end < rangesLen; end++) {
3111 if (substrLen + ranges[end].length > fatInlineMaxLength)
3112 break;
3113 substrLen += ranges[end].length;
3116 if (i == end) {
3117 /* Not even one range fits JSFatInlineString, use DependentString */
3118 const StringRange& sr = ranges[i++];
3119 part = NewDependentString(cx, flatStr, sr.start, sr.length);
3120 } else {
3121 /* Copy the ranges (linearly) into a JSFatInlineString */
3122 part = FlattenSubstrings(cx, flatStr, ranges + i, end - i, substrLen);
3123 i = end;
3126 if (!part)
3127 return nullptr;
3129 /* Appending to the rope permanently roots the substring. */
3130 if (!rope.append(part))
3131 return nullptr;
3134 return rope.result();
3137 static bool
3138 StrReplaceRegexpRemove(JSContext* cx, HandleString str, RegExpShared& re, MutableHandleValue rval)
3140 Rooted<JSFlatString*> flatStr(cx, str->ensureFlat(cx));
3141 if (!flatStr)
3142 return false;
3144 Vector<StringRange, 16, SystemAllocPolicy> ranges;
3146 size_t charsLen = flatStr->length();
3148 ScopedMatchPairs matches(&cx->tempLifoAlloc());
3149 size_t startIndex = 0; /* Index used for iterating through the string. */
3150 size_t lastIndex = 0; /* Index after last successful match. */
3151 size_t lazyIndex = 0; /* Index before last successful match. */
3153 /* Accumulate StringRanges for unmatched substrings. */
3154 while (startIndex <= charsLen) {
3155 if (!CheckForInterrupt(cx))
3156 return false;
3158 RegExpRunStatus status = re.execute(cx, flatStr, startIndex, &matches);
3159 if (status == RegExpRunStatus_Error)
3160 return false;
3161 if (status == RegExpRunStatus_Success_NotFound)
3162 break;
3163 MatchPair& match = matches[0];
3165 /* Include the latest unmatched substring. */
3166 if (size_t(match.start) > lastIndex) {
3167 if (!ranges.append(StringRange(lastIndex, match.start - lastIndex)))
3168 return false;
3171 lazyIndex = lastIndex;
3172 lastIndex = match.limit;
3174 startIndex = match.isEmpty() ? match.limit + 1 : match.limit;
3176 /* Non-global removal executes at most once. */
3177 if (!re.global())
3178 break;
3181 RegExpStatics* res;
3183 /* If unmatched, return the input string. */
3184 if (!lastIndex) {
3185 if (startIndex > 0) {
3186 res = cx->global()->getRegExpStatics(cx);
3187 if (!res)
3188 return false;
3189 res->updateLazily(cx, flatStr, &re, lazyIndex);
3191 rval.setString(str);
3192 return true;
3195 /* The last successful match updates the RegExpStatics. */
3196 res = cx->global()->getRegExpStatics(cx);
3197 if (!res)
3198 return false;
3200 res->updateLazily(cx, flatStr, &re, lazyIndex);
3202 /* Include any remaining part of the string. */
3203 if (lastIndex < charsLen) {
3204 if (!ranges.append(StringRange(lastIndex, charsLen - lastIndex)))
3205 return false;
3208 /* Handle the empty string before calling .begin(). */
3209 if (ranges.empty()) {
3210 rval.setString(cx->runtime()->emptyString);
3211 return true;
3214 JSString* result = AppendSubstrings(cx, flatStr, ranges.begin(), ranges.length());
3215 if (!result)
3216 return false;
3218 rval.setString(result);
3219 return true;
3222 static inline bool
3223 StrReplaceRegExp(JSContext* cx, ReplaceData& rdata, MutableHandleValue rval)
3225 rdata.leftIndex = 0;
3226 rdata.calledBack = false;
3228 RegExpStatics* res = cx->global()->getRegExpStatics(cx);
3229 if (!res)
3230 return false;
3232 RegExpShared& re = rdata.g.regExp();
3234 // The spec doesn't describe this function very clearly, so we go ahead and
3235 // assume that when the input to String.prototype.replace is a global
3236 // RegExp, calling the replacer function (assuming one was provided) takes
3237 // place only after the matching is done. See the comment at the beginning
3238 // of DoMatchGlobal explaining why we can zero the the RegExp object's
3239 // lastIndex property here.
3240 if (re.global() && !rdata.g.zeroLastIndex(cx))
3241 return false;
3243 /* Optimize removal. */
3244 if (rdata.repstr && rdata.repstr->length() == 0) {
3245 MOZ_ASSERT(!rdata.lambda && !rdata.elembase && rdata.dollarIndex == UINT32_MAX);
3246 return StrReplaceRegexpRemove(cx, rdata.str, re, rval);
3249 RootedLinearString linearStr(cx, rdata.str->ensureLinear(cx));
3250 if (!linearStr)
3251 return false;
3253 if (re.global()) {
3254 if (!DoMatchForReplaceGlobal(cx, res, linearStr, re, rdata))
3255 return false;
3256 } else {
3257 if (!DoMatchForReplaceLocal(cx, res, linearStr, re, rdata))
3258 return false;
3261 if (!rdata.calledBack) {
3262 /* Didn't match, so the string is unmodified. */
3263 rval.setString(rdata.str);
3264 return true;
3267 JSSubString sub;
3268 res->getRightContext(&sub);
3269 if (!rdata.sb.appendSubstring(sub.base, sub.offset, sub.length))
3270 return false;
3272 JSString* retstr = rdata.sb.finishString();
3273 if (!retstr)
3274 return false;
3276 rval.setString(retstr);
3277 return true;
3280 static inline bool
3281 str_replace_regexp(JSContext* cx, CallArgs args, ReplaceData& rdata)
3283 if (!rdata.g.normalizeRegExp(cx, true, 2, args))
3284 return false;
3286 return StrReplaceRegExp(cx, rdata, args.rval());
3289 bool
3290 js::str_replace_regexp_raw(JSContext* cx, HandleString string, HandleObject regexp,
3291 HandleString replacement, MutableHandleValue rval)
3293 /* Optimize removal, so we don't have to create ReplaceData */
3294 if (replacement->length() == 0) {
3295 StringRegExpGuard guard(cx);
3296 if (!guard.init(cx, regexp))
3297 return false;
3299 RegExpShared& re = guard.regExp();
3300 return StrReplaceRegexpRemove(cx, string, re, rval);
3303 ReplaceData rdata(cx);
3304 rdata.str = string;
3306 JSLinearString* repl = replacement->ensureLinear(cx);
3307 if (!repl)
3308 return false;
3310 rdata.setReplacementString(repl);
3312 if (!rdata.g.init(cx, regexp))
3313 return false;
3315 return StrReplaceRegExp(cx, rdata, rval);
3318 static inline bool
3319 StrReplaceString(JSContext* cx, ReplaceData& rdata, const FlatMatch& fm, MutableHandleValue rval)
3322 * Note: we could optimize the text.length == pattern.length case if we wanted,
3323 * even in the presence of dollar metachars.
3325 if (rdata.dollarIndex != UINT32_MAX)
3326 return BuildDollarReplacement(cx, rdata.str, rdata.repstr, rdata.dollarIndex, fm, rval);
3327 return BuildFlatReplacement(cx, rdata.str, rdata.repstr, fm, rval);
3330 static const uint32_t ReplaceOptArg = 2;
3332 bool
3333 js::str_replace_string_raw(JSContext* cx, HandleString string, HandleString pattern,
3334 HandleString replacement, MutableHandleValue rval)
3336 ReplaceData rdata(cx);
3338 rdata.str = string;
3339 JSLinearString* repl = replacement->ensureLinear(cx);
3340 if (!repl)
3341 return false;
3342 rdata.setReplacementString(repl);
3344 if (!rdata.g.init(cx, pattern))
3345 return false;
3346 const FlatMatch* fm = rdata.g.tryFlatMatch(cx, rdata.str, ReplaceOptArg, ReplaceOptArg, false);
3348 if (fm->match() < 0) {
3349 rval.setString(string);
3350 return true;
3353 return StrReplaceString(cx, rdata, *fm, rval);
3356 static inline bool
3357 str_replace_flat_lambda(JSContext* cx, CallArgs outerArgs, ReplaceData& rdata, const FlatMatch& fm)
3359 RootedString matchStr(cx, NewDependentString(cx, rdata.str, fm.match(), fm.patternLength()));
3360 if (!matchStr)
3361 return false;
3363 /* lambda(matchStr, matchStart, textstr) */
3364 static const uint32_t lambdaArgc = 3;
3365 if (!rdata.fig.args().init(lambdaArgc))
3366 return false;
3368 CallArgs& args = rdata.fig.args();
3369 args.setCallee(ObjectValue(*rdata.lambda));
3370 args.setThis(UndefinedValue());
3372 Value* sp = args.array();
3373 sp[0].setString(matchStr);
3374 sp[1].setInt32(fm.match());
3375 sp[2].setString(rdata.str);
3377 if (!rdata.fig.invoke(cx))
3378 return false;
3380 RootedString repstr(cx, ToString<CanGC>(cx, args.rval()));
3381 if (!repstr)
3382 return false;
3384 RootedString leftSide(cx, NewDependentString(cx, rdata.str, 0, fm.match()));
3385 if (!leftSide)
3386 return false;
3388 size_t matchLimit = fm.match() + fm.patternLength();
3389 RootedString rightSide(cx, NewDependentString(cx, rdata.str, matchLimit,
3390 rdata.str->length() - matchLimit));
3391 if (!rightSide)
3392 return false;
3394 RopeBuilder builder(cx);
3395 if (!(builder.append(leftSide) &&
3396 builder.append(repstr) &&
3397 builder.append(rightSide))) {
3398 return false;
3401 outerArgs.rval().setString(builder.result());
3402 return true;
3406 * Pattern match the script to check if it is is indexing into a particular
3407 * object, e.g. 'function(a) { return b[a]; }'. Avoid calling the script in
3408 * such cases, which are used by javascript packers (particularly the popular
3409 * Dean Edwards packer) to efficiently encode large scripts. We only handle the
3410 * code patterns generated by such packers here.
3412 static bool
3413 LambdaIsGetElem(JSContext* cx, JSObject& lambda, MutableHandleNativeObject pobj)
3415 if (!lambda.is<JSFunction>())
3416 return true;
3418 RootedFunction fun(cx, &lambda.as<JSFunction>());
3419 if (!fun->isInterpreted())
3420 return true;
3422 JSScript* script = fun->getOrCreateScript(cx);
3423 if (!script)
3424 return false;
3426 jsbytecode* pc = script->code();
3429 * JSOP_GETALIASEDVAR tells us exactly where to find the base object 'b'.
3430 * Rule out the (unlikely) possibility of a heavyweight function since it
3431 * would make our scope walk off by 1.
3433 if (JSOp(*pc) != JSOP_GETALIASEDVAR || fun->isHeavyweight())
3434 return true;
3435 ScopeCoordinate sc(pc);
3436 ScopeObject* scope = &fun->environment()->as<ScopeObject>();
3437 for (unsigned i = 0; i < sc.hops(); ++i)
3438 scope = &scope->enclosingScope().as<ScopeObject>();
3439 Value b = scope->aliasedVar(sc);
3440 pc += JSOP_GETALIASEDVAR_LENGTH;
3442 /* Look for 'a' to be the lambda's first argument. */
3443 if (JSOp(*pc) != JSOP_GETARG || GET_ARGNO(pc) != 0)
3444 return true;
3445 pc += JSOP_GETARG_LENGTH;
3447 /* 'b[a]' */
3448 if (JSOp(*pc) != JSOP_GETELEM)
3449 return true;
3450 pc += JSOP_GETELEM_LENGTH;
3452 /* 'return b[a]' */
3453 if (JSOp(*pc) != JSOP_RETURN)
3454 return true;
3456 /* 'b' must behave like a normal object. */
3457 if (!b.isObject())
3458 return true;
3460 JSObject& bobj = b.toObject();
3461 const Class* clasp = bobj.getClass();
3462 if (!clasp->isNative() || clasp->ops.lookupProperty || clasp->ops.getProperty)
3463 return true;
3465 pobj.set(&bobj.as<NativeObject>());
3466 return true;
3469 bool
3470 js::str_replace(JSContext* cx, unsigned argc, Value* vp)
3472 CallArgs args = CallArgsFromVp(argc, vp);
3474 ReplaceData rdata(cx);
3475 rdata.str = ThisToStringForStringProto(cx, args);
3476 if (!rdata.str)
3477 return false;
3479 if (!rdata.g.init(cx, args))
3480 return false;
3482 /* Extract replacement string/function. */
3483 if (args.length() >= ReplaceOptArg && IsCallable(args[1])) {
3484 rdata.setReplacementFunction(&args[1].toObject());
3486 if (!LambdaIsGetElem(cx, *rdata.lambda, &rdata.elembase))
3487 return false;
3488 } else {
3489 JSLinearString* string = ArgToRootedString(cx, args, 1);
3490 if (!string)
3491 return false;
3493 rdata.setReplacementString(string);
3496 rdata.fig.initFunction(ObjectOrNullValue(rdata.lambda));
3499 * Unlike its |String.prototype| brethren, |replace| doesn't convert
3500 * its input to a regular expression. (Even if it contains metachars.)
3502 * However, if the user invokes our (non-standard) |flags| argument
3503 * extension then we revert to creating a regular expression. Note that
3504 * this is observable behavior through the side-effect mutation of the
3505 * |RegExp| statics.
3508 const FlatMatch* fm = rdata.g.tryFlatMatch(cx, rdata.str, ReplaceOptArg, args.length(), false);
3510 if (!fm) {
3511 if (cx->isExceptionPending()) /* oom in RopeMatch in tryFlatMatch */
3512 return false;
3513 return str_replace_regexp(cx, args, rdata);
3516 if (fm->match() < 0) {
3517 args.rval().setString(rdata.str);
3518 return true;
3521 if (rdata.lambda)
3522 return str_replace_flat_lambda(cx, args, rdata, *fm);
3523 return StrReplaceString(cx, rdata, *fm, args.rval());
3526 namespace {
3528 class SplitMatchResult {
3529 size_t endIndex_;
3530 size_t length_;
3532 public:
3533 void setFailure() {
3534 JS_STATIC_ASSERT(SIZE_MAX > JSString::MAX_LENGTH);
3535 endIndex_ = SIZE_MAX;
3537 bool isFailure() const {
3538 return endIndex_ == SIZE_MAX;
3540 size_t endIndex() const {
3541 MOZ_ASSERT(!isFailure());
3542 return endIndex_;
3544 size_t length() const {
3545 MOZ_ASSERT(!isFailure());
3546 return length_;
3548 void setResult(size_t length, size_t endIndex) {
3549 length_ = length;
3550 endIndex_ = endIndex;
3554 } /* anonymous namespace */
3556 template<class Matcher>
3557 static ArrayObject*
3558 SplitHelper(JSContext* cx, HandleLinearString str, uint32_t limit, const Matcher& splitMatch,
3559 Handle<TypeObject*> type)
3561 size_t strLength = str->length();
3562 SplitMatchResult result;
3564 /* Step 11. */
3565 if (strLength == 0) {
3566 if (!splitMatch(cx, str, 0, &result))
3567 return nullptr;
3570 * NB: Unlike in the non-empty string case, it's perfectly fine
3571 * (indeed the spec requires it) if we match at the end of the
3572 * string. Thus these cases should hold:
3574 * var a = "".split("");
3575 * assertEq(a.length, 0);
3576 * var b = "".split(/.?/);
3577 * assertEq(b.length, 0);
3579 if (!result.isFailure())
3580 return NewDenseEmptyArray(cx);
3582 RootedValue v(cx, StringValue(str));
3583 return NewDenseCopiedArray(cx, 1, v.address());
3586 /* Step 12. */
3587 size_t lastEndIndex = 0;
3588 size_t index = 0;
3590 /* Step 13. */
3591 AutoValueVector splits(cx);
3593 while (index < strLength) {
3594 /* Step 13(a). */
3595 if (!splitMatch(cx, str, index, &result))
3596 return nullptr;
3599 * Step 13(b).
3601 * Our match algorithm differs from the spec in that it returns the
3602 * next index at which a match happens. If no match happens we're
3603 * done.
3605 * But what if the match is at the end of the string (and the string is
3606 * not empty)? Per 13(c)(ii) this shouldn't be a match, so we have to
3607 * specially exclude it. Thus this case should hold:
3609 * var a = "abc".split(/\b/);
3610 * assertEq(a.length, 1);
3611 * assertEq(a[0], "abc");
3613 if (result.isFailure())
3614 break;
3616 /* Step 13(c)(i). */
3617 size_t sepLength = result.length();
3618 size_t endIndex = result.endIndex();
3619 if (sepLength == 0 && endIndex == strLength)
3620 break;
3622 /* Step 13(c)(ii). */
3623 if (endIndex == lastEndIndex) {
3624 index++;
3625 continue;
3628 /* Step 13(c)(iii). */
3629 MOZ_ASSERT(lastEndIndex < endIndex);
3630 MOZ_ASSERT(sepLength <= strLength);
3631 MOZ_ASSERT(lastEndIndex + sepLength <= endIndex);
3633 /* Steps 13(c)(iii)(1-3). */
3634 size_t subLength = size_t(endIndex - sepLength - lastEndIndex);
3635 JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
3636 if (!sub || !splits.append(StringValue(sub)))
3637 return nullptr;
3639 /* Step 13(c)(iii)(4). */
3640 if (splits.length() == limit)
3641 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3643 /* Step 13(c)(iii)(5). */
3644 lastEndIndex = endIndex;
3646 /* Step 13(c)(iii)(6-7). */
3647 if (Matcher::returnsCaptures) {
3648 RegExpStatics* res = cx->global()->getRegExpStatics(cx);
3649 if (!res)
3650 return nullptr;
3652 const MatchPairs& matches = res->getMatches();
3653 for (size_t i = 0; i < matches.parenCount(); i++) {
3654 /* Steps 13(c)(iii)(7)(a-c). */
3655 if (!matches[i + 1].isUndefined()) {
3656 JSSubString parsub;
3657 res->getParen(i + 1, &parsub);
3658 sub = NewDependentString(cx, parsub.base, parsub.offset, parsub.length);
3659 if (!sub || !splits.append(StringValue(sub)))
3660 return nullptr;
3661 } else {
3662 /* Only string entries have been accounted for so far. */
3663 AddTypePropertyId(cx, type, JSID_VOID, UndefinedValue());
3664 if (!splits.append(UndefinedValue()))
3665 return nullptr;
3668 /* Step 13(c)(iii)(7)(d). */
3669 if (splits.length() == limit)
3670 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3674 /* Step 13(c)(iii)(8). */
3675 index = lastEndIndex;
3678 /* Steps 14-15. */
3679 JSString* sub = NewDependentString(cx, str, lastEndIndex, strLength - lastEndIndex);
3680 if (!sub || !splits.append(StringValue(sub)))
3681 return nullptr;
3683 /* Step 16. */
3684 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3687 // Fast-path for splitting a string into a character array via split("").
3688 static ArrayObject*
3689 CharSplitHelper(JSContext* cx, HandleLinearString str, uint32_t limit)
3691 size_t strLength = str->length();
3692 if (strLength == 0)
3693 return NewDenseEmptyArray(cx);
3695 js::StaticStrings& staticStrings = cx->staticStrings();
3696 uint32_t resultlen = (limit < strLength ? limit : strLength);
3698 AutoValueVector splits(cx);
3699 if (!splits.reserve(resultlen))
3700 return nullptr;
3702 for (size_t i = 0; i < resultlen; ++i) {
3703 JSString* sub = staticStrings.getUnitStringForElement(cx, str, i);
3704 if (!sub)
3705 return nullptr;
3706 splits.infallibleAppend(StringValue(sub));
3709 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3712 namespace {
3715 * The SplitMatch operation from ES5 15.5.4.14 is implemented using different
3716 * paths for regular expression and string separators.
3718 * The algorithm differs from the spec in that the we return the next index at
3719 * which a match happens.
3721 class SplitRegExpMatcher
3723 RegExpShared& re;
3724 RegExpStatics* res;
3726 public:
3727 SplitRegExpMatcher(RegExpShared& re, RegExpStatics* res) : re(re), res(res) {}
3729 static const bool returnsCaptures = true;
3731 bool operator()(JSContext* cx, HandleLinearString str, size_t index,
3732 SplitMatchResult* result) const
3734 ScopedMatchPairs matches(&cx->tempLifoAlloc());
3735 RegExpRunStatus status = re.execute(cx, str, index, &matches);
3736 if (status == RegExpRunStatus_Error)
3737 return false;
3739 if (status == RegExpRunStatus_Success_NotFound) {
3740 result->setFailure();
3741 return true;
3744 if (!res->updateFromMatchPairs(cx, str, matches))
3745 return false;
3747 JSSubString sep;
3748 res->getLastMatch(&sep);
3750 result->setResult(sep.length, matches[0].limit);
3751 return true;
3755 class SplitStringMatcher
3757 RootedLinearString sep;
3759 public:
3760 SplitStringMatcher(JSContext* cx, HandleLinearString sep)
3761 : sep(cx, sep)
3764 static const bool returnsCaptures = false;
3766 bool operator()(JSContext* cx, JSLinearString* str, size_t index, SplitMatchResult* res) const
3768 MOZ_ASSERT(index == 0 || index < str->length());
3769 int match = StringMatch(str, sep, index);
3770 if (match == -1)
3771 res->setFailure();
3772 else
3773 res->setResult(sep->length(), match + sep->length());
3774 return true;
3778 } /* anonymous namespace */
3780 /* ES5 15.5.4.14 */
3781 bool
3782 js::str_split(JSContext* cx, unsigned argc, Value* vp)
3784 CallArgs args = CallArgsFromVp(argc, vp);
3786 /* Steps 1-2. */
3787 RootedString str(cx, ThisToStringForStringProto(cx, args));
3788 if (!str)
3789 return false;
3791 RootedTypeObject type(cx, GetTypeCallerInitObject(cx, JSProto_Array));
3792 if (!type)
3793 return false;
3794 AddTypePropertyId(cx, type, JSID_VOID, Type::StringType());
3796 /* Step 5: Use the second argument as the split limit, if given. */
3797 uint32_t limit;
3798 if (args.hasDefined(1)) {
3799 double d;
3800 if (!ToNumber(cx, args[1], &d))
3801 return false;
3802 limit = ToUint32(d);
3803 } else {
3804 limit = UINT32_MAX;
3807 /* Step 8. */
3808 RegExpGuard re(cx);
3809 RootedLinearString sepstr(cx);
3810 bool sepDefined = args.hasDefined(0);
3811 if (sepDefined) {
3812 if (IsObjectWithClass(args[0], ESClass_RegExp, cx)) {
3813 RootedObject obj(cx, &args[0].toObject());
3814 if (!RegExpToShared(cx, obj, &re))
3815 return false;
3816 } else {
3817 sepstr = ArgToRootedString(cx, args, 0);
3818 if (!sepstr)
3819 return false;
3823 /* Step 9. */
3824 if (limit == 0) {
3825 JSObject* aobj = NewDenseEmptyArray(cx);
3826 if (!aobj)
3827 return false;
3828 aobj->setType(type);
3829 args.rval().setObject(*aobj);
3830 return true;
3833 /* Step 10. */
3834 if (!sepDefined) {
3835 RootedValue v(cx, StringValue(str));
3836 JSObject* aobj = NewDenseCopiedArray(cx, 1, v.address());
3837 if (!aobj)
3838 return false;
3839 aobj->setType(type);
3840 args.rval().setObject(*aobj);
3841 return true;
3843 RootedLinearString linearStr(cx, str->ensureLinear(cx));
3844 if (!linearStr)
3845 return false;
3847 /* Steps 11-15. */
3848 RootedObject aobj(cx);
3849 if (!re.initialized()) {
3850 if (sepstr->length() == 0) {
3851 aobj = CharSplitHelper(cx, linearStr, limit);
3852 } else {
3853 SplitStringMatcher matcher(cx, sepstr);
3854 aobj = SplitHelper(cx, linearStr, limit, matcher, type);
3856 } else {
3857 RegExpStatics* res = cx->global()->getRegExpStatics(cx);
3858 if (!res)
3859 return false;
3860 SplitRegExpMatcher matcher(*re, res);
3861 aobj = SplitHelper(cx, linearStr, limit, matcher, type);
3863 if (!aobj)
3864 return false;
3866 /* Step 16. */
3867 aobj->setType(type);
3868 args.rval().setObject(*aobj);
3869 return true;
3872 JSObject*
3873 js::str_split_string(JSContext* cx, HandleTypeObject type, HandleString str, HandleString sep)
3875 RootedLinearString linearStr(cx, str->ensureLinear(cx));
3876 if (!linearStr)
3877 return nullptr;
3879 RootedLinearString linearSep(cx, sep->ensureLinear(cx));
3880 if (!linearSep)
3881 return nullptr;
3883 uint32_t limit = UINT32_MAX;
3885 RootedObject aobj(cx);
3886 if (linearSep->length() == 0) {
3887 aobj = CharSplitHelper(cx, linearStr, limit);
3888 } else {
3889 SplitStringMatcher matcher(cx, linearSep);
3890 aobj = SplitHelper(cx, linearStr, limit, matcher, type);
3893 if (!aobj)
3894 return nullptr;
3896 aobj->setType(type);
3897 return aobj;
3901 * Python-esque sequence operations.
3903 static bool
3904 str_concat(JSContext* cx, unsigned argc, Value* vp)
3906 CallArgs args = CallArgsFromVp(argc, vp);
3907 JSString* str = ThisToStringForStringProto(cx, args);
3908 if (!str)
3909 return false;
3911 for (unsigned i = 0; i < args.length(); i++) {
3912 JSString* argStr = ToString<NoGC>(cx, args[i]);
3913 if (!argStr) {
3914 RootedString strRoot(cx, str);
3915 argStr = ToString<CanGC>(cx, args[i]);
3916 if (!argStr)
3917 return false;
3918 str = strRoot;
3921 JSString* next = ConcatStrings<NoGC>(cx, str, argStr);
3922 if (next) {
3923 str = next;
3924 } else {
3925 RootedString strRoot(cx, str), argStrRoot(cx, argStr);
3926 str = ConcatStrings<CanGC>(cx, strRoot, argStrRoot);
3927 if (!str)
3928 return false;
3932 args.rval().setString(str);
3933 return true;
3936 static const JSFunctionSpec string_methods[] = {
3937 #if JS_HAS_TOSOURCE
3938 JS_FN(js_toSource_str, str_toSource, 0,0),
3939 #endif
3941 /* Java-like methods. */
3942 JS_FN(js_toString_str, js_str_toString, 0,0),
3943 JS_FN(js_valueOf_str, js_str_toString, 0,0),
3944 JS_FN("toLowerCase", str_toLowerCase, 0,JSFUN_GENERIC_NATIVE),
3945 JS_FN("toUpperCase", str_toUpperCase, 0,JSFUN_GENERIC_NATIVE),
3946 JS_FN("charAt", js_str_charAt, 1,JSFUN_GENERIC_NATIVE),
3947 JS_FN("charCodeAt", js_str_charCodeAt, 1,JSFUN_GENERIC_NATIVE),
3948 JS_SELF_HOSTED_FN("substring", "String_substring", 2,0),
3949 JS_SELF_HOSTED_FN("codePointAt", "String_codePointAt", 1,0),
3950 JS_FN("contains", str_contains, 1,JSFUN_GENERIC_NATIVE),
3951 JS_FN("indexOf", str_indexOf, 1,JSFUN_GENERIC_NATIVE),
3952 JS_FN("lastIndexOf", str_lastIndexOf, 1,JSFUN_GENERIC_NATIVE),
3953 JS_FN("startsWith", str_startsWith, 1,JSFUN_GENERIC_NATIVE),
3954 JS_FN("endsWith", str_endsWith, 1,JSFUN_GENERIC_NATIVE),
3955 JS_FN("trim", str_trim, 0,JSFUN_GENERIC_NATIVE),
3956 JS_FN("trimLeft", str_trimLeft, 0,JSFUN_GENERIC_NATIVE),
3957 JS_FN("trimRight", str_trimRight, 0,JSFUN_GENERIC_NATIVE),
3958 JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0,JSFUN_GENERIC_NATIVE),
3959 JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0,JSFUN_GENERIC_NATIVE),
3960 #if EXPOSE_INTL_API
3961 JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1,0),
3962 #else
3963 JS_FN("localeCompare", str_localeCompare, 1,JSFUN_GENERIC_NATIVE),
3964 #endif
3965 JS_SELF_HOSTED_FN("repeat", "String_repeat", 1,0),
3966 #if EXPOSE_INTL_API
3967 JS_FN("normalize", str_normalize, 0,JSFUN_GENERIC_NATIVE),
3968 #endif
3970 /* Perl-ish methods (search is actually Python-esque). */
3971 JS_FN("match", str_match, 1,JSFUN_GENERIC_NATIVE),
3972 JS_FN("search", str_search, 1,JSFUN_GENERIC_NATIVE),
3973 JS_FN("replace", str_replace, 2,JSFUN_GENERIC_NATIVE),
3974 JS_FN("split", str_split, 2,JSFUN_GENERIC_NATIVE),
3975 JS_SELF_HOSTED_FN("substr", "String_substr", 2,0),
3977 /* Python-esque sequence methods. */
3978 JS_FN("concat", str_concat, 1,JSFUN_GENERIC_NATIVE),
3979 JS_SELF_HOSTED_FN("slice", "String_slice", 2,0),
3981 /* HTML string methods. */
3982 JS_SELF_HOSTED_FN("bold", "String_bold", 0,0),
3983 JS_SELF_HOSTED_FN("italics", "String_italics", 0,0),
3984 JS_SELF_HOSTED_FN("fixed", "String_fixed", 0,0),
3985 JS_SELF_HOSTED_FN("strike", "String_strike", 0,0),
3986 JS_SELF_HOSTED_FN("small", "String_small", 0,0),
3987 JS_SELF_HOSTED_FN("big", "String_big", 0,0),
3988 JS_SELF_HOSTED_FN("blink", "String_blink", 0,0),
3989 JS_SELF_HOSTED_FN("sup", "String_sup", 0,0),
3990 JS_SELF_HOSTED_FN("sub", "String_sub", 0,0),
3991 JS_SELF_HOSTED_FN("anchor", "String_anchor", 1,0),
3992 JS_SELF_HOSTED_FN("link", "String_link", 1,0),
3993 JS_SELF_HOSTED_FN("fontcolor","String_fontcolor", 1,0),
3994 JS_SELF_HOSTED_FN("fontsize", "String_fontsize", 1,0),
3996 JS_SELF_HOSTED_SYM_FN(iterator, "String_iterator", 0,0),
3997 JS_FS_END
4000 // ES6 rev 27 (2014 Aug 24) 21.1.1
4001 bool
4002 js_String(JSContext* cx, unsigned argc, Value* vp)
4004 CallArgs args = CallArgsFromVp(argc, vp);
4006 RootedString str(cx);
4007 if (args.length() > 0) {
4008 if (!args.isConstructing() && args[0].isSymbol())
4009 return js::SymbolDescriptiveString(cx, args[0].toSymbol(), args.rval());
4011 str = ToString<CanGC>(cx, args[0]);
4012 if (!str)
4013 return false;
4014 } else {
4015 str = cx->runtime()->emptyString;
4018 if (args.isConstructing()) {
4019 StringObject* strobj = StringObject::create(cx, str);
4020 if (!strobj)
4021 return false;
4022 args.rval().setObject(*strobj);
4023 return true;
4026 args.rval().setString(str);
4027 return true;
4030 bool
4031 js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp)
4033 CallArgs args = CallArgsFromVp(argc, vp);
4035 MOZ_ASSERT(args.length() <= ARGS_LENGTH_MAX);
4036 if (args.length() == 1)
4037 return str_fromCharCode_one_arg(cx, args[0], args.rval());
4039 char16_t* chars = cx->pod_malloc<char16_t>(args.length() + 1);
4040 if (!chars)
4041 return false;
4042 for (unsigned i = 0; i < args.length(); i++) {
4043 uint16_t code;
4044 if (!ToUint16(cx, args[i], &code)) {
4045 js_free(chars);
4046 return false;
4048 chars[i] = char16_t(code);
4050 chars[args.length()] = 0;
4051 JSString* str = NewString<CanGC>(cx, chars, args.length());
4052 if (!str) {
4053 js_free(chars);
4054 return false;
4057 args.rval().setString(str);
4058 return true;
4061 bool
4062 js::str_fromCharCode_one_arg(JSContext* cx, HandleValue code, MutableHandleValue rval)
4064 uint16_t ucode;
4066 if (!ToUint16(cx, code, &ucode))
4067 return false;
4069 if (StaticStrings::hasUnit(ucode)) {
4070 rval.setString(cx->staticStrings().getUnit(ucode));
4071 return true;
4074 char16_t c = char16_t(ucode);
4075 JSString* str = NewStringCopyN<CanGC>(cx, &c, 1);
4076 if (!str)
4077 return false;
4079 rval.setString(str);
4080 return true;
4083 static const JSFunctionSpec string_static_methods[] = {
4084 JS_FN("fromCharCode", js::str_fromCharCode, 1, 0),
4086 JS_SELF_HOSTED_FN("fromCodePoint", "String_static_fromCodePoint", 1,0),
4087 JS_SELF_HOSTED_FN("raw", "String_static_raw", 2,0),
4088 JS_SELF_HOSTED_FN("substring", "String_static_substring", 3,0),
4089 JS_SELF_HOSTED_FN("substr", "String_static_substr", 3,0),
4090 JS_SELF_HOSTED_FN("slice", "String_static_slice", 3,0),
4092 // This must be at the end because of bug 853075: functions listed after
4093 // self-hosted methods aren't available in self-hosted code.
4094 #if EXPOSE_INTL_API
4095 JS_SELF_HOSTED_FN("localeCompare", "String_static_localeCompare", 2,0),
4096 #endif
4097 JS_FS_END
4100 /* static */ Shape*
4101 StringObject::assignInitialShape(ExclusiveContext* cx, Handle<StringObject*> obj)
4103 MOZ_ASSERT(obj->empty());
4105 return obj->addDataProperty(cx, cx->names().length, LENGTH_SLOT,
4106 JSPROP_PERMANENT | JSPROP_READONLY);
4109 JSObject*
4110 js_InitStringClass(JSContext* cx, HandleObject obj)
4112 MOZ_ASSERT(obj->isNative());
4114 Rooted<GlobalObject*> global(cx, &obj->as<GlobalObject>());
4116 Rooted<JSString*> empty(cx, cx->runtime()->emptyString);
4117 RootedObject proto(cx, global->createBlankPrototype(cx, &StringObject::class_));
4118 if (!proto || !proto->as<StringObject>().init(cx, empty))
4119 return nullptr;
4121 /* Now create the String function. */
4122 RootedFunction ctor(cx);
4123 ctor = global->createConstructor(cx, js_String, cx->names().String, 1);
4124 if (!ctor)
4125 return nullptr;
4127 if (!GlobalObject::initBuiltinConstructor(cx, global, JSProto_String, ctor, proto))
4128 return nullptr;
4130 if (!LinkConstructorAndPrototype(cx, ctor, proto))
4131 return nullptr;
4133 if (!DefinePropertiesAndFunctions(cx, proto, nullptr, string_methods) ||
4134 !DefinePropertiesAndFunctions(cx, ctor, nullptr, string_static_methods))
4136 return nullptr;
4140 * Define escape/unescape, the URI encode/decode functions, and maybe
4141 * uneval on the global object.
4143 if (!JS_DefineFunctions(cx, global, string_functions))
4144 return nullptr;
4146 return proto;
4149 const char*
4150 js_ValueToPrintable(JSContext* cx, const Value& vArg, JSAutoByteString* bytes, bool asSource)
4152 RootedValue v(cx, vArg);
4153 JSString* str;
4154 if (asSource)
4155 str = ValueToSource(cx, v);
4156 else
4157 str = ToString<CanGC>(cx, v);
4158 if (!str)
4159 return nullptr;
4160 str = js_QuoteString(cx, str, 0);
4161 if (!str)
4162 return nullptr;
4163 return bytes->encodeLatin1(cx, str);
4166 template <AllowGC allowGC>
4167 JSString*
4168 js::ToStringSlow(ExclusiveContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg)
4170 /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */
4171 MOZ_ASSERT(!arg.isString());
4173 Value v = arg;
4174 if (!v.isPrimitive()) {
4175 if (!cx->shouldBeJSContext() || !allowGC)
4176 return nullptr;
4177 RootedValue v2(cx, v);
4178 if (!ToPrimitive(cx->asJSContext(), JSTYPE_STRING, &v2))
4179 return nullptr;
4180 v = v2;
4183 JSString* str;
4184 if (v.isString()) {
4185 str = v.toString();
4186 } else if (v.isInt32()) {
4187 str = Int32ToString<allowGC>(cx, v.toInt32());
4188 } else if (v.isDouble()) {
4189 str = NumberToString<allowGC>(cx, v.toDouble());
4190 } else if (v.isBoolean()) {
4191 str = js_BooleanToString(cx, v.toBoolean());
4192 } else if (v.isNull()) {
4193 str = cx->names().null;
4194 } else if (v.isSymbol()) {
4195 if (cx->shouldBeJSContext() && allowGC) {
4196 JS_ReportErrorNumber(cx->asJSContext(), js_GetErrorMessage, nullptr,
4197 JSMSG_SYMBOL_TO_STRING);
4199 return nullptr;
4200 } else {
4201 MOZ_ASSERT(v.isUndefined());
4202 str = cx->names().undefined;
4204 return str;
4207 template JSString*
4208 js::ToStringSlow<CanGC>(ExclusiveContext* cx, HandleValue arg);
4210 template JSString*
4211 js::ToStringSlow<NoGC>(ExclusiveContext* cx, Value arg);
4213 JS_PUBLIC_API(JSString*)
4214 js::ToStringSlow(JSContext* cx, HandleValue v)
4216 return ToStringSlow<CanGC>(cx, v);
4219 static JSString*
4220 SymbolToSource(JSContext* cx, Symbol* symbol)
4222 RootedString desc(cx, symbol->description());
4223 SymbolCode code = symbol->code();
4224 if (code != SymbolCode::InSymbolRegistry && code != SymbolCode::UniqueSymbol) {
4225 // Well-known symbol.
4226 MOZ_ASSERT(uint32_t(code) < JS::WellKnownSymbolLimit);
4227 return desc;
4230 StringBuffer buf(cx);
4231 if (code == SymbolCode::InSymbolRegistry ? !buf.append("Symbol.for(") : !buf.append("Symbol("))
4232 return nullptr;
4233 if (desc) {
4234 desc = StringToSource(cx, desc);
4235 if (!desc || !buf.append(desc))
4236 return nullptr;
4238 if (!buf.append(')'))
4239 return nullptr;
4240 return buf.finishString();
4243 JSString*
4244 js::ValueToSource(JSContext* cx, HandleValue v)
4246 JS_CHECK_RECURSION(cx, return nullptr);
4247 assertSameCompartment(cx, v);
4249 if (v.isUndefined())
4250 return cx->names().void0;
4251 if (v.isString())
4252 return StringToSource(cx, v.toString());
4253 if (v.isSymbol())
4254 return SymbolToSource(cx, v.toSymbol());
4255 if (v.isPrimitive()) {
4256 /* Special case to preserve negative zero, _contra_ toString. */
4257 if (v.isDouble() && IsNegativeZero(v.toDouble())) {
4258 /* NB: _ucNstr rather than _ucstr to indicate non-terminated. */
4259 static const char16_t js_negzero_ucNstr[] = {'-', '0'};
4261 return NewStringCopyN<CanGC>(cx, js_negzero_ucNstr, 2);
4263 return ToString<CanGC>(cx, v);
4266 RootedValue fval(cx);
4267 RootedObject obj(cx, &v.toObject());
4268 if (!JSObject::getProperty(cx, obj, obj, cx->names().toSource, &fval))
4269 return nullptr;
4270 if (IsCallable(fval)) {
4271 RootedValue rval(cx);
4272 if (!Invoke(cx, ObjectValue(*obj), fval, 0, nullptr, &rval))
4273 return nullptr;
4274 return ToString<CanGC>(cx, rval);
4277 return ObjectToSource(cx, obj);
4280 JSString*
4281 js::StringToSource(JSContext* cx, JSString* str)
4283 return js_QuoteString(cx, str, '"');
4286 bool
4287 js::EqualChars(JSLinearString* str1, JSLinearString* str2)
4289 MOZ_ASSERT(str1->length() == str2->length());
4291 size_t len = str1->length();
4293 AutoCheckCannotGC nogc;
4294 if (str1->hasTwoByteChars()) {
4295 if (str2->hasTwoByteChars())
4296 return PodEqual(str1->twoByteChars(nogc), str2->twoByteChars(nogc), len);
4298 return EqualChars(str2->latin1Chars(nogc), str1->twoByteChars(nogc), len);
4301 if (str2->hasLatin1Chars())
4302 return PodEqual(str1->latin1Chars(nogc), str2->latin1Chars(nogc), len);
4304 return EqualChars(str1->latin1Chars(nogc), str2->twoByteChars(nogc), len);
4307 bool
4308 js::EqualStrings(JSContext* cx, JSString* str1, JSString* str2, bool* result)
4310 if (str1 == str2) {
4311 *result = true;
4312 return true;
4315 size_t length1 = str1->length();
4316 if (length1 != str2->length()) {
4317 *result = false;
4318 return true;
4321 JSLinearString* linear1 = str1->ensureLinear(cx);
4322 if (!linear1)
4323 return false;
4324 JSLinearString* linear2 = str2->ensureLinear(cx);
4325 if (!linear2)
4326 return false;
4328 *result = EqualChars(linear1, linear2);
4329 return true;
4332 bool
4333 js::EqualStrings(JSLinearString* str1, JSLinearString* str2)
4335 if (str1 == str2)
4336 return true;
4338 size_t length1 = str1->length();
4339 if (length1 != str2->length())
4340 return false;
4342 return EqualChars(str1, str2);
4345 static int32_t
4346 CompareStringsImpl(JSLinearString* str1, JSLinearString* str2)
4348 size_t len1 = str1->length();
4349 size_t len2 = str2->length();
4351 AutoCheckCannotGC nogc;
4352 if (str1->hasLatin1Chars()) {
4353 const Latin1Char* chars1 = str1->latin1Chars(nogc);
4354 return str2->hasLatin1Chars()
4355 ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
4356 : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
4359 const char16_t* chars1 = str1->twoByteChars(nogc);
4360 return str2->hasLatin1Chars()
4361 ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
4362 : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
4365 int32_t
4366 js::CompareChars(const char16_t* s1, size_t len1, JSLinearString* s2)
4368 AutoCheckCannotGC nogc;
4369 return s2->hasLatin1Chars()
4370 ? CompareChars(s1, len1, s2->latin1Chars(nogc), s2->length())
4371 : CompareChars(s1, len1, s2->twoByteChars(nogc), s2->length());
4374 bool
4375 js::CompareStrings(JSContext* cx, JSString* str1, JSString* str2, int32_t* result)
4377 MOZ_ASSERT(str1);
4378 MOZ_ASSERT(str2);
4380 if (str1 == str2) {
4381 *result = 0;
4382 return true;
4385 JSLinearString* linear1 = str1->ensureLinear(cx);
4386 if (!linear1)
4387 return false;
4389 JSLinearString* linear2 = str2->ensureLinear(cx);
4390 if (!linear2)
4391 return false;
4393 *result = CompareStringsImpl(linear1, linear2);
4394 return true;
4397 int32_t
4398 js::CompareAtoms(JSAtom* atom1, JSAtom* atom2)
4400 return CompareStringsImpl(atom1, atom2);
4403 bool
4404 js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes)
4406 size_t length = strlen(asciiBytes);
4407 #ifdef DEBUG
4408 for (size_t i = 0; i != length; ++i)
4409 MOZ_ASSERT(unsigned(asciiBytes[i]) <= 127);
4410 #endif
4411 if (length != str->length())
4412 return false;
4414 const Latin1Char* latin1 = reinterpret_cast<const Latin1Char*>(asciiBytes);
4416 AutoCheckCannotGC nogc;
4417 return str->hasLatin1Chars()
4418 ? PodEqual(latin1, str->latin1Chars(nogc), length)
4419 : EqualChars(latin1, str->twoByteChars(nogc), length);
4422 size_t
4423 js_strlen(const char16_t* s)
4425 const char16_t* t;
4427 for (t = s; *t != 0; t++)
4428 continue;
4429 return (size_t)(t - s);
4432 int32_t
4433 js_strcmp(const char16_t* lhs, const char16_t* rhs)
4435 while (true) {
4436 if (*lhs != *rhs)
4437 return int32_t(*lhs) - int32_t(*rhs);
4438 if (*lhs == 0)
4439 return 0;
4440 ++lhs, ++rhs;
4444 UniquePtr<char[], JS::FreePolicy>
4445 js::DuplicateString(js::ExclusiveContext* cx, const char* s)
4447 size_t n = strlen(s) + 1;
4448 auto ret = cx->make_pod_array<char>(n);
4449 if (!ret)
4450 return ret;
4451 PodCopy(ret.get(), s, n);
4452 return ret;
4455 UniquePtr<char16_t[], JS::FreePolicy>
4456 js::DuplicateString(js::ExclusiveContext* cx, const char16_t* s)
4458 size_t n = js_strlen(s) + 1;
4459 auto ret = cx->make_pod_array<char16_t>(n);
4460 if (!ret)
4461 return ret;
4462 PodCopy(ret.get(), s, n);
4463 return ret;
4466 template <typename CharT>
4467 const CharT*
4468 js_strchr_limit(const CharT* s, char16_t c, const CharT* limit)
4470 while (s < limit) {
4471 if (*s == c)
4472 return s;
4473 s++;
4475 return nullptr;
4478 template const Latin1Char*
4479 js_strchr_limit(const Latin1Char* s, char16_t c, const Latin1Char* limit);
4481 template const char16_t*
4482 js_strchr_limit(const char16_t* s, char16_t c, const char16_t* limit);
4484 char16_t*
4485 js::InflateString(ExclusiveContext* cx, const char* bytes, size_t* lengthp)
4487 size_t nchars;
4488 char16_t* chars;
4489 size_t nbytes = *lengthp;
4491 nchars = nbytes;
4492 chars = cx->pod_malloc<char16_t>(nchars + 1);
4493 if (!chars)
4494 goto bad;
4495 for (size_t i = 0; i < nchars; i++)
4496 chars[i] = (unsigned char) bytes[i];
4497 *lengthp = nchars;
4498 chars[nchars] = 0;
4499 return chars;
4501 bad:
4502 // For compatibility with callers of JS_DecodeBytes we must zero lengthp
4503 // on errors.
4504 *lengthp = 0;
4505 return nullptr;
4508 template <typename CharT>
4509 bool
4510 js::DeflateStringToBuffer(JSContext* maybecx, const CharT* src, size_t srclen,
4511 char* dst, size_t* dstlenp)
4513 size_t dstlen = *dstlenp;
4514 if (srclen > dstlen) {
4515 for (size_t i = 0; i < dstlen; i++)
4516 dst[i] = char(src[i]);
4517 if (maybecx) {
4518 AutoSuppressGC suppress(maybecx);
4519 JS_ReportErrorNumber(maybecx, js_GetErrorMessage, nullptr,
4520 JSMSG_BUFFER_TOO_SMALL);
4522 return false;
4524 for (size_t i = 0; i < srclen; i++)
4525 dst[i] = char(src[i]);
4526 *dstlenp = srclen;
4527 return true;
4530 template bool
4531 js::DeflateStringToBuffer(JSContext* maybecx, const Latin1Char* src, size_t srclen,
4532 char* dst, size_t* dstlenp);
4534 template bool
4535 js::DeflateStringToBuffer(JSContext* maybecx, const char16_t* src, size_t srclen,
4536 char* dst, size_t* dstlenp);
4538 #define ____ false
4541 * Identifier start chars:
4542 * - 36: $
4543 * - 65..90: A..Z
4544 * - 95: _
4545 * - 97..122: a..z
4547 const bool js_isidstart[] = {
4548 /* 0 1 2 3 4 5 6 7 8 9 */
4549 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4550 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4551 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4552 /* 3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____,
4553 /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4554 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4555 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4556 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4557 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4558 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4559 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4560 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4561 /* 12 */ true, true, true, ____, ____, ____, ____, ____
4565 * Identifier chars:
4566 * - 36: $
4567 * - 48..57: 0..9
4568 * - 65..90: A..Z
4569 * - 95: _
4570 * - 97..122: a..z
4572 const bool js_isident[] = {
4573 /* 0 1 2 3 4 5 6 7 8 9 */
4574 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4575 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4576 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4577 /* 3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____,
4578 /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
4579 /* 5 */ true, true, true, true, true, true, true, true, ____, ____,
4580 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4581 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4582 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4583 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4584 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4585 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4586 /* 12 */ true, true, true, ____, ____, ____, ____, ____
4589 /* Whitespace chars: '\t', '\n', '\v', '\f', '\r', ' '. */
4590 const bool js_isspace[] = {
4591 /* 0 1 2 3 4 5 6 7 8 9 */
4592 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, true,
4593 /* 1 */ true, true, true, true, ____, ____, ____, ____, ____, ____,
4594 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4595 /* 3 */ ____, ____, true, ____, ____, ____, ____, ____, ____, ____,
4596 /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4597 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4598 /* 6 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4599 /* 7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4600 /* 8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4601 /* 9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4602 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4603 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4604 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
4608 * Uri reserved chars + #:
4609 * - 35: #
4610 * - 36: $
4611 * - 38: &
4612 * - 43: +
4613 * - 44: ,
4614 * - 47: /
4615 * - 58: :
4616 * - 59: ;
4617 * - 61: =
4618 * - 63: ?
4619 * - 64: @
4621 static const bool js_isUriReservedPlusPound[] = {
4622 /* 0 1 2 3 4 5 6 7 8 9 */
4623 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4624 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4625 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4626 /* 3 */ ____, ____, ____, ____, ____, true, true, ____, true, ____,
4627 /* 4 */ ____, ____, ____, true, true, ____, ____, true, ____, ____,
4628 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
4629 /* 6 */ ____, true, ____, true, true, ____, ____, ____, ____, ____,
4630 /* 7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4631 /* 8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4632 /* 9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4633 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4634 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4635 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
4639 * Uri unescaped chars:
4640 * - 33: !
4641 * - 39: '
4642 * - 40: (
4643 * - 41: )
4644 * - 42: *
4645 * - 45: -
4646 * - 46: .
4647 * - 48..57: 0-9
4648 * - 65..90: A-Z
4649 * - 95: _
4650 * - 97..122: a-z
4651 * - 126: ~
4653 static const bool js_isUriUnescaped[] = {
4654 /* 0 1 2 3 4 5 6 7 8 9 */
4655 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4656 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4657 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4658 /* 3 */ ____, ____, ____, true, ____, ____, ____, ____, ____, true,
4659 /* 4 */ true, true, true, ____, ____, true, true, ____, true, true,
4660 /* 5 */ true, true, true, true, true, true, true, true, ____, ____,
4661 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4662 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4663 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4664 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4665 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4666 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4667 /* 12 */ true, true, true, ____, ____, ____, true, ____
4670 #undef ____
4672 #define URI_CHUNK 64U
4674 static inline bool
4675 TransferBufferToString(StringBuffer& sb, MutableHandleValue rval)
4677 JSString* str = sb.finishString();
4678 if (!str)
4679 return false;
4680 rval.setString(str);
4681 return true;
4685 * ECMA 3, 15.1.3 URI Handling Function Properties
4687 * The following are implementations of the algorithms
4688 * given in the ECMA specification for the hidden functions
4689 * 'Encode' and 'Decode'.
4691 enum EncodeResult { Encode_Failure, Encode_BadUri, Encode_Success };
4693 template <typename CharT>
4694 static EncodeResult
4695 Encode(StringBuffer& sb, const CharT* chars, size_t length,
4696 const bool* unescapedSet, const bool* unescapedSet2)
4698 static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
4700 char16_t hexBuf[4];
4701 hexBuf[0] = '%';
4702 hexBuf[3] = 0;
4704 for (size_t k = 0; k < length; k++) {
4705 char16_t c = chars[k];
4706 if (c < 128 && (unescapedSet[c] || (unescapedSet2 && unescapedSet2[c]))) {
4707 if (!sb.append(c))
4708 return Encode_Failure;
4709 } else {
4710 if (c >= 0xDC00 && c <= 0xDFFF)
4711 return Encode_BadUri;
4713 uint32_t v;
4714 if (c < 0xD800 || c > 0xDBFF) {
4715 v = c;
4716 } else {
4717 k++;
4718 if (k == length)
4719 return Encode_BadUri;
4721 char16_t c2 = chars[k];
4722 if (c2 < 0xDC00 || c2 > 0xDFFF)
4723 return Encode_BadUri;
4725 v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
4727 uint8_t utf8buf[4];
4728 size_t L = js_OneUcs4ToUtf8Char(utf8buf, v);
4729 for (size_t j = 0; j < L; j++) {
4730 hexBuf[1] = HexDigits[utf8buf[j] >> 4];
4731 hexBuf[2] = HexDigits[utf8buf[j] & 0xf];
4732 if (!sb.append(hexBuf, 3))
4733 return Encode_Failure;
4738 return Encode_Success;
4741 static bool
4742 Encode(JSContext* cx, HandleLinearString str, const bool* unescapedSet,
4743 const bool* unescapedSet2, MutableHandleValue rval)
4745 size_t length = str->length();
4746 if (length == 0) {
4747 rval.setString(cx->runtime()->emptyString);
4748 return true;
4751 StringBuffer sb(cx);
4752 if (!sb.reserve(length))
4753 return false;
4755 EncodeResult res;
4756 if (str->hasLatin1Chars()) {
4757 AutoCheckCannotGC nogc;
4758 res = Encode(sb, str->latin1Chars(nogc), str->length(), unescapedSet, unescapedSet2);
4759 } else {
4760 AutoCheckCannotGC nogc;
4761 res = Encode(sb, str->twoByteChars(nogc), str->length(), unescapedSet, unescapedSet2);
4764 if (res == Encode_Failure)
4765 return false;
4767 if (res == Encode_BadUri) {
4768 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_BAD_URI, nullptr);
4769 return false;
4772 MOZ_ASSERT(res == Encode_Success);
4773 return TransferBufferToString(sb, rval);
4776 enum DecodeResult { Decode_Failure, Decode_BadUri, Decode_Success };
4778 template <typename CharT>
4779 static DecodeResult
4780 Decode(StringBuffer& sb, const CharT* chars, size_t length, const bool* reservedSet)
4782 for (size_t k = 0; k < length; k++) {
4783 char16_t c = chars[k];
4784 if (c == '%') {
4785 size_t start = k;
4786 if ((k + 2) >= length)
4787 return Decode_BadUri;
4789 if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2]))
4790 return Decode_BadUri;
4792 uint32_t B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
4793 k += 2;
4794 if (!(B & 0x80)) {
4795 c = char16_t(B);
4796 } else {
4797 int n = 1;
4798 while (B & (0x80 >> n))
4799 n++;
4801 if (n == 1 || n > 4)
4802 return Decode_BadUri;
4804 uint8_t octets[4];
4805 octets[0] = (uint8_t)B;
4806 if (k + 3 * (n - 1) >= length)
4807 return Decode_BadUri;
4809 for (int j = 1; j < n; j++) {
4810 k++;
4811 if (chars[k] != '%')
4812 return Decode_BadUri;
4814 if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2]))
4815 return Decode_BadUri;
4817 B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
4818 if ((B & 0xC0) != 0x80)
4819 return Decode_BadUri;
4821 k += 2;
4822 octets[j] = char(B);
4824 uint32_t v = JS::Utf8ToOneUcs4Char(octets, n);
4825 if (v >= 0x10000) {
4826 v -= 0x10000;
4827 if (v > 0xFFFFF)
4828 return Decode_BadUri;
4830 c = char16_t((v & 0x3FF) + 0xDC00);
4831 char16_t H = char16_t((v >> 10) + 0xD800);
4832 if (!sb.append(H))
4833 return Decode_Failure;
4834 } else {
4835 c = char16_t(v);
4838 if (c < 128 && reservedSet && reservedSet[c]) {
4839 if (!sb.append(chars + start, k - start + 1))
4840 return Decode_Failure;
4841 } else {
4842 if (!sb.append(c))
4843 return Decode_Failure;
4845 } else {
4846 if (!sb.append(c))
4847 return Decode_Failure;
4851 return Decode_Success;
4854 static bool
4855 Decode(JSContext* cx, HandleLinearString str, const bool* reservedSet, MutableHandleValue rval)
4857 size_t length = str->length();
4858 if (length == 0) {
4859 rval.setString(cx->runtime()->emptyString);
4860 return true;
4863 StringBuffer sb(cx);
4865 DecodeResult res;
4866 if (str->hasLatin1Chars()) {
4867 AutoCheckCannotGC nogc;
4868 res = Decode(sb, str->latin1Chars(nogc), str->length(), reservedSet);
4869 } else {
4870 AutoCheckCannotGC nogc;
4871 res = Decode(sb, str->twoByteChars(nogc), str->length(), reservedSet);
4874 if (res == Decode_Failure)
4875 return false;
4877 if (res == Decode_BadUri) {
4878 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_BAD_URI);
4879 return false;
4882 MOZ_ASSERT(res == Decode_Success);
4883 return TransferBufferToString(sb, rval);
4886 static bool
4887 str_decodeURI(JSContext* cx, unsigned argc, Value* vp)
4889 CallArgs args = CallArgsFromVp(argc, vp);
4890 RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
4891 if (!str)
4892 return false;
4894 return Decode(cx, str, js_isUriReservedPlusPound, args.rval());
4897 static bool
4898 str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp)
4900 CallArgs args = CallArgsFromVp(argc, vp);
4901 RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
4902 if (!str)
4903 return false;
4905 return Decode(cx, str, nullptr, args.rval());
4908 static bool
4909 str_encodeURI(JSContext* cx, unsigned argc, Value* vp)
4911 CallArgs args = CallArgsFromVp(argc, vp);
4912 RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
4913 if (!str)
4914 return false;
4916 return Encode(cx, str, js_isUriUnescaped, js_isUriReservedPlusPound, args.rval());
4919 static bool
4920 str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp)
4922 CallArgs args = CallArgsFromVp(argc, vp);
4923 RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
4924 if (!str)
4925 return false;
4927 return Encode(cx, str, js_isUriUnescaped, nullptr, args.rval());
4931 * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
4932 * least 4 bytes long. Return the number of UTF-8 bytes of data written.
4935 js_OneUcs4ToUtf8Char(uint8_t* utf8Buffer, uint32_t ucs4Char)
4937 int utf8Length = 1;
4939 MOZ_ASSERT(ucs4Char <= 0x10FFFF);
4940 if (ucs4Char < 0x80) {
4941 *utf8Buffer = (uint8_t)ucs4Char;
4942 } else {
4943 int i;
4944 uint32_t a = ucs4Char >> 11;
4945 utf8Length = 2;
4946 while (a) {
4947 a >>= 5;
4948 utf8Length++;
4950 i = utf8Length;
4951 while (--i) {
4952 utf8Buffer[i] = (uint8_t)((ucs4Char & 0x3F) | 0x80);
4953 ucs4Char >>= 6;
4955 *utf8Buffer = (uint8_t)(0x100 - (1 << (8-utf8Length)) + ucs4Char);
4957 return utf8Length;
4960 size_t
4961 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, FILE* fp, JSLinearString* str,
4962 uint32_t quote)
4964 size_t len = str->length();
4965 AutoCheckCannotGC nogc;
4966 return str->hasLatin1Chars()
4967 ? PutEscapedStringImpl(buffer, bufferSize, fp, str->latin1Chars(nogc), len, quote)
4968 : PutEscapedStringImpl(buffer, bufferSize, fp, str->twoByteChars(nogc), len, quote);
4971 template <typename CharT>
4972 size_t
4973 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, FILE* fp, const CharT* chars,
4974 size_t length, uint32_t quote)
4976 enum {
4977 STOP, FIRST_QUOTE, LAST_QUOTE, CHARS, ESCAPE_START, ESCAPE_MORE
4978 } state;
4980 MOZ_ASSERT(quote == 0 || quote == '\'' || quote == '"');
4981 MOZ_ASSERT_IF(!buffer, bufferSize == 0);
4982 MOZ_ASSERT_IF(fp, !buffer);
4984 if (bufferSize == 0)
4985 buffer = nullptr;
4986 else
4987 bufferSize--;
4989 const CharT* charsEnd = chars + length;
4990 size_t n = 0;
4991 state = FIRST_QUOTE;
4992 unsigned shift = 0;
4993 unsigned hex = 0;
4994 unsigned u = 0;
4995 char c = 0; /* to quell GCC warnings */
4997 for (;;) {
4998 switch (state) {
4999 case STOP:
5000 goto stop;
5001 case FIRST_QUOTE:
5002 state = CHARS;
5003 goto do_quote;
5004 case LAST_QUOTE:
5005 state = STOP;
5006 do_quote:
5007 if (quote == 0)
5008 continue;
5009 c = (char)quote;
5010 break;
5011 case CHARS:
5012 if (chars == charsEnd) {
5013 state = LAST_QUOTE;
5014 continue;
5016 u = *chars++;
5017 if (u < ' ') {
5018 if (u != 0) {
5019 const char* escape = strchr(js_EscapeMap, (int)u);
5020 if (escape) {
5021 u = escape[1];
5022 goto do_escape;
5025 goto do_hex_escape;
5027 if (u < 127) {
5028 if (u == quote || u == '\\')
5029 goto do_escape;
5030 c = (char)u;
5031 } else if (u < 0x100) {
5032 goto do_hex_escape;
5033 } else {
5034 shift = 16;
5035 hex = u;
5036 u = 'u';
5037 goto do_escape;
5039 break;
5040 do_hex_escape:
5041 shift = 8;
5042 hex = u;
5043 u = 'x';
5044 do_escape:
5045 c = '\\';
5046 state = ESCAPE_START;
5047 break;
5048 case ESCAPE_START:
5049 MOZ_ASSERT(' ' <= u && u < 127);
5050 c = (char)u;
5051 state = ESCAPE_MORE;
5052 break;
5053 case ESCAPE_MORE:
5054 if (shift == 0) {
5055 state = CHARS;
5056 continue;
5058 shift -= 4;
5059 u = 0xF & (hex >> shift);
5060 c = (char)(u + (u < 10 ? '0' : 'A' - 10));
5061 break;
5063 if (buffer) {
5064 MOZ_ASSERT(n <= bufferSize);
5065 if (n != bufferSize) {
5066 buffer[n] = c;
5067 } else {
5068 buffer[n] = '\0';
5069 buffer = nullptr;
5071 } else if (fp) {
5072 if (fputc(c, fp) < 0)
5073 return size_t(-1);
5075 n++;
5077 stop:
5078 if (buffer)
5079 buffer[n] = '\0';
5080 return n;
5083 template size_t
5084 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, FILE* fp, const Latin1Char* chars,
5085 size_t length, uint32_t quote);
5087 template size_t
5088 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, FILE* fp, const char* chars,
5089 size_t length, uint32_t quote);
5091 template size_t
5092 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, FILE* fp, const char16_t* chars,
5093 size_t length, uint32_t quote);
5095 template size_t
5096 js::PutEscapedString(char* buffer, size_t bufferSize, const Latin1Char* chars, size_t length,
5097 uint32_t quote);
5099 template size_t
5100 js::PutEscapedString(char* buffer, size_t bufferSize, const char16_t* chars, size_t length,
5101 uint32_t quote);