Bumping manifests a=b2g-bump
[gecko.git] / js / src / jsstr.cpp
blob30013168156310b80d9f68c504a6677b71ae3f03
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "jsstr.h"
9 #include "mozilla/Attributes.h"
10 #include "mozilla/Casting.h"
11 #include "mozilla/CheckedInt.h"
12 #include "mozilla/FloatingPoint.h"
13 #include "mozilla/PodOperations.h"
14 #include "mozilla/Range.h"
15 #include "mozilla/TypeTraits.h"
16 #include "mozilla/UniquePtr.h"
18 #include <ctype.h>
19 #include <string.h>
21 #include "jsapi.h"
22 #include "jsarray.h"
23 #include "jsatom.h"
24 #include "jsbool.h"
25 #include "jscntxt.h"
26 #include "jsgc.h"
27 #include "jsnum.h"
28 #include "jsobj.h"
29 #include "jsopcode.h"
30 #include "jstypes.h"
31 #include "jsutil.h"
33 #include "builtin/Intl.h"
34 #include "builtin/RegExp.h"
35 #if ENABLE_INTL_API
36 #include "unicode/unorm.h"
37 #endif
38 #include "vm/GlobalObject.h"
39 #include "vm/Interpreter.h"
40 #include "vm/NumericConversions.h"
41 #include "vm/Opcodes.h"
42 #include "vm/RegExpObject.h"
43 #include "vm/RegExpStatics.h"
44 #include "vm/ScopeObject.h"
45 #include "vm/StringBuffer.h"
47 #include "jsinferinlines.h"
49 #include "vm/Interpreter-inl.h"
50 #include "vm/String-inl.h"
51 #include "vm/StringObject-inl.h"
53 using namespace js;
54 using namespace js::gc;
55 using namespace js::types;
56 using namespace js::unicode;
58 using JS::Symbol;
59 using JS::SymbolCode;
61 using mozilla::AssertedCast;
62 using mozilla::CheckedInt;
63 using mozilla::IsNaN;
64 using mozilla::IsNegativeZero;
65 using mozilla::IsSame;
66 using mozilla::Move;
67 using mozilla::PodCopy;
68 using mozilla::PodEqual;
69 using mozilla::RangedPtr;
70 using mozilla::UniquePtr;
72 using JS::AutoCheckCannotGC;
74 static JSLinearString*
75 ArgToRootedString(JSContext* cx, CallArgs& args, unsigned argno)
77 if (argno >= args.length())
78 return cx->names().undefined;
80 JSString* str = ToString<CanGC>(cx, args[argno]);
81 if (!str)
82 return nullptr;
84 args[argno].setString(str);
85 return str->ensureLinear(cx);
89 * Forward declarations for URI encode/decode and helper routines
91 static bool
92 str_decodeURI(JSContext* cx, unsigned argc, Value* vp);
94 static bool
95 str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
97 static bool
98 str_encodeURI(JSContext* cx, unsigned argc, Value* vp);
100 static bool
101 str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
104 * Global string methods
108 /* ES5 B.2.1 */
109 template <typename CharT>
110 static Latin1Char*
111 Escape(JSContext* cx, const CharT* chars, uint32_t length, uint32_t* newLengthOut)
113 static const uint8_t shouldPassThrough[128] = {
114 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
115 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
116 0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */
117 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */
118 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */
119 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */
120 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */
121 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */
124 /* Take a first pass and see how big the result string will need to be. */
125 uint32_t newLength = length;
126 for (size_t i = 0; i < length; i++) {
127 jschar ch = chars[i];
128 if (ch < 128 && shouldPassThrough[ch])
129 continue;
131 /* The character will be encoded as %XX or %uXXXX. */
132 newLength += (ch < 256) ? 2 : 5;
135 * newlength is incremented by at most 5 on each iteration, so worst
136 * case newlength == length * 6. This can't overflow.
138 static_assert(JSString::MAX_LENGTH < UINT32_MAX / 6,
139 "newlength must not overflow");
142 Latin1Char* newChars = cx->pod_malloc<Latin1Char>(newLength + 1);
143 if (!newChars)
144 return nullptr;
146 static const char digits[] = "0123456789ABCDEF";
148 size_t i, ni;
149 for (i = 0, ni = 0; i < length; i++) {
150 jschar ch = chars[i];
151 if (ch < 128 && shouldPassThrough[ch]) {
152 newChars[ni++] = ch;
153 } else if (ch < 256) {
154 newChars[ni++] = '%';
155 newChars[ni++] = digits[ch >> 4];
156 newChars[ni++] = digits[ch & 0xF];
157 } else {
158 newChars[ni++] = '%';
159 newChars[ni++] = 'u';
160 newChars[ni++] = digits[ch >> 12];
161 newChars[ni++] = digits[(ch & 0xF00) >> 8];
162 newChars[ni++] = digits[(ch & 0xF0) >> 4];
163 newChars[ni++] = digits[ch & 0xF];
166 JS_ASSERT(ni == newLength);
167 newChars[newLength] = 0;
169 *newLengthOut = newLength;
170 return newChars;
173 static bool
174 str_escape(JSContext* cx, unsigned argc, Value* vp)
176 CallArgs args = CallArgsFromVp(argc, vp);
178 JSLinearString* str = ArgToRootedString(cx, args, 0);
179 if (!str)
180 return false;
182 ScopedJSFreePtr<Latin1Char> newChars;
183 uint32_t newLength;
184 if (str->hasLatin1Chars()) {
185 AutoCheckCannotGC nogc;
186 newChars = Escape(cx, str->latin1Chars(nogc), str->length(), &newLength);
187 } else {
188 AutoCheckCannotGC nogc;
189 newChars = Escape(cx, str->twoByteChars(nogc), str->length(), &newLength);
192 if (!newChars)
193 return false;
195 JSString* res = NewString<CanGC>(cx, newChars.get(), newLength);
196 if (!res)
197 return false;
199 newChars.forget();
200 args.rval().setString(res);
201 return true;
204 template <typename CharT>
205 static inline bool
206 Unhex4(const RangedPtr<const CharT> chars, jschar* result)
208 jschar a = chars[0],
209 b = chars[1],
210 c = chars[2],
211 d = chars[3];
213 if (!(JS7_ISHEX(a) && JS7_ISHEX(b) && JS7_ISHEX(c) && JS7_ISHEX(d)))
214 return false;
216 *result = (((((JS7_UNHEX(a) << 4) + JS7_UNHEX(b)) << 4) + JS7_UNHEX(c)) << 4) + JS7_UNHEX(d);
217 return true;
220 template <typename CharT>
221 static inline bool
222 Unhex2(const RangedPtr<const CharT> chars, jschar* result)
224 jschar a = chars[0],
225 b = chars[1];
227 if (!(JS7_ISHEX(a) && JS7_ISHEX(b)))
228 return false;
230 *result = (JS7_UNHEX(a) << 4) + JS7_UNHEX(b);
231 return true;
234 template <typename CharT>
235 static bool
236 Unescape(StringBuffer& sb, const mozilla::Range<const CharT> chars)
239 * NB: use signed integers for length/index to allow simple length
240 * comparisons without unsigned-underflow hazards.
242 static_assert(JSString::MAX_LENGTH <= INT_MAX, "String length must fit in a signed integer");
243 int length = AssertedCast<int>(chars.length());
246 * Note that the spec algorithm has been optimized to avoid building
247 * a string in the case where no escapes are present.
250 /* Step 4. */
251 int k = 0;
252 bool building = false;
254 /* Step 5. */
255 while (k < length) {
256 /* Step 6. */
257 jschar c = chars[k];
259 /* Step 7. */
260 if (c != '%')
261 goto step_18;
263 /* Step 8. */
264 if (k > length - 6)
265 goto step_14;
267 /* Step 9. */
268 if (chars[k + 1] != 'u')
269 goto step_14;
271 #define ENSURE_BUILDING \
272 do { \
273 if (!building) { \
274 building = true; \
275 if (!sb.reserve(length)) \
276 return false; \
277 sb.infallibleAppend(chars.start().get(), k); \
279 } while(false);
281 /* Step 10-13. */
282 if (Unhex4(chars.start() + k + 2, &c)) {
283 ENSURE_BUILDING;
284 k += 5;
285 goto step_18;
288 step_14:
289 /* Step 14. */
290 if (k > length - 3)
291 goto step_18;
293 /* Step 15-17. */
294 if (Unhex2(chars.start() + k + 1, &c)) {
295 ENSURE_BUILDING;
296 k += 2;
299 step_18:
300 if (building && !sb.append(c))
301 return false;
303 /* Step 19. */
304 k += 1;
307 return true;
308 #undef ENSURE_BUILDING
311 /* ES5 B.2.2 */
312 static bool
313 str_unescape(JSContext* cx, unsigned argc, Value* vp)
315 CallArgs args = CallArgsFromVp(argc, vp);
317 /* Step 1. */
318 RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
319 if (!str)
320 return false;
322 /* Step 3. */
323 StringBuffer sb(cx);
324 if (str->hasTwoByteChars() && !sb.ensureTwoByteChars())
325 return false;
327 if (str->hasLatin1Chars()) {
328 AutoCheckCannotGC nogc;
329 if (!Unescape(sb, str->latin1Range(nogc)))
330 return false;
331 } else {
332 AutoCheckCannotGC nogc;
333 if (!Unescape(sb, str->twoByteRange(nogc)))
334 return false;
337 JSLinearString* result;
338 if (!sb.empty()) {
339 result = sb.finishString();
340 if (!result)
341 return false;
342 } else {
343 result = str;
346 args.rval().setString(result);
347 return true;
350 #if JS_HAS_UNEVAL
351 static bool
352 str_uneval(JSContext* cx, unsigned argc, Value* vp)
354 CallArgs args = CallArgsFromVp(argc, vp);
355 JSString* str = ValueToSource(cx, args.get(0));
356 if (!str)
357 return false;
359 args.rval().setString(str);
360 return true;
362 #endif
364 static const JSFunctionSpec string_functions[] = {
365 JS_FN(js_escape_str, str_escape, 1,0),
366 JS_FN(js_unescape_str, str_unescape, 1,0),
367 #if JS_HAS_UNEVAL
368 JS_FN(js_uneval_str, str_uneval, 1,0),
369 #endif
370 JS_FN(js_decodeURI_str, str_decodeURI, 1,0),
371 JS_FN(js_encodeURI_str, str_encodeURI, 1,0),
372 JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1,0),
373 JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1,0),
375 JS_FS_END
378 static const unsigned STRING_ELEMENT_ATTRS = JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
380 static bool
381 str_enumerate(JSContext* cx, HandleObject obj)
383 RootedString str(cx, obj->as<StringObject>().unbox());
384 RootedValue value(cx);
385 for (size_t i = 0, length = str->length(); i < length; i++) {
386 JSString* str1 = NewDependentString(cx, str, i, 1);
387 if (!str1)
388 return false;
389 value.setString(str1);
390 if (!JSObject::defineElement(cx, obj, i, value,
391 JS_PropertyStub, JS_StrictPropertyStub,
392 STRING_ELEMENT_ATTRS))
394 return false;
398 return true;
401 bool
402 js::str_resolve(JSContext* cx, HandleObject obj, HandleId id, MutableHandleObject objp)
404 if (!JSID_IS_INT(id))
405 return true;
407 RootedString str(cx, obj->as<StringObject>().unbox());
409 int32_t slot = JSID_TO_INT(id);
410 if ((size_t)slot < str->length()) {
411 JSString* str1 = cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot));
412 if (!str1)
413 return false;
414 RootedValue value(cx, StringValue(str1));
415 if (!JSObject::defineElement(cx, obj, uint32_t(slot), value, nullptr, nullptr,
416 STRING_ELEMENT_ATTRS))
418 return false;
420 objp.set(obj);
422 return true;
425 const Class StringObject::class_ = {
426 js_String_str,
427 JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
428 JSCLASS_NEW_RESOLVE | JSCLASS_HAS_CACHED_PROTO(JSProto_String),
429 JS_PropertyStub, /* addProperty */
430 JS_DeletePropertyStub, /* delProperty */
431 JS_PropertyStub, /* getProperty */
432 JS_StrictPropertyStub, /* setProperty */
433 str_enumerate,
434 (JSResolveOp)str_resolve,
435 JS_ConvertStub
439 * Returns a JSString * for the |this| value associated with 'call', or throws
440 * a TypeError if |this| is null or undefined. This algorithm is the same as
441 * calling CheckObjectCoercible(this), then returning ToString(this), as all
442 * String.prototype.* methods do (other than toString and valueOf).
444 static MOZ_ALWAYS_INLINE JSString*
445 ThisToStringForStringProto(JSContext* cx, CallReceiver call)
447 JS_CHECK_RECURSION(cx, return nullptr);
449 if (call.thisv().isString())
450 return call.thisv().toString();
452 if (call.thisv().isObject()) {
453 RootedObject obj(cx, &call.thisv().toObject());
454 if (obj->is<StringObject>()) {
455 Rooted<jsid> id(cx, NameToId(cx->names().toString));
456 if (ClassMethodIsNative(cx, obj, &StringObject::class_, id, js_str_toString)) {
457 JSString* str = obj->as<StringObject>().unbox();
458 call.setThis(StringValue(str));
459 return str;
462 } else if (call.thisv().isNullOrUndefined()) {
463 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_CANT_CONVERT_TO,
464 call.thisv().isNull() ? "null" : "undefined", "object");
465 return nullptr;
468 JSString* str = ToStringSlow<CanGC>(cx, call.thisv());
469 if (!str)
470 return nullptr;
472 call.setThis(StringValue(str));
473 return str;
476 MOZ_ALWAYS_INLINE bool
477 IsString(HandleValue v)
479 return v.isString() || (v.isObject() && v.toObject().is<StringObject>());
482 #if JS_HAS_TOSOURCE
485 * String.prototype.quote is generic (as are most string methods), unlike
486 * toSource, toString, and valueOf.
488 static bool
489 str_quote(JSContext* cx, unsigned argc, Value* vp)
491 CallArgs args = CallArgsFromVp(argc, vp);
492 RootedString str(cx, ThisToStringForStringProto(cx, args));
493 if (!str)
494 return false;
495 str = js_QuoteString(cx, str, '"');
496 if (!str)
497 return false;
498 args.rval().setString(str);
499 return true;
502 MOZ_ALWAYS_INLINE bool
503 str_toSource_impl(JSContext* cx, CallArgs args)
505 JS_ASSERT(IsString(args.thisv()));
507 Rooted<JSString*> str(cx, ToString<CanGC>(cx, args.thisv()));
508 if (!str)
509 return false;
511 str = js_QuoteString(cx, str, '"');
512 if (!str)
513 return false;
515 StringBuffer sb(cx);
516 if (!sb.append("(new String(") || !sb.append(str) || !sb.append("))"))
517 return false;
519 str = sb.finishString();
520 if (!str)
521 return false;
522 args.rval().setString(str);
523 return true;
526 static bool
527 str_toSource(JSContext* cx, unsigned argc, Value* vp)
529 CallArgs args = CallArgsFromVp(argc, vp);
530 return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args);
533 #endif /* JS_HAS_TOSOURCE */
535 MOZ_ALWAYS_INLINE bool
536 str_toString_impl(JSContext* cx, CallArgs args)
538 JS_ASSERT(IsString(args.thisv()));
540 args.rval().setString(args.thisv().isString()
541 ? args.thisv().toString()
542 : args.thisv().toObject().as<StringObject>().unbox());
543 return true;
546 bool
547 js_str_toString(JSContext* cx, unsigned argc, Value* vp)
549 CallArgs args = CallArgsFromVp(argc, vp);
550 return CallNonGenericMethod<IsString, str_toString_impl>(cx, args);
554 * Java-like string native methods.
557 static MOZ_ALWAYS_INLINE bool
558 ValueToIntegerRange(JSContext* cx, HandleValue v, int32_t* out)
560 if (v.isInt32()) {
561 *out = v.toInt32();
562 } else {
563 double d;
564 if (!ToInteger(cx, v, &d))
565 return false;
566 if (d > INT32_MAX)
567 *out = INT32_MAX;
568 else if (d < INT32_MIN)
569 *out = INT32_MIN;
570 else
571 *out = int32_t(d);
574 return true;
577 static JSString*
578 DoSubstr(JSContext* cx, JSString* str, size_t begin, size_t len)
581 * Optimization for one level deep ropes.
582 * This is common for the following pattern:
584 * while() {
585 * text = text.substr(0, x) + "bla" + text.substr(x)
586 * test.charCodeAt(x + 1)
589 if (str->isRope()) {
590 JSRope* rope = &str->asRope();
592 /* Substring is totally in leftChild of rope. */
593 if (begin + len <= rope->leftChild()->length()) {
594 str = rope->leftChild();
595 return NewDependentString(cx, str, begin, len);
598 /* Substring is totally in rightChild of rope. */
599 if (begin >= rope->leftChild()->length()) {
600 str = rope->rightChild();
601 begin -= rope->leftChild()->length();
602 return NewDependentString(cx, str, begin, len);
606 * Requested substring is partly in the left and partly in right child.
607 * Create a rope of substrings for both childs.
609 JS_ASSERT (begin < rope->leftChild()->length() &&
610 begin + len > rope->leftChild()->length());
612 size_t lhsLength = rope->leftChild()->length() - begin;
613 size_t rhsLength = begin + len - rope->leftChild()->length();
615 Rooted<JSRope*> ropeRoot(cx, rope);
616 RootedString lhs(cx, NewDependentString(cx, ropeRoot->leftChild(), begin, lhsLength));
617 if (!lhs)
618 return nullptr;
620 RootedString rhs(cx, NewDependentString(cx, ropeRoot->rightChild(), 0, rhsLength));
621 if (!rhs)
622 return nullptr;
624 return JSRope::new_<CanGC>(cx, lhs, rhs, len);
627 return NewDependentString(cx, str, begin, len);
630 static bool
631 str_substring(JSContext* cx, unsigned argc, Value* vp)
633 CallArgs args = CallArgsFromVp(argc, vp);
635 JSString* str = ThisToStringForStringProto(cx, args);
636 if (!str)
637 return false;
639 int32_t length, begin, end;
640 if (args.length() > 0) {
641 end = length = int32_t(str->length());
643 if (args[0].isInt32()) {
644 begin = args[0].toInt32();
645 } else {
646 RootedString strRoot(cx, str);
647 if (!ValueToIntegerRange(cx, args[0], &begin))
648 return false;
649 str = strRoot;
652 if (begin < 0)
653 begin = 0;
654 else if (begin > length)
655 begin = length;
657 if (args.hasDefined(1)) {
658 if (args[1].isInt32()) {
659 end = args[1].toInt32();
660 } else {
661 RootedString strRoot(cx, str);
662 if (!ValueToIntegerRange(cx, args[1], &end))
663 return false;
664 str = strRoot;
667 if (end > length) {
668 end = length;
669 } else {
670 if (end < 0)
671 end = 0;
672 if (end < begin) {
673 int32_t tmp = begin;
674 begin = end;
675 end = tmp;
680 str = DoSubstr(cx, str, size_t(begin), size_t(end - begin));
681 if (!str)
682 return false;
685 args.rval().setString(str);
686 return true;
689 template <typename CharT>
690 static JSString*
691 ToLowerCase(JSContext* cx, JSLinearString* str)
693 // Unlike toUpperCase, toLowerCase has the nice invariant that if the input
694 // is a Latin1 string, the output is also a Latin1 string.
695 UniquePtr<CharT[], JS::FreePolicy> newChars;
696 size_t length = str->length();
698 AutoCheckCannotGC nogc;
699 const CharT* chars = str->chars<CharT>(nogc);
701 // Look for the first upper case character.
702 size_t i = 0;
703 for (; i < length; i++) {
704 jschar c = chars[i];
705 if (unicode::ToLowerCase(c) != c)
706 break;
709 // If all characters are lower case, return the input string.
710 if (i == length)
711 return str;
713 newChars = cx->make_pod_array<CharT>(length + 1);
714 if (!newChars)
715 return nullptr;
717 PodCopy(newChars.get(), chars, i);
719 for (; i < length; i++) {
720 jschar c = unicode::ToLowerCase(chars[i]);
721 MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
722 newChars[i] = c;
725 newChars[length] = 0;
728 JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), length);
729 if (!res)
730 return nullptr;
732 newChars.release();
733 return res;
736 static inline bool
737 ToLowerCaseHelper(JSContext* cx, CallReceiver call)
739 RootedString str(cx, ThisToStringForStringProto(cx, call));
740 if (!str)
741 return false;
743 JSLinearString* linear = str->ensureLinear(cx);
744 if (!linear)
745 return false;
747 if (linear->hasLatin1Chars())
748 str = ToLowerCase<Latin1Char>(cx, linear);
749 else
750 str = ToLowerCase<jschar>(cx, linear);
751 if (!str)
752 return false;
754 call.rval().setString(str);
755 return true;
758 static bool
759 str_toLowerCase(JSContext* cx, unsigned argc, Value* vp)
761 return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp));
764 static bool
765 str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp)
767 CallArgs args = CallArgsFromVp(argc, vp);
770 * Forcefully ignore the first (or any) argument and return toLowerCase(),
771 * ECMA has reserved that argument, presumably for defining the locale.
773 if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToLowerCase) {
774 RootedString str(cx, ThisToStringForStringProto(cx, args));
775 if (!str)
776 return false;
778 RootedValue result(cx);
779 if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result))
780 return false;
782 args.rval().set(result);
783 return true;
786 return ToLowerCaseHelper(cx, args);
789 template <typename DestChar, typename SrcChar>
790 static void
791 ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t firstLowerCase, size_t length)
793 MOZ_ASSERT(firstLowerCase < length);
795 for (size_t i = 0; i < firstLowerCase; i++)
796 destChars[i] = srcChars[i];
798 for (size_t i = firstLowerCase; i < length; i++) {
799 jschar c = unicode::ToUpperCase(srcChars[i]);
800 MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
801 destChars[i] = c;
804 destChars[length] = '\0';
807 template <typename CharT>
808 static JSString*
809 ToUpperCase(JSContext* cx, JSLinearString* str)
811 typedef UniquePtr<Latin1Char[], JS::FreePolicy> Latin1CharPtr;
812 typedef UniquePtr<jschar[], JS::FreePolicy> TwoByteCharPtr;
814 mozilla::MaybeOneOf<Latin1CharPtr, TwoByteCharPtr> newChars;
815 size_t length = str->length();
817 AutoCheckCannotGC nogc;
818 const CharT* chars = str->chars<CharT>(nogc);
820 // Look for the first lower case character.
821 size_t i = 0;
822 for (; i < length; i++) {
823 jschar c = chars[i];
824 if (unicode::ToUpperCase(c) != c)
825 break;
828 // If all characters are upper case, return the input string.
829 if (i == length)
830 return str;
832 // If the string is Latin1, check if it contains the MICRO SIGN (0xb5)
833 // or SMALL LETTER Y WITH DIAERESIS (0xff) character. The corresponding
834 // upper case characters are not in the Latin1 range.
835 bool resultIsLatin1;
836 if (IsSame<CharT, Latin1Char>::value) {
837 resultIsLatin1 = true;
838 for (size_t j = i; j < length; j++) {
839 Latin1Char c = chars[j];
840 if (c == 0xb5 || c == 0xff) {
841 MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
842 resultIsLatin1 = false;
843 break;
844 } else {
845 MOZ_ASSERT(unicode::ToUpperCase(c) <= JSString::MAX_LATIN1_CHAR);
848 } else {
849 resultIsLatin1 = false;
852 if (resultIsLatin1) {
853 Latin1CharPtr buf = cx->make_pod_array<Latin1Char>(length + 1);
854 if (!buf)
855 return nullptr;
857 ToUpperCaseImpl(buf.get(), chars, i, length);
858 newChars.construct<Latin1CharPtr>(buf);
859 } else {
860 TwoByteCharPtr buf = cx->make_pod_array<jschar>(length + 1);
861 if (!buf)
862 return nullptr;
864 ToUpperCaseImpl(buf.get(), chars, i, length);
865 newChars.construct<TwoByteCharPtr>(buf);
869 JSString* res;
870 if (newChars.constructed<Latin1CharPtr>()) {
871 res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), length);
872 if (!res)
873 return nullptr;
875 newChars.ref<Latin1CharPtr>().release();
876 } else {
877 res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), length);
878 if (!res)
879 return nullptr;
881 newChars.ref<TwoByteCharPtr>().release();
884 return res;
887 static bool
888 ToUpperCaseHelper(JSContext* cx, CallReceiver call)
890 RootedString str(cx, ThisToStringForStringProto(cx, call));
891 if (!str)
892 return false;
894 JSLinearString* linear = str->ensureLinear(cx);
895 if (!linear)
896 return false;
898 if (linear->hasLatin1Chars())
899 str = ToUpperCase<Latin1Char>(cx, linear);
900 else
901 str = ToUpperCase<jschar>(cx, linear);
902 if (!str)
903 return false;
905 call.rval().setString(str);
906 return true;
909 static bool
910 str_toUpperCase(JSContext* cx, unsigned argc, Value* vp)
912 return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp));
915 static bool
916 str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp)
918 CallArgs args = CallArgsFromVp(argc, vp);
921 * Forcefully ignore the first (or any) argument and return toUpperCase(),
922 * ECMA has reserved that argument, presumably for defining the locale.
924 if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeToUpperCase) {
925 RootedString str(cx, ThisToStringForStringProto(cx, args));
926 if (!str)
927 return false;
929 RootedValue result(cx);
930 if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result))
931 return false;
933 args.rval().set(result);
934 return true;
937 return ToUpperCaseHelper(cx, args);
940 #if !EXPOSE_INTL_API
941 static bool
942 str_localeCompare(JSContext* cx, unsigned argc, Value* vp)
944 CallArgs args = CallArgsFromVp(argc, vp);
945 RootedString str(cx, ThisToStringForStringProto(cx, args));
946 if (!str)
947 return false;
949 RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0)));
950 if (!thatStr)
951 return false;
953 if (cx->runtime()->localeCallbacks && cx->runtime()->localeCallbacks->localeCompare) {
954 RootedValue result(cx);
955 if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr, &result))
956 return false;
958 args.rval().set(result);
959 return true;
962 int32_t result;
963 if (!CompareStrings(cx, str, thatStr, &result))
964 return false;
966 args.rval().setInt32(result);
967 return true;
969 #endif
971 #if EXPOSE_INTL_API
972 /* ES6 20140210 draft 21.1.3.12. */
973 static bool
974 str_normalize(JSContext* cx, unsigned argc, Value* vp)
976 CallArgs args = CallArgsFromVp(argc, vp);
978 // Steps 1-3.
979 RootedString str(cx, ThisToStringForStringProto(cx, args));
980 if (!str)
981 return false;
983 // Step 4.
984 UNormalizationMode form;
985 if (!args.hasDefined(0)) {
986 form = UNORM_NFC;
987 } else {
988 // Steps 5-6.
989 RootedLinearString formStr(cx, ArgToRootedString(cx, args, 0));
990 if (!formStr)
991 return false;
993 // Step 7.
994 if (formStr == cx->names().NFC) {
995 form = UNORM_NFC;
996 } else if (formStr == cx->names().NFD) {
997 form = UNORM_NFD;
998 } else if (formStr == cx->names().NFKC) {
999 form = UNORM_NFKC;
1000 } else if (formStr == cx->names().NFKD) {
1001 form = UNORM_NFKD;
1002 } else {
1003 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr,
1004 JSMSG_INVALID_NORMALIZE_FORM);
1005 return false;
1009 // Step 8.
1010 AutoStableStringChars stableChars(cx);
1011 if (!str->ensureFlat(cx) || !stableChars.initTwoByte(cx, str))
1012 return false;
1014 static const size_t INLINE_CAPACITY = 32;
1016 const UChar* srcChars = JSCharToUChar(stableChars.twoByteRange().start().get());
1017 int32_t srcLen = AssertedCast<int32_t>(str->length());
1018 Vector<jschar, INLINE_CAPACITY> chars(cx);
1019 if (!chars.resize(INLINE_CAPACITY))
1020 return false;
1022 UErrorCode status = U_ZERO_ERROR;
1023 int32_t size = unorm_normalize(srcChars, srcLen, form, 0,
1024 JSCharToUChar(chars.begin()), INLINE_CAPACITY,
1025 &status);
1026 if (status == U_BUFFER_OVERFLOW_ERROR) {
1027 if (!chars.resize(size))
1028 return false;
1029 status = U_ZERO_ERROR;
1030 #ifdef DEBUG
1031 int32_t finalSize =
1032 #endif
1033 unorm_normalize(srcChars, srcLen, form, 0,
1034 JSCharToUChar(chars.begin()), size,
1035 &status);
1036 MOZ_ASSERT(size == finalSize || U_FAILURE(status), "unorm_normalize behaved inconsistently");
1038 if (U_FAILURE(status))
1039 return false;
1041 JSString* ns = NewStringCopyN<CanGC>(cx, chars.begin(), size);
1042 if (!ns)
1043 return false;
1045 // Step 9.
1046 args.rval().setString(ns);
1047 return true;
1049 #endif
1051 bool
1052 js_str_charAt(JSContext* cx, unsigned argc, Value* vp)
1054 CallArgs args = CallArgsFromVp(argc, vp);
1056 RootedString str(cx);
1057 size_t i;
1058 if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
1059 str = args.thisv().toString();
1060 i = size_t(args[0].toInt32());
1061 if (i >= str->length())
1062 goto out_of_range;
1063 } else {
1064 str = ThisToStringForStringProto(cx, args);
1065 if (!str)
1066 return false;
1068 double d = 0.0;
1069 if (args.length() > 0 && !ToInteger(cx, args[0], &d))
1070 return false;
1072 if (d < 0 || str->length() <= d)
1073 goto out_of_range;
1074 i = size_t(d);
1077 str = cx->staticStrings().getUnitStringForElement(cx, str, i);
1078 if (!str)
1079 return false;
1080 args.rval().setString(str);
1081 return true;
1083 out_of_range:
1084 args.rval().setString(cx->runtime()->emptyString);
1085 return true;
1088 bool
1089 js::str_charCodeAt_impl(JSContext* cx, HandleString string, HandleValue index, MutableHandleValue res)
1091 RootedString str(cx);
1092 size_t i;
1093 if (index.isInt32()) {
1094 i = index.toInt32();
1095 if (i >= string->length())
1096 goto out_of_range;
1097 } else {
1098 double d = 0.0;
1099 if (!ToInteger(cx, index, &d))
1100 return false;
1101 // check whether d is negative as size_t is unsigned
1102 if (d < 0 || string->length() <= d )
1103 goto out_of_range;
1104 i = size_t(d);
1106 jschar c;
1107 if (!string->getChar(cx, i , &c))
1108 return false;
1109 res.setInt32(c);
1110 return true;
1112 out_of_range:
1113 res.setNaN();
1114 return true;
1117 bool
1118 js_str_charCodeAt(JSContext* cx, unsigned argc, Value* vp)
1120 CallArgs args = CallArgsFromVp(argc, vp);
1121 RootedString str(cx);
1122 RootedValue index(cx);
1123 if (args.thisv().isString()) {
1124 str = args.thisv().toString();
1125 } else {
1126 str = ThisToStringForStringProto(cx, args);
1127 if (!str)
1128 return false;
1130 if (args.length() != 0)
1131 index = args[0];
1132 else
1133 index.setInt32(0);
1135 return js::str_charCodeAt_impl(cx, str, index, args.rval());
1139 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
1140 * The patlen argument must be positive and no greater than sBMHPatLenMax.
1142 * Return the index of pat in text, or -1 if not found.
1144 static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */
1145 static const uint32_t sBMHPatLenMax = 255; /* skip table element is uint8_t */
1146 static const int sBMHBadPattern = -2; /* return value if pat is not ISO-Latin-1 */
1148 template <typename TextChar, typename PatChar>
1149 static int
1150 BoyerMooreHorspool(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t patLen)
1152 JS_ASSERT(0 < patLen && patLen <= sBMHPatLenMax);
1154 uint8_t skip[sBMHCharSetSize];
1155 for (uint32_t i = 0; i < sBMHCharSetSize; i++)
1156 skip[i] = uint8_t(patLen);
1158 uint32_t patLast = patLen - 1;
1159 for (uint32_t i = 0; i < patLast; i++) {
1160 jschar c = pat[i];
1161 if (c >= sBMHCharSetSize)
1162 return sBMHBadPattern;
1163 skip[c] = uint8_t(patLast - i);
1166 for (uint32_t k = patLast; k < textLen; ) {
1167 for (uint32_t i = k, j = patLast; ; i--, j--) {
1168 if (text[i] != pat[j])
1169 break;
1170 if (j == 0)
1171 return static_cast<int>(i); /* safe: max string size */
1174 jschar c = text[k];
1175 k += (c >= sBMHCharSetSize) ? patLen : skip[c];
1177 return -1;
1180 template <typename TextChar, typename PatChar>
1181 struct MemCmp {
1182 typedef uint32_t Extent;
1183 static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*, uint32_t patLen) {
1184 return (patLen - 1) * sizeof(PatChar);
1186 static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t, Extent extent) {
1187 MOZ_ASSERT(sizeof(TextChar) == sizeof(PatChar));
1188 return memcmp(p, t, extent) == 0;
1192 template <typename TextChar, typename PatChar>
1193 struct ManualCmp {
1194 typedef const PatChar* Extent;
1195 static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar* pat, uint32_t patLen) {
1196 return pat + patLen;
1198 static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t, Extent extent) {
1199 for (; p != extent; ++p, ++t) {
1200 if (*p != *t)
1201 return false;
1203 return true;
1207 template <typename TextChar, typename PatChar>
1208 static const TextChar*
1209 FirstCharMatcherUnrolled(const TextChar* text, uint32_t n, const PatChar pat)
1211 const TextChar* textend = text + n;
1212 const TextChar* t = text;
1214 switch ((textend - t) & 7) {
1215 case 0: if (*t++ == pat) return t - 1;
1216 case 7: if (*t++ == pat) return t - 1;
1217 case 6: if (*t++ == pat) return t - 1;
1218 case 5: if (*t++ == pat) return t - 1;
1219 case 4: if (*t++ == pat) return t - 1;
1220 case 3: if (*t++ == pat) return t - 1;
1221 case 2: if (*t++ == pat) return t - 1;
1222 case 1: if (*t++ == pat) return t - 1;
1224 while (textend != t) {
1225 if (t[0] == pat) return t;
1226 if (t[1] == pat) return t + 1;
1227 if (t[2] == pat) return t + 2;
1228 if (t[3] == pat) return t + 3;
1229 if (t[4] == pat) return t + 4;
1230 if (t[5] == pat) return t + 5;
1231 if (t[6] == pat) return t + 6;
1232 if (t[7] == pat) return t + 7;
1233 t += 8;
1235 return nullptr;
1238 static const char*
1239 FirstCharMatcher8bit(const char* text, uint32_t n, const char pat)
1241 #if defined(__clang__)
1242 return FirstCharMatcherUnrolled<char, char>(text, n, pat);
1243 #else
1244 return reinterpret_cast<const char*>(memchr(text, pat, n));
1245 #endif
1248 static const jschar*
1249 FirstCharMatcher16bit(const jschar* text, uint32_t n, const jschar pat)
1251 #if defined(XP_MACOSX) || defined(XP_WIN)
1253 * Performance of memchr is horrible in OSX. Windows is better,
1254 * but it is still better to use UnrolledMatcher.
1256 return FirstCharMatcherUnrolled<jschar, jschar>(text, n, pat);
1257 #else
1259 * For linux the best performance is obtained by slightly hacking memchr.
1260 * memchr works only on 8bit char but jschar is 16bit. So we treat jschar
1261 * in blocks of 8bit and use memchr.
1264 const char* text8 = (const char*) text;
1265 const char* pat8 = reinterpret_cast<const char*>(&pat);
1267 JS_ASSERT(n < UINT32_MAX/2);
1268 n *= 2;
1270 uint32_t i = 0;
1271 while (i < n) {
1272 /* Find the first 8 bits of 16bit character in text. */
1273 const char* pos8 = FirstCharMatcher8bit(text8 + i, n - i, pat8[0]);
1274 if (pos8 == nullptr)
1275 return nullptr;
1276 i = static_cast<uint32_t>(pos8 - text8);
1278 /* Incorrect match if it matches the last 8 bits of 16bit char. */
1279 if (i % 2 != 0) {
1280 i++;
1281 continue;
1284 /* Test if last 8 bits match last 8 bits of 16bit char. */
1285 if (pat8[1] == text8[i + 1])
1286 return (text + (i/2));
1288 i += 2;
1290 return nullptr;
1291 #endif
1294 template <class InnerMatch, typename TextChar, typename PatChar>
1295 static int
1296 Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat, uint32_t patlen)
1298 const typename InnerMatch::Extent extent = InnerMatch::computeExtent(pat, patlen);
1300 uint32_t i = 0;
1301 uint32_t n = textlen - patlen + 1;
1302 while (i < n) {
1303 const TextChar* pos;
1305 if (sizeof(TextChar) == 2 && sizeof(PatChar) == 2)
1306 pos = (TextChar*) FirstCharMatcher16bit((jschar*)text + i, n - i, pat[0]);
1307 else if (sizeof(TextChar) == 1 && sizeof(PatChar) == 1)
1308 pos = (TextChar*) FirstCharMatcher8bit((char*) text + i, n - i, pat[0]);
1309 else
1310 pos = (TextChar*) FirstCharMatcherUnrolled<TextChar, PatChar>(text + i, n - i, pat[0]);
1312 if (pos == nullptr)
1313 return -1;
1315 i = static_cast<uint32_t>(pos - text);
1316 if (InnerMatch::match(pat + 1, text + i + 1, extent))
1317 return i;
1319 i += 1;
1321 return -1;
1325 template <typename TextChar, typename PatChar>
1326 static MOZ_ALWAYS_INLINE int
1327 StringMatch(const TextChar* text, uint32_t textLen, const PatChar* pat, uint32_t patLen)
1329 if (patLen == 0)
1330 return 0;
1331 if (textLen < patLen)
1332 return -1;
1334 #if defined(__i386__) || defined(_M_IX86) || defined(__i386)
1336 * Given enough registers, the unrolled loop below is faster than the
1337 * following loop. 32-bit x86 does not have enough registers.
1339 if (patLen == 1) {
1340 const PatChar p0 = *pat;
1341 for (const TextChar* c = text, *end = text + textLen; c != end; ++c) {
1342 if (*c == p0)
1343 return c - text;
1345 return -1;
1347 #endif
1350 * If the text or pattern string is short, BMH will be more expensive than
1351 * the basic linear scan due to initialization cost and a more complex loop
1352 * body. While the correct threshold is input-dependent, we can make a few
1353 * conservative observations:
1354 * - When |textLen| is "big enough", the initialization time will be
1355 * proportionally small, so the worst-case slowdown is minimized.
1356 * - When |patLen| is "too small", even the best case for BMH will be
1357 * slower than a simple scan for large |textLen| due to the more complex
1358 * loop body of BMH.
1359 * From this, the values for "big enough" and "too small" are determined
1360 * empirically. See bug 526348.
1362 if (textLen >= 512 && patLen >= 11 && patLen <= sBMHPatLenMax) {
1363 int index = BoyerMooreHorspool(text, textLen, pat, patLen);
1364 if (index != sBMHBadPattern)
1365 return index;
1369 * For big patterns with large potential overlap we want the SIMD-optimized
1370 * speed of memcmp. For small patterns, a simple loop is faster. We also can't
1371 * use memcmp if one of the strings is TwoByte and the other is Latin1.
1373 * FIXME: Linux memcmp performance is sad and the manual loop is faster.
1375 return
1376 #if !defined(__linux__)
1377 (patLen > 128 && IsSame<TextChar, PatChar>::value)
1378 ? Matcher<MemCmp<TextChar, PatChar>, TextChar, PatChar>(text, textLen, pat, patLen)
1380 #endif
1381 Matcher<ManualCmp<TextChar, PatChar>, TextChar, PatChar>(text, textLen, pat, patLen);
1384 static int32_t
1385 StringMatch(JSLinearString* text, JSLinearString* pat, uint32_t start = 0)
1387 MOZ_ASSERT(start <= text->length());
1388 uint32_t textLen = text->length() - start;
1389 uint32_t patLen = pat->length();
1391 int match;
1392 AutoCheckCannotGC nogc;
1393 if (text->hasLatin1Chars()) {
1394 const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1395 if (pat->hasLatin1Chars())
1396 match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1397 else
1398 match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1399 } else {
1400 const jschar* textChars = text->twoByteChars(nogc) + start;
1401 if (pat->hasLatin1Chars())
1402 match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1403 else
1404 match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1407 return (match == -1) ? -1 : start + match;
1410 static const size_t sRopeMatchThresholdRatioLog2 = 5;
1412 bool
1413 js::StringHasPattern(JSLinearString* text, const jschar* pat, uint32_t patLen)
1415 AutoCheckCannotGC nogc;
1416 return text->hasLatin1Chars()
1417 ? StringMatch(text->latin1Chars(nogc), text->length(), pat, patLen) != -1
1418 : StringMatch(text->twoByteChars(nogc), text->length(), pat, patLen) != -1;
1422 js::StringFindPattern(JSLinearString* text, JSLinearString* pat, size_t start)
1424 return StringMatch(text, pat, start);
1427 // When an algorithm does not need a string represented as a single linear
1428 // array of characters, this range utility may be used to traverse the string a
1429 // sequence of linear arrays of characters. This avoids flattening ropes.
1430 class StringSegmentRange
1432 // If malloc() shows up in any profiles from this vector, we can add a new
1433 // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
1434 AutoStringVector stack;
1435 RootedLinearString cur;
1437 bool settle(JSString* str) {
1438 while (str->isRope()) {
1439 JSRope& rope = str->asRope();
1440 if (!stack.append(rope.rightChild()))
1441 return false;
1442 str = rope.leftChild();
1444 cur = &str->asLinear();
1445 return true;
1448 public:
1449 explicit StringSegmentRange(JSContext* cx)
1450 : stack(cx), cur(cx)
1453 MOZ_WARN_UNUSED_RESULT bool init(JSString* str) {
1454 JS_ASSERT(stack.empty());
1455 return settle(str);
1458 bool empty() const {
1459 return cur == nullptr;
1462 JSLinearString* front() const {
1463 JS_ASSERT(!cur->isRope());
1464 return cur;
1467 MOZ_WARN_UNUSED_RESULT bool popFront() {
1468 JS_ASSERT(!empty());
1469 if (stack.empty()) {
1470 cur = nullptr;
1471 return true;
1473 return settle(stack.popCopy());
1477 typedef Vector<JSLinearString*, 16, SystemAllocPolicy> LinearStringVector;
1479 template <typename TextChar, typename PatChar>
1480 static int
1481 RopeMatchImpl(const AutoCheckCannotGC& nogc, LinearStringVector& strings,
1482 const PatChar* pat, size_t patLen)
1484 /* Absolute offset from the beginning of the logical text string. */
1485 int pos = 0;
1487 for (JSLinearString** outerp = strings.begin(); outerp != strings.end(); ++outerp) {
1488 /* Try to find a match within 'outer'. */
1489 JSLinearString* outer = *outerp;
1490 const TextChar* chars = outer->chars<TextChar>(nogc);
1491 size_t len = outer->length();
1492 int matchResult = StringMatch(chars, len, pat, patLen);
1493 if (matchResult != -1) {
1494 /* Matched! */
1495 return pos + matchResult;
1498 /* Try to find a match starting in 'outer' and running into other nodes. */
1499 const TextChar* const text = chars + (patLen > len ? 0 : len - patLen + 1);
1500 const TextChar* const textend = chars + len;
1501 const PatChar p0 = *pat;
1502 const PatChar* const p1 = pat + 1;
1503 const PatChar* const patend = pat + patLen;
1504 for (const TextChar* t = text; t != textend; ) {
1505 if (*t++ != p0)
1506 continue;
1508 JSLinearString** innerp = outerp;
1509 const TextChar* ttend = textend;
1510 const TextChar* tt = t;
1511 for (const PatChar* pp = p1; pp != patend; ++pp, ++tt) {
1512 while (tt == ttend) {
1513 if (++innerp == strings.end())
1514 return -1;
1516 JSLinearString* inner = *innerp;
1517 tt = inner->chars<TextChar>(nogc);
1518 ttend = tt + inner->length();
1520 if (*pp != *tt)
1521 goto break_continue;
1524 /* Matched! */
1525 return pos + (t - chars) - 1; /* -1 because of *t++ above */
1527 break_continue:;
1530 pos += len;
1533 return -1;
1537 * RopeMatch takes the text to search and the pattern to search for in the text.
1538 * RopeMatch returns false on OOM and otherwise returns the match index through
1539 * the 'match' outparam (-1 for not found).
1541 static bool
1542 RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat, int* match)
1544 uint32_t patLen = pat->length();
1545 if (patLen == 0) {
1546 *match = 0;
1547 return true;
1549 if (text->length() < patLen) {
1550 *match = -1;
1551 return true;
1555 * List of leaf nodes in the rope. If we run out of memory when trying to
1556 * append to this list, we can still fall back to StringMatch, so use the
1557 * system allocator so we don't report OOM in that case.
1559 LinearStringVector strings;
1562 * We don't want to do rope matching if there is a poor node-to-char ratio,
1563 * since this means spending a lot of time in the match loop below. We also
1564 * need to build the list of leaf nodes. Do both here: iterate over the
1565 * nodes so long as there are not too many.
1567 * We also don't use rope matching if the rope contains both Latin1 and
1568 * TwoByte nodes, to simplify the match algorithm.
1571 size_t threshold = text->length() >> sRopeMatchThresholdRatioLog2;
1572 StringSegmentRange r(cx);
1573 if (!r.init(text))
1574 return false;
1576 bool textIsLatin1 = text->hasLatin1Chars();
1577 while (!r.empty()) {
1578 if (threshold-- == 0 ||
1579 r.front()->hasLatin1Chars() != textIsLatin1 ||
1580 !strings.append(r.front()))
1582 JSLinearString* linear = text->ensureLinear(cx);
1583 if (!linear)
1584 return false;
1586 *match = StringMatch(linear, pat);
1587 return true;
1589 if (!r.popFront())
1590 return false;
1594 AutoCheckCannotGC nogc;
1595 if (text->hasLatin1Chars()) {
1596 if (pat->hasLatin1Chars())
1597 *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->latin1Chars(nogc), patLen);
1598 else
1599 *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->twoByteChars(nogc), patLen);
1600 } else {
1601 if (pat->hasLatin1Chars())
1602 *match = RopeMatchImpl<jschar>(nogc, strings, pat->latin1Chars(nogc), patLen);
1603 else
1604 *match = RopeMatchImpl<jschar>(nogc, strings, pat->twoByteChars(nogc), patLen);
1607 return true;
1610 /* ES6 20121026 draft 15.5.4.24. */
1611 static bool
1612 str_contains(JSContext* cx, unsigned argc, Value* vp)
1614 CallArgs args = CallArgsFromVp(argc, vp);
1616 // Steps 1, 2, and 3
1617 RootedString str(cx, ThisToStringForStringProto(cx, args));
1618 if (!str)
1619 return false;
1621 // Steps 4 and 5
1622 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1623 if (!searchStr)
1624 return false;
1626 // Steps 6 and 7
1627 uint32_t pos = 0;
1628 if (args.hasDefined(1)) {
1629 if (args[1].isInt32()) {
1630 int i = args[1].toInt32();
1631 pos = (i < 0) ? 0U : uint32_t(i);
1632 } else {
1633 double d;
1634 if (!ToInteger(cx, args[1], &d))
1635 return false;
1636 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1640 // Step 8
1641 uint32_t textLen = str->length();
1643 // Step 9
1644 uint32_t start = Min(Max(pos, 0U), textLen);
1646 // Steps 10 and 11
1647 JSLinearString* text = str->ensureLinear(cx);
1648 if (!text)
1649 return false;
1651 args.rval().setBoolean(StringMatch(text, searchStr, start) != -1);
1652 return true;
1655 /* ES6 20120927 draft 15.5.4.7. */
1656 static bool
1657 str_indexOf(JSContext* cx, unsigned argc, Value* vp)
1659 CallArgs args = CallArgsFromVp(argc, vp);
1661 // Steps 1, 2, and 3
1662 RootedString str(cx, ThisToStringForStringProto(cx, args));
1663 if (!str)
1664 return false;
1666 // Steps 4 and 5
1667 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1668 if (!searchStr)
1669 return false;
1671 // Steps 6 and 7
1672 uint32_t pos = 0;
1673 if (args.hasDefined(1)) {
1674 if (args[1].isInt32()) {
1675 int i = args[1].toInt32();
1676 pos = (i < 0) ? 0U : uint32_t(i);
1677 } else {
1678 double d;
1679 if (!ToInteger(cx, args[1], &d))
1680 return false;
1681 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1685 // Step 8
1686 uint32_t textLen = str->length();
1688 // Step 9
1689 uint32_t start = Min(Max(pos, 0U), textLen);
1691 // Steps 10 and 11
1692 JSLinearString* text = str->ensureLinear(cx);
1693 if (!text)
1694 return false;
1696 args.rval().setInt32(StringMatch(text, searchStr, start));
1697 return true;
1700 template <typename TextChar, typename PatChar>
1701 static int32_t
1702 LastIndexOfImpl(const TextChar* text, size_t textLen, const PatChar* pat, size_t patLen,
1703 size_t start)
1705 MOZ_ASSERT(patLen > 0);
1706 MOZ_ASSERT(patLen <= textLen);
1707 MOZ_ASSERT(start <= textLen - patLen);
1709 const PatChar p0 = *pat;
1710 const PatChar* patNext = pat + 1;
1711 const PatChar* patEnd = pat + patLen;
1713 for (const TextChar* t = text + start; t >= text; --t) {
1714 if (*t == p0) {
1715 const TextChar* t1 = t + 1;
1716 for (const PatChar* p1 = patNext; p1 < patEnd; ++p1, ++t1) {
1717 if (*t1 != *p1)
1718 goto break_continue;
1721 return static_cast<int32_t>(t - text);
1723 break_continue:;
1726 return -1;
1729 static bool
1730 str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp)
1732 CallArgs args = CallArgsFromVp(argc, vp);
1733 RootedString textstr(cx, ThisToStringForStringProto(cx, args));
1734 if (!textstr)
1735 return false;
1737 RootedLinearString pat(cx, ArgToRootedString(cx, args, 0));
1738 if (!pat)
1739 return false;
1741 size_t textLen = textstr->length();
1742 size_t patLen = pat->length();
1743 int start = textLen - patLen; // Start searching here
1744 if (start < 0) {
1745 args.rval().setInt32(-1);
1746 return true;
1749 if (args.hasDefined(1)) {
1750 if (args[1].isInt32()) {
1751 int i = args[1].toInt32();
1752 if (i <= 0)
1753 start = 0;
1754 else if (i < start)
1755 start = i;
1756 } else {
1757 double d;
1758 if (!ToNumber(cx, args[1], &d))
1759 return false;
1760 if (!IsNaN(d)) {
1761 d = ToInteger(d);
1762 if (d <= 0)
1763 start = 0;
1764 else if (d < start)
1765 start = int(d);
1770 if (patLen == 0) {
1771 args.rval().setInt32(start);
1772 return true;
1775 JSLinearString* text = textstr->ensureLinear(cx);
1776 if (!text)
1777 return false;
1779 int32_t res;
1780 AutoCheckCannotGC nogc;
1781 if (text->hasLatin1Chars()) {
1782 const Latin1Char* textChars = text->latin1Chars(nogc);
1783 if (pat->hasLatin1Chars())
1784 res = LastIndexOfImpl(textChars, textLen, pat->latin1Chars(nogc), patLen, start);
1785 else
1786 res = LastIndexOfImpl(textChars, textLen, pat->twoByteChars(nogc), patLen, start);
1787 } else {
1788 const jschar* textChars = text->twoByteChars(nogc);
1789 if (pat->hasLatin1Chars())
1790 res = LastIndexOfImpl(textChars, textLen, pat->latin1Chars(nogc), patLen, start);
1791 else
1792 res = LastIndexOfImpl(textChars, textLen, pat->twoByteChars(nogc), patLen, start);
1795 args.rval().setInt32(res);
1796 return true;
1799 static bool
1800 HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start)
1802 MOZ_ASSERT(start + pat->length() <= text->length());
1804 size_t patLen = pat->length();
1806 AutoCheckCannotGC nogc;
1807 if (text->hasLatin1Chars()) {
1808 const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1809 if (pat->hasLatin1Chars())
1810 return PodEqual(textChars, pat->latin1Chars(nogc), patLen);
1812 return EqualChars(textChars, pat->twoByteChars(nogc), patLen);
1815 const jschar* textChars = text->twoByteChars(nogc) + start;
1816 if (pat->hasTwoByteChars())
1817 return PodEqual(textChars, pat->twoByteChars(nogc), patLen);
1819 return EqualChars(pat->latin1Chars(nogc), textChars, patLen);
1822 /* ES6 20131108 draft 21.1.3.18. */
1823 static bool
1824 str_startsWith(JSContext* cx, unsigned argc, Value* vp)
1826 CallArgs args = CallArgsFromVp(argc, vp);
1828 // Steps 1, 2, and 3
1829 RootedString str(cx, ThisToStringForStringProto(cx, args));
1830 if (!str)
1831 return false;
1833 // Step 4
1834 if (args.get(0).isObject() && IsObjectWithClass(args[0], ESClass_RegExp, cx)) {
1835 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE,
1836 "first", "", "Regular Expression");
1837 return false;
1840 // Steps 5 and 6
1841 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1842 if (!searchStr)
1843 return false;
1845 // Steps 7 and 8
1846 uint32_t pos = 0;
1847 if (args.hasDefined(1)) {
1848 if (args[1].isInt32()) {
1849 int i = args[1].toInt32();
1850 pos = (i < 0) ? 0U : uint32_t(i);
1851 } else {
1852 double d;
1853 if (!ToInteger(cx, args[1], &d))
1854 return false;
1855 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1859 // Step 9
1860 uint32_t textLen = str->length();
1862 // Step 10
1863 uint32_t start = Min(Max(pos, 0U), textLen);
1865 // Step 11
1866 uint32_t searchLen = searchStr->length();
1868 // Step 12
1869 if (searchLen + start < searchLen || searchLen + start > textLen) {
1870 args.rval().setBoolean(false);
1871 return true;
1874 // Steps 13 and 14
1875 JSLinearString* text = str->ensureLinear(cx);
1876 if (!text)
1877 return false;
1879 args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
1880 return true;
1883 /* ES6 20131108 draft 21.1.3.7. */
1884 static bool
1885 str_endsWith(JSContext* cx, unsigned argc, Value* vp)
1887 CallArgs args = CallArgsFromVp(argc, vp);
1889 // Steps 1, 2, and 3
1890 RootedString str(cx, ThisToStringForStringProto(cx, args));
1891 if (!str)
1892 return false;
1894 // Step 4
1895 if (args.get(0).isObject() && IsObjectWithClass(args[0], ESClass_RegExp, cx)) {
1896 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_INVALID_ARG_TYPE,
1897 "first", "", "Regular Expression");
1898 return false;
1901 // Steps 5 and 6
1902 RootedLinearString searchStr(cx, ArgToRootedString(cx, args, 0));
1903 if (!searchStr)
1904 return false;
1906 // Step 7
1907 uint32_t textLen = str->length();
1909 // Steps 8 and 9
1910 uint32_t pos = textLen;
1911 if (args.hasDefined(1)) {
1912 if (args[1].isInt32()) {
1913 int i = args[1].toInt32();
1914 pos = (i < 0) ? 0U : uint32_t(i);
1915 } else {
1916 double d;
1917 if (!ToInteger(cx, args[1], &d))
1918 return false;
1919 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
1923 // Step 10
1924 uint32_t end = Min(Max(pos, 0U), textLen);
1926 // Step 11
1927 uint32_t searchLen = searchStr->length();
1929 // Step 13 (reordered)
1930 if (searchLen > end) {
1931 args.rval().setBoolean(false);
1932 return true;
1935 // Step 12
1936 uint32_t start = end - searchLen;
1938 // Steps 14 and 15
1939 JSLinearString* text = str->ensureLinear(cx);
1940 if (!text)
1941 return false;
1943 args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
1944 return true;
1947 template <typename CharT>
1948 static void
1949 TrimString(const CharT* chars, bool trimLeft, bool trimRight, size_t length,
1950 size_t* pBegin, size_t* pEnd)
1952 size_t begin = 0, end = length;
1954 if (trimLeft) {
1955 while (begin < length && unicode::IsSpace(chars[begin]))
1956 ++begin;
1959 if (trimRight) {
1960 while (end > begin && unicode::IsSpace(chars[end - 1]))
1961 --end;
1964 *pBegin = begin;
1965 *pEnd = end;
1968 static bool
1969 TrimString(JSContext* cx, Value* vp, bool trimLeft, bool trimRight)
1971 CallReceiver call = CallReceiverFromVp(vp);
1972 RootedString str(cx, ThisToStringForStringProto(cx, call));
1973 if (!str)
1974 return false;
1976 JSLinearString* linear = str->ensureLinear(cx);
1977 if (!linear)
1978 return false;
1980 size_t length = linear->length();
1981 size_t begin, end;
1982 if (linear->hasLatin1Chars()) {
1983 AutoCheckCannotGC nogc;
1984 TrimString(linear->latin1Chars(nogc), trimLeft, trimRight, length, &begin, &end);
1985 } else {
1986 AutoCheckCannotGC nogc;
1987 TrimString(linear->twoByteChars(nogc), trimLeft, trimRight, length, &begin, &end);
1990 str = NewDependentString(cx, str, begin, end - begin);
1991 if (!str)
1992 return false;
1994 call.rval().setString(str);
1995 return true;
1998 static bool
1999 str_trim(JSContext* cx, unsigned argc, Value* vp)
2001 return TrimString(cx, vp, true, true);
2004 static bool
2005 str_trimLeft(JSContext* cx, unsigned argc, Value* vp)
2007 return TrimString(cx, vp, true, false);
2010 static bool
2011 str_trimRight(JSContext* cx, unsigned argc, Value* vp)
2013 return TrimString(cx, vp, false, true);
2017 * Perl-inspired string functions.
2020 namespace {
2022 /* Result of a successfully performed flat match. */
2023 class FlatMatch
2025 RootedAtom pat_;
2026 int32_t match_;
2028 friend class StringRegExpGuard;
2030 public:
2031 explicit FlatMatch(JSContext* cx) : pat_(cx) {}
2032 JSLinearString* pattern() const { return pat_; }
2033 size_t patternLength() const { return pat_->length(); }
2036 * Note: The match is -1 when the match is performed successfully,
2037 * but no match is found.
2039 int32_t match() const { return match_; }
2042 } /* anonymous namespace */
2044 static inline bool
2045 IsRegExpMetaChar(jschar c)
2047 switch (c) {
2048 /* Taken from the PatternCharacter production in 15.10.1. */
2049 case '^': case '$': case '\\': case '.': case '*': case '+':
2050 case '?': case '(': case ')': case '[': case ']': case '{':
2051 case '}': case '|':
2052 return true;
2053 default:
2054 return false;
2058 template <typename CharT>
2059 static inline bool
2060 HasRegExpMetaChars(const CharT* chars, size_t length)
2062 for (size_t i = 0; i < length; ++i) {
2063 if (IsRegExpMetaChar(chars[i]))
2064 return true;
2066 return false;
2069 bool
2070 js::StringHasRegExpMetaChars(JSLinearString* str, size_t beginOffset, size_t endOffset)
2072 JS_ASSERT(beginOffset + endOffset <= str->length());
2074 AutoCheckCannotGC nogc;
2075 if (str->hasLatin1Chars())
2076 return HasRegExpMetaChars(str->latin1Chars(nogc) + beginOffset, str->length() - beginOffset - endOffset);
2078 return HasRegExpMetaChars(str->twoByteChars(nogc) + beginOffset, str->length() - beginOffset - endOffset);
2081 namespace {
2084 * StringRegExpGuard factors logic out of String regexp operations.
2086 * |optarg| indicates in which argument position RegExp flags will be found, if
2087 * present. This is a Mozilla extension and not part of any ECMA spec.
2089 class MOZ_STACK_CLASS StringRegExpGuard
2091 RegExpGuard re_;
2092 FlatMatch fm;
2093 RootedObject obj_;
2096 * Upper bound on the number of characters we are willing to potentially
2097 * waste on searching for RegExp meta-characters.
2099 static const size_t MAX_FLAT_PAT_LEN = 256;
2101 template <typename CharT>
2102 static bool
2103 flattenPattern(StringBuffer& sb, const CharT* chars, size_t len)
2105 static const char ESCAPE_CHAR = '\\';
2106 for (const CharT* it = chars; it < chars + len; ++it) {
2107 if (IsRegExpMetaChar(*it)) {
2108 if (!sb.append(ESCAPE_CHAR) || !sb.append(*it))
2109 return false;
2110 } else {
2111 if (!sb.append(*it))
2112 return false;
2115 return true;
2118 static JSAtom*
2119 flattenPattern(JSContext* cx, JSAtom* pat)
2121 StringBuffer sb(cx);
2122 if (!sb.reserve(pat->length()))
2123 return nullptr;
2125 if (pat->hasLatin1Chars()) {
2126 AutoCheckCannotGC nogc;
2127 if (!flattenPattern(sb, pat->latin1Chars(nogc), pat->length()))
2128 return nullptr;
2129 } else {
2130 AutoCheckCannotGC nogc;
2131 if (!flattenPattern(sb, pat->twoByteChars(nogc), pat->length()))
2132 return nullptr;
2135 return sb.finishAtom();
2138 public:
2139 explicit StringRegExpGuard(JSContext* cx)
2140 : re_(cx), fm(cx), obj_(cx)
2143 /* init must succeed in order to call tryFlatMatch or normalizeRegExp. */
2144 bool init(JSContext* cx, CallArgs args, bool convertVoid = false)
2146 if (args.length() != 0 && IsObjectWithClass(args[0], ESClass_RegExp, cx))
2147 return init(cx, &args[0].toObject());
2149 if (convertVoid && !args.hasDefined(0)) {
2150 fm.pat_ = cx->runtime()->emptyString;
2151 return true;
2154 JSString* arg = ArgToRootedString(cx, args, 0);
2155 if (!arg)
2156 return false;
2158 fm.pat_ = AtomizeString(cx, arg);
2159 if (!fm.pat_)
2160 return false;
2162 return true;
2165 bool init(JSContext* cx, JSObject* regexp) {
2166 obj_ = regexp;
2168 JS_ASSERT(ObjectClassIs(obj_, ESClass_RegExp, cx));
2170 if (!RegExpToShared(cx, obj_, &re_))
2171 return false;
2172 return true;
2175 bool init(JSContext* cx, HandleString pattern) {
2176 fm.pat_ = AtomizeString(cx, pattern);
2177 if (!fm.pat_)
2178 return false;
2179 return true;
2183 * Attempt to match |patstr| to |textstr|. A flags argument, metachars in
2184 * the pattern string, or a lengthy pattern string can thwart this process.
2186 * |checkMetaChars| looks for regexp metachars in the pattern string.
2188 * Return whether flat matching could be used.
2190 * N.B. tryFlatMatch returns nullptr on OOM, so the caller must check
2191 * cx->isExceptionPending().
2193 const FlatMatch*
2194 tryFlatMatch(JSContext* cx, JSString* text, unsigned optarg, unsigned argc,
2195 bool checkMetaChars = true)
2197 if (re_.initialized())
2198 return nullptr;
2200 if (optarg < argc)
2201 return nullptr;
2203 size_t patLen = fm.pat_->length();
2204 if (checkMetaChars && (patLen > MAX_FLAT_PAT_LEN || StringHasRegExpMetaChars(fm.pat_)))
2205 return nullptr;
2208 * |text| could be a rope, so we want to avoid flattening it for as
2209 * long as possible.
2211 if (text->isRope()) {
2212 if (!RopeMatch(cx, &text->asRope(), fm.pat_, &fm.match_))
2213 return nullptr;
2214 } else {
2215 fm.match_ = StringMatch(&text->asLinear(), fm.pat_, 0);
2218 return &fm;
2221 /* If the pattern is not already a regular expression, make it so. */
2222 bool normalizeRegExp(JSContext* cx, bool flat, unsigned optarg, CallArgs args)
2224 if (re_.initialized())
2225 return true;
2227 /* Build RegExp from pattern string. */
2228 RootedString opt(cx);
2229 if (optarg < args.length()) {
2230 opt = ToString<CanGC>(cx, args[optarg]);
2231 if (!opt)
2232 return false;
2233 } else {
2234 opt = nullptr;
2237 Rooted<JSAtom*> pat(cx);
2238 if (flat) {
2239 pat = flattenPattern(cx, fm.pat_);
2240 if (!pat)
2241 return false;
2242 } else {
2243 pat = fm.pat_;
2245 JS_ASSERT(pat);
2247 return cx->compartment()->regExps.get(cx, pat, opt, &re_);
2250 bool zeroLastIndex(JSContext* cx) {
2251 if (!regExpIsObject())
2252 return true;
2254 // Use a fast path for same-global RegExp objects with writable
2255 // lastIndex.
2256 if (obj_->is<RegExpObject>() && obj_->nativeLookup(cx, cx->names().lastIndex)->writable()) {
2257 obj_->as<RegExpObject>().zeroLastIndex();
2258 return true;
2261 // Handle everything else generically (including throwing if .lastIndex is non-writable).
2262 RootedValue zero(cx, Int32Value(0));
2263 return JSObject::setProperty(cx, obj_, obj_, cx->names().lastIndex, &zero, true);
2266 RegExpShared& regExp() { return *re_; }
2268 bool regExpIsObject() { return obj_ != nullptr; }
2269 HandleObject regExpObject() {
2270 JS_ASSERT(regExpIsObject());
2271 return obj_;
2274 private:
2275 StringRegExpGuard(const StringRegExpGuard&) MOZ_DELETE;
2276 void operator=(const StringRegExpGuard&) MOZ_DELETE;
2279 } /* anonymous namespace */
2281 static bool
2282 DoMatchLocal(JSContext* cx, CallArgs args, RegExpStatics* res, HandleLinearString input,
2283 RegExpShared& re)
2285 size_t i = 0;
2286 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2287 RegExpRunStatus status = re.execute(cx, input, &i, matches);
2288 if (status == RegExpRunStatus_Error)
2289 return false;
2291 if (status == RegExpRunStatus_Success_NotFound) {
2292 args.rval().setNull();
2293 return true;
2296 if (!res->updateFromMatchPairs(cx, input, matches))
2297 return false;
2299 RootedValue rval(cx);
2300 if (!CreateRegExpMatchResult(cx, input, matches, &rval))
2301 return false;
2303 args.rval().set(rval);
2304 return true;
2307 /* ES5 15.5.4.10 step 8. */
2308 static bool
2309 DoMatchGlobal(JSContext* cx, CallArgs args, RegExpStatics* res, HandleLinearString input,
2310 StringRegExpGuard& g)
2312 // Step 8a.
2314 // This single zeroing of "lastIndex" covers all "lastIndex" changes in the
2315 // rest of String.prototype.match, particularly in steps 8f(i) and
2316 // 8f(iii)(2)(a). Here's why.
2318 // The inputs to the calls to RegExp.prototype.exec are a RegExp object
2319 // whose .global is true and a string. The only side effect of a call in
2320 // these circumstances is that the RegExp's .lastIndex will be modified to
2321 // the next starting index after the discovered match (or to 0 if there's
2322 // no remaining match). Because .lastIndex is a non-configurable data
2323 // property and no script-controllable code executes after step 8a, passing
2324 // step 8a implies *every* .lastIndex set succeeds. String.prototype.match
2325 // calls RegExp.prototype.exec repeatedly, and the last call doesn't match,
2326 // so the final value of .lastIndex is 0: exactly the state after step 8a
2327 // succeeds. No spec step lets script observe intermediate .lastIndex
2328 // values.
2330 // The arrays returned by RegExp.prototype.exec always have a string at
2331 // index 0, for which [[Get]]s have no side effects.
2333 // Filling in a new array using [[DefineOwnProperty]] is unobservable.
2335 // This is a tricky point, because after this set, our implementation *can*
2336 // fail. The key is that script can't distinguish these failure modes from
2337 // one where, in spec terms, we fail immediately after step 8a. That *in
2338 // reality* we might have done extra matching work, or created a partial
2339 // results array to return, or hit an interrupt, is irrelevant. The
2340 // script can't tell we did any of those things but didn't update
2341 // .lastIndex. Thus we can optimize steps 8b onward however we want,
2342 // including eliminating intermediate .lastIndex sets, as long as we don't
2343 // add ways for script to observe the intermediate states.
2345 // In short: it's okay to cheat (by setting .lastIndex to 0, once) because
2346 // we can't get caught.
2347 if (!g.zeroLastIndex(cx))
2348 return false;
2350 // Step 8b.
2351 AutoValueVector elements(cx);
2353 size_t lastSuccessfulStart = 0;
2355 // The loop variables from steps 8c-e aren't needed, as we use different
2356 // techniques from the spec to implement step 8f's loop.
2358 // Step 8f.
2359 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2360 size_t charsLen = input->length();
2361 RegExpShared& re = g.regExp();
2362 for (size_t searchIndex = 0; searchIndex <= charsLen; ) {
2363 if (!CheckForInterrupt(cx))
2364 return false;
2366 // Steps 8f(i-ii), minus "lastIndex" updates (see above).
2367 size_t nextSearchIndex = searchIndex;
2368 RegExpRunStatus status = re.execute(cx, input, &nextSearchIndex, matches);
2369 if (status == RegExpRunStatus_Error)
2370 return false;
2372 // Step 8f(ii).
2373 if (status == RegExpRunStatus_Success_NotFound)
2374 break;
2376 lastSuccessfulStart = searchIndex;
2377 MatchPair& match = matches[0];
2379 // Steps 8f(iii)(1-3).
2380 searchIndex = match.isEmpty() ? nextSearchIndex + 1 : nextSearchIndex;
2382 // Step 8f(iii)(4-5).
2383 JSLinearString* str = NewDependentString(cx, input, match.start, match.length());
2384 if (!str)
2385 return false;
2386 if (!elements.append(StringValue(str)))
2387 return false;
2390 // Step 8g.
2391 if (elements.empty()) {
2392 args.rval().setNull();
2393 return true;
2396 // The last *successful* match updates the RegExpStatics. (Interestingly,
2397 // this implies that String.prototype.match's semantics aren't those
2398 // implied by the RegExp.prototype.exec calls in the ES5 algorithm.)
2399 res->updateLazily(cx, input, &re, lastSuccessfulStart);
2401 // Steps 8b, 8f(iii)(5-6), 8h.
2402 JSObject* array = NewDenseCopiedArray(cx, elements.length(), elements.begin());
2403 if (!array)
2404 return false;
2406 args.rval().setObject(*array);
2407 return true;
2410 static bool
2411 BuildFlatMatchArray(JSContext* cx, HandleString textstr, const FlatMatch& fm, CallArgs* args)
2413 if (fm.match() < 0) {
2414 args->rval().setNull();
2415 return true;
2418 /* For this non-global match, produce a RegExp.exec-style array. */
2419 RootedObject obj(cx, NewDenseEmptyArray(cx));
2420 if (!obj)
2421 return false;
2423 RootedValue patternVal(cx, StringValue(fm.pattern()));
2424 RootedValue matchVal(cx, Int32Value(fm.match()));
2425 RootedValue textVal(cx, StringValue(textstr));
2427 if (!JSObject::defineElement(cx, obj, 0, patternVal) ||
2428 !JSObject::defineProperty(cx, obj, cx->names().index, matchVal) ||
2429 !JSObject::defineProperty(cx, obj, cx->names().input, textVal))
2431 return false;
2434 args->rval().setObject(*obj);
2435 return true;
2438 /* ES5 15.5.4.10. */
2439 bool
2440 js::str_match(JSContext* cx, unsigned argc, Value* vp)
2442 CallArgs args = CallArgsFromVp(argc, vp);
2444 /* Steps 1-2. */
2445 RootedString str(cx, ThisToStringForStringProto(cx, args));
2446 if (!str)
2447 return false;
2449 /* Steps 3-4, plus the trailing-argument "flags" extension. */
2450 StringRegExpGuard g(cx);
2451 if (!g.init(cx, args, true))
2452 return false;
2454 /* Fast path when the search pattern can be searched for as a string. */
2455 if (const FlatMatch* fm = g.tryFlatMatch(cx, str, 1, args.length()))
2456 return BuildFlatMatchArray(cx, str, *fm, &args);
2458 /* Return if there was an error in tryFlatMatch. */
2459 if (cx->isExceptionPending())
2460 return false;
2462 /* Create regular-expression internals as needed to perform the match. */
2463 if (!g.normalizeRegExp(cx, false, 1, args))
2464 return false;
2466 RegExpStatics* res = cx->global()->getRegExpStatics(cx);
2467 if (!res)
2468 return false;
2470 RootedLinearString linearStr(cx, str->ensureLinear(cx));
2471 if (!linearStr)
2472 return false;
2474 /* Steps 5-6, 7. */
2475 if (!g.regExp().global())
2476 return DoMatchLocal(cx, args, res, linearStr, g.regExp());
2478 /* Steps 6, 8. */
2479 return DoMatchGlobal(cx, args, res, linearStr, g);
2482 bool
2483 js::str_search(JSContext* cx, unsigned argc, Value* vp)
2485 CallArgs args = CallArgsFromVp(argc, vp);
2486 RootedString str(cx, ThisToStringForStringProto(cx, args));
2487 if (!str)
2488 return false;
2490 StringRegExpGuard g(cx);
2491 if (!g.init(cx, args, true))
2492 return false;
2493 if (const FlatMatch* fm = g.tryFlatMatch(cx, str, 1, args.length())) {
2494 args.rval().setInt32(fm->match());
2495 return true;
2498 if (cx->isExceptionPending()) /* from tryFlatMatch */
2499 return false;
2501 if (!g.normalizeRegExp(cx, false, 1, args))
2502 return false;
2504 RootedLinearString linearStr(cx, str->ensureLinear(cx));
2505 if (!linearStr)
2506 return false;
2508 RegExpStatics* res = cx->global()->getRegExpStatics(cx);
2509 if (!res)
2510 return false;
2512 /* Per ECMAv5 15.5.4.12 (5) The last index property is ignored and left unchanged. */
2513 size_t i = 0;
2514 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2515 RegExpRunStatus status = g.regExp().execute(cx, linearStr, &i, matches);
2516 if (status == RegExpRunStatus_Error)
2517 return false;
2519 if (status == RegExpRunStatus_Success)
2520 res->updateLazily(cx, linearStr, &g.regExp(), 0);
2522 args.rval().setInt32(status == RegExpRunStatus_Success_NotFound ? -1 : matches[0].start);
2523 return true;
2526 // Utility for building a rope (lazy concatenation) of strings.
2527 class RopeBuilder {
2528 JSContext* cx;
2529 RootedString res;
2531 RopeBuilder(const RopeBuilder& other) MOZ_DELETE;
2532 void operator=(const RopeBuilder& other) MOZ_DELETE;
2534 public:
2535 explicit RopeBuilder(JSContext* cx)
2536 : cx(cx), res(cx, cx->runtime()->emptyString)
2539 inline bool append(HandleString str) {
2540 res = ConcatStrings<CanGC>(cx, res, str);
2541 return !!res;
2544 inline JSString* result() {
2545 return res;
2549 namespace {
2551 template <typename CharT>
2552 static uint32_t
2553 FindDollarIndex(const CharT* chars, size_t length)
2555 if (const CharT* p = js_strchr_limit(chars, '$', chars + length)) {
2556 uint32_t dollarIndex = p - chars;
2557 MOZ_ASSERT(dollarIndex < length);
2558 return dollarIndex;
2560 return UINT32_MAX;
2563 struct ReplaceData
2565 explicit ReplaceData(JSContext* cx)
2566 : str(cx), g(cx), lambda(cx), elembase(cx), repstr(cx),
2567 fig(cx, NullValue()), sb(cx)
2570 inline void setReplacementString(JSLinearString* string) {
2571 JS_ASSERT(string);
2572 lambda = nullptr;
2573 elembase = nullptr;
2574 repstr = string;
2576 AutoCheckCannotGC nogc;
2577 dollarIndex = string->hasLatin1Chars()
2578 ? FindDollarIndex(string->latin1Chars(nogc), string->length())
2579 : FindDollarIndex(string->twoByteChars(nogc), string->length());
2582 inline void setReplacementFunction(JSObject* func) {
2583 JS_ASSERT(func);
2584 lambda = func;
2585 elembase = nullptr;
2586 repstr = nullptr;
2587 dollarIndex = UINT32_MAX;
2590 RootedString str; /* 'this' parameter object as a string */
2591 StringRegExpGuard g; /* regexp parameter object and private data */
2592 RootedObject lambda; /* replacement function object or null */
2593 RootedObject elembase; /* object for function(a){return b[a]} replace */
2594 RootedLinearString repstr; /* replacement string */
2595 uint32_t dollarIndex; /* index of first $ in repstr, or UINT32_MAX */
2596 int leftIndex; /* left context index in str->chars */
2597 bool calledBack; /* record whether callback has been called */
2598 FastInvokeGuard fig; /* used for lambda calls, also holds arguments */
2599 StringBuffer sb; /* buffer built during DoMatch */
2602 } /* anonymous namespace */
2604 static bool
2605 ReplaceRegExp(JSContext* cx, RegExpStatics* res, ReplaceData& rdata);
2607 static bool
2608 DoMatchForReplaceLocal(JSContext* cx, RegExpStatics* res, HandleLinearString linearStr,
2609 RegExpShared& re, ReplaceData& rdata)
2611 size_t i = 0;
2612 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2613 RegExpRunStatus status = re.execute(cx, linearStr, &i, matches);
2614 if (status == RegExpRunStatus_Error)
2615 return false;
2617 if (status == RegExpRunStatus_Success_NotFound)
2618 return true;
2620 if (!res->updateFromMatchPairs(cx, linearStr, matches))
2621 return false;
2623 return ReplaceRegExp(cx, res, rdata);
2626 static bool
2627 DoMatchForReplaceGlobal(JSContext* cx, RegExpStatics* res, HandleLinearString linearStr,
2628 RegExpShared& re, ReplaceData& rdata)
2630 size_t charsLen = linearStr->length();
2631 ScopedMatchPairs matches(&cx->tempLifoAlloc());
2632 for (size_t count = 0, i = 0; i <= charsLen; ++count) {
2633 if (!CheckForInterrupt(cx))
2634 return false;
2636 RegExpRunStatus status = re.execute(cx, linearStr, &i, matches);
2637 if (status == RegExpRunStatus_Error)
2638 return false;
2640 if (status == RegExpRunStatus_Success_NotFound)
2641 break;
2643 if (!res->updateFromMatchPairs(cx, linearStr, matches))
2644 return false;
2646 if (!ReplaceRegExp(cx, res, rdata))
2647 return false;
2648 if (!res->matched())
2649 ++i;
2652 return true;
2655 template <typename CharT>
2656 static bool
2657 InterpretDollar(RegExpStatics* res, const CharT* bp, const CharT* dp, const CharT* ep,
2658 ReplaceData& rdata, JSSubString* out, size_t* skip)
2660 JS_ASSERT(*dp == '$');
2662 /* If there is only a dollar, bail now */
2663 if (dp + 1 >= ep)
2664 return false;
2666 /* Interpret all Perl match-induced dollar variables. */
2667 jschar dc = dp[1];
2668 if (JS7_ISDEC(dc)) {
2669 /* ECMA-262 Edition 3: 1-9 or 01-99 */
2670 unsigned num = JS7_UNDEC(dc);
2671 if (num > res->getMatches().parenCount())
2672 return false;
2674 const CharT* cp = dp + 2;
2675 if (cp < ep && (dc = *cp, JS7_ISDEC(dc))) {
2676 unsigned tmp = 10 * num + JS7_UNDEC(dc);
2677 if (tmp <= res->getMatches().parenCount()) {
2678 cp++;
2679 num = tmp;
2682 if (num == 0)
2683 return false;
2685 *skip = cp - dp;
2687 JS_ASSERT(num <= res->getMatches().parenCount());
2690 * Note: we index to get the paren with the (1-indexed) pair
2691 * number, as opposed to a (0-indexed) paren number.
2693 res->getParen(num, out);
2694 return true;
2697 *skip = 2;
2698 switch (dc) {
2699 case '$':
2700 out->init(rdata.repstr, dp - bp, 1);
2701 return true;
2702 case '&':
2703 res->getLastMatch(out);
2704 return true;
2705 case '+':
2706 res->getLastParen(out);
2707 return true;
2708 case '`':
2709 res->getLeftContext(out);
2710 return true;
2711 case '\'':
2712 res->getRightContext(out);
2713 return true;
2715 return false;
2718 template <typename CharT>
2719 static bool
2720 FindReplaceLengthString(JSContext* cx, RegExpStatics* res, ReplaceData& rdata, size_t* sizep)
2722 JSLinearString* repstr = rdata.repstr;
2723 CheckedInt<uint32_t> replen = repstr->length();
2725 if (rdata.dollarIndex != UINT32_MAX) {
2726 AutoCheckCannotGC nogc;
2727 MOZ_ASSERT(rdata.dollarIndex < repstr->length());
2728 const CharT* bp = repstr->chars<CharT>(nogc);
2729 const CharT* dp = bp + rdata.dollarIndex;
2730 const CharT* ep = bp + repstr->length();
2731 do {
2732 JSSubString sub;
2733 size_t skip;
2734 if (InterpretDollar(res, bp, dp, ep, rdata, &sub, &skip)) {
2735 if (sub.length > skip)
2736 replen += sub.length - skip;
2737 else
2738 replen -= skip - sub.length;
2739 dp += skip;
2740 } else {
2741 dp++;
2744 dp = js_strchr_limit(dp, '$', ep);
2745 } while (dp);
2748 if (!replen.isValid()) {
2749 js_ReportAllocationOverflow(cx);
2750 return false;
2753 *sizep = replen.value();
2754 return true;
2757 static bool
2758 FindReplaceLength(JSContext* cx, RegExpStatics* res, ReplaceData& rdata, size_t* sizep)
2760 if (rdata.elembase) {
2762 * The base object is used when replace was passed a lambda which looks like
2763 * 'function(a) { return b[a]; }' for the base object b. b will not change
2764 * in the course of the replace unless we end up making a scripted call due
2765 * to accessing a scripted getter or a value with a scripted toString.
2767 JS_ASSERT(rdata.lambda);
2768 JS_ASSERT(!rdata.elembase->getOps()->lookupProperty);
2769 JS_ASSERT(!rdata.elembase->getOps()->getProperty);
2771 RootedValue match(cx);
2772 if (!res->createLastMatch(cx, &match))
2773 return false;
2774 JSAtom* atom = ToAtom<CanGC>(cx, match);
2775 if (!atom)
2776 return false;
2778 RootedValue v(cx);
2779 if (HasDataProperty(cx, rdata.elembase, AtomToId(atom), v.address()) && v.isString()) {
2780 rdata.repstr = v.toString()->ensureLinear(cx);
2781 if (!rdata.repstr)
2782 return false;
2783 *sizep = rdata.repstr->length();
2784 return true;
2788 * Couldn't handle this property, fall through and despecialize to the
2789 * general lambda case.
2791 rdata.elembase = nullptr;
2794 if (rdata.lambda) {
2795 RootedObject lambda(cx, rdata.lambda);
2796 PreserveRegExpStatics staticsGuard(cx, res);
2797 if (!staticsGuard.init(cx))
2798 return false;
2801 * In the lambda case, not only do we find the replacement string's
2802 * length, we compute repstr and return it via rdata for use within
2803 * DoReplace. The lambda is called with arguments ($&, $1, $2, ...,
2804 * index, input), i.e., all the properties of a regexp match array.
2805 * For $&, etc., we must create string jsvals from cx->regExpStatics.
2806 * We grab up stack space to keep the newborn strings GC-rooted.
2808 unsigned p = res->getMatches().parenCount();
2809 unsigned argc = 1 + p + 2;
2811 InvokeArgs& args = rdata.fig.args();
2812 if (!args.init(argc))
2813 return false;
2815 args.setCallee(ObjectValue(*lambda));
2816 args.setThis(UndefinedValue());
2818 /* Push $&, $1, $2, ... */
2819 unsigned argi = 0;
2820 if (!res->createLastMatch(cx, args[argi++]))
2821 return false;
2823 for (size_t i = 0; i < res->getMatches().parenCount(); ++i) {
2824 if (!res->createParen(cx, i + 1, args[argi++]))
2825 return false;
2828 /* Push match index and input string. */
2829 args[argi++].setInt32(res->getMatches()[0].start);
2830 args[argi].setString(rdata.str);
2832 if (!rdata.fig.invoke(cx))
2833 return false;
2835 /* root repstr: rdata is on the stack, so scanned by conservative gc. */
2836 JSString* repstr = ToString<CanGC>(cx, args.rval());
2837 if (!repstr)
2838 return false;
2839 rdata.repstr = repstr->ensureLinear(cx);
2840 if (!rdata.repstr)
2841 return false;
2842 *sizep = rdata.repstr->length();
2843 return true;
2846 return rdata.repstr->hasLatin1Chars()
2847 ? FindReplaceLengthString<Latin1Char>(cx, res, rdata, sizep)
2848 : FindReplaceLengthString<jschar>(cx, res, rdata, sizep);
2852 * Precondition: |rdata.sb| already has necessary growth space reserved (as
2853 * derived from FindReplaceLength), and has been inflated to TwoByte if
2854 * necessary.
2856 template <typename CharT>
2857 static void
2858 DoReplace(RegExpStatics* res, ReplaceData& rdata)
2860 AutoCheckCannotGC nogc;
2861 JSLinearString* repstr = rdata.repstr;
2862 const CharT* bp = repstr->chars<CharT>(nogc);
2863 const CharT* cp = bp;
2865 if (rdata.dollarIndex != UINT32_MAX) {
2866 MOZ_ASSERT(rdata.dollarIndex < repstr->length());
2867 const CharT* dp = bp + rdata.dollarIndex;
2868 const CharT* ep = bp + repstr->length();
2869 do {
2870 /* Move one of the constant portions of the replacement value. */
2871 size_t len = dp - cp;
2872 rdata.sb.infallibleAppend(cp, len);
2873 cp = dp;
2875 JSSubString sub;
2876 size_t skip;
2877 if (InterpretDollar(res, bp, dp, ep, rdata, &sub, &skip)) {
2878 rdata.sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length);
2879 cp += skip;
2880 dp += skip;
2881 } else {
2882 dp++;
2885 dp = js_strchr_limit(dp, '$', ep);
2886 } while (dp);
2888 rdata.sb.infallibleAppend(cp, repstr->length() - (cp - bp));
2891 static bool
2892 ReplaceRegExp(JSContext* cx, RegExpStatics* res, ReplaceData& rdata)
2895 const MatchPair& match = res->getMatches()[0];
2896 JS_ASSERT(!match.isUndefined());
2897 JS_ASSERT(match.limit >= match.start && match.limit >= 0);
2899 rdata.calledBack = true;
2900 size_t leftoff = rdata.leftIndex;
2901 size_t leftlen = match.start - leftoff;
2902 rdata.leftIndex = match.limit;
2904 size_t replen = 0; /* silence 'unused' warning */
2905 if (!FindReplaceLength(cx, res, rdata, &replen))
2906 return false;
2908 CheckedInt<uint32_t> newlen(rdata.sb.length());
2909 newlen += leftlen;
2910 newlen += replen;
2911 if (!newlen.isValid()) {
2912 js_ReportAllocationOverflow(cx);
2913 return false;
2917 * Inflate the buffer now if needed, to avoid (fallible) Latin1 to TwoByte
2918 * inflation later on.
2920 JSLinearString& str = rdata.str->asLinear(); /* flattened for regexp */
2921 if (str.hasTwoByteChars() || rdata.repstr->hasTwoByteChars()) {
2922 if (!rdata.sb.ensureTwoByteChars())
2923 return false;
2926 if (!rdata.sb.reserve(newlen.value()))
2927 return false;
2929 /* Append skipped-over portion of the search value. */
2930 rdata.sb.infallibleAppendSubstring(&str, leftoff, leftlen);
2932 if (rdata.repstr->hasLatin1Chars())
2933 DoReplace<Latin1Char>(res, rdata);
2934 else
2935 DoReplace<jschar>(res, rdata);
2936 return true;
2939 static bool
2940 BuildFlatReplacement(JSContext* cx, HandleString textstr, HandleString repstr,
2941 const FlatMatch& fm, MutableHandleValue rval)
2943 RopeBuilder builder(cx);
2944 size_t match = fm.match();
2945 size_t matchEnd = match + fm.patternLength();
2947 if (textstr->isRope()) {
2949 * If we are replacing over a rope, avoid flattening it by iterating
2950 * through it, building a new rope.
2952 StringSegmentRange r(cx);
2953 if (!r.init(textstr))
2954 return false;
2955 size_t pos = 0;
2956 while (!r.empty()) {
2957 RootedString str(cx, r.front());
2958 size_t len = str->length();
2959 size_t strEnd = pos + len;
2960 if (pos < matchEnd && strEnd > match) {
2962 * We need to special-case any part of the rope that overlaps
2963 * with the replacement string.
2965 if (match >= pos) {
2967 * If this part of the rope overlaps with the left side of
2968 * the pattern, then it must be the only one to overlap with
2969 * the first character in the pattern, so we include the
2970 * replacement string here.
2972 RootedString leftSide(cx, NewDependentString(cx, str, 0, match - pos));
2973 if (!leftSide ||
2974 !builder.append(leftSide) ||
2975 !builder.append(repstr)) {
2976 return false;
2981 * If str runs off the end of the matched string, append the
2982 * last part of str.
2984 if (strEnd > matchEnd) {
2985 RootedString rightSide(cx, NewDependentString(cx, str, matchEnd - pos,
2986 strEnd - matchEnd));
2987 if (!rightSide || !builder.append(rightSide))
2988 return false;
2990 } else {
2991 if (!builder.append(str))
2992 return false;
2994 pos += str->length();
2995 if (!r.popFront())
2996 return false;
2998 } else {
2999 RootedString leftSide(cx, NewDependentString(cx, textstr, 0, match));
3000 if (!leftSide)
3001 return false;
3002 RootedString rightSide(cx);
3003 rightSide = NewDependentString(cx, textstr, match + fm.patternLength(),
3004 textstr->length() - match - fm.patternLength());
3005 if (!rightSide ||
3006 !builder.append(leftSide) ||
3007 !builder.append(repstr) ||
3008 !builder.append(rightSide)) {
3009 return false;
3013 rval.setString(builder.result());
3014 return true;
3017 template <typename CharT>
3018 static bool
3019 AppendDollarReplacement(StringBuffer& newReplaceChars, size_t firstDollarIndex,
3020 const FlatMatch& fm, JSLinearString* text,
3021 const CharT* repChars, size_t repLength)
3023 JS_ASSERT(firstDollarIndex < repLength);
3025 size_t matchStart = fm.match();
3026 size_t matchLimit = matchStart + fm.patternLength();
3028 /* Move the pre-dollar chunk in bulk. */
3029 newReplaceChars.infallibleAppend(repChars, firstDollarIndex);
3031 /* Move the rest char-by-char, interpreting dollars as we encounter them. */
3032 const CharT* repLimit = repChars + repLength;
3033 for (const CharT* it = repChars + firstDollarIndex; it < repLimit; ++it) {
3034 if (*it != '$' || it == repLimit - 1) {
3035 if (!newReplaceChars.append(*it))
3036 return false;
3037 continue;
3040 switch (*(it + 1)) {
3041 case '$': /* Eat one of the dollars. */
3042 if (!newReplaceChars.append(*it))
3043 return false;
3044 break;
3045 case '&':
3046 if (!newReplaceChars.appendSubstring(text, matchStart, matchLimit - matchStart))
3047 return false;
3048 break;
3049 case '`':
3050 if (!newReplaceChars.appendSubstring(text, 0, matchStart))
3051 return false;
3052 break;
3053 case '\'':
3054 if (!newReplaceChars.appendSubstring(text, matchLimit, text->length() - matchLimit))
3055 return false;
3056 break;
3057 default: /* The dollar we saw was not special (no matter what its mother told it). */
3058 if (!newReplaceChars.append(*it))
3059 return false;
3060 continue;
3062 ++it; /* We always eat an extra char in the above switch. */
3065 return true;
3069 * Perform a linear-scan dollar substitution on the replacement text,
3070 * constructing a result string that looks like:
3072 * newstring = string[:matchStart] + dollarSub(replaceValue) + string[matchLimit:]
3074 static inline bool
3075 BuildDollarReplacement(JSContext* cx, JSString* textstrArg, JSLinearString* repstr,
3076 uint32_t firstDollarIndex, const FlatMatch& fm, MutableHandleValue rval)
3078 RootedLinearString textstr(cx, textstrArg->ensureLinear(cx));
3079 if (!textstr)
3080 return false;
3082 size_t matchStart = fm.match();
3083 size_t matchLimit = matchStart + fm.patternLength();
3086 * Most probably:
3088 * len(newstr) >= len(orig) - len(match) + len(replacement)
3090 * Note that dollar vars _could_ make the resulting text smaller than this.
3092 StringBuffer newReplaceChars(cx);
3093 if (repstr->hasTwoByteChars() && !newReplaceChars.ensureTwoByteChars())
3094 return false;
3096 if (!newReplaceChars.reserve(textstr->length() - fm.patternLength() + repstr->length()))
3097 return false;
3099 bool res;
3100 if (repstr->hasLatin1Chars()) {
3101 AutoCheckCannotGC nogc;
3102 res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, fm, textstr,
3103 repstr->latin1Chars(nogc), repstr->length());
3104 } else {
3105 AutoCheckCannotGC nogc;
3106 res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, fm, textstr,
3107 repstr->twoByteChars(nogc), repstr->length());
3109 if (!res)
3110 return false;
3112 RootedString leftSide(cx, NewDependentString(cx, textstr, 0, matchStart));
3113 if (!leftSide)
3114 return false;
3116 RootedString newReplace(cx, newReplaceChars.finishString());
3117 if (!newReplace)
3118 return false;
3120 JS_ASSERT(textstr->length() >= matchLimit);
3121 RootedString rightSide(cx, NewDependentString(cx, textstr, matchLimit,
3122 textstr->length() - matchLimit));
3123 if (!rightSide)
3124 return false;
3126 RopeBuilder builder(cx);
3127 if (!builder.append(leftSide) || !builder.append(newReplace) || !builder.append(rightSide))
3128 return false;
3130 rval.setString(builder.result());
3131 return true;
3134 struct StringRange
3136 size_t start;
3137 size_t length;
3139 StringRange(size_t s, size_t l)
3140 : start(s), length(l)
3144 template <typename CharT>
3145 static void
3146 CopySubstringsToFatInline(JSFatInlineString* dest, const CharT* src, const StringRange* ranges,
3147 size_t rangesLen, size_t outputLen)
3149 CharT* buf = dest->init<CharT>(outputLen);
3150 size_t pos = 0;
3151 for (size_t i = 0; i < rangesLen; i++) {
3152 PodCopy(buf + pos, src + ranges[i].start, ranges[i].length);
3153 pos += ranges[i].length;
3156 MOZ_ASSERT(pos == outputLen);
3157 buf[outputLen] = 0;
3160 static inline JSFatInlineString*
3161 FlattenSubstrings(JSContext* cx, Handle<JSFlatString*> flatStr, const StringRange* ranges,
3162 size_t rangesLen, size_t outputLen)
3164 JSFatInlineString* str = NewGCFatInlineString<CanGC>(cx);
3165 if (!str)
3166 return nullptr;
3168 AutoCheckCannotGC nogc;
3169 if (flatStr->hasLatin1Chars())
3170 CopySubstringsToFatInline(str, flatStr->latin1Chars(nogc), ranges, rangesLen, outputLen);
3171 else
3172 CopySubstringsToFatInline(str, flatStr->twoByteChars(nogc), ranges, rangesLen, outputLen);
3173 return str;
3176 static JSString*
3177 AppendSubstrings(JSContext* cx, Handle<JSFlatString*> flatStr,
3178 const StringRange* ranges, size_t rangesLen)
3180 JS_ASSERT(rangesLen);
3182 /* For single substrings, construct a dependent string. */
3183 if (rangesLen == 1)
3184 return NewDependentString(cx, flatStr, ranges[0].start, ranges[0].length);
3186 bool isLatin1 = flatStr->hasLatin1Chars();
3187 uint32_t fatInlineMaxLength = JSFatInlineString::MAX_LENGTH_TWO_BYTE;
3188 if (isLatin1)
3189 fatInlineMaxLength = JSFatInlineString::MAX_LENGTH_LATIN1;
3191 /* Collect substrings into a rope */
3192 size_t i = 0;
3193 RopeBuilder rope(cx);
3194 RootedString part(cx, nullptr);
3195 while (i < rangesLen) {
3197 /* Find maximum range that fits in JSFatInlineString */
3198 size_t substrLen = 0;
3199 size_t end = i;
3200 for (; end < rangesLen; end++) {
3201 if (substrLen + ranges[end].length > fatInlineMaxLength)
3202 break;
3203 substrLen += ranges[end].length;
3206 if (i == end) {
3207 /* Not even one range fits JSFatInlineString, use DependentString */
3208 const StringRange& sr = ranges[i++];
3209 part = NewDependentString(cx, flatStr, sr.start, sr.length);
3210 } else {
3211 /* Copy the ranges (linearly) into a JSFatInlineString */
3212 part = FlattenSubstrings(cx, flatStr, ranges + i, end - i, substrLen);
3213 i = end;
3216 if (!part)
3217 return nullptr;
3219 /* Appending to the rope permanently roots the substring. */
3220 if (!rope.append(part))
3221 return nullptr;
3224 return rope.result();
3227 static bool
3228 StrReplaceRegexpRemove(JSContext* cx, HandleString str, RegExpShared& re, MutableHandleValue rval)
3230 Rooted<JSFlatString*> flatStr(cx, str->ensureFlat(cx));
3231 if (!flatStr)
3232 return false;
3234 Vector<StringRange, 16, SystemAllocPolicy> ranges;
3236 size_t charsLen = flatStr->length();
3238 ScopedMatchPairs matches(&cx->tempLifoAlloc());
3239 size_t startIndex = 0; /* Index used for iterating through the string. */
3240 size_t lastIndex = 0; /* Index after last successful match. */
3241 size_t lazyIndex = 0; /* Index before last successful match. */
3243 /* Accumulate StringRanges for unmatched substrings. */
3244 while (startIndex <= charsLen) {
3245 if (!CheckForInterrupt(cx))
3246 return false;
3248 RegExpRunStatus status = re.execute(cx, flatStr, &startIndex, matches);
3249 if (status == RegExpRunStatus_Error)
3250 return false;
3251 if (status == RegExpRunStatus_Success_NotFound)
3252 break;
3253 MatchPair& match = matches[0];
3255 /* Include the latest unmatched substring. */
3256 if (size_t(match.start) > lastIndex) {
3257 if (!ranges.append(StringRange(lastIndex, match.start - lastIndex)))
3258 return false;
3261 lazyIndex = lastIndex;
3262 lastIndex = startIndex;
3264 if (match.isEmpty())
3265 startIndex++;
3267 /* Non-global removal executes at most once. */
3268 if (!re.global())
3269 break;
3272 RegExpStatics* res;
3274 /* If unmatched, return the input string. */
3275 if (!lastIndex) {
3276 if (startIndex > 0) {
3277 res = cx->global()->getRegExpStatics(cx);
3278 if (!res)
3279 return false;
3280 res->updateLazily(cx, flatStr, &re, lazyIndex);
3282 rval.setString(str);
3283 return true;
3286 /* The last successful match updates the RegExpStatics. */
3287 res = cx->global()->getRegExpStatics(cx);
3288 if (!res)
3289 return false;
3291 res->updateLazily(cx, flatStr, &re, lazyIndex);
3293 /* Include any remaining part of the string. */
3294 if (lastIndex < charsLen) {
3295 if (!ranges.append(StringRange(lastIndex, charsLen - lastIndex)))
3296 return false;
3299 /* Handle the empty string before calling .begin(). */
3300 if (ranges.empty()) {
3301 rval.setString(cx->runtime()->emptyString);
3302 return true;
3305 JSString* result = AppendSubstrings(cx, flatStr, ranges.begin(), ranges.length());
3306 if (!result)
3307 return false;
3309 rval.setString(result);
3310 return true;
3313 static inline bool
3314 StrReplaceRegExp(JSContext* cx, ReplaceData& rdata, MutableHandleValue rval)
3316 rdata.leftIndex = 0;
3317 rdata.calledBack = false;
3319 RegExpStatics* res = cx->global()->getRegExpStatics(cx);
3320 if (!res)
3321 return false;
3323 RegExpShared& re = rdata.g.regExp();
3325 // The spec doesn't describe this function very clearly, so we go ahead and
3326 // assume that when the input to String.prototype.replace is a global
3327 // RegExp, calling the replacer function (assuming one was provided) takes
3328 // place only after the matching is done. See the comment at the beginning
3329 // of DoMatchGlobal explaining why we can zero the the RegExp object's
3330 // lastIndex property here.
3331 if (re.global() && !rdata.g.zeroLastIndex(cx))
3332 return false;
3334 /* Optimize removal. */
3335 if (rdata.repstr && rdata.repstr->length() == 0) {
3336 JS_ASSERT(!rdata.lambda && !rdata.elembase && rdata.dollarIndex == UINT32_MAX);
3337 return StrReplaceRegexpRemove(cx, rdata.str, re, rval);
3340 RootedLinearString linearStr(cx, rdata.str->ensureLinear(cx));
3341 if (!linearStr)
3342 return false;
3344 if (re.global()) {
3345 if (!DoMatchForReplaceGlobal(cx, res, linearStr, re, rdata))
3346 return false;
3347 } else {
3348 if (!DoMatchForReplaceLocal(cx, res, linearStr, re, rdata))
3349 return false;
3352 if (!rdata.calledBack) {
3353 /* Didn't match, so the string is unmodified. */
3354 rval.setString(rdata.str);
3355 return true;
3358 JSSubString sub;
3359 res->getRightContext(&sub);
3360 if (!rdata.sb.appendSubstring(sub.base, sub.offset, sub.length))
3361 return false;
3363 JSString* retstr = rdata.sb.finishString();
3364 if (!retstr)
3365 return false;
3367 rval.setString(retstr);
3368 return true;
3371 static inline bool
3372 str_replace_regexp(JSContext* cx, CallArgs args, ReplaceData& rdata)
3374 if (!rdata.g.normalizeRegExp(cx, true, 2, args))
3375 return false;
3377 return StrReplaceRegExp(cx, rdata, args.rval());
3380 bool
3381 js::str_replace_regexp_raw(JSContext* cx, HandleString string, HandleObject regexp,
3382 HandleString replacement, MutableHandleValue rval)
3384 /* Optimize removal, so we don't have to create ReplaceData */
3385 if (replacement->length() == 0) {
3386 StringRegExpGuard guard(cx);
3387 if (!guard.init(cx, regexp))
3388 return false;
3390 RegExpShared& re = guard.regExp();
3391 return StrReplaceRegexpRemove(cx, string, re, rval);
3394 ReplaceData rdata(cx);
3395 rdata.str = string;
3397 JSLinearString* repl = replacement->ensureLinear(cx);
3398 if (!repl)
3399 return false;
3401 rdata.setReplacementString(repl);
3403 if (!rdata.g.init(cx, regexp))
3404 return false;
3406 return StrReplaceRegExp(cx, rdata, rval);
3409 static inline bool
3410 StrReplaceString(JSContext* cx, ReplaceData& rdata, const FlatMatch& fm, MutableHandleValue rval)
3413 * Note: we could optimize the text.length == pattern.length case if we wanted,
3414 * even in the presence of dollar metachars.
3416 if (rdata.dollarIndex != UINT32_MAX)
3417 return BuildDollarReplacement(cx, rdata.str, rdata.repstr, rdata.dollarIndex, fm, rval);
3418 return BuildFlatReplacement(cx, rdata.str, rdata.repstr, fm, rval);
3421 static const uint32_t ReplaceOptArg = 2;
3423 bool
3424 js::str_replace_string_raw(JSContext* cx, HandleString string, HandleString pattern,
3425 HandleString replacement, MutableHandleValue rval)
3427 ReplaceData rdata(cx);
3429 rdata.str = string;
3430 JSLinearString* repl = replacement->ensureLinear(cx);
3431 if (!repl)
3432 return false;
3433 rdata.setReplacementString(repl);
3435 if (!rdata.g.init(cx, pattern))
3436 return false;
3437 const FlatMatch* fm = rdata.g.tryFlatMatch(cx, rdata.str, ReplaceOptArg, ReplaceOptArg, false);
3439 if (fm->match() < 0) {
3440 rval.setString(string);
3441 return true;
3444 return StrReplaceString(cx, rdata, *fm, rval);
3447 static inline bool
3448 str_replace_flat_lambda(JSContext* cx, CallArgs outerArgs, ReplaceData& rdata, const FlatMatch& fm)
3450 RootedString matchStr(cx, NewDependentString(cx, rdata.str, fm.match(), fm.patternLength()));
3451 if (!matchStr)
3452 return false;
3454 /* lambda(matchStr, matchStart, textstr) */
3455 static const uint32_t lambdaArgc = 3;
3456 if (!rdata.fig.args().init(lambdaArgc))
3457 return false;
3459 CallArgs& args = rdata.fig.args();
3460 args.setCallee(ObjectValue(*rdata.lambda));
3461 args.setThis(UndefinedValue());
3463 Value* sp = args.array();
3464 sp[0].setString(matchStr);
3465 sp[1].setInt32(fm.match());
3466 sp[2].setString(rdata.str);
3468 if (!rdata.fig.invoke(cx))
3469 return false;
3471 RootedString repstr(cx, ToString<CanGC>(cx, args.rval()));
3472 if (!repstr)
3473 return false;
3475 RootedString leftSide(cx, NewDependentString(cx, rdata.str, 0, fm.match()));
3476 if (!leftSide)
3477 return false;
3479 size_t matchLimit = fm.match() + fm.patternLength();
3480 RootedString rightSide(cx, NewDependentString(cx, rdata.str, matchLimit,
3481 rdata.str->length() - matchLimit));
3482 if (!rightSide)
3483 return false;
3485 RopeBuilder builder(cx);
3486 if (!(builder.append(leftSide) &&
3487 builder.append(repstr) &&
3488 builder.append(rightSide))) {
3489 return false;
3492 outerArgs.rval().setString(builder.result());
3493 return true;
3497 * Pattern match the script to check if it is is indexing into a particular
3498 * object, e.g. 'function(a) { return b[a]; }'. Avoid calling the script in
3499 * such cases, which are used by javascript packers (particularly the popular
3500 * Dean Edwards packer) to efficiently encode large scripts. We only handle the
3501 * code patterns generated by such packers here.
3503 static bool
3504 LambdaIsGetElem(JSContext* cx, JSObject& lambda, MutableHandleObject pobj)
3506 if (!lambda.is<JSFunction>())
3507 return true;
3509 RootedFunction fun(cx, &lambda.as<JSFunction>());
3510 if (!fun->isInterpreted())
3511 return true;
3513 JSScript* script = fun->getOrCreateScript(cx);
3514 if (!script)
3515 return false;
3517 jsbytecode* pc = script->code();
3520 * JSOP_GETALIASEDVAR tells us exactly where to find the base object 'b'.
3521 * Rule out the (unlikely) possibility of a heavyweight function since it
3522 * would make our scope walk off by 1.
3524 if (JSOp(*pc) != JSOP_GETALIASEDVAR || fun->isHeavyweight())
3525 return true;
3526 ScopeCoordinate sc(pc);
3527 ScopeObject* scope = &fun->environment()->as<ScopeObject>();
3528 for (unsigned i = 0; i < sc.hops(); ++i)
3529 scope = &scope->enclosingScope().as<ScopeObject>();
3530 Value b = scope->aliasedVar(sc);
3531 pc += JSOP_GETALIASEDVAR_LENGTH;
3533 /* Look for 'a' to be the lambda's first argument. */
3534 if (JSOp(*pc) != JSOP_GETARG || GET_ARGNO(pc) != 0)
3535 return true;
3536 pc += JSOP_GETARG_LENGTH;
3538 /* 'b[a]' */
3539 if (JSOp(*pc) != JSOP_GETELEM)
3540 return true;
3541 pc += JSOP_GETELEM_LENGTH;
3543 /* 'return b[a]' */
3544 if (JSOp(*pc) != JSOP_RETURN)
3545 return true;
3547 /* 'b' must behave like a normal object. */
3548 if (!b.isObject())
3549 return true;
3551 JSObject& bobj = b.toObject();
3552 const Class* clasp = bobj.getClass();
3553 if (!clasp->isNative() || clasp->ops.lookupProperty || clasp->ops.getProperty)
3554 return true;
3556 pobj.set(&bobj);
3557 return true;
3560 bool
3561 js::str_replace(JSContext* cx, unsigned argc, Value* vp)
3563 CallArgs args = CallArgsFromVp(argc, vp);
3565 ReplaceData rdata(cx);
3566 rdata.str = ThisToStringForStringProto(cx, args);
3567 if (!rdata.str)
3568 return false;
3570 if (!rdata.g.init(cx, args))
3571 return false;
3573 /* Extract replacement string/function. */
3574 if (args.length() >= ReplaceOptArg && IsCallable(args[1])) {
3575 rdata.setReplacementFunction(&args[1].toObject());
3577 if (!LambdaIsGetElem(cx, *rdata.lambda, &rdata.elembase))
3578 return false;
3579 } else {
3580 JSLinearString* string = ArgToRootedString(cx, args, 1);
3581 if (!string)
3582 return false;
3584 rdata.setReplacementString(string);
3587 rdata.fig.initFunction(ObjectOrNullValue(rdata.lambda));
3590 * Unlike its |String.prototype| brethren, |replace| doesn't convert
3591 * its input to a regular expression. (Even if it contains metachars.)
3593 * However, if the user invokes our (non-standard) |flags| argument
3594 * extension then we revert to creating a regular expression. Note that
3595 * this is observable behavior through the side-effect mutation of the
3596 * |RegExp| statics.
3599 const FlatMatch* fm = rdata.g.tryFlatMatch(cx, rdata.str, ReplaceOptArg, args.length(), false);
3601 if (!fm) {
3602 if (cx->isExceptionPending()) /* oom in RopeMatch in tryFlatMatch */
3603 return false;
3604 return str_replace_regexp(cx, args, rdata);
3607 if (fm->match() < 0) {
3608 args.rval().setString(rdata.str);
3609 return true;
3612 if (rdata.lambda)
3613 return str_replace_flat_lambda(cx, args, rdata, *fm);
3614 return StrReplaceString(cx, rdata, *fm, args.rval());
3617 namespace {
3619 class SplitMatchResult {
3620 size_t endIndex_;
3621 size_t length_;
3623 public:
3624 void setFailure() {
3625 JS_STATIC_ASSERT(SIZE_MAX > JSString::MAX_LENGTH);
3626 endIndex_ = SIZE_MAX;
3628 bool isFailure() const {
3629 return endIndex_ == SIZE_MAX;
3631 size_t endIndex() const {
3632 JS_ASSERT(!isFailure());
3633 return endIndex_;
3635 size_t length() const {
3636 JS_ASSERT(!isFailure());
3637 return length_;
3639 void setResult(size_t length, size_t endIndex) {
3640 length_ = length;
3641 endIndex_ = endIndex;
3645 } /* anonymous namespace */
3647 template<class Matcher>
3648 static ArrayObject*
3649 SplitHelper(JSContext* cx, HandleLinearString str, uint32_t limit, const Matcher& splitMatch,
3650 Handle<TypeObject*> type)
3652 size_t strLength = str->length();
3653 SplitMatchResult result;
3655 /* Step 11. */
3656 if (strLength == 0) {
3657 if (!splitMatch(cx, str, 0, &result))
3658 return nullptr;
3661 * NB: Unlike in the non-empty string case, it's perfectly fine
3662 * (indeed the spec requires it) if we match at the end of the
3663 * string. Thus these cases should hold:
3665 * var a = "".split("");
3666 * assertEq(a.length, 0);
3667 * var b = "".split(/.?/);
3668 * assertEq(b.length, 0);
3670 if (!result.isFailure())
3671 return NewDenseEmptyArray(cx);
3673 RootedValue v(cx, StringValue(str));
3674 return NewDenseCopiedArray(cx, 1, v.address());
3677 /* Step 12. */
3678 size_t lastEndIndex = 0;
3679 size_t index = 0;
3681 /* Step 13. */
3682 AutoValueVector splits(cx);
3684 while (index < strLength) {
3685 /* Step 13(a). */
3686 if (!splitMatch(cx, str, index, &result))
3687 return nullptr;
3690 * Step 13(b).
3692 * Our match algorithm differs from the spec in that it returns the
3693 * next index at which a match happens. If no match happens we're
3694 * done.
3696 * But what if the match is at the end of the string (and the string is
3697 * not empty)? Per 13(c)(ii) this shouldn't be a match, so we have to
3698 * specially exclude it. Thus this case should hold:
3700 * var a = "abc".split(/\b/);
3701 * assertEq(a.length, 1);
3702 * assertEq(a[0], "abc");
3704 if (result.isFailure())
3705 break;
3707 /* Step 13(c)(i). */
3708 size_t sepLength = result.length();
3709 size_t endIndex = result.endIndex();
3710 if (sepLength == 0 && endIndex == strLength)
3711 break;
3713 /* Step 13(c)(ii). */
3714 if (endIndex == lastEndIndex) {
3715 index++;
3716 continue;
3719 /* Step 13(c)(iii). */
3720 JS_ASSERT(lastEndIndex < endIndex);
3721 JS_ASSERT(sepLength <= strLength);
3722 JS_ASSERT(lastEndIndex + sepLength <= endIndex);
3724 /* Steps 13(c)(iii)(1-3). */
3725 size_t subLength = size_t(endIndex - sepLength - lastEndIndex);
3726 JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
3727 if (!sub || !splits.append(StringValue(sub)))
3728 return nullptr;
3730 /* Step 13(c)(iii)(4). */
3731 if (splits.length() == limit)
3732 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3734 /* Step 13(c)(iii)(5). */
3735 lastEndIndex = endIndex;
3737 /* Step 13(c)(iii)(6-7). */
3738 if (Matcher::returnsCaptures) {
3739 RegExpStatics* res = cx->global()->getRegExpStatics(cx);
3740 if (!res)
3741 return nullptr;
3743 const MatchPairs& matches = res->getMatches();
3744 for (size_t i = 0; i < matches.parenCount(); i++) {
3745 /* Steps 13(c)(iii)(7)(a-c). */
3746 if (!matches[i + 1].isUndefined()) {
3747 JSSubString parsub;
3748 res->getParen(i + 1, &parsub);
3749 sub = NewDependentString(cx, parsub.base, parsub.offset, parsub.length);
3750 if (!sub || !splits.append(StringValue(sub)))
3751 return nullptr;
3752 } else {
3753 /* Only string entries have been accounted for so far. */
3754 AddTypePropertyId(cx, type, JSID_VOID, UndefinedValue());
3755 if (!splits.append(UndefinedValue()))
3756 return nullptr;
3759 /* Step 13(c)(iii)(7)(d). */
3760 if (splits.length() == limit)
3761 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3765 /* Step 13(c)(iii)(8). */
3766 index = lastEndIndex;
3769 /* Steps 14-15. */
3770 JSString* sub = NewDependentString(cx, str, lastEndIndex, strLength - lastEndIndex);
3771 if (!sub || !splits.append(StringValue(sub)))
3772 return nullptr;
3774 /* Step 16. */
3775 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3778 // Fast-path for splitting a string into a character array via split("").
3779 static ArrayObject*
3780 CharSplitHelper(JSContext* cx, HandleLinearString str, uint32_t limit)
3782 size_t strLength = str->length();
3783 if (strLength == 0)
3784 return NewDenseEmptyArray(cx);
3786 js::StaticStrings& staticStrings = cx->staticStrings();
3787 uint32_t resultlen = (limit < strLength ? limit : strLength);
3789 AutoValueVector splits(cx);
3790 if (!splits.reserve(resultlen))
3791 return nullptr;
3793 for (size_t i = 0; i < resultlen; ++i) {
3794 JSString* sub = staticStrings.getUnitStringForElement(cx, str, i);
3795 if (!sub)
3796 return nullptr;
3797 splits.infallibleAppend(StringValue(sub));
3800 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3803 namespace {
3806 * The SplitMatch operation from ES5 15.5.4.14 is implemented using different
3807 * paths for regular expression and string separators.
3809 * The algorithm differs from the spec in that the we return the next index at
3810 * which a match happens.
3812 class SplitRegExpMatcher
3814 RegExpShared& re;
3815 RegExpStatics* res;
3817 public:
3818 SplitRegExpMatcher(RegExpShared& re, RegExpStatics* res) : re(re), res(res) {}
3820 static const bool returnsCaptures = true;
3822 bool operator()(JSContext* cx, HandleLinearString str, size_t index,
3823 SplitMatchResult* result) const
3825 ScopedMatchPairs matches(&cx->tempLifoAlloc());
3826 RegExpRunStatus status = re.execute(cx, str, &index, matches);
3827 if (status == RegExpRunStatus_Error)
3828 return false;
3830 if (status == RegExpRunStatus_Success_NotFound) {
3831 result->setFailure();
3832 return true;
3835 if (!res->updateFromMatchPairs(cx, str, matches))
3836 return false;
3838 JSSubString sep;
3839 res->getLastMatch(&sep);
3841 result->setResult(sep.length, index);
3842 return true;
3846 class SplitStringMatcher
3848 RootedLinearString sep;
3850 public:
3851 SplitStringMatcher(JSContext* cx, HandleLinearString sep)
3852 : sep(cx, sep)
3855 static const bool returnsCaptures = false;
3857 bool operator()(JSContext* cx, JSLinearString* str, size_t index, SplitMatchResult* res) const
3859 JS_ASSERT(index == 0 || index < str->length());
3860 int match = StringMatch(str, sep, index);
3861 if (match == -1)
3862 res->setFailure();
3863 else
3864 res->setResult(sep->length(), match + sep->length());
3865 return true;
3869 } /* anonymous namespace */
3871 /* ES5 15.5.4.14 */
3872 bool
3873 js::str_split(JSContext* cx, unsigned argc, Value* vp)
3875 CallArgs args = CallArgsFromVp(argc, vp);
3877 /* Steps 1-2. */
3878 RootedString str(cx, ThisToStringForStringProto(cx, args));
3879 if (!str)
3880 return false;
3882 RootedTypeObject type(cx, GetTypeCallerInitObject(cx, JSProto_Array));
3883 if (!type)
3884 return false;
3885 AddTypePropertyId(cx, type, JSID_VOID, Type::StringType());
3887 /* Step 5: Use the second argument as the split limit, if given. */
3888 uint32_t limit;
3889 if (args.hasDefined(1)) {
3890 double d;
3891 if (!ToNumber(cx, args[1], &d))
3892 return false;
3893 limit = ToUint32(d);
3894 } else {
3895 limit = UINT32_MAX;
3898 /* Step 8. */
3899 RegExpGuard re(cx);
3900 RootedLinearString sepstr(cx);
3901 bool sepDefined = args.hasDefined(0);
3902 if (sepDefined) {
3903 if (IsObjectWithClass(args[0], ESClass_RegExp, cx)) {
3904 RootedObject obj(cx, &args[0].toObject());
3905 if (!RegExpToShared(cx, obj, &re))
3906 return false;
3907 } else {
3908 sepstr = ArgToRootedString(cx, args, 0);
3909 if (!sepstr)
3910 return false;
3914 /* Step 9. */
3915 if (limit == 0) {
3916 JSObject* aobj = NewDenseEmptyArray(cx);
3917 if (!aobj)
3918 return false;
3919 aobj->setType(type);
3920 args.rval().setObject(*aobj);
3921 return true;
3924 /* Step 10. */
3925 if (!sepDefined) {
3926 RootedValue v(cx, StringValue(str));
3927 JSObject* aobj = NewDenseCopiedArray(cx, 1, v.address());
3928 if (!aobj)
3929 return false;
3930 aobj->setType(type);
3931 args.rval().setObject(*aobj);
3932 return true;
3934 RootedLinearString linearStr(cx, str->ensureLinear(cx));
3935 if (!linearStr)
3936 return false;
3938 /* Steps 11-15. */
3939 RootedObject aobj(cx);
3940 if (!re.initialized()) {
3941 if (sepstr->length() == 0) {
3942 aobj = CharSplitHelper(cx, linearStr, limit);
3943 } else {
3944 SplitStringMatcher matcher(cx, sepstr);
3945 aobj = SplitHelper(cx, linearStr, limit, matcher, type);
3947 } else {
3948 RegExpStatics* res = cx->global()->getRegExpStatics(cx);
3949 if (!res)
3950 return false;
3951 SplitRegExpMatcher matcher(*re, res);
3952 aobj = SplitHelper(cx, linearStr, limit, matcher, type);
3954 if (!aobj)
3955 return false;
3957 /* Step 16. */
3958 aobj->setType(type);
3959 args.rval().setObject(*aobj);
3960 return true;
3963 JSObject*
3964 js::str_split_string(JSContext* cx, HandleTypeObject type, HandleString str, HandleString sep)
3966 RootedLinearString linearStr(cx, str->ensureLinear(cx));
3967 if (!linearStr)
3968 return nullptr;
3970 RootedLinearString linearSep(cx, sep->ensureLinear(cx));
3971 if (!linearSep)
3972 return nullptr;
3974 uint32_t limit = UINT32_MAX;
3976 RootedObject aobj(cx);
3977 if (linearSep->length() == 0) {
3978 aobj = CharSplitHelper(cx, linearStr, limit);
3979 } else {
3980 SplitStringMatcher matcher(cx, linearSep);
3981 aobj = SplitHelper(cx, linearStr, limit, matcher, type);
3984 if (!aobj)
3985 return nullptr;
3987 aobj->setType(type);
3988 return aobj;
3991 static bool
3992 str_substr(JSContext* cx, unsigned argc, Value* vp)
3994 CallArgs args = CallArgsFromVp(argc, vp);
3995 RootedString str(cx, ThisToStringForStringProto(cx, args));
3996 if (!str)
3997 return false;
3999 int32_t length, len, begin;
4000 if (args.length() > 0) {
4001 length = int32_t(str->length());
4002 if (!ValueToIntegerRange(cx, args[0], &begin))
4003 return false;
4005 if (begin >= length) {
4006 args.rval().setString(cx->runtime()->emptyString);
4007 return true;
4009 if (begin < 0) {
4010 begin += length; /* length + INT_MIN will always be less than 0 */
4011 if (begin < 0)
4012 begin = 0;
4015 if (args.hasDefined(1)) {
4016 if (!ValueToIntegerRange(cx, args[1], &len))
4017 return false;
4019 if (len <= 0) {
4020 args.rval().setString(cx->runtime()->emptyString);
4021 return true;
4024 if (uint32_t(length) < uint32_t(begin + len))
4025 len = length - begin;
4026 } else {
4027 len = length - begin;
4030 str = DoSubstr(cx, str, size_t(begin), size_t(len));
4031 if (!str)
4032 return false;
4035 args.rval().setString(str);
4036 return true;
4040 * Python-esque sequence operations.
4042 static bool
4043 str_concat(JSContext* cx, unsigned argc, Value* vp)
4045 CallArgs args = CallArgsFromVp(argc, vp);
4046 JSString* str = ThisToStringForStringProto(cx, args);
4047 if (!str)
4048 return false;
4050 for (unsigned i = 0; i < args.length(); i++) {
4051 JSString* argStr = ToString<NoGC>(cx, args[i]);
4052 if (!argStr) {
4053 RootedString strRoot(cx, str);
4054 argStr = ToString<CanGC>(cx, args[i]);
4055 if (!argStr)
4056 return false;
4057 str = strRoot;
4060 JSString* next = ConcatStrings<NoGC>(cx, str, argStr);
4061 if (next) {
4062 str = next;
4063 } else {
4064 RootedString strRoot(cx, str), argStrRoot(cx, argStr);
4065 str = ConcatStrings<CanGC>(cx, strRoot, argStrRoot);
4066 if (!str)
4067 return false;
4071 args.rval().setString(str);
4072 return true;
4075 static bool
4076 str_slice(JSContext* cx, unsigned argc, Value* vp)
4078 CallArgs args = CallArgsFromVp(argc, vp);
4080 if (args.length() == 1 && args.thisv().isString() && args[0].isInt32()) {
4081 JSString* str = args.thisv().toString();
4082 size_t begin = args[0].toInt32();
4083 size_t end = str->length();
4084 if (begin <= end) {
4085 size_t length = end - begin;
4086 if (length == 0) {
4087 str = cx->runtime()->emptyString;
4088 } else {
4089 str = (length == 1)
4090 ? cx->staticStrings().getUnitStringForElement(cx, str, begin)
4091 : NewDependentString(cx, str, begin, length);
4092 if (!str)
4093 return false;
4095 args.rval().setString(str);
4096 return true;
4100 RootedString str(cx, ThisToStringForStringProto(cx, args));
4101 if (!str)
4102 return false;
4104 if (args.length() != 0) {
4105 double begin, end, length;
4107 if (!ToInteger(cx, args[0], &begin))
4108 return false;
4109 length = str->length();
4110 if (begin < 0) {
4111 begin += length;
4112 if (begin < 0)
4113 begin = 0;
4114 } else if (begin > length) {
4115 begin = length;
4118 if (args.hasDefined(1)) {
4119 if (!ToInteger(cx, args[1], &end))
4120 return false;
4121 if (end < 0) {
4122 end += length;
4123 if (end < 0)
4124 end = 0;
4125 } else if (end > length) {
4126 end = length;
4128 if (end < begin)
4129 end = begin;
4130 } else {
4131 end = length;
4134 str = NewDependentString(cx, str, size_t(begin), size_t(end - begin));
4135 if (!str)
4136 return false;
4138 args.rval().setString(str);
4139 return true;
4142 static const JSFunctionSpec string_methods[] = {
4143 #if JS_HAS_TOSOURCE
4144 JS_FN("quote", str_quote, 0,JSFUN_GENERIC_NATIVE),
4145 JS_FN(js_toSource_str, str_toSource, 0,0),
4146 #endif
4148 /* Java-like methods. */
4149 JS_FN(js_toString_str, js_str_toString, 0,0),
4150 JS_FN(js_valueOf_str, js_str_toString, 0,0),
4151 JS_FN("substring", str_substring, 2,JSFUN_GENERIC_NATIVE),
4152 JS_FN("toLowerCase", str_toLowerCase, 0,JSFUN_GENERIC_NATIVE),
4153 JS_FN("toUpperCase", str_toUpperCase, 0,JSFUN_GENERIC_NATIVE),
4154 JS_FN("charAt", js_str_charAt, 1,JSFUN_GENERIC_NATIVE),
4155 JS_FN("charCodeAt", js_str_charCodeAt, 1,JSFUN_GENERIC_NATIVE),
4156 JS_SELF_HOSTED_FN("codePointAt", "String_codePointAt", 1,0),
4157 JS_FN("contains", str_contains, 1,JSFUN_GENERIC_NATIVE),
4158 JS_FN("indexOf", str_indexOf, 1,JSFUN_GENERIC_NATIVE),
4159 JS_FN("lastIndexOf", str_lastIndexOf, 1,JSFUN_GENERIC_NATIVE),
4160 JS_FN("startsWith", str_startsWith, 1,JSFUN_GENERIC_NATIVE),
4161 JS_FN("endsWith", str_endsWith, 1,JSFUN_GENERIC_NATIVE),
4162 JS_FN("trim", str_trim, 0,JSFUN_GENERIC_NATIVE),
4163 JS_FN("trimLeft", str_trimLeft, 0,JSFUN_GENERIC_NATIVE),
4164 JS_FN("trimRight", str_trimRight, 0,JSFUN_GENERIC_NATIVE),
4165 JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0,JSFUN_GENERIC_NATIVE),
4166 JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0,JSFUN_GENERIC_NATIVE),
4167 #if EXPOSE_INTL_API
4168 JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1,0),
4169 #else
4170 JS_FN("localeCompare", str_localeCompare, 1,JSFUN_GENERIC_NATIVE),
4171 #endif
4172 JS_SELF_HOSTED_FN("repeat", "String_repeat", 1,0),
4173 #if EXPOSE_INTL_API
4174 JS_FN("normalize", str_normalize, 0,JSFUN_GENERIC_NATIVE),
4175 #endif
4177 /* Perl-ish methods (search is actually Python-esque). */
4178 JS_FN("match", str_match, 1,JSFUN_GENERIC_NATIVE),
4179 JS_FN("search", str_search, 1,JSFUN_GENERIC_NATIVE),
4180 JS_FN("replace", str_replace, 2,JSFUN_GENERIC_NATIVE),
4181 JS_FN("split", str_split, 2,JSFUN_GENERIC_NATIVE),
4182 JS_FN("substr", str_substr, 2,JSFUN_GENERIC_NATIVE),
4184 /* Python-esque sequence methods. */
4185 JS_FN("concat", str_concat, 1,JSFUN_GENERIC_NATIVE),
4186 JS_FN("slice", str_slice, 2,JSFUN_GENERIC_NATIVE),
4188 /* HTML string methods. */
4189 JS_SELF_HOSTED_FN("bold", "String_bold", 0,0),
4190 JS_SELF_HOSTED_FN("italics", "String_italics", 0,0),
4191 JS_SELF_HOSTED_FN("fixed", "String_fixed", 0,0),
4192 JS_SELF_HOSTED_FN("strike", "String_strike", 0,0),
4193 JS_SELF_HOSTED_FN("small", "String_small", 0,0),
4194 JS_SELF_HOSTED_FN("big", "String_big", 0,0),
4195 JS_SELF_HOSTED_FN("blink", "String_blink", 0,0),
4196 JS_SELF_HOSTED_FN("sup", "String_sup", 0,0),
4197 JS_SELF_HOSTED_FN("sub", "String_sub", 0,0),
4198 JS_SELF_HOSTED_FN("anchor", "String_anchor", 1,0),
4199 JS_SELF_HOSTED_FN("link", "String_link", 1,0),
4200 JS_SELF_HOSTED_FN("fontcolor","String_fontcolor", 1,0),
4201 JS_SELF_HOSTED_FN("fontsize", "String_fontsize", 1,0),
4203 JS_SELF_HOSTED_FN("@@iterator", "String_iterator", 0,0),
4204 JS_FS_END
4207 bool
4208 js_String(JSContext* cx, unsigned argc, Value* vp)
4210 CallArgs args = CallArgsFromVp(argc, vp);
4212 RootedString str(cx);
4213 if (args.length() > 0) {
4214 str = ToString<CanGC>(cx, args[0]);
4215 if (!str)
4216 return false;
4217 } else {
4218 str = cx->runtime()->emptyString;
4221 if (args.isConstructing()) {
4222 StringObject* strobj = StringObject::create(cx, str);
4223 if (!strobj)
4224 return false;
4225 args.rval().setObject(*strobj);
4226 return true;
4229 args.rval().setString(str);
4230 return true;
4233 bool
4234 js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp)
4236 CallArgs args = CallArgsFromVp(argc, vp);
4238 JS_ASSERT(args.length() <= ARGS_LENGTH_MAX);
4239 if (args.length() == 1)
4240 return str_fromCharCode_one_arg(cx, args[0], args.rval());
4242 jschar* chars = cx->pod_malloc<jschar>(args.length() + 1);
4243 if (!chars)
4244 return false;
4245 for (unsigned i = 0; i < args.length(); i++) {
4246 uint16_t code;
4247 if (!ToUint16(cx, args[i], &code)) {
4248 js_free(chars);
4249 return false;
4251 chars[i] = jschar(code);
4253 chars[args.length()] = 0;
4254 JSString* str = NewString<CanGC>(cx, chars, args.length());
4255 if (!str) {
4256 js_free(chars);
4257 return false;
4260 args.rval().setString(str);
4261 return true;
4264 bool
4265 js::str_fromCharCode_one_arg(JSContext* cx, HandleValue code, MutableHandleValue rval)
4267 uint16_t ucode;
4269 if (!ToUint16(cx, code, &ucode))
4270 return false;
4272 if (StaticStrings::hasUnit(ucode)) {
4273 rval.setString(cx->staticStrings().getUnit(ucode));
4274 return true;
4277 jschar c = jschar(ucode);
4278 JSString* str = NewStringCopyN<CanGC>(cx, &c, 1);
4279 if (!str)
4280 return false;
4282 rval.setString(str);
4283 return true;
4286 static const JSFunctionSpec string_static_methods[] = {
4287 JS_FN("fromCharCode", js::str_fromCharCode, 1, 0),
4288 JS_SELF_HOSTED_FN("fromCodePoint", "String_static_fromCodePoint", 0,0),
4289 JS_SELF_HOSTED_FN("raw", "String_static_raw", 2, 0),
4291 // This must be at the end because of bug 853075: functions listed after
4292 // self-hosted methods aren't available in self-hosted code.
4293 #if EXPOSE_INTL_API
4294 JS_SELF_HOSTED_FN("localeCompare", "String_static_localeCompare", 2,0),
4295 #endif
4296 JS_FS_END
4299 /* static */ Shape*
4300 StringObject::assignInitialShape(ExclusiveContext* cx, Handle<StringObject*> obj)
4302 JS_ASSERT(obj->nativeEmpty());
4304 return obj->addDataProperty(cx, cx->names().length, LENGTH_SLOT,
4305 JSPROP_PERMANENT | JSPROP_READONLY);
4308 JSObject*
4309 js_InitStringClass(JSContext* cx, HandleObject obj)
4311 JS_ASSERT(obj->isNative());
4313 Rooted<GlobalObject*> global(cx, &obj->as<GlobalObject>());
4315 Rooted<JSString*> empty(cx, cx->runtime()->emptyString);
4316 RootedObject proto(cx, global->createBlankPrototype(cx, &StringObject::class_));
4317 if (!proto || !proto->as<StringObject>().init(cx, empty))
4318 return nullptr;
4320 /* Now create the String function. */
4321 RootedFunction ctor(cx);
4322 ctor = global->createConstructor(cx, js_String, cx->names().String, 1);
4323 if (!ctor)
4324 return nullptr;
4326 if (!GlobalObject::initBuiltinConstructor(cx, global, JSProto_String, ctor, proto))
4327 return nullptr;
4329 if (!LinkConstructorAndPrototype(cx, ctor, proto))
4330 return nullptr;
4332 if (!DefinePropertiesAndFunctions(cx, proto, nullptr, string_methods) ||
4333 !DefinePropertiesAndFunctions(cx, ctor, nullptr, string_static_methods))
4335 return nullptr;
4339 * Define escape/unescape, the URI encode/decode functions, and maybe
4340 * uneval on the global object.
4342 if (!JS_DefineFunctions(cx, global, string_functions))
4343 return nullptr;
4345 return proto;
4348 const char*
4349 js_ValueToPrintable(JSContext* cx, const Value& vArg, JSAutoByteString* bytes, bool asSource)
4351 RootedValue v(cx, vArg);
4352 JSString* str;
4353 if (asSource)
4354 str = ValueToSource(cx, v);
4355 else
4356 str = ToString<CanGC>(cx, v);
4357 if (!str)
4358 return nullptr;
4359 str = js_QuoteString(cx, str, 0);
4360 if (!str)
4361 return nullptr;
4362 return bytes->encodeLatin1(cx, str);
4365 template <AllowGC allowGC>
4366 JSString*
4367 js::ToStringSlow(ExclusiveContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg)
4369 /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */
4370 JS_ASSERT(!arg.isString());
4372 Value v = arg;
4373 if (!v.isPrimitive()) {
4374 if (!cx->shouldBeJSContext() || !allowGC)
4375 return nullptr;
4376 RootedValue v2(cx, v);
4377 if (!ToPrimitive(cx->asJSContext(), JSTYPE_STRING, &v2))
4378 return nullptr;
4379 v = v2;
4382 JSString* str;
4383 if (v.isString()) {
4384 str = v.toString();
4385 } else if (v.isInt32()) {
4386 str = Int32ToString<allowGC>(cx, v.toInt32());
4387 } else if (v.isDouble()) {
4388 str = NumberToString<allowGC>(cx, v.toDouble());
4389 } else if (v.isBoolean()) {
4390 str = js_BooleanToString(cx, v.toBoolean());
4391 } else if (v.isNull()) {
4392 str = cx->names().null;
4393 } else if (v.isSymbol()) {
4394 if (cx->shouldBeJSContext() && allowGC) {
4395 JS_ReportErrorNumber(cx->asJSContext(), js_GetErrorMessage, nullptr,
4396 JSMSG_SYMBOL_TO_STRING);
4398 return nullptr;
4399 } else {
4400 MOZ_ASSERT(v.isUndefined());
4401 str = cx->names().undefined;
4403 return str;
4406 template JSString*
4407 js::ToStringSlow<CanGC>(ExclusiveContext* cx, HandleValue arg);
4409 template JSString*
4410 js::ToStringSlow<NoGC>(ExclusiveContext* cx, Value arg);
4412 JS_PUBLIC_API(JSString*)
4413 js::ToStringSlow(JSContext* cx, HandleValue v)
4415 return ToStringSlow<CanGC>(cx, v);
4418 static JSString*
4419 SymbolToSource(JSContext* cx, Symbol* symbol)
4421 RootedString desc(cx, symbol->description());
4422 SymbolCode code = symbol->code();
4423 if (code != SymbolCode::InSymbolRegistry && code != SymbolCode::UniqueSymbol) {
4424 // Well-known symbol.
4425 MOZ_ASSERT(uint32_t(code) < JS::WellKnownSymbolLimit);
4426 return desc;
4429 StringBuffer buf(cx);
4430 if (code == SymbolCode::InSymbolRegistry ? !buf.append("Symbol.for(") : !buf.append("Symbol("))
4431 return nullptr;
4432 if (desc) {
4433 desc = StringToSource(cx, desc);
4434 if (!desc || !buf.append(desc))
4435 return nullptr;
4437 if (!buf.append(')'))
4438 return nullptr;
4439 return buf.finishString();
4442 JSString*
4443 js::ValueToSource(JSContext* cx, HandleValue v)
4445 JS_CHECK_RECURSION(cx, return nullptr);
4446 assertSameCompartment(cx, v);
4448 if (v.isUndefined())
4449 return cx->names().void0;
4450 if (v.isString())
4451 return StringToSource(cx, v.toString());
4452 if (v.isSymbol())
4453 return SymbolToSource(cx, v.toSymbol());
4454 if (v.isPrimitive()) {
4455 /* Special case to preserve negative zero, _contra_ toString. */
4456 if (v.isDouble() && IsNegativeZero(v.toDouble())) {
4457 /* NB: _ucNstr rather than _ucstr to indicate non-terminated. */
4458 static const jschar js_negzero_ucNstr[] = {'-', '0'};
4460 return NewStringCopyN<CanGC>(cx, js_negzero_ucNstr, 2);
4462 return ToString<CanGC>(cx, v);
4465 RootedValue fval(cx);
4466 RootedObject obj(cx, &v.toObject());
4467 if (!JSObject::getProperty(cx, obj, obj, cx->names().toSource, &fval))
4468 return nullptr;
4469 if (IsCallable(fval)) {
4470 RootedValue rval(cx);
4471 if (!Invoke(cx, ObjectValue(*obj), fval, 0, nullptr, &rval))
4472 return nullptr;
4473 return ToString<CanGC>(cx, rval);
4476 return ObjectToSource(cx, obj);
4479 JSString*
4480 js::StringToSource(JSContext* cx, JSString* str)
4482 return js_QuoteString(cx, str, '"');
4485 bool
4486 js::EqualChars(JSLinearString* str1, JSLinearString* str2)
4488 MOZ_ASSERT(str1->length() == str2->length());
4490 size_t len = str1->length();
4492 AutoCheckCannotGC nogc;
4493 if (str1->hasTwoByteChars()) {
4494 if (str2->hasTwoByteChars())
4495 return PodEqual(str1->twoByteChars(nogc), str2->twoByteChars(nogc), len);
4497 return EqualChars(str2->latin1Chars(nogc), str1->twoByteChars(nogc), len);
4500 if (str2->hasLatin1Chars())
4501 return PodEqual(str1->latin1Chars(nogc), str2->latin1Chars(nogc), len);
4503 return EqualChars(str1->latin1Chars(nogc), str2->twoByteChars(nogc), len);
4506 bool
4507 js::EqualStrings(JSContext* cx, JSString* str1, JSString* str2, bool* result)
4509 if (str1 == str2) {
4510 *result = true;
4511 return true;
4514 size_t length1 = str1->length();
4515 if (length1 != str2->length()) {
4516 *result = false;
4517 return true;
4520 JSLinearString* linear1 = str1->ensureLinear(cx);
4521 if (!linear1)
4522 return false;
4523 JSLinearString* linear2 = str2->ensureLinear(cx);
4524 if (!linear2)
4525 return false;
4527 *result = EqualChars(linear1, linear2);
4528 return true;
4531 bool
4532 js::EqualStrings(JSLinearString* str1, JSLinearString* str2)
4534 if (str1 == str2)
4535 return true;
4537 size_t length1 = str1->length();
4538 if (length1 != str2->length())
4539 return false;
4541 return EqualChars(str1, str2);
4544 static int32_t
4545 CompareStringsImpl(JSLinearString* str1, JSLinearString* str2)
4547 size_t len1 = str1->length();
4548 size_t len2 = str2->length();
4550 AutoCheckCannotGC nogc;
4551 if (str1->hasLatin1Chars()) {
4552 const Latin1Char* chars1 = str1->latin1Chars(nogc);
4553 return str2->hasLatin1Chars()
4554 ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
4555 : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
4558 const jschar* chars1 = str1->twoByteChars(nogc);
4559 return str2->hasLatin1Chars()
4560 ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
4561 : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
4564 int32_t
4565 js::CompareChars(const jschar* s1, size_t len1, JSLinearString* s2)
4567 AutoCheckCannotGC nogc;
4568 return s2->hasLatin1Chars()
4569 ? CompareChars(s1, len1, s2->latin1Chars(nogc), s2->length())
4570 : CompareChars(s1, len1, s2->twoByteChars(nogc), s2->length());
4573 bool
4574 js::CompareStrings(JSContext* cx, JSString* str1, JSString* str2, int32_t* result)
4576 JS_ASSERT(str1);
4577 JS_ASSERT(str2);
4579 if (str1 == str2) {
4580 *result = 0;
4581 return true;
4584 JSLinearString* linear1 = str1->ensureLinear(cx);
4585 if (!linear1)
4586 return false;
4588 JSLinearString* linear2 = str2->ensureLinear(cx);
4589 if (!linear2)
4590 return false;
4592 *result = CompareStringsImpl(linear1, linear2);
4593 return true;
4596 int32_t
4597 js::CompareAtoms(JSAtom* atom1, JSAtom* atom2)
4599 return CompareStringsImpl(atom1, atom2);
4602 bool
4603 js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes)
4605 size_t length = strlen(asciiBytes);
4606 #ifdef DEBUG
4607 for (size_t i = 0; i != length; ++i)
4608 JS_ASSERT(unsigned(asciiBytes[i]) <= 127);
4609 #endif
4610 if (length != str->length())
4611 return false;
4613 const Latin1Char* latin1 = reinterpret_cast<const Latin1Char*>(asciiBytes);
4615 AutoCheckCannotGC nogc;
4616 return str->hasLatin1Chars()
4617 ? PodEqual(latin1, str->latin1Chars(nogc), length)
4618 : EqualChars(latin1, str->twoByteChars(nogc), length);
4621 size_t
4622 js_strlen(const jschar* s)
4624 const jschar* t;
4626 for (t = s; *t != 0; t++)
4627 continue;
4628 return (size_t)(t - s);
4631 int32_t
4632 js_strcmp(const jschar* lhs, const jschar* rhs)
4634 while (true) {
4635 if (*lhs != *rhs)
4636 return int32_t(*lhs) - int32_t(*rhs);
4637 if (*lhs == 0)
4638 return 0;
4639 ++lhs, ++rhs;
4643 UniquePtr<char[], JS::FreePolicy>
4644 js::DuplicateString(js::ThreadSafeContext* cx, const char* s)
4646 size_t n = strlen(s) + 1;
4647 auto ret = cx->make_pod_array<char>(n);
4648 if (!ret)
4649 return ret;
4650 PodCopy(ret.get(), s, n);
4651 return ret;
4654 UniquePtr<jschar[], JS::FreePolicy>
4655 js::DuplicateString(js::ThreadSafeContext* cx, const jschar* s)
4657 size_t n = js_strlen(s) + 1;
4658 auto ret = cx->make_pod_array<jschar>(n);
4659 if (!ret)
4660 return ret;
4661 PodCopy(ret.get(), s, n);
4662 return ret;
4665 template <typename CharT>
4666 const CharT*
4667 js_strchr_limit(const CharT* s, jschar c, const CharT* limit)
4669 while (s < limit) {
4670 if (*s == c)
4671 return s;
4672 s++;
4674 return nullptr;
4677 template const Latin1Char*
4678 js_strchr_limit(const Latin1Char* s, jschar c, const Latin1Char* limit);
4680 template const jschar*
4681 js_strchr_limit(const jschar* s, jschar c, const jschar* limit);
4683 jschar*
4684 js::InflateString(ThreadSafeContext* cx, const char* bytes, size_t* lengthp)
4686 size_t nchars;
4687 jschar* chars;
4688 size_t nbytes = *lengthp;
4690 nchars = nbytes;
4691 chars = cx->pod_malloc<jschar>(nchars + 1);
4692 if (!chars)
4693 goto bad;
4694 for (size_t i = 0; i < nchars; i++)
4695 chars[i] = (unsigned char) bytes[i];
4696 *lengthp = nchars;
4697 chars[nchars] = 0;
4698 return chars;
4700 bad:
4701 // For compatibility with callers of JS_DecodeBytes we must zero lengthp
4702 // on errors.
4703 *lengthp = 0;
4704 return nullptr;
4707 template <typename CharT>
4708 bool
4709 js::DeflateStringToBuffer(JSContext* maybecx, const CharT* src, size_t srclen,
4710 char* dst, size_t* dstlenp)
4712 size_t dstlen = *dstlenp;
4713 if (srclen > dstlen) {
4714 for (size_t i = 0; i < dstlen; i++)
4715 dst[i] = char(src[i]);
4716 if (maybecx) {
4717 AutoSuppressGC suppress(maybecx);
4718 JS_ReportErrorNumber(maybecx, js_GetErrorMessage, nullptr,
4719 JSMSG_BUFFER_TOO_SMALL);
4721 return false;
4723 for (size_t i = 0; i < srclen; i++)
4724 dst[i] = char(src[i]);
4725 *dstlenp = srclen;
4726 return true;
4729 template bool
4730 js::DeflateStringToBuffer(JSContext* maybecx, const Latin1Char* src, size_t srclen,
4731 char* dst, size_t* dstlenp);
4733 template bool
4734 js::DeflateStringToBuffer(JSContext* maybecx, const jschar* src, size_t srclen,
4735 char* dst, size_t* dstlenp);
4737 #define ____ false
4740 * Identifier start chars:
4741 * - 36: $
4742 * - 65..90: A..Z
4743 * - 95: _
4744 * - 97..122: a..z
4746 const bool js_isidstart[] = {
4747 /* 0 1 2 3 4 5 6 7 8 9 */
4748 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4749 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4750 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4751 /* 3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____,
4752 /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4753 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4754 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4755 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4756 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4757 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4758 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4759 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4760 /* 12 */ true, true, true, ____, ____, ____, ____, ____
4764 * Identifier chars:
4765 * - 36: $
4766 * - 48..57: 0..9
4767 * - 65..90: A..Z
4768 * - 95: _
4769 * - 97..122: a..z
4771 const bool js_isident[] = {
4772 /* 0 1 2 3 4 5 6 7 8 9 */
4773 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4774 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4775 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4776 /* 3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____,
4777 /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
4778 /* 5 */ true, true, true, true, true, true, true, true, ____, ____,
4779 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4780 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4781 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4782 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4783 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4784 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4785 /* 12 */ true, true, true, ____, ____, ____, ____, ____
4788 /* Whitespace chars: '\t', '\n', '\v', '\f', '\r', ' '. */
4789 const bool js_isspace[] = {
4790 /* 0 1 2 3 4 5 6 7 8 9 */
4791 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, true,
4792 /* 1 */ true, true, true, true, ____, ____, ____, ____, ____, ____,
4793 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4794 /* 3 */ ____, ____, true, ____, ____, ____, ____, ____, ____, ____,
4795 /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4796 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4797 /* 6 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4798 /* 7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4799 /* 8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4800 /* 9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4801 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4802 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4803 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
4807 * Uri reserved chars + #:
4808 * - 35: #
4809 * - 36: $
4810 * - 38: &
4811 * - 43: +
4812 * - 44: ,
4813 * - 47: /
4814 * - 58: :
4815 * - 59: ;
4816 * - 61: =
4817 * - 63: ?
4818 * - 64: @
4820 static const bool js_isUriReservedPlusPound[] = {
4821 /* 0 1 2 3 4 5 6 7 8 9 */
4822 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4823 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4824 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4825 /* 3 */ ____, ____, ____, ____, ____, true, true, ____, true, ____,
4826 /* 4 */ ____, ____, ____, true, true, ____, ____, true, ____, ____,
4827 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
4828 /* 6 */ ____, true, ____, true, true, ____, ____, ____, ____, ____,
4829 /* 7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4830 /* 8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4831 /* 9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4832 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4833 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4834 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
4838 * Uri unescaped chars:
4839 * - 33: !
4840 * - 39: '
4841 * - 40: (
4842 * - 41: )
4843 * - 42: *
4844 * - 45: -
4845 * - 46: .
4846 * - 48..57: 0-9
4847 * - 65..90: A-Z
4848 * - 95: _
4849 * - 97..122: a-z
4850 * - 126: ~
4852 static const bool js_isUriUnescaped[] = {
4853 /* 0 1 2 3 4 5 6 7 8 9 */
4854 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4855 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4856 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
4857 /* 3 */ ____, ____, ____, true, ____, ____, ____, ____, ____, true,
4858 /* 4 */ true, true, true, ____, ____, true, true, ____, true, true,
4859 /* 5 */ true, true, true, true, true, true, true, true, ____, ____,
4860 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4861 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4862 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4863 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4864 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4865 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4866 /* 12 */ true, true, true, ____, ____, ____, true, ____
4869 #undef ____
4871 #define URI_CHUNK 64U
4873 static inline bool
4874 TransferBufferToString(StringBuffer& sb, MutableHandleValue rval)
4876 JSString* str = sb.finishString();
4877 if (!str)
4878 return false;
4879 rval.setString(str);
4880 return true;
4884 * ECMA 3, 15.1.3 URI Handling Function Properties
4886 * The following are implementations of the algorithms
4887 * given in the ECMA specification for the hidden functions
4888 * 'Encode' and 'Decode'.
4890 enum EncodeResult { Encode_Failure, Encode_BadUri, Encode_Success };
4892 template <typename CharT>
4893 static EncodeResult
4894 Encode(StringBuffer& sb, const CharT* chars, size_t length,
4895 const bool* unescapedSet, const bool* unescapedSet2)
4897 static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
4899 jschar hexBuf[4];
4900 hexBuf[0] = '%';
4901 hexBuf[3] = 0;
4903 for (size_t k = 0; k < length; k++) {
4904 jschar c = chars[k];
4905 if (c < 128 && (unescapedSet[c] || (unescapedSet2 && unescapedSet2[c]))) {
4906 if (!sb.append(c))
4907 return Encode_Failure;
4908 } else {
4909 if (c >= 0xDC00 && c <= 0xDFFF)
4910 return Encode_BadUri;
4912 uint32_t v;
4913 if (c < 0xD800 || c > 0xDBFF) {
4914 v = c;
4915 } else {
4916 k++;
4917 if (k == length)
4918 return Encode_BadUri;
4920 jschar c2 = chars[k];
4921 if (c2 < 0xDC00 || c2 > 0xDFFF)
4922 return Encode_BadUri;
4924 v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
4926 uint8_t utf8buf[4];
4927 size_t L = js_OneUcs4ToUtf8Char(utf8buf, v);
4928 for (size_t j = 0; j < L; j++) {
4929 hexBuf[1] = HexDigits[utf8buf[j] >> 4];
4930 hexBuf[2] = HexDigits[utf8buf[j] & 0xf];
4931 if (!sb.append(hexBuf, 3))
4932 return Encode_Failure;
4937 return Encode_Success;
4940 static bool
4941 Encode(JSContext* cx, HandleLinearString str, const bool* unescapedSet,
4942 const bool* unescapedSet2, MutableHandleValue rval)
4944 size_t length = str->length();
4945 if (length == 0) {
4946 rval.setString(cx->runtime()->emptyString);
4947 return true;
4950 StringBuffer sb(cx);
4951 if (!sb.reserve(length))
4952 return false;
4954 EncodeResult res;
4955 if (str->hasLatin1Chars()) {
4956 AutoCheckCannotGC nogc;
4957 res = Encode(sb, str->latin1Chars(nogc), str->length(), unescapedSet, unescapedSet2);
4958 } else {
4959 AutoCheckCannotGC nogc;
4960 res = Encode(sb, str->twoByteChars(nogc), str->length(), unescapedSet, unescapedSet2);
4963 if (res == Encode_Failure)
4964 return false;
4966 if (res == Encode_BadUri) {
4967 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_BAD_URI, nullptr);
4968 return false;
4971 MOZ_ASSERT(res == Encode_Success);
4972 return TransferBufferToString(sb, rval);
4975 enum DecodeResult { Decode_Failure, Decode_BadUri, Decode_Success };
4977 template <typename CharT>
4978 static DecodeResult
4979 Decode(StringBuffer& sb, const CharT* chars, size_t length, const bool* reservedSet)
4981 for (size_t k = 0; k < length; k++) {
4982 jschar c = chars[k];
4983 if (c == '%') {
4984 size_t start = k;
4985 if ((k + 2) >= length)
4986 return Decode_BadUri;
4988 if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2]))
4989 return Decode_BadUri;
4991 uint32_t B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
4992 k += 2;
4993 if (!(B & 0x80)) {
4994 c = jschar(B);
4995 } else {
4996 int n = 1;
4997 while (B & (0x80 >> n))
4998 n++;
5000 if (n == 1 || n > 4)
5001 return Decode_BadUri;
5003 uint8_t octets[4];
5004 octets[0] = (uint8_t)B;
5005 if (k + 3 * (n - 1) >= length)
5006 return Decode_BadUri;
5008 for (int j = 1; j < n; j++) {
5009 k++;
5010 if (chars[k] != '%')
5011 return Decode_BadUri;
5013 if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2]))
5014 return Decode_BadUri;
5016 B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
5017 if ((B & 0xC0) != 0x80)
5018 return Decode_BadUri;
5020 k += 2;
5021 octets[j] = char(B);
5023 uint32_t v = JS::Utf8ToOneUcs4Char(octets, n);
5024 if (v >= 0x10000) {
5025 v -= 0x10000;
5026 if (v > 0xFFFFF)
5027 return Decode_BadUri;
5029 c = jschar((v & 0x3FF) + 0xDC00);
5030 jschar H = jschar((v >> 10) + 0xD800);
5031 if (!sb.append(H))
5032 return Decode_Failure;
5033 } else {
5034 c = jschar(v);
5037 if (c < 128 && reservedSet && reservedSet[c]) {
5038 if (!sb.append(chars + start, k - start + 1))
5039 return Decode_Failure;
5040 } else {
5041 if (!sb.append(c))
5042 return Decode_Failure;
5044 } else {
5045 if (!sb.append(c))
5046 return Decode_Failure;
5050 return Decode_Success;
5053 static bool
5054 Decode(JSContext* cx, HandleLinearString str, const bool* reservedSet, MutableHandleValue rval)
5056 size_t length = str->length();
5057 if (length == 0) {
5058 rval.setString(cx->runtime()->emptyString);
5059 return true;
5062 StringBuffer sb(cx);
5064 DecodeResult res;
5065 if (str->hasLatin1Chars()) {
5066 AutoCheckCannotGC nogc;
5067 res = Decode(sb, str->latin1Chars(nogc), str->length(), reservedSet);
5068 } else {
5069 AutoCheckCannotGC nogc;
5070 res = Decode(sb, str->twoByteChars(nogc), str->length(), reservedSet);
5073 if (res == Decode_Failure)
5074 return false;
5076 if (res == Decode_BadUri) {
5077 JS_ReportErrorNumber(cx, js_GetErrorMessage, nullptr, JSMSG_BAD_URI);
5078 return false;
5081 MOZ_ASSERT(res == Decode_Success);
5082 return TransferBufferToString(sb, rval);
5085 static bool
5086 str_decodeURI(JSContext* cx, unsigned argc, Value* vp)
5088 CallArgs args = CallArgsFromVp(argc, vp);
5089 RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
5090 if (!str)
5091 return false;
5093 return Decode(cx, str, js_isUriReservedPlusPound, args.rval());
5096 static bool
5097 str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp)
5099 CallArgs args = CallArgsFromVp(argc, vp);
5100 RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
5101 if (!str)
5102 return false;
5104 return Decode(cx, str, nullptr, args.rval());
5107 static bool
5108 str_encodeURI(JSContext* cx, unsigned argc, Value* vp)
5110 CallArgs args = CallArgsFromVp(argc, vp);
5111 RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
5112 if (!str)
5113 return false;
5115 return Encode(cx, str, js_isUriUnescaped, js_isUriReservedPlusPound, args.rval());
5118 static bool
5119 str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp)
5121 CallArgs args = CallArgsFromVp(argc, vp);
5122 RootedLinearString str(cx, ArgToRootedString(cx, args, 0));
5123 if (!str)
5124 return false;
5126 return Encode(cx, str, js_isUriUnescaped, nullptr, args.rval());
5130 * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
5131 * least 4 bytes long. Return the number of UTF-8 bytes of data written.
5134 js_OneUcs4ToUtf8Char(uint8_t* utf8Buffer, uint32_t ucs4Char)
5136 int utf8Length = 1;
5138 JS_ASSERT(ucs4Char <= 0x10FFFF);
5139 if (ucs4Char < 0x80) {
5140 *utf8Buffer = (uint8_t)ucs4Char;
5141 } else {
5142 int i;
5143 uint32_t a = ucs4Char >> 11;
5144 utf8Length = 2;
5145 while (a) {
5146 a >>= 5;
5147 utf8Length++;
5149 i = utf8Length;
5150 while (--i) {
5151 utf8Buffer[i] = (uint8_t)((ucs4Char & 0x3F) | 0x80);
5152 ucs4Char >>= 6;
5154 *utf8Buffer = (uint8_t)(0x100 - (1 << (8-utf8Length)) + ucs4Char);
5156 return utf8Length;
5159 size_t
5160 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, FILE* fp, JSLinearString* str,
5161 uint32_t quote)
5163 size_t len = str->length();
5164 AutoCheckCannotGC nogc;
5165 return str->hasLatin1Chars()
5166 ? PutEscapedStringImpl(buffer, bufferSize, fp, str->latin1Chars(nogc), len, quote)
5167 : PutEscapedStringImpl(buffer, bufferSize, fp, str->twoByteChars(nogc), len, quote);
5170 template <typename CharT>
5171 size_t
5172 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, FILE* fp, const CharT* chars,
5173 size_t length, uint32_t quote)
5175 enum {
5176 STOP, FIRST_QUOTE, LAST_QUOTE, CHARS, ESCAPE_START, ESCAPE_MORE
5177 } state;
5179 JS_ASSERT(quote == 0 || quote == '\'' || quote == '"');
5180 JS_ASSERT_IF(!buffer, bufferSize == 0);
5181 JS_ASSERT_IF(fp, !buffer);
5183 if (bufferSize == 0)
5184 buffer = nullptr;
5185 else
5186 bufferSize--;
5188 const CharT* charsEnd = chars + length;
5189 size_t n = 0;
5190 state = FIRST_QUOTE;
5191 unsigned shift = 0;
5192 unsigned hex = 0;
5193 unsigned u = 0;
5194 char c = 0; /* to quell GCC warnings */
5196 for (;;) {
5197 switch (state) {
5198 case STOP:
5199 goto stop;
5200 case FIRST_QUOTE:
5201 state = CHARS;
5202 goto do_quote;
5203 case LAST_QUOTE:
5204 state = STOP;
5205 do_quote:
5206 if (quote == 0)
5207 continue;
5208 c = (char)quote;
5209 break;
5210 case CHARS:
5211 if (chars == charsEnd) {
5212 state = LAST_QUOTE;
5213 continue;
5215 u = *chars++;
5216 if (u < ' ') {
5217 if (u != 0) {
5218 const char* escape = strchr(js_EscapeMap, (int)u);
5219 if (escape) {
5220 u = escape[1];
5221 goto do_escape;
5224 goto do_hex_escape;
5226 if (u < 127) {
5227 if (u == quote || u == '\\')
5228 goto do_escape;
5229 c = (char)u;
5230 } else if (u < 0x100) {
5231 goto do_hex_escape;
5232 } else {
5233 shift = 16;
5234 hex = u;
5235 u = 'u';
5236 goto do_escape;
5238 break;
5239 do_hex_escape:
5240 shift = 8;
5241 hex = u;
5242 u = 'x';
5243 do_escape:
5244 c = '\\';
5245 state = ESCAPE_START;
5246 break;
5247 case ESCAPE_START:
5248 JS_ASSERT(' ' <= u && u < 127);
5249 c = (char)u;
5250 state = ESCAPE_MORE;
5251 break;
5252 case ESCAPE_MORE:
5253 if (shift == 0) {
5254 state = CHARS;
5255 continue;
5257 shift -= 4;
5258 u = 0xF & (hex >> shift);
5259 c = (char)(u + (u < 10 ? '0' : 'A' - 10));
5260 break;
5262 if (buffer) {
5263 JS_ASSERT(n <= bufferSize);
5264 if (n != bufferSize) {
5265 buffer[n] = c;
5266 } else {
5267 buffer[n] = '\0';
5268 buffer = nullptr;
5270 } else if (fp) {
5271 if (fputc(c, fp) < 0)
5272 return size_t(-1);
5274 n++;
5276 stop:
5277 if (buffer)
5278 buffer[n] = '\0';
5279 return n;
5282 template size_t
5283 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, FILE* fp, const Latin1Char* chars,
5284 size_t length, uint32_t quote);
5286 template size_t
5287 js::PutEscapedStringImpl(char* buffer, size_t bufferSize, FILE* fp, const jschar* chars,
5288 size_t length, uint32_t quote);
5290 template size_t
5291 js::PutEscapedString(char* buffer, size_t bufferSize, const Latin1Char* chars, size_t length,
5292 uint32_t quote);
5294 template size_t
5295 js::PutEscapedString(char* buffer, size_t bufferSize, const jschar* chars, size_t length,
5296 uint32_t quote);