Merge mozilla-central and tracemonkey. (a=blockers)
[mozilla-central.git] / js / src / jsstr.h
blob275f9892b00b75f3707707d4ec111dba17c9f0f9
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
3 * ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
16 * The Original Code is Mozilla Communicator client code, released
17 * March 31, 1998.
19 * The Initial Developer of the Original Code is
20 * Netscape Communications Corporation.
21 * Portions created by the Initial Developer are Copyright (C) 1998
22 * the Initial Developer. All Rights Reserved.
24 * Contributor(s):
26 * Alternatively, the contents of this file may be used under the terms of
27 * either of the GNU General Public License Version 2 or later (the "GPL"),
28 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #ifndef jsstr_h___
41 #define jsstr_h___
43 * JS string type implementation.
45 * A JS string is a counted array of unicode characters. To support handoff
46 * of API client memory, the chars are allocated separately from the length,
47 * necessitating a pointer after the count, to form a separately allocated
48 * string descriptor. String descriptors are GC'ed, while their chars are
49 * allocated from the malloc heap.
51 #include <ctype.h>
52 #include "jsapi.h"
53 #include "jsprvtd.h"
54 #include "jshashtable.h"
55 #include "jslock.h"
56 #include "jsobj.h"
57 #include "jsvalue.h"
58 #include "jscell.h"
60 enum {
61 UNIT_STRING_LIMIT = 256U,
62 SMALL_CHAR_LIMIT = 128U, /* Bigger chars cannot be in a length-2 string. */
63 NUM_SMALL_CHARS = 64U,
64 INT_STRING_LIMIT = 256U,
65 NUM_HUNDRED_STRINGS = 156U
68 extern jschar *
69 js_GetDependentStringChars(JSString *str);
71 extern JSString * JS_FASTCALL
72 js_ConcatStrings(JSContext *cx, JSString *left, JSString *right);
74 JS_STATIC_ASSERT(JS_BITS_PER_WORD >= 32);
76 struct JSRopeBufferInfo {
77 /* Number of jschars we can hold, not including null terminator. */
78 size_t capacity;
81 /* Forward declaration for friending. */
82 namespace js { namespace mjit {
83 class Compiler;
86 struct JSLinearString;
89 * The GC-thing "string" type.
91 * In FLAT strings, the mChars field points to a flat character array owned by
92 * its GC-thing descriptor. The array is terminated at index length by a zero
93 * character and the size of the array in bytes is
94 * (length + 1) * sizeof(jschar). The terminator is purely a backstop, in case
95 * the chars pointer flows out to native code that requires \u0000 termination.
97 * A flat string with the ATOMIZED flag means that the string is hashed as
98 * an atom. This flag is used to avoid re-hashing the already-atomized string.
100 * A flat string with the EXTENSIBLE flag means that the string may change into
101 * a dependent string as part of an optimization with js_ConcatStrings:
102 * extending |str1 = "abc"| with the character |str2 = str1 + "d"| will place
103 * "d" in the extra capacity from |str1|, make that the buffer for |str2|, and
104 * turn |str1| into a dependent string of |str2|.
106 * Flat strings without the EXTENSIBLE flag can be safely accessed by multiple
107 * threads.
109 * When the string is DEPENDENT, the string depends on characters of another
110 * string strongly referenced by the base field. The base member may point to
111 * another dependent string if chars() has not been called yet.
113 * When a string is a ROPE, it represents the lazy concatenation of other
114 * strings. In general, the nodes reachable from any rope form a dag.
116 * To allow static type-based checking that a given JSString* always points
117 * to a flat or non-rope string, the JSFlatString and JSLinearString types may
118 * be used. Instead of casting, callers should use ensureX() and assertIsX().
120 struct JSString
122 friend class js::TraceRecorder;
123 friend class js::mjit::Compiler;
125 friend JSAtom *js_AtomizeString(JSContext *cx, JSString *str, uintN flags);
128 * Not private because we want to be able to use static initializers for
129 * them. Don't use these directly! FIXME bug 614459.
131 size_t lengthAndFlags; /* in all strings */
132 union {
133 const jschar *chars; /* in non-rope strings */
134 JSString *left; /* in rope strings */
135 } u;
136 union {
137 jschar inlineStorage[4]; /* in short strings */
138 struct {
139 union {
140 JSString *right; /* in rope strings */
141 JSString *base; /* in dependent strings */
142 size_t capacity; /* in extensible flat strings */
144 union {
145 JSString *parent; /* temporarily used during flatten */
146 size_t reserved; /* may use for bug 615290 */
148 } s;
149 size_t externalStringType; /* in external strings */
153 * The lengthAndFlags field in string headers has data arranged in the
154 * following way:
156 * [ length (bits 4-31) ][ flags (bits 2-3) ][ type (bits 0-1) ]
158 * The length is packed in lengthAndFlags, even in string types that don't
159 * need 3 other fields, to make the length check simpler.
161 * When the string type is FLAT, the flags can contain ATOMIZED or
162 * EXTENSIBLE.
164 static const size_t TYPE_FLAGS_MASK = JS_BITMASK(4);
165 static const size_t LENGTH_SHIFT = 4;
167 static const size_t TYPE_MASK = JS_BITMASK(2);
168 static const size_t FLAT = 0x0;
169 static const size_t DEPENDENT = 0x1;
170 static const size_t ROPE = 0x2;
172 /* Allow checking 1 bit for dependent/rope strings. */
173 static const size_t DEPENDENT_BIT = JS_BIT(0);
174 static const size_t ROPE_BIT = JS_BIT(1);
176 static const size_t ATOMIZED = JS_BIT(2);
177 static const size_t EXTENSIBLE = JS_BIT(3);
180 size_t buildLengthAndFlags(size_t length, size_t flags) {
181 return (length << LENGTH_SHIFT) | flags;
184 inline js::gc::Cell *asCell() {
185 return reinterpret_cast<js::gc::Cell *>(this);
188 inline js::gc::FreeCell *asFreeCell() {
189 return reinterpret_cast<js::gc::FreeCell *>(this);
193 * Generous but sane length bound; the "-1" is there for comptibility with
194 * OOM tests.
196 static const size_t MAX_LENGTH = (1 << 28) - 1;
198 JS_ALWAYS_INLINE bool isDependent() const {
199 return lengthAndFlags & DEPENDENT_BIT;
202 JS_ALWAYS_INLINE bool isFlat() const {
203 return (lengthAndFlags & TYPE_MASK) == FLAT;
206 JS_ALWAYS_INLINE bool isExtensible() const {
207 JS_ASSERT_IF(lengthAndFlags & EXTENSIBLE, isFlat());
208 return lengthAndFlags & EXTENSIBLE;
211 JS_ALWAYS_INLINE bool isAtomized() const {
212 JS_ASSERT_IF(lengthAndFlags & ATOMIZED, isFlat());
213 return lengthAndFlags & ATOMIZED;
216 JS_ALWAYS_INLINE bool isRope() const {
217 return lengthAndFlags & ROPE_BIT;
220 JS_ALWAYS_INLINE size_t length() const {
221 return lengthAndFlags >> LENGTH_SHIFT;
224 JS_ALWAYS_INLINE bool empty() const {
225 return lengthAndFlags <= TYPE_FLAGS_MASK;
228 /* This can fail by returning null and reporting an error on cx. */
229 JS_ALWAYS_INLINE const jschar *getChars(JSContext *cx) {
230 if (isRope())
231 return flatten(cx);
232 return nonRopeChars();
235 /* This can fail by returning null and reporting an error on cx. */
236 JS_ALWAYS_INLINE const jschar *getCharsZ(JSContext *cx) {
237 if (!isFlat())
238 return undepend(cx);
239 return flatChars();
242 JS_ALWAYS_INLINE void initFlatNotTerminated(jschar *chars, size_t length) {
243 JS_ASSERT(length <= MAX_LENGTH);
244 JS_ASSERT(!isStatic(this));
245 lengthAndFlags = buildLengthAndFlags(length, FLAT);
246 u.chars = chars;
249 /* Specific flat string initializer and accessor methods. */
250 JS_ALWAYS_INLINE void initFlat(jschar *chars, size_t length) {
251 initFlatNotTerminated(chars, length);
252 JS_ASSERT(chars[length] == jschar(0));
255 JS_ALWAYS_INLINE void initShortString(const jschar *chars, size_t length) {
256 JS_ASSERT(length <= MAX_LENGTH);
257 JS_ASSERT(chars >= inlineStorage && chars < (jschar *)(this + 2));
258 JS_ASSERT(!isStatic(this));
259 lengthAndFlags = buildLengthAndFlags(length, FLAT);
260 u.chars = chars;
263 JS_ALWAYS_INLINE void initFlatExtensible(jschar *chars, size_t length, size_t cap) {
264 JS_ASSERT(length <= MAX_LENGTH);
265 JS_ASSERT(chars[length] == jschar(0));
266 JS_ASSERT(!isStatic(this));
267 lengthAndFlags = buildLengthAndFlags(length, FLAT | EXTENSIBLE);
268 u.chars = chars;
269 s.capacity = cap;
272 JS_ALWAYS_INLINE JSFlatString *assertIsFlat() {
273 JS_ASSERT(isFlat());
274 return reinterpret_cast<JSFlatString *>(this);
277 JS_ALWAYS_INLINE const jschar *flatChars() const {
278 JS_ASSERT(isFlat());
279 return u.chars;
282 JS_ALWAYS_INLINE size_t flatLength() const {
283 JS_ASSERT(isFlat());
284 return length();
287 inline void flatSetAtomized() {
288 JS_ASSERT(isFlat());
289 JS_ASSERT(!isStatic(this));
290 lengthAndFlags |= ATOMIZED;
293 inline void flatClearExtensible() {
295 * N.B. This may be called on static strings, which may be in read-only
296 * memory, so we cannot unconditionally apply the mask.
298 JS_ASSERT(isFlat());
299 if (lengthAndFlags & EXTENSIBLE)
300 lengthAndFlags &= ~EXTENSIBLE;
304 * The chars pointer should point somewhere inside the buffer owned by base.
305 * The caller still needs to pass base for GC purposes.
307 inline void initDependent(JSString *base, const jschar *chars, size_t length) {
308 JS_ASSERT(!isStatic(this));
309 JS_ASSERT(base->isFlat());
310 JS_ASSERT(chars >= base->flatChars() && chars < base->flatChars() + base->length());
311 JS_ASSERT(length <= base->length() - (chars - base->flatChars()));
312 lengthAndFlags = buildLengthAndFlags(length, DEPENDENT);
313 u.chars = chars;
314 s.base = base;
317 inline JSLinearString *dependentBase() const {
318 JS_ASSERT(isDependent());
319 return s.base->assertIsLinear();
322 JS_ALWAYS_INLINE const jschar *dependentChars() {
323 JS_ASSERT(isDependent());
324 return u.chars;
327 inline size_t dependentLength() const {
328 JS_ASSERT(isDependent());
329 return length();
332 const jschar *undepend(JSContext *cx);
334 const jschar *nonRopeChars() const {
335 JS_ASSERT(!isRope());
336 return u.chars;
339 /* Rope-related initializers and accessors. */
340 inline void initRopeNode(JSString *left, JSString *right, size_t length) {
341 JS_ASSERT(left->length() + right->length() == length);
342 lengthAndFlags = buildLengthAndFlags(length, ROPE);
343 u.left = left;
344 s.right = right;
347 inline JSString *ropeLeft() const {
348 JS_ASSERT(isRope());
349 return u.left;
352 inline JSString *ropeRight() const {
353 JS_ASSERT(isRope());
354 return s.right;
357 inline void finishTraversalConversion(JSString *base, const jschar *baseBegin, const jschar *end) {
358 JS_ASSERT(baseBegin <= u.chars && u.chars <= end);
359 lengthAndFlags = buildLengthAndFlags(end - u.chars, DEPENDENT);
360 s.base = base;
363 const jschar *flatten(JSContext *maybecx);
365 JSLinearString *ensureLinear(JSContext *cx) {
366 if (isRope() && !flatten(cx))
367 return NULL;
368 return reinterpret_cast<JSLinearString *>(this);
371 bool isLinear() const {
372 return !isRope();
375 JSLinearString *assertIsLinear() {
376 JS_ASSERT(isLinear());
377 return reinterpret_cast<JSLinearString *>(this);
380 typedef uint8 SmallChar;
382 static inline bool fitsInSmallChar(jschar c) {
383 return c < SMALL_CHAR_LIMIT && toSmallChar[c] != INVALID_SMALL_CHAR;
386 static inline bool isUnitString(void *ptr) {
387 jsuword delta = reinterpret_cast<jsuword>(ptr) -
388 reinterpret_cast<jsuword>(unitStringTable);
389 if (delta >= UNIT_STRING_LIMIT * sizeof(JSString))
390 return false;
392 /* If ptr points inside the static array, it must be well-aligned. */
393 JS_ASSERT(delta % sizeof(JSString) == 0);
394 return true;
397 static inline bool isLength2String(void *ptr) {
398 jsuword delta = reinterpret_cast<jsuword>(ptr) -
399 reinterpret_cast<jsuword>(length2StringTable);
400 if (delta >= NUM_SMALL_CHARS * NUM_SMALL_CHARS * sizeof(JSString))
401 return false;
403 /* If ptr points inside the static array, it must be well-aligned. */
404 JS_ASSERT(delta % sizeof(JSString) == 0);
405 return true;
408 static inline bool isHundredString(void *ptr) {
409 jsuword delta = reinterpret_cast<jsuword>(ptr) -
410 reinterpret_cast<jsuword>(hundredStringTable);
411 if (delta >= NUM_HUNDRED_STRINGS * sizeof(JSString))
412 return false;
414 /* If ptr points inside the static array, it must be well-aligned. */
415 JS_ASSERT(delta % sizeof(JSString) == 0);
416 return true;
419 static inline bool isStatic(void *ptr) {
420 return isUnitString(ptr) || isLength2String(ptr) || isHundredString(ptr);
423 #ifdef __SUNPRO_CC
424 #pragma align 8 (__1cIJSStringPunitStringTable_, __1cIJSStringSlength2StringTable_, __1cIJSStringShundredStringTable_)
425 #endif
427 static const SmallChar INVALID_SMALL_CHAR = -1;
429 static const jschar fromSmallChar[];
430 static const SmallChar toSmallChar[];
431 static const JSString unitStringTable[];
432 static const JSString length2StringTable[];
433 static const JSString hundredStringTable[];
435 * Since int strings can be unit strings, length-2 strings, or hundred
436 * strings, we keep a table to map from integer to the correct string.
438 static const JSString *const intStringTable[];
440 static JSFlatString *unitString(jschar c);
441 static JSLinearString *getUnitString(JSContext *cx, JSString *str, size_t index);
442 static JSFlatString *length2String(jschar c1, jschar c2);
443 static JSFlatString *length2String(uint32 i);
444 static JSFlatString *intString(jsint i);
446 static JSFlatString *lookupStaticString(const jschar *chars, size_t length);
448 JS_ALWAYS_INLINE void finalize(JSContext *cx);
450 static size_t offsetOfLengthAndFlags() {
451 return offsetof(JSString, lengthAndFlags);
454 static size_t offsetOfChars() {
455 return offsetof(JSString, u.chars);
458 static void staticAsserts() {
459 JS_STATIC_ASSERT(((JSString::MAX_LENGTH << JSString::LENGTH_SHIFT) >>
460 JSString::LENGTH_SHIFT) == JSString::MAX_LENGTH);
465 * A "linear" string may or may not be null-terminated, but it provides
466 * infallible access to a linear array of characters. Namely, this means the
467 * string is not a rope.
469 struct JSLinearString : JSString
471 const jschar *chars() const { return JSString::nonRopeChars(); }
474 JS_STATIC_ASSERT(sizeof(JSLinearString) == sizeof(JSString));
477 * A linear string where, additionally, chars()[length()] == '\0'. Namely, this
478 * means the string is not a dependent string or rope.
480 struct JSFlatString : JSLinearString
482 const jschar *charsZ() const { return chars(); }
485 JS_STATIC_ASSERT(sizeof(JSFlatString) == sizeof(JSString));
488 * A flat string which has been "atomized", i.e., that is a unique string among
489 * other atomized strings and therefore allows equality via pointer comparison.
491 struct JSAtom : JSFlatString
495 struct JSExternalString : JSString
497 static const uintN TYPE_LIMIT = 8;
498 static JSStringFinalizeOp str_finalizers[TYPE_LIMIT];
500 static intN changeFinalizer(JSStringFinalizeOp oldop,
501 JSStringFinalizeOp newop) {
502 for (uintN i = 0; i != JS_ARRAY_LENGTH(str_finalizers); i++) {
503 if (str_finalizers[i] == oldop) {
504 str_finalizers[i] = newop;
505 return intN(i);
508 return -1;
511 void finalize(JSContext *cx);
512 void finalize();
515 JS_STATIC_ASSERT(sizeof(JSString) == sizeof(JSExternalString));
518 * Short strings should be created in cases where it's worthwhile to avoid
519 * mallocing the string buffer for a small string. We keep 2 string headers'
520 * worth of space in short strings so that more strings can be stored this way.
522 class JSShortString : public js::gc::Cell
524 JSString mHeader;
525 JSString mDummy;
527 public:
529 * Set the length of the string, and return a buffer for the caller to write
530 * to. This buffer must be written immediately, and should not be modified
531 * afterward.
533 inline jschar *init(size_t length) {
534 JS_ASSERT(length <= MAX_SHORT_STRING_LENGTH);
535 mHeader.initShortString(mHeader.inlineStorage, length);
536 return mHeader.inlineStorage;
539 inline jschar *getInlineStorageBeforeInit() {
540 return mHeader.inlineStorage;
543 inline void initAtOffsetInBuffer(jschar *p, size_t length) {
544 JS_ASSERT(p >= mHeader.inlineStorage && p < mHeader.inlineStorage + MAX_SHORT_STRING_LENGTH);
545 mHeader.initShortString(p, length);
548 inline void resetLength(size_t length) {
549 mHeader.initShortString(mHeader.flatChars(), length);
552 inline JSString *header() {
553 return &mHeader;
556 static const size_t FREE_STRING_WORDS = 2;
558 static const size_t MAX_SHORT_STRING_LENGTH =
559 ((sizeof(JSString) + FREE_STRING_WORDS * sizeof(size_t)) / sizeof(jschar)) - 1;
561 static inline bool fitsIntoShortString(size_t length) {
562 return length <= MAX_SHORT_STRING_LENGTH;
565 JS_ALWAYS_INLINE void finalize(JSContext *cx);
567 static void staticAsserts() {
568 JS_STATIC_ASSERT(offsetof(JSString, inlineStorage) ==
569 sizeof(JSString) - JSShortString::FREE_STRING_WORDS * sizeof(void *));
570 JS_STATIC_ASSERT(offsetof(JSShortString, mDummy) == sizeof(JSString));
571 JS_STATIC_ASSERT(offsetof(JSString, inlineStorage) +
572 sizeof(jschar) * (JSShortString::MAX_SHORT_STRING_LENGTH + 1) ==
573 sizeof(JSShortString));
577 namespace js {
579 class StringBuffer;
582 * When an algorithm does not need a string represented as a single linear
583 * array of characters, this range utility may be used to traverse the string a
584 * sequence of linear arrays of characters. This avoids flattening ropes.
586 * Implemented in jsstrinlines.h.
588 class StringSegmentRange;
589 class MutatingRopeSegmentRange;
592 * Utility for building a rope (lazy concatenation) of strings.
594 class RopeBuilder;
596 } /* namespace js */
598 extern const jschar *
599 js_GetStringChars(JSContext *cx, JSString *str);
601 extern const jschar *
602 js_UndependString(JSContext *cx, JSString *str);
604 extern JSBool
605 js_MakeStringImmutable(JSContext *cx, JSString *str);
607 extern JSString * JS_FASTCALL
608 js_toLowerCase(JSContext *cx, JSString *str);
610 extern JSString * JS_FASTCALL
611 js_toUpperCase(JSContext *cx, JSString *str);
613 struct JSSubString {
614 size_t length;
615 const jschar *chars;
618 extern jschar js_empty_ucstr[];
619 extern JSSubString js_EmptySubString;
621 /* Unicode character attribute lookup tables. */
622 extern const uint8 js_X[];
623 extern const uint8 js_Y[];
624 extern const uint32 js_A[];
626 /* Enumerated Unicode general category types. */
627 typedef enum JSCharType {
628 JSCT_UNASSIGNED = 0,
629 JSCT_UPPERCASE_LETTER = 1,
630 JSCT_LOWERCASE_LETTER = 2,
631 JSCT_TITLECASE_LETTER = 3,
632 JSCT_MODIFIER_LETTER = 4,
633 JSCT_OTHER_LETTER = 5,
634 JSCT_NON_SPACING_MARK = 6,
635 JSCT_ENCLOSING_MARK = 7,
636 JSCT_COMBINING_SPACING_MARK = 8,
637 JSCT_DECIMAL_DIGIT_NUMBER = 9,
638 JSCT_LETTER_NUMBER = 10,
639 JSCT_OTHER_NUMBER = 11,
640 JSCT_SPACE_SEPARATOR = 12,
641 JSCT_LINE_SEPARATOR = 13,
642 JSCT_PARAGRAPH_SEPARATOR = 14,
643 JSCT_CONTROL = 15,
644 JSCT_FORMAT = 16,
645 JSCT_PRIVATE_USE = 18,
646 JSCT_SURROGATE = 19,
647 JSCT_DASH_PUNCTUATION = 20,
648 JSCT_START_PUNCTUATION = 21,
649 JSCT_END_PUNCTUATION = 22,
650 JSCT_CONNECTOR_PUNCTUATION = 23,
651 JSCT_OTHER_PUNCTUATION = 24,
652 JSCT_MATH_SYMBOL = 25,
653 JSCT_CURRENCY_SYMBOL = 26,
654 JSCT_MODIFIER_SYMBOL = 27,
655 JSCT_OTHER_SYMBOL = 28
656 } JSCharType;
658 /* Character classifying and mapping macros, based on java.lang.Character. */
659 #define JS_CCODE(c) (js_A[js_Y[(js_X[(uint16)(c)>>6]<<6)|((c)&0x3F)]])
660 #define JS_CTYPE(c) (JS_CCODE(c) & 0x1F)
662 #define JS_ISALPHA(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
663 (1 << JSCT_LOWERCASE_LETTER) | \
664 (1 << JSCT_TITLECASE_LETTER) | \
665 (1 << JSCT_MODIFIER_LETTER) | \
666 (1 << JSCT_OTHER_LETTER)) \
667 >> JS_CTYPE(c)) & 1)
669 #define JS_ISALNUM(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
670 (1 << JSCT_LOWERCASE_LETTER) | \
671 (1 << JSCT_TITLECASE_LETTER) | \
672 (1 << JSCT_MODIFIER_LETTER) | \
673 (1 << JSCT_OTHER_LETTER) | \
674 (1 << JSCT_DECIMAL_DIGIT_NUMBER)) \
675 >> JS_CTYPE(c)) & 1)
677 /* A unicode letter, suitable for use in an identifier. */
678 #define JS_ISLETTER(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
679 (1 << JSCT_LOWERCASE_LETTER) | \
680 (1 << JSCT_TITLECASE_LETTER) | \
681 (1 << JSCT_MODIFIER_LETTER) | \
682 (1 << JSCT_OTHER_LETTER) | \
683 (1 << JSCT_LETTER_NUMBER)) \
684 >> JS_CTYPE(c)) & 1)
687 * 'IdentifierPart' from ECMA grammar, is Unicode letter or combining mark or
688 * digit or connector punctuation.
690 #define JS_ISIDPART(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
691 (1 << JSCT_LOWERCASE_LETTER) | \
692 (1 << JSCT_TITLECASE_LETTER) | \
693 (1 << JSCT_MODIFIER_LETTER) | \
694 (1 << JSCT_OTHER_LETTER) | \
695 (1 << JSCT_LETTER_NUMBER) | \
696 (1 << JSCT_NON_SPACING_MARK) | \
697 (1 << JSCT_COMBINING_SPACING_MARK) | \
698 (1 << JSCT_DECIMAL_DIGIT_NUMBER) | \
699 (1 << JSCT_CONNECTOR_PUNCTUATION)) \
700 >> JS_CTYPE(c)) & 1)
702 /* Unicode control-format characters, ignored in input */
703 #define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1)
706 * This table is used in JS_ISWORD. The definition has external linkage to
707 * allow the raw table data to be used in the regular expression compiler.
709 extern const bool js_alnum[];
712 * This macro performs testing for the regular expression word class \w, which
713 * is defined by ECMA-262 15.10.2.6 to be [0-9A-Z_a-z]. If we want a
714 * Unicode-friendlier definition of "word", we should rename this macro to
715 * something regexp-y.
717 #define JS_ISWORD(c) ((c) < 128 && js_alnum[(c)])
719 #define JS_ISIDSTART(c) (JS_ISLETTER(c) || (c) == '_' || (c) == '$')
720 #define JS_ISIDENT(c) (JS_ISIDPART(c) || (c) == '_' || (c) == '$')
722 #define JS_ISXMLSPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\r' || \
723 (c) == '\n')
724 #define JS_ISXMLNSSTART(c) ((JS_CCODE(c) & 0x00000100) || (c) == '_')
725 #define JS_ISXMLNS(c) ((JS_CCODE(c) & 0x00000080) || (c) == '.' || \
726 (c) == '-' || (c) == '_')
727 #define JS_ISXMLNAMESTART(c) (JS_ISXMLNSSTART(c) || (c) == ':')
728 #define JS_ISXMLNAME(c) (JS_ISXMLNS(c) || (c) == ':')
730 #define JS_ISDIGIT(c) (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER)
732 const jschar BYTE_ORDER_MARK = 0xFEFF;
733 const jschar NO_BREAK_SPACE = 0x00A0;
735 static inline bool
736 JS_ISSPACE(jschar c)
738 unsigned w = c;
740 if (w < 256)
741 return (w <= ' ' && (w == ' ' || (9 <= w && w <= 0xD))) || w == NO_BREAK_SPACE;
743 return w == BYTE_ORDER_MARK || (JS_CCODE(w) & 0x00070000) == 0x00040000;
746 #define JS_ISPRINT(c) ((c) < 128 && isprint(c))
748 #define JS_ISUPPER(c) (JS_CTYPE(c) == JSCT_UPPERCASE_LETTER)
749 #define JS_ISLOWER(c) (JS_CTYPE(c) == JSCT_LOWERCASE_LETTER)
751 #define JS_TOUPPER(c) ((jschar) ((JS_CCODE(c) & 0x00100000) \
752 ? (c) - ((int32)JS_CCODE(c) >> 22) \
753 : (c)))
754 #define JS_TOLOWER(c) ((jschar) ((JS_CCODE(c) & 0x00200000) \
755 ? (c) + ((int32)JS_CCODE(c) >> 22) \
756 : (c)))
759 * Shorthands for ASCII (7-bit) decimal and hex conversion.
760 * Manually inline isdigit for performance; MSVC doesn't do this for us.
762 #define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
763 #define JS7_UNDEC(c) ((c) - '0')
764 #define JS7_ISHEX(c) ((c) < 128 && isxdigit(c))
765 #define JS7_UNHEX(c) (uintN)(JS7_ISDEC(c) ? (c) - '0' : 10 + tolower(c) - 'a')
766 #define JS7_ISLET(c) ((c) < 128 && isalpha(c))
768 /* Initialize the String class, returning its prototype object. */
769 extern js::Class js_StringClass;
771 inline bool
772 JSObject::isString() const
774 return getClass() == &js_StringClass;
777 extern JSObject *
778 js_InitStringClass(JSContext *cx, JSObject *obj);
780 extern const char js_escape_str[];
781 extern const char js_unescape_str[];
782 extern const char js_uneval_str[];
783 extern const char js_decodeURI_str[];
784 extern const char js_encodeURI_str[];
785 extern const char js_decodeURIComponent_str[];
786 extern const char js_encodeURIComponent_str[];
788 /* GC-allocate a string descriptor for the given malloc-allocated chars. */
789 extern JSFlatString *
790 js_NewString(JSContext *cx, jschar *chars, size_t length);
792 extern JSLinearString *
793 js_NewDependentString(JSContext *cx, JSString *base, size_t start,
794 size_t length);
796 /* Copy a counted string and GC-allocate a descriptor for it. */
797 extern JSFlatString *
798 js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n);
800 extern JSFlatString *
801 js_NewStringCopyN(JSContext *cx, const char *s, size_t n);
803 /* Copy a C string and GC-allocate a descriptor for it. */
804 extern JSFlatString *
805 js_NewStringCopyZ(JSContext *cx, const jschar *s);
807 extern JSFlatString *
808 js_NewStringCopyZ(JSContext *cx, const char *s);
811 * Convert a value to a printable C string.
813 extern const char *
814 js_ValueToPrintable(JSContext *cx, const js::Value &,
815 JSAutoByteString *bytes, bool asSource = false);
818 * Convert a value to a string, returning null after reporting an error,
819 * otherwise returning a new string reference.
821 extern JSString *
822 js_ValueToString(JSContext *cx, const js::Value &v);
824 namespace js {
827 * Most code that calls js_ValueToString knows the value is (probably) not a
828 * string, so it does not make sense to put this inline fast path into
829 * js_ValueToString.
831 static JS_ALWAYS_INLINE JSString *
832 ValueToString_TestForStringInline(JSContext *cx, const Value &v)
834 if (v.isString())
835 return v.toString();
836 return js_ValueToString(cx, v);
840 * This function implements E-262-3 section 9.8, toString. Convert the given
841 * value to a string of jschars appended to the given buffer. On error, the
842 * passed buffer may have partial results appended.
844 extern bool
845 ValueToStringBuffer(JSContext *cx, const Value &v, StringBuffer &sb);
847 } /* namespace js */
850 * Convert a value to its source expression, returning null after reporting
851 * an error, otherwise returning a new string reference.
853 extern JS_FRIEND_API(JSString *)
854 js_ValueToSource(JSContext *cx, const js::Value &v);
857 * Compute a hash function from str. The caller can call this function even if
858 * str is not a GC-allocated thing.
860 inline uint32
861 js_HashString(JSLinearString *str)
863 const jschar *s = str->chars();
864 size_t n = str->length();
865 uint32 h;
866 for (h = 0; n; s++, n--)
867 h = JS_ROTATE_LEFT32(h, 4) ^ *s;
868 return h;
871 namespace js {
874 * Test if strings are equal. The caller can call the function even if str1
875 * or str2 are not GC-allocated things.
877 extern bool
878 EqualStrings(JSContext *cx, JSString *str1, JSString *str2, JSBool *result);
880 /* EqualStrings is infallible on linear strings. */
881 extern bool
882 EqualStrings(JSLinearString *str1, JSLinearString *str2);
885 * Return less than, equal to, or greater than zero depending on whether
886 * str1 is less than, equal to, or greater than str2.
888 extern bool
889 CompareStrings(JSContext *cx, JSString *str1, JSString *str2, int32 *result);
892 * Return true if the string matches the given sequence of ASCII bytes.
894 extern bool
895 StringEqualsAscii(JSLinearString *str, const char *asciiBytes);
897 } /* namespacejs */
900 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
901 * The patlen argument must be positive and no greater than sBMHPatLenMax.
903 * Return the index of pat in text, or -1 if not found.
905 static const jsuint sBMHCharSetSize = 256; /* ISO-Latin-1 */
906 static const jsuint sBMHPatLenMax = 255; /* skip table element is uint8 */
907 static const jsint sBMHBadPattern = -2; /* return value if pat is not ISO-Latin-1 */
909 extern jsint
910 js_BoyerMooreHorspool(const jschar *text, jsuint textlen,
911 const jschar *pat, jsuint patlen);
913 extern size_t
914 js_strlen(const jschar *s);
916 extern jschar *
917 js_strchr(const jschar *s, jschar c);
919 extern jschar *
920 js_strchr_limit(const jschar *s, jschar c, const jschar *limit);
922 #define js_strncpy(t, s, n) memcpy((t), (s), (n) * sizeof(jschar))
924 inline void
925 js_short_strncpy(jschar *dest, const jschar *src, size_t num)
928 * It isn't strictly necessary here for |num| to be small, but this function
929 * is currently only called on buffers for short strings.
931 JS_ASSERT(JSShortString::fitsIntoShortString(num));
932 for (size_t i = 0; i < num; i++)
933 dest[i] = src[i];
937 * Return s advanced past any Unicode white space characters.
939 static inline const jschar *
940 js_SkipWhiteSpace(const jschar *s, const jschar *end)
942 JS_ASSERT(s <= end);
943 while (s != end && JS_ISSPACE(*s))
944 s++;
945 return s;
949 * Inflate bytes to JS chars and vice versa. Report out of memory via cx and
950 * return null on error, otherwise return the jschar or byte vector that was
951 * JS_malloc'ed. length is updated to the length of the new string in jschars.
953 extern jschar *
954 js_InflateString(JSContext *cx, const char *bytes, size_t *length);
956 extern char *
957 js_DeflateString(JSContext *cx, const jschar *chars, size_t length);
960 * Inflate bytes to JS chars into a buffer. 'chars' must be large enough for
961 * 'length' jschars. The buffer is NOT null-terminated. The destination length
962 * must be be initialized with the buffer size and will contain on return the
963 * number of copied chars. Conversion behavior depends on js_CStringsAreUTF8.
965 extern JSBool
966 js_InflateStringToBuffer(JSContext *cx, const char *bytes, size_t length,
967 jschar *chars, size_t *charsLength);
970 * Same as js_InflateStringToBuffer, but always treats 'bytes' as UTF-8.
972 extern JSBool
973 js_InflateUTF8StringToBuffer(JSContext *cx, const char *bytes, size_t length,
974 jschar *chars, size_t *charsLength);
977 * Get number of bytes in the deflated sequence of characters. Behavior depends
978 * on js_CStringsAreUTF8.
980 extern size_t
981 js_GetDeflatedStringLength(JSContext *cx, const jschar *chars,
982 size_t charsLength);
985 * Same as js_GetDeflatedStringLength, but always treats the result as UTF-8.
987 extern size_t
988 js_GetDeflatedUTF8StringLength(JSContext *cx, const jschar *chars,
989 size_t charsLength);
992 * Deflate JS chars to bytes into a buffer. 'bytes' must be large enough for
993 * 'length chars. The buffer is NOT null-terminated. The destination length
994 * must to be initialized with the buffer size and will contain on return the
995 * number of copied bytes. Conversion behavior depends on js_CStringsAreUTF8.
997 extern JSBool
998 js_DeflateStringToBuffer(JSContext *cx, const jschar *chars,
999 size_t charsLength, char *bytes, size_t *length);
1002 * Same as js_DeflateStringToBuffer, but always treats 'bytes' as UTF-8.
1004 extern JSBool
1005 js_DeflateStringToUTF8Buffer(JSContext *cx, const jschar *chars,
1006 size_t charsLength, char *bytes, size_t *length);
1008 /* Export a few natives and a helper to other files in SpiderMonkey. */
1009 extern JSBool
1010 js_str_escape(JSContext *cx, uintN argc, js::Value *argv, js::Value *rval);
1013 * The String.prototype.replace fast-native entry point is exported for joined
1014 * function optimization in js{interp,tracer}.cpp.
1016 namespace js {
1017 extern JSBool
1018 str_replace(JSContext *cx, uintN argc, js::Value *vp);
1021 extern JSBool
1022 js_str_toString(JSContext *cx, uintN argc, js::Value *vp);
1024 extern JSBool
1025 js_str_charAt(JSContext *cx, uintN argc, js::Value *vp);
1027 extern JSBool
1028 js_str_charCodeAt(JSContext *cx, uintN argc, js::Value *vp);
1031 * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
1032 * least 6 bytes long. Return the number of UTF-8 bytes of data written.
1034 extern int
1035 js_OneUcs4ToUtf8Char(uint8 *utf8Buffer, uint32 ucs4Char);
1037 namespace js {
1039 extern size_t
1040 PutEscapedStringImpl(char *buffer, size_t size, FILE *fp, JSLinearString *str, uint32 quote);
1043 * Write str into buffer escaping any non-printable or non-ASCII character
1044 * using \escapes for JS string literals.
1045 * Guarantees that a NUL is at the end of the buffer unless size is 0. Returns
1046 * the length of the written output, NOT including the NUL. Thus, a return
1047 * value of size or more means that the output was truncated. If buffer
1048 * is null, just returns the length of the output. If quote is not 0, it must
1049 * be a single or double quote character that will quote the output.
1051 inline size_t
1052 PutEscapedString(char *buffer, size_t size, JSLinearString *str, uint32 quote)
1054 size_t n = PutEscapedStringImpl(buffer, size, NULL, str, quote);
1056 /* PutEscapedStringImpl can only fail with a file. */
1057 JS_ASSERT(n != size_t(-1));
1058 return n;
1062 * Write str into file escaping any non-printable or non-ASCII character.
1063 * If quote is not 0, it must be a single or double quote character that
1064 * will quote the output.
1066 inline bool
1067 FileEscapedString(FILE *fp, JSLinearString *str, uint32 quote)
1069 return PutEscapedStringImpl(NULL, 0, fp, str, quote) != size_t(-1);
1072 } /* namespace js */
1074 extern JSBool
1075 js_String(JSContext *cx, uintN argc, js::Value *vp);
1077 #endif /* jsstr_h___ */