3 ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
15 /* -- String helpers ------------------------------------------------------ */
17 /* Ordered compare of strings. Assumes string data is 4-byte aligned. */
18 int32_t LJ_FASTCALL
lj_str_cmp(GCstr
*a
, GCstr
*b
)
20 MSize i
, n
= a
->len
> b
->len
? b
->len
: a
->len
;
21 for (i
= 0; i
< n
; i
+= 4) {
22 /* Note: innocuous access up to end of string + 3. */
23 uint32_t va
= *(const uint32_t *)(strdata(a
)+i
);
24 uint32_t vb
= *(const uint32_t *)(strdata(b
)+i
);
27 va
= lj_bswap(va
); vb
= lj_bswap(vb
);
30 if ((int32_t)i
>= -3) {
31 va
>>= 32+(i
<<3); vb
>>= 32+(i
<<3);
34 return va
< vb
? -1 : 1;
37 return (int32_t)(a
->len
- b
->len
);
40 /* Fast string data comparison. Caveat: unaligned access to 1st string! */
41 static LJ_AINLINE
int str_fastcmp(const char *a
, const char *b
, MSize len
)
45 lua_assert((((uintptr_t)a
+len
-1) & (LJ_PAGESIZE
-1)) <= LJ_PAGESIZE
-4);
46 do { /* Note: innocuous access up to end of string + 3. */
47 uint32_t v
= lj_getu32(a
+i
) ^ *(const uint32_t *)(b
+i
);
51 return (int32_t)i
>= -3 ? (v
<< (32+(i
<<3))) : 1;
53 return (int32_t)i
>= -3 ? (v
>> (32+(i
<<3))) : 1;
61 /* Find fixed string p inside string s. */
62 const char *lj_str_find(const char *s
, const char *p
, MSize slen
, MSize plen
)
68 int c
= *(const uint8_t *)p
++;
71 const char *q
= (const char *)memchr(s
, c
, slen
);
73 if (memcmp(q
+1, p
, plen
) == 0) return q
;
74 q
++; slen
-= (MSize
)(q
-s
); s
= q
;
81 /* Check whether a string has a pattern matching character. */
82 int lj_str_haspattern(GCstr
*s
)
84 const char *p
= strdata(s
), *q
= p
+ s
->len
;
86 int c
= *(const uint8_t *)p
++;
87 if (lj_char_ispunct(c
) && strchr("^$*+?.([%-", c
))
88 return 1; /* Found a pattern matching char. */
90 return 0; /* No pattern matching chars found. */
93 /* -- String interning ---------------------------------------------------- */
95 /* Resize the string hash table (grow and shrink). */
96 void lj_str_resize(lua_State
*L
, MSize newmask
)
98 global_State
*g
= G(L
);
101 if (g
->gc
.state
== GCSsweepstring
|| newmask
>= LJ_MAX_STRTAB
-1)
102 return; /* No resizing during GC traversal or if already too big. */
103 newhash
= lj_mem_newvec(L
, newmask
+1, GCRef
);
104 memset(newhash
, 0, (newmask
+1)*sizeof(GCRef
));
105 for (i
= g
->strmask
; i
!= ~(MSize
)0; i
--) { /* Rehash old table. */
106 GCobj
*p
= gcref(g
->strhash
[i
]);
107 while (p
) { /* Follow each hash chain and reinsert all strings. */
108 MSize h
= gco2str(p
)->hash
& newmask
;
109 GCobj
*next
= gcnext(p
);
110 /* NOBARRIER: The string table is a GC root. */
111 setgcrefr(p
->gch
.nextgc
, newhash
[h
]);
112 setgcref(newhash
[h
], p
);
116 lj_mem_freevec(g
, g
->strhash
, g
->strmask
+1, GCRef
);
117 g
->strmask
= newmask
;
118 g
->strhash
= newhash
;
121 /* Intern a string and return string object. */
122 GCstr
*lj_str_new(lua_State
*L
, const char *str
, size_t lenx
)
127 MSize len
= (MSize
)lenx
;
129 if (lenx
>= LJ_MAX_STR
)
130 lj_err_msg(L
, LJ_ERR_STROV
);
132 /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
133 if (len
>= 4) { /* Caveat: unaligned access! */
135 h
^= lj_getu32(str
+len
-4);
136 b
= lj_getu32(str
+(len
>>1)-2);
137 h
^= b
; h
-= lj_rol(b
, 14);
138 b
+= lj_getu32(str
+(len
>>2)-1);
139 } else if (len
> 0) {
140 a
= *(const uint8_t *)str
;
141 h
^= *(const uint8_t *)(str
+len
-1);
142 b
= *(const uint8_t *)(str
+(len
>>1));
143 h
^= b
; h
-= lj_rol(b
, 14);
147 a
^= h
; a
-= lj_rol(h
, 11);
148 b
^= a
; b
-= lj_rol(a
, 25);
149 h
^= b
; h
-= lj_rol(b
, 16);
150 /* Check if the string has already been interned. */
151 o
= gcref(g
->strhash
[h
& g
->strmask
]);
152 if (LJ_LIKELY((((uintptr_t)str
+len
-1) & (LJ_PAGESIZE
-1)) <= LJ_PAGESIZE
-4)) {
154 GCstr
*sx
= gco2str(o
);
155 if (sx
->len
== len
&& str_fastcmp(str
, strdata(sx
), len
) == 0) {
156 /* Resurrect if dead. Can only happen with fixstring() (keywords). */
157 if (isdead(g
, o
)) flipwhite(o
);
158 return sx
; /* Return existing string. */
162 } else { /* Slow path: end of string is too close to a page boundary. */
164 GCstr
*sx
= gco2str(o
);
165 if (sx
->len
== len
&& memcmp(str
, strdata(sx
), len
) == 0) {
166 /* Resurrect if dead. Can only happen with fixstring() (keywords). */
167 if (isdead(g
, o
)) flipwhite(o
);
168 return sx
; /* Return existing string. */
173 /* Nope, create a new string. */
174 s
= lj_mem_newt(L
, sizeof(GCstr
)+len
+1, GCstr
);
180 memcpy(strdatawr(s
), str
, len
);
181 strdatawr(s
)[len
] = '\0'; /* Zero-terminate string. */
182 /* Add it to string hash table. */
184 s
->nextgc
= g
->strhash
[h
];
185 /* NOBARRIER: The string table is a GC root. */
186 setgcref(g
->strhash
[h
], obj2gco(s
));
187 if (g
->strnum
++ > g
->strmask
) /* Allow a 100% load factor. */
188 lj_str_resize(L
, (g
->strmask
<<1)+1); /* Grow string table. */
189 return s
; /* Return newly interned string. */
192 void LJ_FASTCALL
lj_str_free(global_State
*g
, GCstr
*s
)
195 lj_mem_free(g
, s
, sizestring(s
));