3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
16 /* -- String helpers ------------------------------------------------------ */
18 /* Ordered compare of strings. Assumes string data is 4-byte aligned. */
19 int32_t LJ_FASTCALL
lj_str_cmp(GCstr
*a
, GCstr
*b
)
21 MSize i
, n
= a
->len
> b
->len
? b
->len
: a
->len
;
22 for (i
= 0; i
< n
; i
+= 4) {
23 /* Note: innocuous access up to end of string + 3. */
24 uint32_t va
= *(const uint32_t *)(strdata(a
)+i
);
25 uint32_t vb
= *(const uint32_t *)(strdata(b
)+i
);
28 va
= lj_bswap(va
); vb
= lj_bswap(vb
);
31 if ((int32_t)i
>= -3) {
32 va
>>= 32+(i
<<3); vb
>>= 32+(i
<<3);
35 return va
< vb
? -1 : 1;
38 return (int32_t)(a
->len
- b
->len
);
41 /* Find fixed string p inside string s. */
42 const char *lj_str_find(const char *s
, const char *p
, MSize slen
, MSize plen
)
48 int c
= *(const uint8_t *)p
++;
51 const char *q
= (const char *)memchr(s
, c
, slen
);
53 if (memcmp(q
+1, p
, plen
) == 0) return q
;
54 q
++; slen
-= (MSize
)(q
-s
); s
= q
;
61 /* Check whether a string has a pattern matching character. */
62 int lj_str_haspattern(GCstr
*s
)
64 const char *p
= strdata(s
), *q
= p
+ s
->len
;
66 int c
= *(const uint8_t *)p
++;
67 if (lj_char_ispunct(c
) && strchr("^$*+?.([%-", c
))
68 return 1; /* Found a pattern matching char. */
70 return 0; /* No pattern matching chars found. */
73 /* -- String hashing ------------------------------------------------------ */
75 /* Keyed sparse ARX string hash. Constant time. */
76 static StrHash
hash_sparse(uint64_t seed
, const char *str
, MSize len
)
78 /* Constants taken from lookup3 hash by Bob Jenkins. */
79 StrHash a
, b
, h
= len
^ (StrHash
)seed
;
80 if (len
>= 4) { /* Caveat: unaligned access! */
82 h
^= lj_getu32(str
+len
-4);
83 b
= lj_getu32(str
+(len
>>1)-2);
84 h
^= b
; h
-= lj_rol(b
, 14);
85 b
+= lj_getu32(str
+(len
>>2)-1);
87 a
= *(const uint8_t *)str
;
88 h
^= *(const uint8_t *)(str
+len
-1);
89 b
= *(const uint8_t *)(str
+(len
>>1));
90 h
^= b
; h
-= lj_rol(b
, 14);
92 a
^= h
; a
-= lj_rol(h
, 11);
93 b
^= a
; b
-= lj_rol(a
, 25);
94 h
^= b
; h
-= lj_rol(b
, 16);
98 #if LUAJIT_SECURITY_STRHASH
99 /* Keyed dense ARX string hash. Linear time. */
100 static LJ_NOINLINE StrHash
hash_dense(uint64_t seed
, StrHash h
,
101 const char *str
, MSize len
)
103 StrHash b
= lj_bswap(lj_rol(h
^ (StrHash
)(seed
>> 32), 4));
105 StrHash a
= (StrHash
)seed
;
106 const char *pe
= str
+len
-12, *p
= pe
, *q
= str
;
112 h
^= b
; h
-= lj_rol(b
, 14);
113 a
^= h
; a
-= lj_rol(h
, 11);
114 b
^= a
; b
-= lj_rol(a
, 25);
116 h
^= b
; h
-= lj_rol(b
, 16);
117 a
^= h
; a
-= lj_rol(h
, 4);
118 b
^= a
; b
-= lj_rol(a
, 14);
124 /* -- String interning ---------------------------------------------------- */
126 #define LJ_STR_MAXCOLL 32
128 /* Resize the string interning hash table (grow and shrink). */
129 void lj_str_resize(lua_State
*L
, MSize newmask
)
131 global_State
*g
= G(L
);
132 GCRef
*newtab
, *oldtab
= g
->str
.tab
;
135 /* No resizing during GC traversal or if already too big. */
136 if (g
->gc
.state
== GCSsweepstring
|| newmask
>= LJ_MAX_STRTAB
-1)
139 newtab
= lj_mem_newvec(L
, newmask
+1, GCRef
);
140 memset(newtab
, 0, (newmask
+1)*sizeof(GCRef
));
142 #if LUAJIT_SECURITY_STRHASH
143 /* Check which chains need secondary hashes. */
146 /* Compute primary chain lengths. */
147 for (i
= g
->str
.mask
; i
!= ~(MSize
)0; i
--) {
148 GCobj
*o
= (GCobj
*)(gcrefu(oldtab
[i
]) & ~(uintptr_t)1);
150 GCstr
*s
= gco2str(o
);
151 MSize hash
= s
->hashalg
? hash_sparse(g
->str
.seed
, strdata(s
), s
->len
) :
154 setgcrefp(newtab
[hash
], gcrefu(newtab
[hash
]) + 1);
158 /* Mark secondary chains. */
159 for (i
= newmask
; i
!= ~(MSize
)0; i
--) {
160 int secondary
= gcrefu(newtab
[i
]) > LJ_STR_MAXCOLL
;
161 newsecond
|= secondary
;
162 setgcrefp(newtab
[i
], secondary
);
164 g
->str
.second
= newsecond
;
168 /* Reinsert all strings from the old table into the new table. */
169 for (i
= g
->str
.mask
; i
!= ~(MSize
)0; i
--) {
170 GCobj
*o
= (GCobj
*)(gcrefu(oldtab
[i
]) & ~(uintptr_t)1);
172 GCobj
*next
= gcnext(o
);
173 GCstr
*s
= gco2str(o
);
174 MSize hash
= s
->hash
;
175 #if LUAJIT_SECURITY_STRHASH
177 if (LJ_LIKELY(!s
->hashalg
)) { /* String hashed with primary hash. */
179 u
= gcrefu(newtab
[hash
]);
180 if (LJ_UNLIKELY(u
& 1)) { /* Switch string to secondary hash. */
181 s
->hash
= hash
= hash_dense(g
->str
.seed
, s
->hash
, strdata(s
), s
->len
);
184 u
= gcrefu(newtab
[hash
]);
186 } else { /* String hashed with secondary hash. */
187 MSize shash
= hash_sparse(g
->str
.seed
, strdata(s
), s
->len
);
188 u
= gcrefu(newtab
[shash
& newmask
]);
191 u
= gcrefu(newtab
[hash
]);
192 } else { /* Revert string back to primary hash. */
195 hash
= (shash
& newmask
);
198 /* NOBARRIER: The string table is a GC root. */
199 setgcrefp(o
->gch
.nextgc
, (u
& ~(uintptr_t)1));
200 setgcrefp(newtab
[hash
], ((uintptr_t)o
| (u
& 1)));
203 /* NOBARRIER: The string table is a GC root. */
204 setgcrefr(o
->gch
.nextgc
, newtab
[hash
]);
205 setgcref(newtab
[hash
], o
);
211 /* Free old table and replace with new table. */
214 g
->str
.mask
= newmask
;
217 #if LUAJIT_SECURITY_STRHASH
218 /* Rehash and rechain all strings in a chain. */
219 static LJ_NOINLINE GCstr
*lj_str_rehash_chain(lua_State
*L
, StrHash hashc
,
220 const char *str
, MSize len
)
222 global_State
*g
= G(L
);
223 int ow
= g
->gc
.state
== GCSsweepstring
? otherwhite(g
) : 0; /* Sweeping? */
224 GCRef
*strtab
= g
->str
.tab
;
225 MSize strmask
= g
->str
.mask
;
226 GCobj
*o
= gcref(strtab
[hashc
& strmask
]);
227 setgcrefp(strtab
[hashc
& strmask
], (void *)((uintptr_t)1));
231 GCobj
*next
= gcnext(o
);
232 GCstr
*s
= gco2str(o
);
234 if (ow
) { /* Must sweep while rechaining. */
235 if (((o
->gch
.marked
^ LJ_GC_WHITES
) & ow
)) { /* String alive? */
236 lj_assertG(!isdead(g
, o
) || (o
->gch
.marked
& LJ_GC_FIXED
),
237 "sweep of undead string");
239 } else { /* Free dead string. */
240 lj_assertG(isdead(g
, o
) || ow
== LJ_GC_SFIXED
,
241 "sweep of unlive string");
248 if (!s
->hashalg
) { /* Rehash with secondary hash. */
249 hash
= hash_dense(g
->str
.seed
, hash
, strdata(s
), s
->len
);
255 u
= gcrefu(strtab
[hash
]);
256 setgcrefp(o
->gch
.nextgc
, (u
& ~(uintptr_t)1));
257 setgcrefp(strtab
[hash
], ((uintptr_t)o
| (u
& 1)));
260 /* Try to insert the pending string again. */
261 return lj_str_new(L
, str
, len
);
265 /* Reseed String ID from PRNG after random interval < 2^bits. */
266 #if LUAJIT_SECURITY_STRID == 1
267 #define STRID_RESEED_INTERVAL 8
268 #elif LUAJIT_SECURITY_STRID == 2
269 #define STRID_RESEED_INTERVAL 4
270 #elif LUAJIT_SECURITY_STRID >= 3
271 #define STRID_RESEED_INTERVAL 0
274 /* Allocate a new string and add to string interning table. */
275 static GCstr
*lj_str_alloc(lua_State
*L
, const char *str
, MSize len
,
276 StrHash hash
, int hashalg
)
278 GCstr
*s
= lj_mem_newt(L
, lj_str_size(len
), GCstr
);
279 global_State
*g
= G(L
);
285 #ifndef STRID_RESEED_INTERVAL
286 s
->sid
= g
->str
.id
++;
287 #elif STRID_RESEED_INTERVAL
288 if (!g
->str
.idreseed
--) {
289 uint64_t r
= lj_prng_u64(&g
->prng
);
290 g
->str
.id
= (StrID
)r
;
291 g
->str
.idreseed
= (uint8_t)(r
>> (64 - STRID_RESEED_INTERVAL
));
293 s
->sid
= g
->str
.id
++;
295 s
->sid
= (StrID
)lj_prng_u64(&g
->prng
);
298 s
->hashalg
= (uint8_t)hashalg
;
299 /* Clear last 4 bytes of allocated memory. Implies zero-termination, too. */
300 *(uint32_t *)(strdatawr(s
)+(len
& ~(MSize
)3)) = 0;
301 memcpy(strdatawr(s
), str
, len
);
302 /* Add to string hash table. */
304 u
= gcrefu(g
->str
.tab
[hash
]);
305 setgcrefp(s
->nextgc
, (u
& ~(uintptr_t)1));
306 /* NOBARRIER: The string table is a GC root. */
307 setgcrefp(g
->str
.tab
[hash
], ((uintptr_t)s
| (u
& 1)));
308 if (g
->str
.num
++ > g
->str
.mask
) /* Allow a 100% load factor. */
309 lj_str_resize(L
, (g
->str
.mask
<<1)+1); /* Grow string table. */
310 return s
; /* Return newly interned string. */
313 /* Intern a string and return string object. */
314 GCstr
*lj_str_new(lua_State
*L
, const char *str
, size_t lenx
)
316 global_State
*g
= G(L
);
317 if (lenx
-1 < LJ_MAX_STR
-1) {
318 MSize len
= (MSize
)lenx
;
319 StrHash hash
= hash_sparse(g
->str
.seed
, str
, len
);
322 /* Check if the string has already been interned. */
323 GCobj
*o
= gcref(g
->str
.tab
[hash
& g
->str
.mask
]);
324 #if LUAJIT_SECURITY_STRHASH
325 if (LJ_UNLIKELY((uintptr_t)o
& 1)) { /* Secondary hash for this chain? */
327 hash
= hash_dense(g
->str
.seed
, hash
, str
, len
);
328 o
= (GCobj
*)(gcrefu(g
->str
.tab
[hash
& g
->str
.mask
]) & ~(uintptr_t)1);
332 GCstr
*sx
= gco2str(o
);
333 if (sx
->hash
== hash
&& sx
->len
== len
) {
334 if (memcmp(str
, strdata(sx
), len
) == 0) {
335 if (isdead(g
, o
)) flipwhite(o
); /* Resurrect if dead. */
336 return sx
; /* Return existing string. */
343 #if LUAJIT_SECURITY_STRHASH
344 /* Rehash chain if there are too many collisions. */
345 if (LJ_UNLIKELY(coll
> LJ_STR_MAXCOLL
) && !hashalg
) {
346 return lj_str_rehash_chain(L
, hash
, str
, len
);
349 /* Otherwise allocate a new string. */
350 return lj_str_alloc(L
, str
, len
, hash
, hashalg
);
353 lj_err_msg(L
, LJ_ERR_STROV
);
358 void LJ_FASTCALL
lj_str_free(global_State
*g
, GCstr
*s
)
361 lj_mem_free(g
, s
, lj_str_size(s
->len
));
364 void LJ_FASTCALL
lj_str_init(lua_State
*L
)
366 global_State
*g
= G(L
);
367 g
->str
.seed
= lj_prng_u64(&g
->prng
);
368 lj_str_resize(L
, LJ_MIN_STRTAB
-1);