3 ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
15 /* -- String helpers ------------------------------------------------------ */
17 /* Ordered compare of strings. Assumes string data is 4-byte aligned. */
18 int32_t LJ_FASTCALL
lj_str_cmp(GCstr
*a
, GCstr
*b
)
20 MSize i
, n
= a
->len
> b
->len
? b
->len
: a
->len
;
21 for (i
= 0; i
< n
; i
+= 4) {
22 /* Note: innocuous access up to end of string + 3. */
23 uint32_t va
= *(const uint32_t *)(strdata(a
)+i
);
24 uint32_t vb
= *(const uint32_t *)(strdata(b
)+i
);
27 va
= lj_bswap(va
); vb
= lj_bswap(vb
);
30 if ((int32_t)i
>= -3) {
31 va
>>= 32+(i
<<3); vb
>>= 32+(i
<<3);
34 return va
< vb
? -1 : 1;
37 return (int32_t)(a
->len
- b
->len
);
40 /* Fast string data comparison. Caveat: unaligned access to 1st string! */
41 static LJ_AINLINE
int str_fastcmp(const char *a
, const char *b
, MSize len
)
45 lua_assert((((uintptr_t)a
+len
-1) & (LJ_PAGESIZE
-1)) <= LJ_PAGESIZE
-4);
46 do { /* Note: innocuous access up to end of string + 3. */
47 uint32_t v
= lj_getu32(a
+i
) ^ *(const uint32_t *)(b
+i
);
51 return (int32_t)i
>= -3 ? (v
<< (32+(i
<<3))) : 1;
53 return (int32_t)i
>= -3 ? (v
>> (32+(i
<<3))) : 1;
61 /* Find fixed string p inside string s. */
62 const char *lj_str_find(const char *s
, const char *p
, MSize slen
, MSize plen
)
68 int c
= *(const uint8_t *)p
++;
71 const char *q
= (const char *)memchr(s
, c
, slen
);
73 if (memcmp(q
+1, p
, plen
) == 0) return q
;
74 q
++; slen
-= (MSize
)(q
-s
); s
= q
;
81 /* Check whether a string has a pattern matching character. */
82 int lj_str_haspattern(GCstr
*s
)
84 const char *p
= strdata(s
), *q
= p
+ s
->len
;
86 int c
= *(const uint8_t *)p
++;
87 if (lj_char_ispunct(c
) && strchr("^$*+?.([%-", c
))
88 return 1; /* Found a pattern matching char. */
90 return 0; /* No pattern matching chars found. */
93 /* -- String interning ---------------------------------------------------- */
95 /* Resize the string hash table (grow and shrink). */
96 void lj_str_resize(lua_State
*L
, MSize newmask
)
98 global_State
*g
= G(L
);
101 if (g
->gc
.state
== GCSsweepstring
|| newmask
>= LJ_MAX_STRTAB
-1)
102 return; /* No resizing during GC traversal or if already too big. */
103 newhash
= lj_mem_newvec(L
, newmask
+1, GCRef
);
104 memset(newhash
, 0, (newmask
+1)*sizeof(GCRef
));
105 for (i
= g
->strmask
; i
!= ~(MSize
)0; i
--) { /* Rehash old table. */
106 GCobj
*p
= gcref(g
->strhash
[i
]);
107 while (p
) { /* Follow each hash chain and reinsert all strings. */
108 MSize h
= gco2str(p
)->hash
& newmask
;
109 GCobj
*next
= gcnext(p
);
110 /* NOBARRIER: The string table is a GC root. */
111 setgcrefr(p
->gch
.nextgc
, newhash
[h
]);
112 setgcref(newhash
[h
], p
);
116 lj_mem_freevec(g
, g
->strhash
, g
->strmask
+1, GCRef
);
117 g
->strmask
= newmask
;
118 g
->strhash
= newhash
;
122 ** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a string to
125 #if !defined(LUAI_HASHLIMIT)
126 #define LUAI_HASHLIMIT 5
129 #define cast(t, exp) ((t)(exp))
130 int luajittex_choose_hash_function
= 0 ;
131 /* Intern a string and return string object. */
132 GCstr
*lj_str_new(lua_State
*L
, const char *str
, size_t lenx
)
137 MSize len
= (MSize
)lenx
;
141 if (lenx
>= LJ_MAX_STR
)
142 lj_err_msg(L
, LJ_ERR_STROV
);
147 if (luajittex_choose_hash_function
==0) {
148 /* Lua 5.1.5 hash function */
149 /* for 5.2 max methods we also need to patch the vm eq */
150 step
= (len
>>LUAI_HASHLIMIT
)+1; /* if string is too long, don't hash all its chars */
151 for (l1
=len
; l1
>=step
; l1
-=step
) /* compute hash */
152 h
= h
^ ((h
<<5)+(h
>>2)+cast(unsigned char, str
[l1
-1]));
154 /* LuaJIT 2.0.2 hash function */
155 /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
156 if (len
>= 4) { /* Caveat: unaligned access! */
158 h
^= lj_getu32(str
+len
-4);
159 b
= lj_getu32(str
+(len
>>1)-2);
160 h
^= b
; h
-= lj_rol(b
, 14);
161 b
+= lj_getu32(str
+(len
>>2)-1);
162 } else if (len
> 0) {
163 a
= *(const uint8_t *)str
;
164 h
^= *(const uint8_t *)(str
+len
-1);
165 b
= *(const uint8_t *)(str
+(len
>>1));
166 h
^= b
; h
-= lj_rol(b
, 14);
168 /* Already done, kept for reference */
171 a
^= h
; a
-= lj_rol(h
, 11);
172 b
^= a
; b
-= lj_rol(a
, 25);
173 h
^= b
; h
-= lj_rol(b
, 16);
177 /* Check if the string has already been interned. */
178 o
= gcref(g
->strhash
[h
& g
->strmask
]);
179 if (LJ_LIKELY((((uintptr_t)str
+len
-1) & (LJ_PAGESIZE
-1)) <= LJ_PAGESIZE
-4)) {
181 GCstr
*sx
= gco2str(o
);
182 if (sx
->len
== len
&& str_fastcmp(str
, strdata(sx
), len
) == 0) {
183 /* Resurrect if dead. Can only happen with fixstring() (keywords). */
184 if (isdead(g
, o
)) flipwhite(o
);
185 return sx
; /* Return existing string. */
189 } else { /* Slow path: end of string is too close to a page boundary. */
191 GCstr
*sx
= gco2str(o
);
192 if (sx
->len
== len
&& memcmp(str
, strdata(sx
), len
) == 0) {
193 /* Resurrect if dead. Can only happen with fixstring() (keywords). */
194 if (isdead(g
, o
)) flipwhite(o
);
195 return sx
; /* Return existing string. */
200 /* Nope, create a new string. */
201 s
= lj_mem_newt(L
, sizeof(GCstr
)+len
+1, GCstr
);
207 memcpy(strdatawr(s
), str
, len
);
208 strdatawr(s
)[len
] = '\0'; /* Zero-terminate string. */
209 /* Add it to string hash table. */
211 s
->nextgc
= g
->strhash
[h
];
212 /* NOBARRIER: The string table is a GC root. */
213 setgcref(g
->strhash
[h
], obj2gco(s
));
214 if (g
->strnum
++ > g
->strmask
) /* Allow a 100% load factor. */
215 lj_str_resize(L
, (g
->strmask
<<1)+1); /* Grow string table. */
216 return s
; /* Return newly interned string. */
219 void LJ_FASTCALL
lj_str_free(global_State
*g
, GCstr
*s
)
222 lj_mem_free(g
, s
, sizestring(s
));