beta-0.89.2
[luatex.git] / source / libs / luajit / LuaJIT-src / src / lj_str.c
blob3bc8079f37c87dc7fbe5946bfcad441677500d59
1 /*
2 ** String handling.
3 ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4 */
6 #define lj_str_c
7 #define LUA_CORE
9 #include "lj_obj.h"
10 #include "lj_gc.h"
11 #include "lj_err.h"
12 #include "lj_str.h"
13 #include "lj_char.h"
15 /* -- String helpers ------------------------------------------------------ */
17 /* Ordered compare of strings. Assumes string data is 4-byte aligned. */
18 int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
20 MSize i, n = a->len > b->len ? b->len : a->len;
21 for (i = 0; i < n; i += 4) {
22 /* Note: innocuous access up to end of string + 3. */
23 uint32_t va = *(const uint32_t *)(strdata(a)+i);
24 uint32_t vb = *(const uint32_t *)(strdata(b)+i);
25 if (va != vb) {
26 #if LJ_LE
27 va = lj_bswap(va); vb = lj_bswap(vb);
28 #endif
29 i -= n;
30 if ((int32_t)i >= -3) {
31 va >>= 32+(i<<3); vb >>= 32+(i<<3);
32 if (va == vb) break;
34 return va < vb ? -1 : 1;
37 return (int32_t)(a->len - b->len);
40 /* Fast string data comparison. Caveat: unaligned access to 1st string! */
41 static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
43 MSize i = 0;
44 lua_assert(len > 0);
45 lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4);
46 do { /* Note: innocuous access up to end of string + 3. */
47 uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
48 if (v) {
49 i -= len;
50 #if LJ_LE
51 return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
52 #else
53 return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
54 #endif
56 i += 4;
57 } while (i < len);
58 return 0;
61 /* Find fixed string p inside string s. */
62 const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
64 if (plen <= slen) {
65 if (plen == 0) {
66 return s;
67 } else {
68 int c = *(const uint8_t *)p++;
69 plen--; slen -= plen;
70 while (slen) {
71 const char *q = (const char *)memchr(s, c, slen);
72 if (!q) break;
73 if (memcmp(q+1, p, plen) == 0) return q;
74 q++; slen -= (MSize)(q-s); s = q;
78 return NULL;
81 /* Check whether a string has a pattern matching character. */
82 int lj_str_haspattern(GCstr *s)
84 const char *p = strdata(s), *q = p + s->len;
85 while (p < q) {
86 int c = *(const uint8_t *)p++;
87 if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
88 return 1; /* Found a pattern matching char. */
90 return 0; /* No pattern matching chars found. */
93 /* -- String interning ---------------------------------------------------- */
95 /* Resize the string hash table (grow and shrink). */
96 void lj_str_resize(lua_State *L, MSize newmask)
98 global_State *g = G(L);
99 GCRef *newhash;
100 MSize i;
101 if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
102 return; /* No resizing during GC traversal or if already too big. */
103 newhash = lj_mem_newvec(L, newmask+1, GCRef);
104 memset(newhash, 0, (newmask+1)*sizeof(GCRef));
105 for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */
106 GCobj *p = gcref(g->strhash[i]);
107 while (p) { /* Follow each hash chain and reinsert all strings. */
108 MSize h = gco2str(p)->hash & newmask;
109 GCobj *next = gcnext(p);
110 /* NOBARRIER: The string table is a GC root. */
111 setgcrefr(p->gch.nextgc, newhash[h]);
112 setgcref(newhash[h], p);
113 p = next;
116 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
117 g->strmask = newmask;
118 g->strhash = newhash;
122 ** Lua will use at most ~(2^LUAI_HASHLIMIT) bytes from a string to
123 ** compute its hash
125 #if !defined(LUAI_HASHLIMIT)
126 #define LUAI_HASHLIMIT 5
127 #endif
129 #define cast(t, exp) ((t)(exp))
130 int luajittex_choose_hash_function = 0 ;
131 /* Intern a string and return string object. */
132 GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
134 global_State *g;
135 GCstr *s;
136 GCobj *o;
137 MSize len = (MSize)lenx;
138 MSize a, b, h = len;
139 size_t step ;
140 size_t l1 ;
141 if (lenx >= LJ_MAX_STR)
142 lj_err_msg(L, LJ_ERR_STROV);
143 g = G(L);
145 if (len==0)
146 return &g->strempty;
147 if (luajittex_choose_hash_function==0) {
148 /* Lua 5.1.5 hash function */
149 /* for 5.2 max methods we also need to patch the vm eq */
150 step = (len>>LUAI_HASHLIMIT)+1; /* if string is too long, don't hash all its chars */
151 for (l1=len; l1>=step; l1-=step) /* compute hash */
152 h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
153 } else {
154 /* LuaJIT 2.0.2 hash function */
155 /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
156 if (len >= 4) { /* Caveat: unaligned access! */
157 a = lj_getu32(str);
158 h ^= lj_getu32(str+len-4);
159 b = lj_getu32(str+(len>>1)-2);
160 h ^= b; h -= lj_rol(b, 14);
161 b += lj_getu32(str+(len>>2)-1);
162 } else if (len > 0) {
163 a = *(const uint8_t *)str;
164 h ^= *(const uint8_t *)(str+len-1);
165 b = *(const uint8_t *)(str+(len>>1));
166 h ^= b; h -= lj_rol(b, 14);
167 } else {
168 /* Already done, kept for reference */
169 return &g->strempty;
171 a ^= h; a -= lj_rol(h, 11);
172 b ^= a; b -= lj_rol(a, 25);
173 h ^= b; h -= lj_rol(b, 16);
177 /* Check if the string has already been interned. */
178 o = gcref(g->strhash[h & g->strmask]);
179 if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
180 while (o != NULL) {
181 GCstr *sx = gco2str(o);
182 if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
183 /* Resurrect if dead. Can only happen with fixstring() (keywords). */
184 if (isdead(g, o)) flipwhite(o);
185 return sx; /* Return existing string. */
187 o = gcnext(o);
189 } else { /* Slow path: end of string is too close to a page boundary. */
190 while (o != NULL) {
191 GCstr *sx = gco2str(o);
192 if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
193 /* Resurrect if dead. Can only happen with fixstring() (keywords). */
194 if (isdead(g, o)) flipwhite(o);
195 return sx; /* Return existing string. */
197 o = gcnext(o);
200 /* Nope, create a new string. */
201 s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
202 newwhite(g, s);
203 s->gct = ~LJ_TSTR;
204 s->len = len;
205 s->hash = h;
206 s->reserved = 0;
207 memcpy(strdatawr(s), str, len);
208 strdatawr(s)[len] = '\0'; /* Zero-terminate string. */
209 /* Add it to string hash table. */
210 h &= g->strmask;
211 s->nextgc = g->strhash[h];
212 /* NOBARRIER: The string table is a GC root. */
213 setgcref(g->strhash[h], obj2gco(s));
214 if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */
215 lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */
216 return s; /* Return newly interned string. */
219 void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
221 g->strnum--;
222 lj_mem_free(g, s, sizestring(s));