Merge branch 'master' into v2.1
[luajit-2.0.git] / src / lj_str.c
blob46d546c683fa289c88a5e0f35de3a243309ed32e
1 /*
2 ** String handling.
3 ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
4 */
6 #define lj_str_c
7 #define LUA_CORE
9 #include "lj_obj.h"
10 #include "lj_gc.h"
11 #include "lj_err.h"
12 #include "lj_str.h"
13 #include "lj_char.h"
15 /* -- String helpers ------------------------------------------------------ */
17 /* Ordered compare of strings. Assumes string data is 4-byte aligned. */
18 int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
20 MSize i, n = a->len > b->len ? b->len : a->len;
21 for (i = 0; i < n; i += 4) {
22 /* Note: innocuous access up to end of string + 3. */
23 uint32_t va = *(const uint32_t *)(strdata(a)+i);
24 uint32_t vb = *(const uint32_t *)(strdata(b)+i);
25 if (va != vb) {
26 #if LJ_LE
27 va = lj_bswap(va); vb = lj_bswap(vb);
28 #endif
29 i -= n;
30 if ((int32_t)i >= -3) {
31 va >>= 32+(i<<3); vb >>= 32+(i<<3);
32 if (va == vb) break;
34 return va < vb ? -1 : 1;
37 return (int32_t)(a->len - b->len);
40 /* Fast string data comparison. Caveat: unaligned access to 1st string! */
41 static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
43 MSize i = 0;
44 lua_assert(len > 0);
45 lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4);
46 do { /* Note: innocuous access up to end of string + 3. */
47 uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
48 if (v) {
49 i -= len;
50 #if LJ_LE
51 return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
52 #else
53 return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
54 #endif
56 i += 4;
57 } while (i < len);
58 return 0;
61 /* Find fixed string p inside string s. */
62 const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
64 if (plen <= slen) {
65 if (plen == 0) {
66 return s;
67 } else {
68 int c = *(const uint8_t *)p++;
69 plen--; slen -= plen;
70 while (slen) {
71 const char *q = (const char *)memchr(s, c, slen);
72 if (!q) break;
73 if (memcmp(q+1, p, plen) == 0) return q;
74 q++; slen -= (MSize)(q-s); s = q;
78 return NULL;
81 /* Check whether a string has a pattern matching character. */
82 int lj_str_haspattern(GCstr *s)
84 const char *p = strdata(s), *q = p + s->len;
85 while (p < q) {
86 int c = *(const uint8_t *)p++;
87 if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
88 return 1; /* Found a pattern matching char. */
90 return 0; /* No pattern matching chars found. */
93 /* -- String interning ---------------------------------------------------- */
95 /* Resize the string hash table (grow and shrink). */
96 void lj_str_resize(lua_State *L, MSize newmask)
98 global_State *g = G(L);
99 GCRef *newhash;
100 MSize i;
101 if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
102 return; /* No resizing during GC traversal or if already too big. */
103 newhash = lj_mem_newvec(L, newmask+1, GCRef);
104 memset(newhash, 0, (newmask+1)*sizeof(GCRef));
105 for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */
106 GCobj *p = gcref(g->strhash[i]);
107 while (p) { /* Follow each hash chain and reinsert all strings. */
108 MSize h = gco2str(p)->hash & newmask;
109 GCobj *next = gcnext(p);
110 /* NOBARRIER: The string table is a GC root. */
111 setgcrefr(p->gch.nextgc, newhash[h]);
112 setgcref(newhash[h], p);
113 p = next;
116 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
117 g->strmask = newmask;
118 g->strhash = newhash;
121 /* Intern a string and return string object. */
122 GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
124 global_State *g;
125 GCstr *s;
126 GCobj *o;
127 MSize len = (MSize)lenx;
128 MSize a, b, h = len;
129 if (lenx >= LJ_MAX_STR)
130 lj_err_msg(L, LJ_ERR_STROV);
131 g = G(L);
132 /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
133 if (len >= 4) { /* Caveat: unaligned access! */
134 a = lj_getu32(str);
135 h ^= lj_getu32(str+len-4);
136 b = lj_getu32(str+(len>>1)-2);
137 h ^= b; h -= lj_rol(b, 14);
138 b += lj_getu32(str+(len>>2)-1);
139 } else if (len > 0) {
140 a = *(const uint8_t *)str;
141 h ^= *(const uint8_t *)(str+len-1);
142 b = *(const uint8_t *)(str+(len>>1));
143 h ^= b; h -= lj_rol(b, 14);
144 } else {
145 return &g->strempty;
147 a ^= h; a -= lj_rol(h, 11);
148 b ^= a; b -= lj_rol(a, 25);
149 h ^= b; h -= lj_rol(b, 16);
150 /* Check if the string has already been interned. */
151 o = gcref(g->strhash[h & g->strmask]);
152 if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
153 while (o != NULL) {
154 GCstr *sx = gco2str(o);
155 if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
156 /* Resurrect if dead. Can only happen with fixstring() (keywords). */
157 if (isdead(g, o)) flipwhite(o);
158 return sx; /* Return existing string. */
160 o = gcnext(o);
162 } else { /* Slow path: end of string is too close to a page boundary. */
163 while (o != NULL) {
164 GCstr *sx = gco2str(o);
165 if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
166 /* Resurrect if dead. Can only happen with fixstring() (keywords). */
167 if (isdead(g, o)) flipwhite(o);
168 return sx; /* Return existing string. */
170 o = gcnext(o);
173 /* Nope, create a new string. */
174 s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
175 newwhite(g, s);
176 s->gct = ~LJ_TSTR;
177 s->len = len;
178 s->hash = h;
179 s->reserved = 0;
180 memcpy(strdatawr(s), str, len);
181 strdatawr(s)[len] = '\0'; /* Zero-terminate string. */
182 /* Add it to string hash table. */
183 h &= g->strmask;
184 s->nextgc = g->strhash[h];
185 /* NOBARRIER: The string table is a GC root. */
186 setgcref(g->strhash[h], obj2gco(s));
187 if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */
188 lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */
189 return s; /* Return newly interned string. */
192 void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
194 g->strnum--;
195 lj_mem_free(g, s, sizestring(s));