6198 Let's EOL cachefs
[illumos-gate.git] / usr / src / lib / libast / common / regex / regcoll.c
blob240dcc1f4c838a2357c78513311f71fa74fe0eb3
1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
20 * *
21 ***********************************************************************/
22 #pragma prototyped
24 * regex collation symbol support
27 #include "reglib.h"
29 #include <ccode.h>
31 #ifndef UCS_BYTE
32 #define UCS_BYTE 1
33 #endif
35 #include "ucs_names.h"
37 typedef struct Ucs_map_s
39 Ucs_attr_t attr[3];
40 Ucs_code_t code;
41 const char* name;
42 Dtlink_t link;
43 struct Ucs_map_s* next;
44 } Ucs_map_t;
46 #define setattr(a,i) ((a)[(i)>>5]|=(1<<((i)&((1<<5)-1))))
47 #define tstattr(a,i) ((a)[(i)>>5]&(1<<((i)&((1<<5)-1))))
48 #define clrattr(a,i) ((a)[(i)>>5]&=~(1<<((i)&((1<<5)-1))))
50 static struct Local_s
52 int fatal;
53 Dt_t* attrs;
54 Dt_t* names;
55 Dtdisc_t dtdisc;
56 #if CC_NATIVE != CC_ASCII
57 unsigned char* a2n;
58 #endif
59 } local;
62 * initialize the writeable tables from the readonly data
63 * the tables are big enough to be concerned about text vs. data vs. bss
64 * UCS_BYTE==0 100K
65 * UCS_BYTE==1 20K
68 static int
69 initialize(void)
71 register int i;
72 register Ucs_map_t* a;
73 register Ucs_map_t* w;
75 if (local.fatal)
76 return -1;
77 local.dtdisc.link = offsetof(Ucs_map_t, link);
78 local.dtdisc.key = offsetof(Ucs_map_t, name);
79 local.dtdisc.size = -1;
80 if (!(w = (Ucs_map_t*)malloc(sizeof(Ucs_map_t) * (elementsof(ucs_attrs) + elementsof(ucs_names)))))
82 local.fatal = 1;
83 return -1;
85 if (!(local.attrs = dtopen(&local.dtdisc, Dttree)))
87 free(w);
88 local.fatal = 1;
89 return -1;
91 if (!(local.names = dtopen(&local.dtdisc, Dttree)))
93 free(w);
94 dtclose(local.attrs);
95 local.fatal = 1;
96 return -1;
98 for (i = 0; i < elementsof(ucs_attrs); i++, w++)
100 memcpy(w, &ucs_attrs[i], offsetof(Ucs_dat_t, table));
101 w->name = ucs_strings[ucs_attrs[i].table] + ucs_attrs[i].index;
102 w->next = 0;
103 dtinsert(local.attrs, w);
105 for (i = 0; i < elementsof(ucs_names); i++, w++)
107 memcpy(w, &ucs_names[i], offsetof(Ucs_dat_t, table));
108 w->name = ucs_strings[ucs_names[i].table] + ucs_names[i].index;
109 w->next = 0;
110 if (a = (Ucs_map_t*)dtsearch(local.names, w))
112 while (a->next)
113 a = a->next;
114 a->next = w;
116 else
117 dtinsert(local.names, w);
119 #if CC_NATIVE != CC_ASCII
120 local.a2n = ccmap(CC_ASCII, CC_NATIVE);
121 #endif
122 return 0;
126 * return the collating symbol delimited by [c c], where c is either '=' or '.'
127 * s points to the first char after the initial [
128 * if e!=0 it is set to point to the next char in s on return
130 * the collating symbol is converted to multibyte in <buf,size>
131 * the return value is:
132 * -1 syntax error or buf not large enough
133 * >=0 size with 0-terminated mb collation element
134 * or ligature value in buf
138 regcollate(register const char* s, char** e, char* buf, int size)
140 register int c;
141 register char* u;
142 register char* b;
143 register char* x;
144 register Ucs_map_t* a;
145 Ucs_map_t* z;
146 const char* t;
147 const char* v;
148 int n;
149 int r;
150 int ul;
151 int term;
152 wchar_t w[2];
153 Ucs_attr_t attr[3];
155 if (size < 2)
156 r = -1;
157 else if ((term = *s++) != '.' && term != '=')
159 s--;
160 r = -1;
162 else if (*s == term && *(s + 1) == ']')
163 r = -1;
164 else
166 t = s;
167 mbchar(s);
168 if ((n = (s - t)) == 1)
170 if (*s == term && *(s + 1) == ']')
172 s += 2;
173 r = -1;
175 else
177 if (!local.attrs && initialize())
178 return -1;
179 attr[0] = attr[1] = attr[2] = 0;
180 ul = 0;
181 b = buf;
182 x = buf + size - 2;
183 r = 1;
184 s = t;
187 v = s;
188 u = b;
189 for (;;)
191 if (!(c = *s++))
192 return -1;
193 if (c == term)
195 if (!(c = *s++))
196 return -1;
197 if (c != term)
199 if (c != ']')
200 return -1;
201 r = -1;
202 break;
205 if (c == ' ' || c == '-' && u > b && *s != ' ' && *s != '-')
206 break;
207 if (isupper(c))
208 c = tolower(c);
209 if (u > x)
210 break;
211 *u++ = c;
213 *u = 0;
214 if (a = (Ucs_map_t*)dtmatch(local.attrs, b))
215 setattr(attr, a->code);
216 else
218 if (u < x)
219 *u++ = ' ';
220 if (b == buf)
222 if (isupper(*v))
223 ul = UCS_UC;
224 else if (islower(*v))
225 ul = UCS_LC;
227 b = u;
229 } while (r > 0);
230 if (b > buf && *(b - 1) == ' ')
231 b--;
232 *b = 0;
233 attr[0] &= ~((Ucs_attr_t)1);
234 if (ul)
236 if (tstattr(attr, UCS_UC) || tstattr(attr, UCS_LC))
237 ul = 0;
238 else
239 setattr(attr, ul);
241 if (z = (Ucs_map_t*)dtmatch(local.names, buf))
242 for(;;)
244 for (a = z; a; a = a->next)
245 if ((attr[0] & a->attr[0]) == attr[0] && (attr[1] & a->attr[1]) == attr[1] && (attr[2] & a->attr[2]) == attr[2])
247 #if 0
248 if (a->code <= 0xff)
250 #if CC_NATIVE != CC_ASCII
251 buf[0] = local.a2n[a->code];
252 #else
253 buf[0] = a->code;
254 #endif
255 buf[r = 1] = 0;
256 ul = 0;
257 break;
259 #endif
260 w[0] = a->code;
261 w[1] = 0;
262 if ((r = wcstombs(buf, w, size)) > 0)
263 ul = 0;
264 break;
266 if (!ul)
267 break;
268 clrattr(attr, ul);
269 ul = 0;
272 if (r < 0)
274 if ((n = s - t - 2) > (size - 1))
275 return -1;
276 memcpy(buf, t, n);
277 buf[n] = 0;
278 if (n == 1)
279 r = n;
280 else
282 for (t = buf; isalnum(*t); t++);
283 if (!*t)
284 r = n;
288 else if (*s++ != term || *s++ != ']')
290 s--;
291 r = -1;
293 else if (n > (size - 1))
294 r = -1;
295 else
297 memcpy(buf, t, n);
298 buf[r = n] = 0;
301 if (e)
302 *e = (char*)s;
303 return r;