Merge branch 'master' of ../null
[unleashed.git] / include / regexp.h
blob5336c81677ce5ef85082629a8fb480010267e509
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
21 /* Copyright (c) 1988 AT&T */
22 /* All Rights Reserved */
25 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
31 #ifndef _REGEXP_H
32 #define _REGEXP_H
34 #include <string.h>
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
40 #define CBRA 2
41 #define CCHR 4
42 #define CDOT 8
43 #define CCL 12
44 #define CXCL 16
45 #define CDOL 20
46 #define CCEOF 22
47 #define CKET 24
48 #define CBACK 36
49 #define NCCL 40
51 #define STAR 01
52 #define RNGE 03
54 #define NBRA 9
56 #define PLACE(c) ep[c >> 3] |= bittab[c & 07]
57 #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07])
58 #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0)
60 static char *braslist[NBRA];
61 static char *braelist[NBRA];
62 int sed, nbra;
63 char *loc1, *loc2, *locs;
64 static int nodelim;
66 int circf;
67 static int low;
68 static int size;
70 static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
72 int advance(const char *lp, const char *ep);
73 static void getrnge(const char *str);
75 char *
76 compile(char *instring, char *ep, const char *endbuf, int seof)
78 INIT /* Dependent declarations and initializations */
79 register int c;
80 register int eof = seof;
81 char *lastep;
82 int cclcnt;
83 char bracket[NBRA], *bracketp;
84 int closed;
85 int neg;
86 int lc;
87 int i, cflg;
88 int iflag; /* used for non-ascii characters in brackets */
90 #ifdef __lint
91 /* make lint happy */
92 c = nodelim;
93 #endif
95 lastep = NULL;
96 if ((c = GETC()) == eof || c == '\n') {
97 if (c == '\n') {
98 UNGETC(c);
99 nodelim = 1;
101 if (*ep == 0 && !sed)
102 ERROR(41);
103 RETURN(ep);
105 bracketp = bracket;
106 circf = closed = nbra = 0;
107 if (c == '^')
108 circf++;
109 else
110 UNGETC(c);
111 for (;;) {
112 if (ep >= endbuf)
113 ERROR(50);
114 c = GETC();
115 if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
116 lastep = ep;
117 if (c == eof) {
118 *ep++ = CCEOF;
119 if (bracketp != bracket)
120 ERROR(42);
121 RETURN(ep);
123 switch (c) {
125 case '.':
126 *ep++ = CDOT;
127 continue;
129 case '\n':
130 if (!sed) {
131 UNGETC(c);
132 *ep++ = CCEOF;
133 nodelim = 1;
134 if (bracketp != bracket)
135 ERROR(42);
136 RETURN(ep);
137 } else ERROR(36);
138 case '*':
139 if (lastep == NULL || *lastep == CBRA ||
140 *lastep == CKET)
141 goto defchar;
142 *lastep |= STAR;
143 continue;
145 case '$':
146 if (PEEKC() != eof && PEEKC() != '\n')
147 goto defchar;
148 *ep++ = CDOL;
149 continue;
151 case '[':
152 if (&ep[17] >= endbuf)
153 ERROR(50);
155 *ep++ = CCL;
156 lc = 0;
157 for (i = 0; i < 16; i++)
158 ep[i] = 0;
160 neg = 0;
161 if ((c = GETC()) == '^') {
162 neg = 1;
163 c = GETC();
165 iflag = 1;
166 do {
167 c &= 0377;
168 if (c == '\0' || c == '\n')
169 ERROR(49);
170 if ((c & 0200) && iflag) {
171 iflag = 0;
172 if (&ep[32] >= endbuf)
173 ERROR(50);
174 ep[-1] = CXCL;
175 for (i = 16; i < 32; i++)
176 ep[i] = 0;
178 if (c == '-' && lc != 0) {
179 if ((c = GETC()) == ']') {
180 PLACE('-');
181 break;
183 if ((c & 0200) && iflag) {
184 iflag = 0;
185 if (&ep[32] >= endbuf)
186 ERROR(50);
187 ep[-1] = CXCL;
188 for (i = 16; i < 32; i++)
189 ep[i] = 0;
191 while (lc < c) {
192 PLACE(lc);
193 lc++;
196 lc = c;
197 PLACE(c);
198 } while ((c = GETC()) != ']');
200 if (iflag)
201 iflag = 16;
202 else
203 iflag = 32;
205 if (neg) {
206 if (iflag == 32) {
207 for (cclcnt = 0; cclcnt < iflag;
208 cclcnt++)
209 ep[cclcnt] ^= 0377;
210 ep[0] &= 0376;
211 } else {
212 ep[-1] = NCCL;
213 /* make nulls match so test fails */
214 ep[0] |= 01;
218 ep += iflag;
220 continue;
222 case '\\':
223 switch (c = GETC()) {
225 case '(':
226 if (nbra >= NBRA)
227 ERROR(43);
228 *bracketp++ = (char)nbra;
229 *ep++ = CBRA;
230 *ep++ = (char)nbra++;
231 continue;
233 case ')':
234 if (bracketp <= bracket)
235 ERROR(42);
236 *ep++ = CKET;
237 *ep++ = *--bracketp;
238 closed++;
239 continue;
241 case '{':
242 if (lastep == NULL)
243 goto defchar;
244 *lastep |= RNGE;
245 cflg = 0;
246 nlim:
247 c = GETC();
248 i = 0;
249 do {
250 if ('0' <= c && c <= '9')
251 i = 10 * i + c - '0';
252 else
253 ERROR(16);
254 } while (((c = GETC()) != '\\') && (c != ','));
255 if (i >= 255)
256 ERROR(11);
257 *ep++ = (char)i;
258 if (c == ',') {
259 if (cflg++)
260 ERROR(44);
261 if ((c = GETC()) == '\\')
262 *ep++ = (char)255;
263 else {
264 UNGETC(c);
265 goto nlim;
266 /* get 2'nd number */
269 if (GETC() != '}')
270 ERROR(45);
271 if (!cflg) /* one number */
272 *ep++ = (char)i;
273 else if ((ep[-1] & 0377) < (ep[-2] & 0377))
274 ERROR(46);
275 continue;
277 case '\n':
278 ERROR(36);
280 case 'n':
281 c = '\n';
282 goto defchar;
284 default:
285 if (c >= '1' && c <= '9') {
286 if ((c -= '1') >= closed)
287 ERROR(25);
288 *ep++ = CBACK;
289 *ep++ = (char)c;
290 continue;
293 /* Drop through to default to use \ to turn off special chars */
295 defchar:
296 default:
297 lastep = ep;
298 *ep++ = CCHR;
299 *ep++ = (char)c;
302 /*NOTREACHED*/
306 step(const char *p1, const char *p2)
308 char c;
311 if (circf) {
312 loc1 = (char *)p1;
313 return (advance(p1, p2));
315 /* fast check for first character */
316 if (*p2 == CCHR) {
317 c = p2[1];
318 do {
319 if (*p1 != c)
320 continue;
321 if (advance(p1, p2)) {
322 loc1 = (char *)p1;
323 return (1);
325 } while (*p1++);
326 return (0);
328 /* regular algorithm */
329 do {
330 if (advance(p1, p2)) {
331 loc1 = (char *)p1;
332 return (1);
334 } while (*p1++);
335 return (0);
339 advance(const char *lp, const char *ep)
341 const char *curlp;
342 int c;
343 char *bbeg;
344 register char neg;
345 size_t ct;
347 for (;;) {
348 neg = 0;
349 switch (*ep++) {
351 case CCHR:
352 if (*ep++ == *lp++)
353 continue;
354 return (0);
355 /*FALLTHRU*/
357 case CDOT:
358 if (*lp++)
359 continue;
360 return (0);
361 /*FALLTHRU*/
363 case CDOL:
364 if (*lp == 0)
365 continue;
366 return (0);
367 /*FALLTHRU*/
369 case CCEOF:
370 loc2 = (char *)lp;
371 return (1);
372 /*FALLTHRU*/
374 case CXCL:
375 c = (unsigned char)*lp++;
376 if (ISTHERE(c)) {
377 ep += 32;
378 continue;
380 return (0);
381 /*FALLTHRU*/
383 case NCCL:
384 neg = 1;
385 /*FALLTHRU*/
387 case CCL:
388 c = *lp++;
389 if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
390 ep += 16;
391 continue;
393 return (0);
394 /*FALLTHRU*/
396 case CBRA:
397 braslist[*ep++] = (char *)lp;
398 continue;
399 /*FALLTHRU*/
401 case CKET:
402 braelist[*ep++] = (char *)lp;
403 continue;
404 /*FALLTHRU*/
406 case CCHR | RNGE:
407 c = *ep++;
408 getrnge(ep);
409 while (low--)
410 if (*lp++ != c)
411 return (0);
412 curlp = lp;
413 while (size--)
414 if (*lp++ != c)
415 break;
416 if (size < 0)
417 lp++;
418 ep += 2;
419 goto star;
420 /*FALLTHRU*/
422 case CDOT | RNGE:
423 getrnge(ep);
424 while (low--)
425 if (*lp++ == '\0')
426 return (0);
427 curlp = lp;
428 while (size--)
429 if (*lp++ == '\0')
430 break;
431 if (size < 0)
432 lp++;
433 ep += 2;
434 goto star;
435 /*FALLTHRU*/
437 case CXCL | RNGE:
438 getrnge(ep + 32);
439 while (low--) {
440 c = (unsigned char)*lp++;
441 if (!ISTHERE(c))
442 return (0);
444 curlp = lp;
445 while (size--) {
446 c = (unsigned char)*lp++;
447 if (!ISTHERE(c))
448 break;
450 if (size < 0)
451 lp++;
452 ep += 34; /* 32 + 2 */
453 goto star;
454 /*FALLTHRU*/
456 case NCCL | RNGE:
457 neg = 1;
458 /*FALLTHRU*/
460 case CCL | RNGE:
461 getrnge(ep + 16);
462 while (low--) {
463 c = *lp++;
464 if (((c & 0200) || !ISTHERE(c)) ^ neg)
465 return (0);
467 curlp = lp;
468 while (size--) {
469 c = *lp++;
470 if (((c & 0200) || !ISTHERE(c)) ^ neg)
471 break;
473 if (size < 0)
474 lp++;
475 ep += 18; /* 16 + 2 */
476 goto star;
477 /*FALLTHRU*/
479 case CBACK:
480 bbeg = braslist[*ep];
481 ct = braelist[*ep++] - bbeg;
483 if (ecmp(bbeg, lp, ct)) {
484 lp += ct;
485 continue;
487 return (0);
488 /*FALLTHRU*/
490 case CBACK | STAR:
491 bbeg = braslist[*ep];
492 ct = braelist[*ep++] - bbeg;
493 curlp = lp;
494 while (ecmp(bbeg, lp, ct))
495 lp += ct;
497 while (lp >= curlp) {
498 if (advance(lp, ep))
499 return (1);
500 lp -= ct;
502 return (0);
503 /*FALLTHRU*/
505 case CDOT | STAR:
506 curlp = lp;
507 while (*lp++);
508 goto star;
509 /*FALLTHRU*/
511 case CCHR | STAR:
512 curlp = lp;
513 while (*lp++ == *ep);
514 ep++;
515 goto star;
516 /*FALLTHRU*/
518 case CXCL | STAR:
519 curlp = lp;
520 do {
521 c = (unsigned char)*lp++;
522 } while (ISTHERE(c));
523 ep += 32;
524 goto star;
525 /*FALLTHRU*/
527 case NCCL | STAR:
528 neg = 1;
529 /*FALLTHRU*/
531 case CCL | STAR:
532 curlp = lp;
533 do {
534 c = *lp++;
535 } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
536 ep += 16;
537 goto star;
538 /*FALLTHRU*/
540 star:
541 do {
542 if (--lp == locs)
543 break;
544 if (advance(lp, ep))
545 return (1);
546 } while (lp > curlp);
547 return (0);
551 /*NOTREACHED*/
554 static void
555 getrnge(const char *str)
557 low = *str++ & 0377;
558 size = ((*str & 0377) == 255)? 20000: (*str &0377) - low;
561 #ifdef __cplusplus
563 #endif
565 #endif /* _REGEXP_H */