2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/types.h>
49 static int backslash(STR
*, int *);
50 static int bracket(STR
*);
51 static void genclass(STR
*);
52 static void genequiv(STR
*);
53 static int genrange(STR
*, int);
54 static void genseq(STR
*);
76 s
->lastch
= backslash(s
, &is_octal
);
83 clen
= mbrtowc(&wch
, s
->str
, MB_LEN_MAX
, NULL
);
84 if (clen
== (size_t)-1 || clen
== (size_t)-2 ||
86 errx(1, "illegal sequence");
93 /* We can start a range at any time. */
94 if (s
->str
[0] == '-' && genrange(s
, is_octal
))
114 ch
= nextwctype(s
->lastch
, s
->cclass
);
122 if ((ch
= s
->set
[s
->cnt
++]) == OOBCH
) {
141 case ':': /* "[:class:]" */
142 if ((p
= strchr(s
->str
+ 2, ']')) == NULL
)
144 if (*(p
- 1) != ':' || p
- s
->str
< 4)
151 case '=': /* "[=equiv=]" */
152 if ((p
= strchr(s
->str
+ 3, ']')) == NULL
)
154 if (*(p
- 1) != '=' || p
- s
->str
< 4)
159 default: /* "[\###*n]" or "[#*n]" */
161 if ((p
= strpbrk(s
->str
+ 2, "*]")) == NULL
)
163 if (p
[0] != '*' || index(p
, ']') == NULL
)
177 if ((s
->cclass
= wctype(s
->str
)) == 0)
178 errx(1, "unknown class %s", s
->str
);
180 s
->lastch
= -1; /* incremented before check in next() */
181 if (strcmp(s
->str
, "upper") == 0)
182 s
->state
= CCLASS_UPPER
;
183 else if (strcmp(s
->str
, "lower") == 0)
184 s
->state
= CCLASS_LOWER
;
198 if (*s
->str
== '\\') {
199 s
->equiv
[0] = backslash(s
, NULL
);
201 errx(1, "misplaced equivalence equals sign");
204 clen
= mbrtowc(&wc
, s
->str
, MB_LEN_MAX
, NULL
);
205 if (clen
== (size_t)-1 || clen
== (size_t)-2 || clen
== 0) {
210 if (s
->str
[clen
] != '=')
211 errx(1, "misplaced equivalence equals sign");
216 * Calculate the set of all characters in the same equivalence class
217 * as the specified character (they will have the same primary
218 * collation weights).
219 * XXX Knows too much about how strxfrm() is implemented. Assumes
220 * it fills the string with primary collation weight bytes. Only one-
221 * to-one mappings are supported.
222 * XXX Equivalence classes not supported in multibyte locales.
224 src
[0] = (char)s
->equiv
[0];
226 if (MB_CUR_MAX
== 1 && strxfrm(dst
, src
, sizeof (dst
)) == 1) {
227 pri
= (unsigned char)*dst
;
228 for (p
= 1, i
= 1; i
< NCHARS_SB
; i
++) {
230 if (strxfrm(dst
, src
, sizeof (dst
)) == 1 && pri
&&
231 pri
== (unsigned char)*dst
)
243 genrange(STR
*s
, int was_octal
)
253 if (*++s
->str
== '\\')
254 stopval
= backslash(s
, &octal
);
256 clen
= mbrtowc(&wc
, s
->str
, MB_LEN_MAX
, NULL
);
257 if (clen
== (size_t)-1 || clen
== (size_t)-2) {
265 * XXX Characters are not ordered according to collating sequence in
268 if (octal
|| was_octal
|| MB_CUR_MAX
> 1) {
269 if (stopval
< s
->lastch
) {
273 s
->cnt
= stopval
- s
->lastch
+ 1;
278 if (charcoll((const void *)&stopval
, (const void *)&(s
->lastch
)) < 0) {
282 p
= malloc((NCHARS_SB
+ 1) * sizeof (int));
283 if ((s
->set
= (void *)p
) == NULL
)
284 err(1, "genrange() malloc");
285 for (cnt
= 0; cnt
< NCHARS_SB
; cnt
++)
286 if (charcoll((const void *)&cnt
, (const void *)&(s
->lastch
)) >=
288 charcoll((const void *)&cnt
, (const void *)&stopval
) <= 0)
291 n
= (int *)p
- (int *)s
->set
;
296 qsort(s
->set
, n
, sizeof (*(s
->set
)), charcoll
);
308 if (s
->which
== STRING1
)
309 errx(1, "sequences only valid in string2");
312 s
->lastch
= backslash(s
, NULL
);
314 clen
= mbrtowc(&wc
, s
->str
, MB_LEN_MAX
, NULL
);
315 if (clen
== (size_t)-1 || clen
== (size_t)-2) {
323 errx(1, "misplaced sequence asterisk");
327 s
->cnt
= backslash(s
, NULL
);
334 if (isdigit((uchar_t
)*s
->str
)) {
335 s
->cnt
= strtol(s
->str
, &ep
, 0);
341 errx(1, "illegal sequence count");
345 s
->state
= s
->cnt
? SEQUENCE
: INFINITE
;
349 * Translate \??? into a character. Up to 3 octal digits, if no digits either
350 * an escape code or a literal character.
353 backslash(STR
*s
, int *is_octal
)
357 if (is_octal
!= NULL
)
359 for (cnt
= val
= 0; ; ) {
360 ch
= (uchar_t
)*++s
->str
;
361 if (!isdigit(ch
) || ch
> '7')
363 val
= val
* 8 + ch
- '0';
370 if (is_octal
!= NULL
)
377 case 'a': /* escape characters */
391 case '\0': /* \" -> \ */
394 default: /* \x" -> x */