2 * Copyright (c) 2004 Tim J. Robbins.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * $FreeBSD: head/usr.bin/tr/cset.c 226363 2011-10-14 10:43:55Z ed $
29 * "Set of characters" ADT implemented as a splay tree of extents, with
30 * a lookup table cache to simplify looking up the first bunch of
31 * characters (which are presumably more common than others).
41 static struct csnode
* cset_delete(struct csnode
*, wchar_t);
42 static __inline
int cset_rangecmp(struct csnode
*, wchar_t);
43 static struct csnode
* cset_splay(struct csnode
*, wchar_t);
47 * Allocate a set of characters.
54 if ((cs
= malloc(sizeof(*cs
))) == NULL
)
57 cs
->cs_classes
= NULL
;
58 cs
->cs_havecache
= false;
59 cs
->cs_invert
= false;
65 * Add a character to the set.
68 cset_add(struct cset
*cs
, wchar_t ch
)
70 struct csnode
*csn
, *ncsn
;
73 cs
->cs_havecache
= false;
76 * Inserting into empty tree; new item becomes the root.
78 if (cs
->cs_root
== NULL
) {
79 csn
= malloc(sizeof(*cs
->cs_root
));
82 csn
->csn_left
= csn
->csn_right
= NULL
;
83 csn
->csn_min
= csn
->csn_max
= ch
;
89 * Splay to check whether the item already exists, and otherwise,
90 * where we should put it.
92 csn
= cs
->cs_root
= cset_splay(cs
->cs_root
, ch
);
95 * Avoid adding duplicate nodes.
97 if (cset_rangecmp(csn
, ch
) == 0)
101 * Allocate a new node and make it the new root.
103 ncsn
= malloc(sizeof(*ncsn
));
106 ncsn
->csn_min
= ncsn
->csn_max
= ch
;
107 if (cset_rangecmp(csn
, ch
) < 0) {
108 ncsn
->csn_left
= csn
->csn_left
;
109 ncsn
->csn_right
= csn
;
110 csn
->csn_left
= NULL
;
112 ncsn
->csn_right
= csn
->csn_right
;
113 ncsn
->csn_left
= csn
;
114 csn
->csn_right
= NULL
;
119 * Coalesce with left and right neighbours if possible.
121 if (ncsn
->csn_left
!= NULL
) {
122 ncsn
->csn_left
= cset_splay(ncsn
->csn_left
, ncsn
->csn_min
- 1);
123 if (ncsn
->csn_left
->csn_max
== ncsn
->csn_min
- 1) {
124 oval
= ncsn
->csn_left
->csn_min
;
125 ncsn
->csn_left
= cset_delete(ncsn
->csn_left
,
126 ncsn
->csn_left
->csn_min
);
127 ncsn
->csn_min
= oval
;
130 if (ncsn
->csn_right
!= NULL
) {
131 ncsn
->csn_right
= cset_splay(ncsn
->csn_right
,
133 if (ncsn
->csn_right
->csn_min
== ncsn
->csn_max
+ 1) {
134 oval
= ncsn
->csn_right
->csn_max
;
135 ncsn
->csn_right
= cset_delete(ncsn
->csn_right
,
136 ncsn
->csn_right
->csn_min
);
137 ncsn
->csn_max
= oval
;
146 * Determine whether a character is in the set without using
150 cset_in_hard(struct cset
*cs
, wchar_t ch
)
154 for (csc
= cs
->cs_classes
; csc
!= NULL
; csc
= csc
->csc_next
)
155 if (csc
->csc_invert
^ (iswctype(ch
, csc
->csc_type
) != 0))
156 return (cs
->cs_invert
^ true);
157 if (cs
->cs_root
!= NULL
) {
158 cs
->cs_root
= cset_splay(cs
->cs_root
, ch
);
159 return (cs
->cs_invert
^ (cset_rangecmp(cs
->cs_root
, ch
) == 0));
161 return (cs
->cs_invert
^ false);
169 cset_cache(struct cset
*cs
)
173 for (i
= 0; i
< CS_CACHE_SIZE
; i
++)
174 cs
->cs_cache
[i
] = cset_in_hard(cs
, i
);
176 cs
->cs_havecache
= true;
181 * Invert the character set.
184 cset_invert(struct cset
*cs
)
187 cs
->cs_invert
^= true;
188 cs
->cs_havecache
= false;
193 * Add a wctype()-style character class to the set, optionally
197 cset_addclass(struct cset
*cs
, wctype_t type
, bool invert
)
201 csc
= malloc(sizeof(*csc
));
204 csc
->csc_type
= type
;
205 csc
->csc_invert
= invert
;
206 csc
->csc_next
= cs
->cs_classes
;
207 cs
->cs_classes
= csc
;
208 cs
->cs_havecache
= false;
213 cset_rangecmp(struct csnode
*t
, wchar_t ch
)
223 static struct csnode
*
224 cset_splay(struct csnode
*t
, wchar_t ch
)
226 struct csnode N
, *l
, *r
, *y
;
229 * Based on public domain code from Sleator.
234 N
.csn_left
= N
.csn_right
= NULL
;
237 if (cset_rangecmp(t
, ch
) < 0) {
238 if (t
->csn_left
!= NULL
&&
239 cset_rangecmp(t
->csn_left
, ch
) < 0) {
241 t
->csn_left
= y
->csn_right
;
245 if (t
->csn_left
== NULL
)
250 } else if (cset_rangecmp(t
, ch
) > 0) {
251 if (t
->csn_right
!= NULL
&&
252 cset_rangecmp(t
->csn_right
, ch
) > 0) {
254 t
->csn_right
= y
->csn_left
;
258 if (t
->csn_right
== NULL
)
266 l
->csn_right
= t
->csn_left
;
267 r
->csn_left
= t
->csn_right
;
268 t
->csn_left
= N
.csn_right
;
269 t
->csn_right
= N
.csn_left
;
273 static struct csnode
*
274 cset_delete(struct csnode
*t
, wchar_t ch
)
279 t
= cset_splay(t
, ch
);
280 assert(cset_rangecmp(t
, ch
) == 0);
281 if (t
->csn_left
== NULL
)
284 x
= cset_splay(t
->csn_left
, ch
);
285 x
->csn_right
= t
->csn_right
;