Initial support for UTF-8 ext. grapheme clusters
[smenu.git] / index.c
blob2f886b0857d9385154a22fccb1e43df22f69168a
1 /* ################################################################### */
2 /* Copyright 2015, Pierre Gentile (p.gen.progs@gmail.com) */
3 /* */
4 /* This Source Code Form is subject to the terms of the Mozilla Public */
5 /* License, v. 2.0. If a copy of the MPL was not distributed with this */
6 /* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
7 /* ################################################################### */
9 /* ************************************************************************ */
10 /* Ternary Search Tree and sorted array creation functions. */
11 /* Inspired by a code described in "Ternary Search Trees" by Jon */
12 /* Bentley and Robert Sedgewick in the April, 1998, Dr. Dobb's Journal. */
13 /* Links: */
14 /* https://www.drdobbs.com/database/ternary-search-trees/184410528?pgno=1 */
15 /* https://www.cs.princeton.edu/~rs/strings/tstdemo.c. */
16 /* ************************************************************************ */
18 #include <stdlib.h>
19 #include <stdint.h>
20 #include <unistd.h>
21 #include <wchar.h>
22 #include <string.h>
24 #include "xmalloc.h"
25 #include "list.h"
26 #include "utils.h"
27 #include "index.h"
29 /* List of words matching the current search. */
30 /* """""""""""""""""""""""""""""""""""""""""" */
31 ll_t *tst_search_list; /* Must be initialized by ll_new() before use. */
33 /* ======================================= */
34 /* Ternary search tree insertion function. */
35 /* ======================================= */
36 tst_node_t *
37 tst_insert(tst_node_t *p, wchar_t *w, void *data)
39 if (p == NULL)
41 p = (tst_node_t *)xmalloc(sizeof(tst_node_t));
42 p->splitchar = *w;
43 p->lokid = p->eqkid = p->hikid = NULL;
44 p->data = NULL;
47 if (*w < p->splitchar)
48 p->lokid = tst_insert(p->lokid, w, data);
49 else if (*w == p->splitchar)
51 if (*w == L'\0')
53 p->data = data;
54 p->eqkid = NULL;
56 else
57 p->eqkid = tst_insert(p->eqkid, ++w, data);
59 else
60 p->hikid = tst_insert(p->hikid, w, data);
62 return (p);
65 /* ====================================== */
66 /* Ternary search tree deletion function. */
67 /* User data area not cleaned. */
68 /* ====================================== */
69 void
70 tst_cleanup(tst_node_t *p)
72 if (p != NULL)
74 tst_cleanup(p->lokid);
75 if (p->splitchar != L'\0')
76 tst_cleanup(p->eqkid);
77 tst_cleanup(p->hikid);
78 free(p);
82 /* ========================================================== */
83 /* Recursive traversal of a ternary tree. A callback function */
84 /* is also called when a complete string is found. */
85 /* Returns 1 if the callback function succeed (returned 1) at */
86 /* least once. */
87 /* The first_call argument is for initializing the static */
88 /* variable. */
89 /* ========================================================== */
90 int
91 tst_traverse(tst_node_t *p, int (*callback)(void *), int first_call)
93 static int rc;
95 if (first_call)
96 rc = 0;
98 if (p == NULL)
99 return 0;
100 tst_traverse(p->lokid, callback, 0);
101 if (p->splitchar != L'\0')
102 tst_traverse(p->eqkid, callback, 0);
103 else
104 rc += (*callback)(p->data);
105 tst_traverse(p->hikid, callback, 0);
107 return !!rc;
110 /* ======================================================================= */
111 /* Traverses the word tst looking for a wchar and build a list of pointers */
112 /* containing all the sub-tst potentially leading to words containing the */
113 /* next wchar os the search string. */
114 /* ======================================================================= */
116 tst_substring_traverse(tst_node_t *p,
117 int (*callback)(void *),
118 int first_call,
119 wchar_t w)
121 static int rc;
123 if (first_call)
124 rc = 0;
126 if (p == NULL)
127 return 0;
129 if (p->splitchar == w)
131 ll_node_t *node;
132 sub_tst_t *sub_tst_data;
134 node = tst_search_list->tail;
135 sub_tst_data = (sub_tst_t *)(node->data);
137 if (p->eqkid != NULL)
138 insert_sorted_ptr(&(sub_tst_data->array),
139 &(sub_tst_data->size),
140 &(sub_tst_data->count),
141 p->eqkid);
143 rc = 1;
146 tst_substring_traverse(p->lokid, callback, 0, w);
147 if (p->splitchar != L'\0')
148 tst_substring_traverse(p->eqkid, callback, 0, w);
149 else if (callback != NULL)
150 rc += (*callback)(p->data);
151 tst_substring_traverse(p->hikid, callback, 0, w);
153 return !!rc;
156 /* ======================================================================== */
157 /* Traverses the word tst looking for a wchar and build a list of pointers */
158 /* containing all the sub-tst nodes potentially leading to words containing */
159 /* the next wchar os the search string. */
160 /* ======================================================================== */
162 tst_fuzzy_traverse(tst_node_t *p,
163 int (*callback)(void *),
164 int first_call,
165 wchar_t w)
167 static int rc;
168 wchar_t w1s[2];
169 wchar_t w2s[2];
171 w1s[1] = w2s[1] = L'\0';
173 if (first_call)
174 rc = 0;
176 if (p == NULL)
177 return 0;
179 w1s[0] = p->splitchar;
180 w2s[0] = w;
182 if (my_wcscasecmp(w1s, w2s) == 0)
184 ll_node_t *node;
185 sub_tst_t *sub_tst_data;
187 node = tst_search_list->tail;
188 sub_tst_data = (sub_tst_t *)(node->data);
190 if (p->eqkid != NULL)
191 insert_sorted_ptr(&(sub_tst_data->array),
192 &(sub_tst_data->size),
193 &(sub_tst_data->count),
194 p->eqkid);
196 rc += 1;
199 tst_fuzzy_traverse(p->lokid, callback, 0, w);
200 if (p->splitchar != L'\0')
201 tst_fuzzy_traverse(p->eqkid, callback, 0, w);
202 else if (callback != NULL)
203 rc += (*callback)(p->data);
204 tst_fuzzy_traverse(p->hikid, callback, 0, w);
206 return !!rc;
209 /* ======================================================================= */
210 /* Searches a complete string in a ternary tree starting from a root node. */
211 /* ======================================================================= */
212 void *
213 tst_search(tst_node_t *root, wchar_t *w)
215 tst_node_t *p;
217 p = root;
219 while (p)
221 if (*w < p->splitchar)
222 p = p->lokid;
223 else if (*w == p->splitchar)
225 if (*w++ == L'\0')
226 return p->data;
227 p = p->eqkid;
229 else
230 p = p->hikid;
233 return NULL;
236 /* ================================================================= */
237 /* Searches all strings beginning with the same prefix. */
238 /* the callback function will be applied to each of theses strings */
239 /* returns NULL if no string matched the prefix. */
240 /* ================================================================= */
241 void *
242 tst_prefix_search(tst_node_t *root, wchar_t *w, int (*callback)(void *))
244 tst_node_t *p = root;
245 size_t len = wcslen(w);
246 size_t rc;
248 while (p)
250 if (*w < p->splitchar)
251 p = p->lokid;
252 else if (*w == p->splitchar)
254 len--;
255 if (*w++ == L'\0')
256 return p->data;
257 if (len == 0)
259 rc = tst_traverse(p->eqkid, callback, 1);
260 return (void *)(long)rc;
262 p = p->eqkid;
264 else
265 p = p->hikid;
268 return NULL;
271 /* ========================================================= */
272 /* Insertion of an integer in a already sorted integer array */
273 /* without duplications. */
274 /* */
275 /* IN/OUT array : pointer to the array to manage. */
276 /* IN/OUT size : allocated size of the array. */
277 /* IN/OUT nb : number of longs in the array. */
278 /* IN value : long value to be inserted in the array. */
279 /* ========================================================= */
280 void
281 insert_sorted_index(long **array_ptr, long *size, long *nb, long value)
283 long pos = *nb;
284 long left = 0, right = *nb, middle;
286 if (*nb > 0)
288 /* Bisection search. */
289 /* """"""""""""""""" */
290 while (left < right)
292 middle = (left + right) / 2;
293 if ((*array_ptr)[middle] == value)
294 return; /* Value already in array. */
296 if (value < (*array_ptr)[middle])
297 right = middle;
298 else
299 left = middle + 1;
301 pos = left;
304 if (*nb == *size)
306 *size += 64;
307 *array_ptr = xrealloc(*array_ptr, *size * sizeof(long));
310 /* Shift remaining array elements at position pos to position pos+1 */
311 /* (shift right one position). */
312 /* """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" */
313 if (*nb > pos)
314 memmove((*array_ptr) + pos + 1,
315 (*array_ptr) + pos,
316 sizeof(value) * (*nb - pos));
318 (*nb)++;
320 /* Set the value of the element at position pos in the passed array. */
321 /* """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" */
322 (*array_ptr)[pos] = value;
325 /* ========================================================= */
326 /* Insertion of an pointer in a already sorted pointer array */
327 /* without duplications. */
328 /* ========================================================= */
329 void
330 insert_sorted_ptr(tst_node_t ***array_ptr,
331 long *size,
332 long *nb,
333 tst_node_t *ptr)
335 long pos = *nb;
336 long left = 0, right = *nb, middle;
338 if (*nb > 0)
340 /* Bisection search. */
341 /* """"""""""""""""" */
342 while (left < right)
344 middle = (left + right) / 2;
345 if ((intptr_t)((*array_ptr)[middle]) == (intptr_t)ptr)
346 return; /* Value already in array. */
348 if ((intptr_t)ptr < (intptr_t)((*array_ptr)[middle]))
349 right = middle;
350 else
351 left = middle + 1;
353 pos = left;
356 if (*nb == *size)
358 *size += 64;
359 *array_ptr = xrealloc(*array_ptr, *size * sizeof(long));
362 if (*nb > pos)
363 memmove((*array_ptr) + pos + 1,
364 (*array_ptr) + pos,
365 sizeof(ptr) * (*nb - pos));
367 (*nb)++;
369 (*array_ptr)[pos] = ptr;