sys/vfs/hammer2: Fix multiple "radii" -> "radix"
[dragonfly.git] / contrib / tre / lib / tre-internal.h
blob1dec1c2ba4e1a83899d27943a465b18cc8826d42
1 /*
2 tre-internal.h - TRE internal definitions
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
7 */
9 #ifndef TRE_INTERNAL_H
10 #define TRE_INTERNAL_H 1
12 #ifdef HAVE_WCHAR_H
13 #include <wchar.h>
14 #endif /* HAVE_WCHAR_H */
16 #ifdef HAVE_WCTYPE_H
17 #include <wctype.h>
18 #endif /* !HAVE_WCTYPE_H */
20 #include <ctype.h>
21 #include <bitstring.h>
22 #include "tre.h"
23 #include "xlocale_private.h"
25 #ifdef TRE_DEBUG
26 #include <stdio.h>
27 #define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0)
28 #else /* !TRE_DEBUG */
29 #define DPRINT(msg) do { } while(/*CONSTCOND*/0)
30 #endif /* !TRE_DEBUG */
32 #define elementsof(x) ( sizeof(x) / sizeof(x[0]) )
34 #ifdef HAVE_MBRTOWC
35 #define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps)))
36 #define tre_mbrtowc_l(pwc,s,n,ps,l) (mbrtowc_l((pwc), (s), (n), (ps), (l)))
37 #else /* !HAVE_MBRTOWC */
38 #ifdef HAVE_MBTOWC
39 #define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
40 #endif /* HAVE_MBTOWC */
41 #endif /* !HAVE_MBRTOWC */
43 #ifdef TRE_MULTIBYTE
44 #ifdef HAVE_MBSTATE_T
45 #define TRE_MBSTATE
46 #endif /* TRE_MULTIBYTE */
47 #endif /* HAVE_MBSTATE_T */
49 /* Define the character types and functions. */
50 #ifdef TRE_WCHAR
52 /* Wide characters. */
53 typedef wint_t tre_cint_t;
54 #define TRE_CHAR_MAX WCHAR_MAX
56 #ifdef TRE_MULTIBYTE
57 #define TRE_MB_CUR_MAX MB_CUR_MAX
58 #define TRE_MB_CUR_MAX_L MB_CUR_MAX_L
59 #else /* !TRE_MULTIBYTE */
60 #define TRE_MB_CUR_MAX 1
61 #endif /* !TRE_MULTIBYTE */
63 #define tre_isalnum iswalnum
64 #define tre_isalpha iswalpha
65 #ifdef HAVE_ISWBLANK
66 #define tre_isblank iswblank
67 #endif /* HAVE_ISWBLANK */
68 #define tre_iscntrl iswcntrl
69 #define tre_isdigit iswdigit
70 #define tre_isgraph iswgraph
71 #define tre_islower iswlower
72 #define tre_isprint iswprint
73 #define tre_ispunct iswpunct
74 #define tre_isspace iswspace
75 #define tre_isupper iswupper
76 #define tre_isxdigit iswxdigit
78 #define tre_tolower towlower
79 #define tre_toupper towupper
80 #define tre_strlen wcslen
82 #define tre_isalnum_l iswalnum_l
83 #define tre_isdigit_l iswdigit_l
84 #define tre_islower_l iswlower_l
85 #define tre_isupper_l iswupper_l
86 #define tre_isxdigit_l iswxdigit_l
87 #define tre_tolower_l towlower_l
88 #define tre_toupper_l towupper_l
90 #else /* !TRE_WCHAR */
92 /* 8 bit characters. */
93 typedef short tre_cint_t;
94 #define TRE_CHAR_MAX 255
95 #define TRE_MB_CUR_MAX 1
97 #define tre_isalnum isalnum
98 #define tre_isalpha isalpha
99 #ifdef HAVE_ISASCII
100 #define tre_isascii isascii
101 #endif /* HAVE_ISASCII */
102 #ifdef HAVE_ISBLANK
103 #define tre_isblank isblank
104 #endif /* HAVE_ISBLANK */
105 #define tre_iscntrl iscntrl
106 #define tre_isdigit isdigit
107 #define tre_isgraph isgraph
108 #define tre_islower islower
109 #define tre_isprint isprint
110 #define tre_ispunct ispunct
111 #define tre_isspace isspace
112 #define tre_isupper isupper
113 #define tre_isxdigit isxdigit
115 #define tre_tolower(c) (tre_cint_t)(tolower(c))
116 #define tre_toupper(c) (tre_cint_t)(toupper(c))
117 #define tre_strlen(s) (strlen((const char*)s))
119 #endif /* !TRE_WCHAR */
121 #if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE)
122 #define TRE_USE_SYSTEM_WCTYPE 1
123 #endif
125 #ifdef TRE_USE_SYSTEM_WCTYPE
126 /* Use system provided iswctype() and wctype(). */
127 typedef wctype_t tre_ctype_t;
128 #define tre_isctype iswctype
129 #define tre_ctype wctype
130 #define tre_isctype_l iswctype_l
131 #define tre_ctype_l wctype_l
132 #else /* !TRE_USE_SYSTEM_WCTYPE */
133 /* Define our own versions of iswctype() and wctype(). */
134 typedef int (*tre_ctype_t)(tre_cint_t);
135 #define tre_isctype(c, type) ( (type)(c) )
136 tre_ctype_t tre_ctype(const char *name);
137 #endif /* !TRE_USE_SYSTEM_WCTYPE */
139 typedef enum { STR_WIDE, STR_BYTE, STR_MBS } tre_str_type_t;
141 /* Returns number of bytes to add to (char *)ptr to make it
142 properly aligned for the type. */
143 #define ALIGN(ptr, type) \
144 ((((long)ptr) % sizeof(type)) \
145 ? (sizeof(type) - (((long)ptr) % sizeof(type))) \
146 : 0)
148 #undef MAX
149 #undef MIN
150 #define MAX(a, b) (((a) >= (b)) ? (a) : (b))
151 #define MIN(a, b) (((a) <= (b)) ? (a) : (b))
153 /* Define STRF to the correct printf formatter for strings. */
154 #ifdef TRE_WCHAR
155 #define STRF "ls"
156 #else /* !TRE_WCHAR */
157 #define STRF "s"
158 #endif /* !TRE_WCHAR */
160 /* Types to handle bracket expressions. */
161 typedef enum {
162 TRE_BRACKET_MATCH_TYPE_UNUSED = 0,
163 TRE_BRACKET_MATCH_TYPE_CHAR, /* Single character value */
164 TRE_BRACKET_MATCH_TYPE_RANGE_BEGIN, /* Collation range begin */
165 TRE_BRACKET_MATCH_TYPE_RANGE_END, /* Collation range end */
166 TRE_BRACKET_MATCH_TYPE_CLASS, /* Character class */
167 TRE_BRACKET_MATCH_TYPE_EQUIVALENCE, /* Collation equivalence value */
168 } tre_bracket_match_type_t;
170 typedef struct {
171 tre_bracket_match_type_t type;
172 tre_cint_t value;
173 } tre_bracket_match_t;
175 #define TRE_BRACKET_MATCH_FLAG_NEGATE 1
177 typedef struct {
178 int num_bracket_matches;
179 int flags;
180 tre_bracket_match_t bracket_matches[0];
181 } tre_bracket_match_list_t;
183 #define SIZEOF_BRACKET_MATCH_LIST_N(n) (sizeof(tre_bracket_match_list_t) + \
184 sizeof(tre_bracket_match_t) * (n))
185 #define SIZEOF_BRACKET_MATCH_LIST(l) SIZEOF_BRACKET_MATCH_LIST_N( \
186 (l)->num_bracket_matches)
188 /* The "count" field is the number of time the tag was set, initially zero.
189 The "first" field contains the first set value (when "count" equals 1).
190 The "value" field contains the current value of the tag, if "count" is
191 greater than zero (the tag's current value is -1 if "count" is zero).
192 The "touch" field is the touch value, a montonically increasing value
193 (maintained by the caller) set each time the tag itself is set. */
194 typedef struct {
195 int count;
196 int first;
197 int value;
198 int touch;
199 } tre_tag_t;
201 /* TNFA transition type. A TNFA state is an array of transitions,
202 the terminator is a transition with NULL `state'. */
203 typedef struct tnfa_transition tre_tnfa_transition_t;
205 struct tnfa_transition {
206 /* Range of accepted characters. */
207 tre_cint_t code_min;
208 tre_cint_t code_max;
209 /* Pointer to the destination state. */
210 tre_tnfa_transition_t *state;
211 /* ID number of the destination state. */
212 int state_id;
213 /* -1 terminated array of tags (or NULL). */
214 int *tags;
215 /* Matching parameters settings (or NULL). */
216 int *params;
217 /* Assertion bitmap. */
218 int assertions;
219 /* Assertion parameters. */
220 union {
221 /* Bracket matches. */
222 tre_bracket_match_list_t *bracket_match_list;
223 /* Back reference assertion. */
224 int backref;
225 } u;
229 /* Assertions. */
230 #define ASSERT_AT_BOL 1 /* Beginning of line. */
231 #define ASSERT_AT_EOL 2 /* End of line. */
232 #define ASSERT_BRACKET_MATCH 4 /* Matches in `bracket_match_list'. */
233 #define ASSERT_AT_BOW 8 /* Beginning of word. */
234 #define ASSERT_AT_EOW 16 /* End of word. */
235 #define ASSERT_AT_WB 32 /* Word boundary. */
236 #define ASSERT_AT_WB_NEG 64 /* Not a word boundary. */
237 #define ASSERT_BACKREF 128 /* A back reference in `backref'. */
238 #define ASSERT_LAST 128
240 /* Tag directions. */
241 typedef enum {
242 TRE_TAG_MINIMIZE = 0,
243 TRE_TAG_MAXIMIZE,
244 TRE_TAG_LEFT_MAXIMIZE,
245 } tre_tag_direction_t;
247 /* Parameters that can be changed dynamically while matching. */
248 typedef enum {
249 TRE_PARAM_COST_INS = 0,
250 TRE_PARAM_COST_DEL = 1,
251 TRE_PARAM_COST_SUBST = 2,
252 TRE_PARAM_COST_MAX = 3,
253 TRE_PARAM_MAX_INS = 4,
254 TRE_PARAM_MAX_DEL = 5,
255 TRE_PARAM_MAX_SUBST = 6,
256 TRE_PARAM_MAX_ERR = 7,
257 TRE_PARAM_DEPTH = 8,
258 TRE_PARAM_LAST = 9
259 } tre_param_t;
261 /* Unset matching parameter */
262 #define TRE_PARAM_UNSET -1
264 /* Signifies the default matching parameter value. */
265 #define TRE_PARAM_DEFAULT -2
267 /* Instructions to compute submatch register values from tag values
268 after a successful match. */
269 struct tre_submatch_data {
270 /* Tag that gives the value for rm_so (submatch start offset). */
271 int so_tag;
272 /* Tag that gives the value for rm_eo (submatch end offset). */
273 int eo_tag;
276 typedef struct tre_submatch_data tre_submatch_data_t;
279 /* LAST MATCHED structures */
280 struct __previous_type;
281 struct __previous_branch_type;
282 struct __current_type;
283 struct __current_branch_type;
285 typedef struct __previous_branch_type {
286 struct __previous_branch_type *next;
287 struct __previous_type *last_matched; int n_last_matched; int cmp_tag; int n_tags;
288 int tot_branches;
289 int tot_last_matched;
290 int tot_tags;
291 bitstr_t tags[0];
292 } tre_last_matched_branch_pre_t;
294 typedef struct __previous_type {
295 struct __previous_type *next;
296 tre_last_matched_branch_pre_t *branches; int n_branches; int start_tag;
297 int tot_branches;
298 int tot_last_matched;
299 int tot_tags;
300 } tre_last_matched_pre_t;
302 typedef struct __current_branch_type {
303 int *tags;
304 struct __current_type *last_matched; int n_last_matched; int cmp_tag; int n_tags;
305 } tre_last_matched_branch_t;
307 typedef struct __current_type {
308 tre_last_matched_branch_t *branches; int n_branches; int start_tag;
309 } tre_last_matched_t;
311 /* TNFA definition. */
312 typedef struct tnfa tre_tnfa_t;
314 struct tnfa {
315 tre_tnfa_transition_t *transitions;
316 tre_tnfa_transition_t *initial;
317 tre_tnfa_transition_t *final;
318 tre_submatch_data_t *submatch_data;
319 tre_tag_direction_t *tag_directions;
320 int *minimal_tags;
321 tre_last_matched_branch_t *last_matched_branch;
322 locale_t loc;
323 unsigned int num_transitions;
324 int first_char;
325 unsigned int num_submatches;
326 unsigned int num_submatches_invisible;
327 int num_tags;
328 int num_minimals;
329 int end_tag;
330 int num_states;
331 int cflags;
332 int have_backrefs;
333 int num_reorder_tags;
334 int have_approx;
335 int params_depth;
339 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags,
340 locale_t loc);
342 void
343 tre_free(regex_t *preg);
345 reg_errcode_t
346 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
347 const tre_tnfa_t *tnfa, const tre_tag_t *tags, int match_eo);
349 reg_errcode_t
350 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
351 tre_str_type_t type, tre_tag_t *match_tags, int eflags,
352 int *match_end_ofs);
354 reg_errcode_t
355 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
356 int len, tre_str_type_t type, tre_tag_t *match_tags,
357 int eflags, int *match_end_ofs);
359 #ifdef TRE_APPROX
360 reg_errcode_t
361 tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
362 tre_str_type_t type, tre_tag_t *match_tags,
363 regamatch_t *match, regaparams_t params,
364 int eflags, int *match_end_ofs);
365 #endif /* TRE_APPROX */
367 #endif /* TRE_INTERNAL_H */
369 /* EOF */