wg.conf.5: Fix a typo (in-inline comments are *not* allowed)
[dragonfly.git] / contrib / tre / lib / tre-match-utils.h
blob34da4031d3e7d08acd5051ed33caa0b2cdc146dd
1 /*
2 tre-match-utils.h - TRE matcher helper definitions
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
7 */
9 #include "tre-internal.h"
11 #define str_source ((const tre_str_source*)string)
13 #ifdef TRE_WCHAR
15 #ifdef TRE_MULTIBYTE
17 /* Wide character and multibyte support. */
20 * Because all multibyte encodings are exclusively single-shift encoding,
21 * with the shift codes having the high bit set, we can make an optimization
22 * for STR_MBS that only calls tre_mbrtowc_l() when a high-bit character
23 * is detected, and just do a direct copy for ASCII characters.
25 #define GET_NEXT_WCHAR() \
26 do { \
27 prev_c = next_c; \
28 switch (type) \
29 { \
30 case STR_BYTE: \
31 pos++; \
32 if (len >= 0 && pos >= len) \
33 next_c = '\0'; \
34 else \
35 next_c = (unsigned char)(*str_byte++); \
36 break; \
37 case STR_WIDE: \
38 pos++; \
39 if (len >= 0 && pos >= len) \
40 next_c = L'\0'; \
41 else \
42 next_c = *str_wide++; \
43 break; \
44 case STR_MBS: \
45 pos += pos_add_next; \
46 if (__builtin_expect(len >= 0 && pos >= len, 0)) \
47 { \
48 next_c = L'\0'; \
49 pos_add_next = 1; \
50 } \
51 else if (__builtin_expect(!(*str_byte & 0x80), 1)) \
52 { \
53 next_c = (unsigned char)(*str_byte++); \
54 pos_add_next = 1; \
55 } \
56 else \
57 { \
58 size_t w; \
59 int max; \
60 if (len >= 0) \
61 max = len - pos; \
62 else \
63 max = 32; \
64 w = tre_mbrtowc_l(&next_c, str_byte, (size_t)max, &mbstate, \
65 tnfa->loc); \
66 if (w == (size_t)-1 || w == (size_t)-2) \
67 return REG_ILLSEQ; \
68 if (w == 0 && len >= 0) \
69 { \
70 pos_add_next = 1; \
71 next_c = 0; \
72 str_byte++; \
73 } \
74 else \
75 { \
76 pos_add_next = w; \
77 str_byte += w; \
78 } \
79 } \
80 break; \
81 } \
82 } while(/*CONSTCOND*/0)
84 #else /* !TRE_MULTIBYTE */
86 /* Wide character support, no multibyte support. */
87 #error TRE_MULTIBYTE undefined
89 #define GET_NEXT_WCHAR() \
90 do { \
91 prev_c = next_c; \
92 if (type == STR_BYTE) \
93 { \
94 pos++; \
95 if (len >= 0 && pos >= len) \
96 next_c = '\0'; \
97 else \
98 next_c = (unsigned char)(*str_byte++); \
99 } \
100 else if (type == STR_WIDE) \
102 pos++; \
103 if (len >= 0 && pos >= len) \
104 next_c = L'\0'; \
105 else \
106 next_c = *str_wide++; \
108 } while(/*CONSTCOND*/0)
110 #endif /* !TRE_MULTIBYTE */
112 #else /* !TRE_WCHAR */
114 /* No wide character or multibyte support. */
115 #error TRE_WCHAR undefined
117 #define GET_NEXT_WCHAR() \
118 do { \
119 prev_c = next_c; \
120 if (type == STR_BYTE) \
122 pos++; \
123 if (len >= 0 && pos >= len) \
124 next_c = '\0'; \
125 else \
126 next_c = (unsigned char)(*str_byte++); \
128 } while(/*CONSTCOND*/0)
130 #endif /* !TRE_WCHAR */
134 /* Assumes tre_tnfa_t *tnfa in scope */
135 #define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum_l(c, tnfa->loc))
137 #define CHECK_ASSERTIONS(assertions) \
138 (((assertions & ASSERT_AT_BOL) \
139 && (pos > 0 || reg_notbol) \
140 && (prev_c != L'\n' || !reg_newline)) \
141 || ((assertions & ASSERT_AT_EOL) \
142 && (next_c != L'\0' || reg_noteol) \
143 && (next_c != L'\n' || !reg_newline)) \
144 || ((assertions & ASSERT_AT_BOW) \
145 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
146 || ((assertions & ASSERT_AT_EOW) \
147 && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
148 || ((assertions & ASSERT_AT_WB) \
149 && (pos != 0 && next_c != L'\0' \
150 && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
151 || ((assertions & ASSERT_AT_WB_NEG) \
152 && (pos == 0 || next_c == L'\0' \
153 || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
155 #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \
156 ((trans_i->assertions & ASSERT_BRACKET_MATCH) \
157 && !tre_bracket_match(trans_i->u.bracket_match_list,(tre_cint_t)prev_c, \
158 tnfa))
163 inline static int
164 tre_tag_get(const tre_tag_t *tags, int i)
166 tags += i;
167 return tags->count > 0 ? tags->value : -1;
170 inline static void
171 tre_tag_set(tre_tag_t *tags, int i, int val, int touch)
173 tags += i;
174 if (tags->count++ == 0)
175 tags->first = val;
176 tags->value = val;
177 tags->touch = touch;
180 inline static void
181 tre_tag_reset(tre_tag_t *tags, int i)
183 tags[i].count = 0;
186 inline static int
187 tre_tag_touch_get(const tre_tag_t *tags, int i)
189 return tags[i].touch;
192 #ifdef TRE_DEBUG
193 inline static void
194 tre_print_tags(const tre_tag_t *tags, int num_tags)
196 int i;
197 for (i = 0; i < num_tags; i++, tags++)
199 switch(tags->count)
201 case 0:
202 DPRINT(("%d:(0,-1)", i));
203 break;
204 case 1:
205 DPRINT(("%d:(1,%d)", i, tags->first));
206 break;
207 default:
208 DPRINT(("%d:(%d,%d,%d)", i, tags->count, tags->first,
209 tags->value));
210 break;
212 if (i < (num_tags - 1))
213 DPRINT((" "));
217 inline static void
218 tre_print_tags_all(const tre_tag_t *tags, int num_tags)
220 int i;
221 for (i = 0; i < num_tags; i++, tags++)
223 switch(tags->count)
225 case 0:
226 DPRINT(("%d:(0,-1)/%d", i, tags->touch));
227 break;
228 case 1:
229 DPRINT(("%d:(1,%d)/%d", i, tags->first, tags->touch));
230 break;
231 default:
232 DPRINT(("%d:(%d,%d,%d)/%d", i, tags->count, tags->first,
233 tags->value, tags->touch));
234 break;
236 if (i < (num_tags - 1))
237 DPRINT((" "));
240 #endif /* TRE_DEBUG */
242 /* Return < 0, = 0 or > 0 depending on how the start/end pairs of a minimal
243 * group between t1 and t2 compare (t1 loses if < 0, t1 wins if > 0) */
244 inline static int
245 tre_minimal_tag_order(int start, int end, const tre_tag_t *tags1,
246 const tre_tag_t *tags2)
248 const tre_tag_t *t1, *t2;
250 t1 = tags1 + start;
251 t2 = tags2 + start;
252 /* We need both start tags to be set */
253 if (t1->count == 0 || t2->count == 0)
254 return 0;
256 /* The start tags must be equal */
257 if (t1->value != t2->value)
258 return 0;
260 t1 = tags1 + end;
261 t2 = tags2 + end;
262 /* For the end tags, we prefer set over unset, because unset means that
263 * the end tag is still growing */
264 if (t1->count == 0)
266 /* if t2 is set, t1 loses since it is unset */
267 if (t2->count != 0)
268 return -1;
270 /* if t2 not set, t1 wins since it is set */
271 else if (t2->count == 0)
272 return 1;
274 /* least current value wins */
275 return t2->value - t1->value;
278 /* Return < 0, = 0 or > 0 depending on how the i-th item of t1 and t2 compare
279 * (t1 loses if < 0, t1 wins if > 0) */
280 inline static int
281 tre_tag_order_1(int i, tre_tag_direction_t dir, const tre_tag_t *t1,
282 const tre_tag_t *t2)
284 int diff;
286 t1 += i;
287 t2 += i;
288 switch (dir)
290 case TRE_TAG_MINIMIZE:
291 /* least current value wins (because tags are initialized to all zeros,
292 * unset wins over set; also, tre_minimal_tag_order() will have already
293 * been run, which checks for being unset) */
294 return t2->value - t1->value;
296 case TRE_TAG_MAXIMIZE:
297 /* prefer set */
298 if (t1->count == 0)
300 /* if neither t1 and t2 are set, try next tag */
301 if (t2->count == 0)
302 return 0;
303 /* t2 is set, t1 loses since it is unset */
304 return -1;
306 /* if t2 not set, t1 wins since it is set */
307 else if (t2->count == 0)
308 return 1;
309 /* greatest initial value wins */
310 if ((diff = t1->first - t2->first) != 0)
311 return diff;
312 /* least number of times the tag was set, wins */
313 if ((diff = t2->count - t1->count) != 0)
314 return diff;
315 /* if the tags were only set once, they only have initial values */
316 if (t1->count == 1)
317 return 0;
318 /* greatest current value wins */
319 return t1->value - t2->value;
321 case TRE_TAG_LEFT_MAXIMIZE:
322 /* prefer set */
323 if (t1->count == 0)
325 /* if neither t1 and t2 are set, try next tag */
326 if (t2->count == 0)
327 return 0;
328 /* t2 is set, t1 loses since it is unset */
329 return -1;
331 /* if t2 not set, t1 wins since it is set */
332 else if (t2->count == 0)
333 return 1;
334 /* least initial value wins */
335 if ((diff = t2->first - t1->first) != 0)
336 return diff;
337 /* least number of times the tag was set, wins */
338 if ((diff = t2->count - t1->count) != 0)
339 return diff;
340 /* if the tags were only set once, they only have initial values */
341 if (t1->count == 1)
342 return 0;
343 /* greatest current value wins */
344 return t1->value - t2->value;
346 default:
347 /* Shouldn't happen: only assert if TRE_DEBUG defined */
348 assert(0);
349 break;
351 return 0;
354 #ifdef TRE_DEBUG
355 #define _MORE_DEBUGGING
356 #endif /* TRE_DEBUG */
358 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
359 inline static int
360 #ifdef _MORE_DEBUGGING
361 _tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
362 const tre_tag_t *t1, const tre_tag_t *t2)
363 #else /* !_MORE_DEBUGGING */
364 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
365 const tre_tag_t *t1, const tre_tag_t *t2)
366 #endif /* !_MORE_DEBUGGING */
368 int i, ret;
370 for (i = 0; i < num_tags; i++)
372 if ((ret = tre_tag_order_1(i, tag_directions[i], t1, t2)) != 0)
373 return (ret > 0);
375 /* assert(0);*/
376 return 0;
379 #ifdef _MORE_DEBUGGING
380 inline static int
381 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
382 const tre_tag_t *t1, const tre_tag_t *t2)
384 int ret = _tre_tag_order(num_tags, tag_directions, t1, t2);
385 DPRINT(("tre_tag_order: "));
386 tre_print_tags(t1, num_tags);
387 DPRINT((" %s ", ret ? "wins" : "doesn't win"));
388 tre_print_tags(t2, num_tags);
389 DPRINT(("\n"));
390 return ret;
392 #endif /* _MORE_DEBUGGING */
394 int __collate_equiv_value(locale_t loc, const wchar_t *str, size_t len);
396 inline static int
397 tre_bracket_match(tre_bracket_match_list_t * __restrict list, tre_cint_t wc,
398 const tre_tnfa_t * __restrict tnfa)
400 int match = 0;
401 int i;
402 tre_bracket_match_t *b;
403 tre_cint_t uc, lc;
404 int we, ue, le, got_equiv = 0;
405 int icase = ((tnfa->cflags & REG_ICASE) != 0);
407 DPRINT(("tre_bracket_match: %p, %d, %d\n", list, wc, icase));
408 if (icase)
410 if (tre_islower_l(wc, tnfa->loc))
412 lc = wc;
413 uc = tre_toupper_l(wc, tnfa->loc);
415 else if (tre_isupper_l(wc, tnfa->loc))
417 uc = wc;
418 lc = tre_tolower_l(wc, tnfa->loc);
420 else
422 icase = 0;
425 for (i = 0, b = list->bracket_matches; i < list->num_bracket_matches;
426 i++, b++)
428 switch (b->type)
430 case TRE_BRACKET_MATCH_TYPE_CHAR:
431 if (icase)
432 match = (b->value == uc || b->value == lc);
433 else
434 match = (b->value == wc);
435 break;
436 case TRE_BRACKET_MATCH_TYPE_RANGE_BEGIN:
438 tre_cint_t start = b->value, end;
439 if (++i >= list->num_bracket_matches ||
440 (++b)->type != TRE_BRACKET_MATCH_TYPE_RANGE_END)
442 DPRINT(("tre_bracket_match: no following range end\n"));
443 assert(0);
444 goto error;
446 end = b->value;
447 if (!got_equiv)
449 if (icase)
451 ue = __collate_equiv_value(tnfa->loc, &uc, 1);
452 le = __collate_equiv_value(tnfa->loc, &lc, 1);
454 else
455 we = __collate_equiv_value(tnfa->loc, &wc, 1);
456 got_equiv = 1;
458 if (icase)
459 match = ((start <= ue && ue <= end) ||
460 (start <= le && le <= end));
461 else
462 match = (start <= we && we <= end);
463 break;
465 case TRE_BRACKET_MATCH_TYPE_RANGE_END:
466 DPRINT(("tre_bracket_match: range end without preceeding start\n"));
467 assert(0);
468 break;
469 case TRE_BRACKET_MATCH_TYPE_CLASS:
470 if (icase)
471 match = (tre_isctype_l(uc, b->value, tnfa->loc) ||
472 tre_isctype_l(lc, b->value, tnfa->loc));
473 else
474 match = (tre_isctype_l(wc, b->value, tnfa->loc));
475 break;
476 case TRE_BRACKET_MATCH_TYPE_EQUIVALENCE:
477 if (!got_equiv)
479 if (icase)
481 ue = __collate_equiv_value(tnfa->loc, &uc, 1);
482 le = __collate_equiv_value(tnfa->loc, &lc, 1);
484 else
485 we = __collate_equiv_value(tnfa->loc, &wc, 1);
486 got_equiv = 1;
488 if (icase)
489 match = (b->value == ue || b->value == le);
490 else
491 match = (b->value == we);
492 break;
493 default:
494 DPRINT(("tre_bracket_match: unknown type %d\n", b->type));
495 assert(0);
496 break;
498 if (match)
499 break;
501 error:
502 if (list->flags & TRE_BRACKET_MATCH_FLAG_NEGATE) {
503 if ((tnfa->cflags & REG_NEWLINE) && wc == '\n') return 0;
504 match = !match;
506 return match;