ldb:utf8: ldb_ascii_toupper() avoids real toupper()
[Samba.git] / lib / ldb / common / attrib_handlers.c
blob6ae12c88eec186c4d25323a51a1bc5e6d072a612
1 /*
2 ldb database library
4 Copyright (C) Andrew Tridgell 2005
5 Copyright (C) Andrew Bartlett <abartlet@samba.org> 2006-2009
7 ** NOTE! The following LGPL license applies to the ldb
8 ** library. This does NOT imply that all of Samba is released
9 ** under the LGPL
11 This library is free software; you can redistribute it and/or
12 modify it under the terms of the GNU Lesser General Public
13 License as published by the Free Software Foundation; either
14 version 3 of the License, or (at your option) any later version.
16 This library is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
21 You should have received a copy of the GNU Lesser General Public
22 License along with this library; if not, see <http://www.gnu.org/licenses/>.
25 attribute handlers for well known attribute types, selected by syntax OID
26 see rfc2252
29 #include "ldb_private.h"
30 #include "system/locale.h"
31 #include "ldb_handlers.h"
34 default handler that just copies a ldb_val.
36 int ldb_handler_copy(struct ldb_context *ldb, void *mem_ctx,
37 const struct ldb_val *in, struct ldb_val *out)
39 *out = ldb_val_dup(mem_ctx, in);
40 if (in->length > 0 && out->data == NULL) {
41 ldb_oom(ldb);
42 return -1;
44 return 0;
48 a case folding copy handler, removing leading and trailing spaces and
49 multiple internal spaces
51 We exploit the fact that utf8 never uses the space octet except for
52 the space itself
54 int ldb_handler_fold(struct ldb_context *ldb, void *mem_ctx,
55 const struct ldb_val *in, struct ldb_val *out)
57 char *s, *t, *start;
58 bool in_space;
60 if (!in || !out || !(in->data)) {
61 return -1;
64 out->data = (uint8_t *)ldb_casefold(ldb, mem_ctx, (const char *)(in->data), in->length);
65 if (out->data == NULL) {
66 ldb_debug(ldb, LDB_DEBUG_ERROR, "ldb_handler_fold: unable to casefold string [%.*s]", (int)in->length, (const char *)in->data);
67 return -1;
70 start = (char *)(out->data);
71 in_space = true;
72 t = start;
73 for (s = start; *s != '\0'; s++) {
74 if (*s == ' ') {
75 if (in_space) {
77 * We already have one (or this is the start)
78 * and we don't want to add more
80 continue;
82 in_space = true;
83 } else {
84 in_space = false;
86 *t = *s;
87 t++;
90 if (in_space && t != start) {
91 /* the loop will have left a single trailing space */
92 t--;
94 *t = '\0';
96 out->length = t - start;
97 return 0;
100 /* length limited conversion of a ldb_val to an int64_t */
101 static int val_to_int64(const struct ldb_val *in, int64_t *v)
103 char *end;
104 char buf[64];
106 /* make sure we don't read past the end of the data */
107 if (in->length > sizeof(buf)-1) {
108 return LDB_ERR_INVALID_ATTRIBUTE_SYNTAX;
110 strncpy(buf, (char *)in->data, in->length);
111 buf[in->length] = 0;
113 *v = (int64_t) strtoll(buf, &end, 0);
114 if (*end != 0) {
115 return LDB_ERR_INVALID_ATTRIBUTE_SYNTAX;
117 return LDB_SUCCESS;
122 canonicalise a ldap Integer
123 rfc2252 specifies it should be in decimal form
125 static int ldb_canonicalise_Integer(struct ldb_context *ldb, void *mem_ctx,
126 const struct ldb_val *in, struct ldb_val *out)
128 int64_t i;
129 int ret;
131 ret = val_to_int64(in, &i);
132 if (ret != LDB_SUCCESS) {
133 return ret;
135 out->data = (uint8_t *) talloc_asprintf(mem_ctx, "%lld", (long long)i);
136 if (out->data == NULL) {
137 ldb_oom(ldb);
138 return LDB_ERR_OPERATIONS_ERROR;
140 out->length = strlen((char *)out->data);
141 return 0;
145 * Lexicographically ordered format for a ldap Integer
147 * [ INT64_MIN ... -3, -2, -1 | 0 | +1, +2, +3 ... INT64_MAX ]
148 * n o p
150 * For human readability sake, we continue to format the key as a string
151 * (like the canonicalize) rather than store as a fixed binary representation.
153 * In order to sort the integers in the correct string order, there are three
154 * techniques we use:
156 * 1. Zero padding
157 * 2. Negative integer inversion
158 * 3. 1-byte prefixes: 'n' < 'o' < 'p'
160 * 1. To have a fixed-width representation so that 10 sorts after 2 rather than
161 * after 1, we zero pad, like this 4-byte width example:
163 * 0001, 0002, 0010
165 * INT64_MAX = 2^63 - 1 = 9223372036854775807 (19 characters long)
167 * Meaning we need to pad to 19 characters.
169 * 2. This works for positive integers, but negative integers will still be
170 * sorted backwards, for example:
172 * -9223372036854775808 ..., -0000000000000000002, -0000000000000000001
173 * INT64_MIN -2 -1
175 * gets sorted based on string as:
177 * -0000000000000000001, -0000000000000000002, ... -9223372036854775808
179 * In order to fix this, we invert the negative integer range, so that they
180 * get sorted the same way as positive numbers. INT64_MIN becomes the lowest
181 * possible non-negative number (zero), and -1 becomes the highest (INT64_MAX).
183 * The actual conversion applied to negative number 'x' is:
184 * INT64_MAX - abs(x) + 1
185 * (The +1 is needed because abs(INT64_MIN) is one greater than INT64_MAX)
187 * 3. Finally, we now have two different numbers that map to the same key, e.g.
188 * INT64_MIN maps to -0000000000000000000 and zero maps to 0000000000000000000.
189 * In order to avoid confusion, we give every number a prefix representing its
190 * sign: 'n' for negative numbers, 'o' for zero, and 'p' for positive. (Note
191 * that '+' and '-' weren't used because they sort the wrong way).
193 * The result is a range of key values that look like this:
195 * n0000000000000000000, ... n9223372036854775807,
196 * INT64_MIN -1
198 * o0000000000000000000,
199 * ZERO
201 * p0000000000000000001, ... p9223372036854775807
202 * +1 INT64_MAX
204 static int ldb_index_format_Integer(struct ldb_context *ldb,
205 void *mem_ctx,
206 const struct ldb_val *in,
207 struct ldb_val *out)
209 int64_t i;
210 int ret;
211 char prefix;
212 size_t len;
214 ret = val_to_int64(in, &i);
215 if (ret != LDB_SUCCESS) {
216 return ret;
219 if (i < 0) {
221 * i is negative, so this is subtraction rather than
222 * wrap-around.
224 prefix = 'n';
225 i = INT64_MAX + i + 1;
226 } else if (i > 0) {
227 prefix = 'p';
228 } else {
229 prefix = 'o';
232 out->data = (uint8_t *) talloc_asprintf(mem_ctx, "%c%019lld", prefix, (long long)i);
233 if (out->data == NULL) {
234 ldb_oom(ldb);
235 return LDB_ERR_OPERATIONS_ERROR;
238 len = talloc_array_length(out->data) - 1;
239 if (len != 20) {
240 ldb_debug(ldb, LDB_DEBUG_ERROR,
241 __location__ ": expected index format str %s to"
242 " have length 20 but got %zu",
243 (char*)out->data, len);
244 return LDB_ERR_OPERATIONS_ERROR;
247 out->length = 20;
248 return 0;
252 compare two Integers
254 static int ldb_comparison_Integer(struct ldb_context *ldb, void *mem_ctx,
255 const struct ldb_val *v1, const struct ldb_val *v2)
257 int64_t i1=0, i2=0;
258 val_to_int64(v1, &i1);
259 val_to_int64(v2, &i2);
260 if (i1 == i2) return 0;
261 return i1 > i2? 1 : -1;
265 canonicalise a ldap Boolean
266 rfc2252 specifies it should be either "TRUE" or "FALSE"
268 static int ldb_canonicalise_Boolean(struct ldb_context *ldb, void *mem_ctx,
269 const struct ldb_val *in, struct ldb_val *out)
271 if (in->length >= 4 && strncasecmp((char *)in->data, "TRUE", in->length) == 0) {
272 out->data = (uint8_t *)talloc_strdup(mem_ctx, "TRUE");
273 out->length = 4;
274 } else if (in->length >= 5 && strncasecmp((char *)in->data, "FALSE", in->length) == 0) {
275 out->data = (uint8_t *)talloc_strdup(mem_ctx, "FALSE");
276 out->length = 5;
277 } else {
278 return -1;
280 return 0;
284 * compare two Booleans.
286 * According to RFC4517 4.2.2, "the booleanMatch rule is an equality matching
287 * rule", meaning it isn't used for ordering.
289 * However, it seems conceivable that Samba could be coerced into sorting on a
290 * field with Boolean syntax, so we might as well have consistent behaviour in
291 * that case.
293 * The most probably values are {"FALSE", 5} and {"TRUE", 4}. To save time we
294 * compare first by length, which makes FALSE > TRUE. This is somewhat
295 * contrary to convention, but is how Samba has worked forever.
297 * If somehow we are comparing incompletely normalised values where the length
298 * is the same (for example {"false", 5} and {"TRUE\0", 5}), the length is the
299 * same, and we fall back to a strncasecmp. In this case, since "FALSE" is
300 * alphabetically lower, we swap the order, so that "TRUE\0" again comes
301 * before "FALSE".
303 * ldb_canonicalise_Boolean (just above) gives us a clue as to what we might
304 * expect to cope with by way of invalid values.
306 static int ldb_comparison_Boolean(struct ldb_context *ldb, void *mem_ctx,
307 const struct ldb_val *v1, const struct ldb_val *v2)
309 if (v1->length != v2->length) {
310 return NUMERIC_CMP(v2->length, v1->length);
312 /* reversed, see long comment above */
313 return strncasecmp((char *)v2->data, (char *)v1->data, v1->length);
318 compare two binary blobs
320 int ldb_comparison_binary(struct ldb_context *ldb, void *mem_ctx,
321 const struct ldb_val *v1, const struct ldb_val *v2)
323 if (v1->length != v2->length) {
324 return NUMERIC_CMP(v1->length, v2->length);
326 return memcmp(v1->data, v2->data, v1->length);
330 compare two case insensitive strings, ignoring multiple whitespaces
331 and leading and trailing whitespaces
332 see rfc2252 section 8.1
334 try to optimize for the ascii case,
335 but if we find out an utf8 codepoint revert to slower but correct function
337 int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx,
338 const struct ldb_val *v1, const struct ldb_val *v2)
340 const char *s1=(const char *)v1->data, *s2=(const char *)v2->data;
341 size_t n1 = v1->length, n2 = v2->length;
342 char *b1, *b2;
343 const char *u1, *u2;
344 int ret;
346 while (n1 && *s1 == ' ') { s1++; n1--; };
347 while (n2 && *s2 == ' ') { s2++; n2--; };
349 while (n1 && n2 && *s1 && *s2) {
350 /* the first 127 (0x7F) chars are ascii and utf8 guarantees they
351 * never appear in multibyte sequences */
352 if (((unsigned char)s1[0]) & 0x80) goto utf8str;
353 if (((unsigned char)s2[0]) & 0x80) goto utf8str;
354 if (ldb_ascii_toupper(*s1) != ldb_ascii_toupper(*s2)) {
355 break;
357 if (*s1 == ' ') {
358 while (n1 > 1 && s1[0] == s1[1]) { s1++; n1--; }
359 while (n2 > 1 && s2[0] == s2[1]) { s2++; n2--; }
361 s1++; s2++;
362 n1--; n2--;
365 /* check for trailing spaces only if the other pointers has
366 * reached the end of the strings otherwise we can
367 * mistakenly match. ex. "domain users" <->
368 * "domainUpdates"
370 if (n1 && *s1 == ' ' && (!n2 || !*s2)) {
371 while (n1 && *s1 == ' ') { s1++; n1--; }
373 if (n2 && *s2 == ' ' && (!n1 || !*s1)) {
374 while (n2 && *s2 == ' ') { s2++; n2--; }
376 if (n1 == 0 && n2 != 0) {
377 return -(int)ldb_ascii_toupper(*s2);
379 if (n2 == 0 && n1 != 0) {
380 return (int)ldb_ascii_toupper(*s1);
382 if (n1 == 0 && n2 == 0) {
383 return 0;
385 return (int)ldb_ascii_toupper(*s1) - (int)ldb_ascii_toupper(*s2);
387 utf8str:
389 * No need to recheck from the start, just from the first utf8 charu
390 * found. Note that the callback of ldb_casefold() needs to be ascii
391 * compatible.
393 b1 = ldb_casefold(ldb, mem_ctx, s1, n1);
394 b2 = ldb_casefold(ldb, mem_ctx, s2, n2);
396 if (!b1 || !b2) {
397 /* One of the strings was not UTF8, so we have no
398 * options but to do a binary compare */
399 talloc_free(b1);
400 talloc_free(b2);
401 ret = memcmp(s1, s2, MIN(n1, n2));
402 if (ret == 0) {
403 if (n1 == n2) return 0;
404 if (n1 > n2) {
405 return (int)ldb_ascii_toupper(s1[n2]);
406 } else {
407 return -(int)ldb_ascii_toupper(s2[n1]);
410 return ret;
413 u1 = b1;
414 u2 = b2;
416 while (*u1 & *u2) {
417 if (*u1 != *u2)
418 break;
419 if (*u1 == ' ') {
420 while (u1[0] == u1[1]) u1++;
421 while (u2[0] == u2[1]) u2++;
423 u1++; u2++;
425 if (! (*u1 && *u2)) {
426 while (*u1 == ' ') u1++;
427 while (*u2 == ' ') u2++;
429 ret = (int)(*u1 - *u2);
431 talloc_free(b1);
432 talloc_free(b2);
434 return ret;
439 canonicalise a attribute in DN format
441 static int ldb_canonicalise_dn(struct ldb_context *ldb, void *mem_ctx,
442 const struct ldb_val *in, struct ldb_val *out)
444 struct ldb_dn *dn;
445 int ret = -1;
447 out->length = 0;
448 out->data = NULL;
450 dn = ldb_dn_from_ldb_val(mem_ctx, ldb, in);
451 if ( ! ldb_dn_validate(dn)) {
452 return LDB_ERR_INVALID_DN_SYNTAX;
455 out->data = (uint8_t *)ldb_dn_alloc_casefold(mem_ctx, dn);
456 if (out->data == NULL) {
457 goto done;
459 out->length = strlen((char *)out->data);
461 ret = 0;
463 done:
464 talloc_free(dn);
466 return ret;
470 compare two dns
472 static int ldb_comparison_dn(struct ldb_context *ldb, void *mem_ctx,
473 const struct ldb_val *v1, const struct ldb_val *v2)
475 struct ldb_dn *dn1 = NULL, *dn2 = NULL;
476 int ret;
478 dn1 = ldb_dn_from_ldb_val(mem_ctx, ldb, v1);
479 if ( ! ldb_dn_validate(dn1)) return -1;
481 dn2 = ldb_dn_from_ldb_val(mem_ctx, ldb, v2);
482 if ( ! ldb_dn_validate(dn2)) {
483 talloc_free(dn1);
484 return -1;
487 ret = ldb_dn_compare(dn1, dn2);
489 talloc_free(dn1);
490 talloc_free(dn2);
491 return ret;
495 compare two utc time values. 1 second resolution
497 static int ldb_comparison_utctime(struct ldb_context *ldb, void *mem_ctx,
498 const struct ldb_val *v1, const struct ldb_val *v2)
500 time_t t1=0, t2=0;
501 ldb_val_to_time(v1, &t1);
502 ldb_val_to_time(v2, &t2);
503 if (t1 == t2) return 0;
504 return t1 > t2? 1 : -1;
508 canonicalise a utc time
510 static int ldb_canonicalise_utctime(struct ldb_context *ldb, void *mem_ctx,
511 const struct ldb_val *in, struct ldb_val *out)
513 time_t t;
514 int ret;
515 ret = ldb_val_to_time(in, &t);
516 if (ret != LDB_SUCCESS) {
517 return ret;
519 out->data = (uint8_t *)ldb_timestring_utc(mem_ctx, t);
520 if (out->data == NULL) {
521 ldb_oom(ldb);
522 return LDB_ERR_OPERATIONS_ERROR;
524 out->length = strlen((char *)out->data);
525 return 0;
529 canonicalise a generalized time
531 static int ldb_canonicalise_generalizedtime(struct ldb_context *ldb, void *mem_ctx,
532 const struct ldb_val *in, struct ldb_val *out)
534 time_t t;
535 int ret;
536 ret = ldb_val_to_time(in, &t);
537 if (ret != LDB_SUCCESS) {
538 return ret;
540 out->data = (uint8_t *)ldb_timestring(mem_ctx, t);
541 if (out->data == NULL) {
542 ldb_oom(ldb);
543 return LDB_ERR_OPERATIONS_ERROR;
545 out->length = strlen((char *)out->data);
546 return 0;
550 table of standard attribute handlers
552 static const struct ldb_schema_syntax ldb_standard_syntaxes[] = {
554 .name = LDB_SYNTAX_INTEGER,
555 .ldif_read_fn = ldb_handler_copy,
556 .ldif_write_fn = ldb_handler_copy,
557 .canonicalise_fn = ldb_canonicalise_Integer,
558 .comparison_fn = ldb_comparison_Integer
561 .name = LDB_SYNTAX_ORDERED_INTEGER,
562 .ldif_read_fn = ldb_handler_copy,
563 .ldif_write_fn = ldb_handler_copy,
564 .canonicalise_fn = ldb_canonicalise_Integer,
565 .index_format_fn = ldb_index_format_Integer,
566 .comparison_fn = ldb_comparison_Integer
569 .name = LDB_SYNTAX_OCTET_STRING,
570 .ldif_read_fn = ldb_handler_copy,
571 .ldif_write_fn = ldb_handler_copy,
572 .canonicalise_fn = ldb_handler_copy,
573 .comparison_fn = ldb_comparison_binary
576 .name = LDB_SYNTAX_DIRECTORY_STRING,
577 .ldif_read_fn = ldb_handler_copy,
578 .ldif_write_fn = ldb_handler_copy,
579 .canonicalise_fn = ldb_handler_fold,
580 .comparison_fn = ldb_comparison_fold
583 .name = LDB_SYNTAX_DN,
584 .ldif_read_fn = ldb_handler_copy,
585 .ldif_write_fn = ldb_handler_copy,
586 .canonicalise_fn = ldb_canonicalise_dn,
587 .comparison_fn = ldb_comparison_dn
590 .name = LDB_SYNTAX_OBJECTCLASS,
591 .ldif_read_fn = ldb_handler_copy,
592 .ldif_write_fn = ldb_handler_copy,
593 .canonicalise_fn = ldb_handler_fold,
594 .comparison_fn = ldb_comparison_fold
597 .name = LDB_SYNTAX_UTC_TIME,
598 .ldif_read_fn = ldb_handler_copy,
599 .ldif_write_fn = ldb_handler_copy,
600 .canonicalise_fn = ldb_canonicalise_utctime,
601 .comparison_fn = ldb_comparison_utctime
604 .name = LDB_SYNTAX_GENERALIZED_TIME,
605 .ldif_read_fn = ldb_handler_copy,
606 .ldif_write_fn = ldb_handler_copy,
607 .canonicalise_fn = ldb_canonicalise_generalizedtime,
608 .comparison_fn = ldb_comparison_utctime
611 .name = LDB_SYNTAX_BOOLEAN,
612 .ldif_read_fn = ldb_handler_copy,
613 .ldif_write_fn = ldb_handler_copy,
614 .canonicalise_fn = ldb_canonicalise_Boolean,
615 .comparison_fn = ldb_comparison_Boolean
621 return the attribute handlers for a given syntax name
623 const struct ldb_schema_syntax *ldb_standard_syntax_by_name(struct ldb_context *ldb,
624 const char *syntax)
626 unsigned int i;
627 unsigned num_handlers = sizeof(ldb_standard_syntaxes)/sizeof(ldb_standard_syntaxes[0]);
628 /* TODO: should be replaced with a binary search */
629 for (i=0;i<num_handlers;i++) {
630 if (strcmp(ldb_standard_syntaxes[i].name, syntax) == 0) {
631 return &ldb_standard_syntaxes[i];
634 return NULL;
637 int ldb_any_comparison(struct ldb_context *ldb, void *mem_ctx,
638 ldb_attr_handler_t canonicalise_fn,
639 const struct ldb_val *v1,
640 const struct ldb_val *v2)
642 int ret, ret1, ret2;
643 struct ldb_val v1_canon, v2_canon;
644 TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
646 /* I could try and bail if tmp_ctx was NULL, but what return
647 * value would I use?
649 * It seems easier to continue on the NULL context
651 ret1 = canonicalise_fn(ldb, tmp_ctx, v1, &v1_canon);
652 ret2 = canonicalise_fn(ldb, tmp_ctx, v2, &v2_canon);
654 if (ret1 == LDB_SUCCESS && ret2 == LDB_SUCCESS) {
655 ret = ldb_comparison_binary(ldb, mem_ctx, &v1_canon, &v2_canon);
656 } else {
657 ret = ldb_comparison_binary(ldb, mem_ctx, v1, v2);
659 talloc_free(tmp_ctx);
660 return ret;