4 Copyright (C) Andrew Tridgell 2005
5 Copyright (C) Andrew Bartlett <abartlet@samba.org> 2006-2009
7 ** NOTE! The following LGPL license applies to the ldb
8 ** library. This does NOT imply that all of Samba is released
11 This library is free software; you can redistribute it and/or
12 modify it under the terms of the GNU Lesser General Public
13 License as published by the Free Software Foundation; either
14 version 3 of the License, or (at your option) any later version.
16 This library is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
21 You should have received a copy of the GNU Lesser General Public
22 License along with this library; if not, see <http://www.gnu.org/licenses/>.
25 attribute handlers for well known attribute types, selected by syntax OID
29 #include "ldb_private.h"
30 #include "system/locale.h"
31 #include "ldb_handlers.h"
34 default handler that just copies a ldb_val.
36 int ldb_handler_copy(struct ldb_context
*ldb
, void *mem_ctx
,
37 const struct ldb_val
*in
, struct ldb_val
*out
)
39 *out
= ldb_val_dup(mem_ctx
, in
);
40 if (in
->length
> 0 && out
->data
== NULL
) {
48 a case folding copy handler, removing leading and trailing spaces and
49 multiple internal spaces
51 We exploit the fact that utf8 never uses the space octet except for
54 int ldb_handler_fold(struct ldb_context
*ldb
, void *mem_ctx
,
55 const struct ldb_val
*in
, struct ldb_val
*out
)
60 if (!in
|| !out
|| !(in
->data
)) {
64 out
->data
= (uint8_t *)ldb_casefold(ldb
, mem_ctx
, (const char *)(in
->data
), in
->length
);
65 if (out
->data
== NULL
) {
66 ldb_debug(ldb
, LDB_DEBUG_ERROR
, "ldb_handler_fold: unable to casefold string [%.*s]", (int)in
->length
, (const char *)in
->data
);
70 start
= (char *)(out
->data
);
73 for (s
= start
; *s
!= '\0'; s
++) {
77 * We already have one (or this is the start)
78 * and we don't want to add more
90 if (in_space
&& t
!= start
) {
91 /* the loop will have left a single trailing space */
96 out
->length
= t
- start
;
100 /* length limited conversion of a ldb_val to an int64_t */
101 static int val_to_int64(const struct ldb_val
*in
, int64_t *v
)
106 /* make sure we don't read past the end of the data */
107 if (in
->length
> sizeof(buf
)-1) {
108 return LDB_ERR_INVALID_ATTRIBUTE_SYNTAX
;
110 strncpy(buf
, (char *)in
->data
, in
->length
);
113 *v
= (int64_t) strtoll(buf
, &end
, 0);
115 return LDB_ERR_INVALID_ATTRIBUTE_SYNTAX
;
122 canonicalise a ldap Integer
123 rfc2252 specifies it should be in decimal form
125 static int ldb_canonicalise_Integer(struct ldb_context
*ldb
, void *mem_ctx
,
126 const struct ldb_val
*in
, struct ldb_val
*out
)
131 ret
= val_to_int64(in
, &i
);
132 if (ret
!= LDB_SUCCESS
) {
135 out
->data
= (uint8_t *) talloc_asprintf(mem_ctx
, "%lld", (long long)i
);
136 if (out
->data
== NULL
) {
138 return LDB_ERR_OPERATIONS_ERROR
;
140 out
->length
= strlen((char *)out
->data
);
145 * Lexicographically ordered format for a ldap Integer
147 * [ INT64_MIN ... -3, -2, -1 | 0 | +1, +2, +3 ... INT64_MAX ]
150 * For human readability sake, we continue to format the key as a string
151 * (like the canonicalize) rather than store as a fixed binary representation.
153 * In order to sort the integers in the correct string order, there are three
157 * 2. Negative integer inversion
158 * 3. 1-byte prefixes: 'n' < 'o' < 'p'
160 * 1. To have a fixed-width representation so that 10 sorts after 2 rather than
161 * after 1, we zero pad, like this 4-byte width example:
165 * INT64_MAX = 2^63 - 1 = 9223372036854775807 (19 characters long)
167 * Meaning we need to pad to 19 characters.
169 * 2. This works for positive integers, but negative integers will still be
170 * sorted backwards, for example:
172 * -9223372036854775808 ..., -0000000000000000002, -0000000000000000001
175 * gets sorted based on string as:
177 * -0000000000000000001, -0000000000000000002, ... -9223372036854775808
179 * In order to fix this, we invert the negative integer range, so that they
180 * get sorted the same way as positive numbers. INT64_MIN becomes the lowest
181 * possible non-negative number (zero), and -1 becomes the highest (INT64_MAX).
183 * The actual conversion applied to negative number 'x' is:
184 * INT64_MAX - abs(x) + 1
185 * (The +1 is needed because abs(INT64_MIN) is one greater than INT64_MAX)
187 * 3. Finally, we now have two different numbers that map to the same key, e.g.
188 * INT64_MIN maps to -0000000000000000000 and zero maps to 0000000000000000000.
189 * In order to avoid confusion, we give every number a prefix representing its
190 * sign: 'n' for negative numbers, 'o' for zero, and 'p' for positive. (Note
191 * that '+' and '-' weren't used because they sort the wrong way).
193 * The result is a range of key values that look like this:
195 * n0000000000000000000, ... n9223372036854775807,
198 * o0000000000000000000,
201 * p0000000000000000001, ... p9223372036854775807
204 static int ldb_index_format_Integer(struct ldb_context
*ldb
,
206 const struct ldb_val
*in
,
214 ret
= val_to_int64(in
, &i
);
215 if (ret
!= LDB_SUCCESS
) {
221 * i is negative, so this is subtraction rather than
225 i
= INT64_MAX
+ i
+ 1;
232 out
->data
= (uint8_t *) talloc_asprintf(mem_ctx
, "%c%019lld", prefix
, (long long)i
);
233 if (out
->data
== NULL
) {
235 return LDB_ERR_OPERATIONS_ERROR
;
238 len
= talloc_array_length(out
->data
) - 1;
240 ldb_debug(ldb
, LDB_DEBUG_ERROR
,
241 __location__
": expected index format str %s to"
242 " have length 20 but got %zu",
243 (char*)out
->data
, len
);
244 return LDB_ERR_OPERATIONS_ERROR
;
254 static int ldb_comparison_Integer(struct ldb_context
*ldb
, void *mem_ctx
,
255 const struct ldb_val
*v1
, const struct ldb_val
*v2
)
258 val_to_int64(v1
, &i1
);
259 val_to_int64(v2
, &i2
);
260 if (i1
== i2
) return 0;
261 return i1
> i2
? 1 : -1;
265 canonicalise a ldap Boolean
266 rfc2252 specifies it should be either "TRUE" or "FALSE"
268 static int ldb_canonicalise_Boolean(struct ldb_context
*ldb
, void *mem_ctx
,
269 const struct ldb_val
*in
, struct ldb_val
*out
)
271 if (in
->length
>= 4 && strncasecmp((char *)in
->data
, "TRUE", in
->length
) == 0) {
272 out
->data
= (uint8_t *)talloc_strdup(mem_ctx
, "TRUE");
274 } else if (in
->length
>= 5 && strncasecmp((char *)in
->data
, "FALSE", in
->length
) == 0) {
275 out
->data
= (uint8_t *)talloc_strdup(mem_ctx
, "FALSE");
284 * compare two Booleans.
286 * According to RFC4517 4.2.2, "the booleanMatch rule is an equality matching
287 * rule", meaning it isn't used for ordering.
289 * However, it seems conceivable that Samba could be coerced into sorting on a
290 * field with Boolean syntax, so we might as well have consistent behaviour in
293 * The most probably values are {"FALSE", 5} and {"TRUE", 4}. To save time we
294 * compare first by length, which makes FALSE > TRUE. This is somewhat
295 * contrary to convention, but is how Samba has worked forever.
297 * If somehow we are comparing incompletely normalised values where the length
298 * is the same (for example {"false", 5} and {"TRUE\0", 5}), the length is the
299 * same, and we fall back to a strncasecmp. In this case, since "FALSE" is
300 * alphabetically lower, we swap the order, so that "TRUE\0" again comes
303 * ldb_canonicalise_Boolean (just above) gives us a clue as to what we might
304 * expect to cope with by way of invalid values.
306 static int ldb_comparison_Boolean(struct ldb_context
*ldb
, void *mem_ctx
,
307 const struct ldb_val
*v1
, const struct ldb_val
*v2
)
309 if (v1
->length
!= v2
->length
) {
310 return NUMERIC_CMP(v2
->length
, v1
->length
);
312 /* reversed, see long comment above */
313 return strncasecmp((char *)v2
->data
, (char *)v1
->data
, v1
->length
);
318 compare two binary blobs
320 int ldb_comparison_binary(struct ldb_context
*ldb
, void *mem_ctx
,
321 const struct ldb_val
*v1
, const struct ldb_val
*v2
)
323 if (v1
->length
!= v2
->length
) {
324 return NUMERIC_CMP(v1
->length
, v2
->length
);
326 return memcmp(v1
->data
, v2
->data
, v1
->length
);
330 compare two case insensitive strings, ignoring multiple whitespaces
331 and leading and trailing whitespaces
332 see rfc2252 section 8.1
334 try to optimize for the ascii case,
335 but if we find out an utf8 codepoint revert to slower but correct function
337 int ldb_comparison_fold(struct ldb_context
*ldb
, void *mem_ctx
,
338 const struct ldb_val
*v1
, const struct ldb_val
*v2
)
340 const char *s1
=(const char *)v1
->data
, *s2
=(const char *)v2
->data
;
341 size_t n1
= v1
->length
, n2
= v2
->length
;
346 while (n1
&& *s1
== ' ') { s1
++; n1
--; };
347 while (n2
&& *s2
== ' ') { s2
++; n2
--; };
349 while (n1
&& n2
&& *s1
&& *s2
) {
350 /* the first 127 (0x7F) chars are ascii and utf8 guarantees they
351 * never appear in multibyte sequences */
352 if (((unsigned char)s1
[0]) & 0x80) goto utf8str
;
353 if (((unsigned char)s2
[0]) & 0x80) goto utf8str
;
354 if (ldb_ascii_toupper(*s1
) != ldb_ascii_toupper(*s2
)) {
358 while (n1
> 1 && s1
[0] == s1
[1]) { s1
++; n1
--; }
359 while (n2
> 1 && s2
[0] == s2
[1]) { s2
++; n2
--; }
365 /* check for trailing spaces only if the other pointers has
366 * reached the end of the strings otherwise we can
367 * mistakenly match. ex. "domain users" <->
370 if (n1
&& *s1
== ' ' && (!n2
|| !*s2
)) {
371 while (n1
&& *s1
== ' ') { s1
++; n1
--; }
373 if (n2
&& *s2
== ' ' && (!n1
|| !*s1
)) {
374 while (n2
&& *s2
== ' ') { s2
++; n2
--; }
376 if (n1
== 0 && n2
!= 0) {
377 return -(int)ldb_ascii_toupper(*s2
);
379 if (n2
== 0 && n1
!= 0) {
380 return (int)ldb_ascii_toupper(*s1
);
382 if (n1
== 0 && n2
== 0) {
385 return (int)ldb_ascii_toupper(*s1
) - (int)ldb_ascii_toupper(*s2
);
389 * No need to recheck from the start, just from the first utf8 charu
390 * found. Note that the callback of ldb_casefold() needs to be ascii
393 b1
= ldb_casefold(ldb
, mem_ctx
, s1
, n1
);
394 b2
= ldb_casefold(ldb
, mem_ctx
, s2
, n2
);
397 /* One of the strings was not UTF8, so we have no
398 * options but to do a binary compare */
401 ret
= memcmp(s1
, s2
, MIN(n1
, n2
));
403 if (n1
== n2
) return 0;
405 return (int)ldb_ascii_toupper(s1
[n2
]);
407 return -(int)ldb_ascii_toupper(s2
[n1
]);
420 while (u1
[0] == u1
[1]) u1
++;
421 while (u2
[0] == u2
[1]) u2
++;
425 if (! (*u1
&& *u2
)) {
426 while (*u1
== ' ') u1
++;
427 while (*u2
== ' ') u2
++;
429 ret
= (int)(*u1
- *u2
);
439 canonicalise a attribute in DN format
441 static int ldb_canonicalise_dn(struct ldb_context
*ldb
, void *mem_ctx
,
442 const struct ldb_val
*in
, struct ldb_val
*out
)
450 dn
= ldb_dn_from_ldb_val(mem_ctx
, ldb
, in
);
451 if ( ! ldb_dn_validate(dn
)) {
452 return LDB_ERR_INVALID_DN_SYNTAX
;
455 out
->data
= (uint8_t *)ldb_dn_alloc_casefold(mem_ctx
, dn
);
456 if (out
->data
== NULL
) {
459 out
->length
= strlen((char *)out
->data
);
472 static int ldb_comparison_dn(struct ldb_context
*ldb
, void *mem_ctx
,
473 const struct ldb_val
*v1
, const struct ldb_val
*v2
)
475 struct ldb_dn
*dn1
= NULL
, *dn2
= NULL
;
478 dn1
= ldb_dn_from_ldb_val(mem_ctx
, ldb
, v1
);
479 if ( ! ldb_dn_validate(dn1
)) return -1;
481 dn2
= ldb_dn_from_ldb_val(mem_ctx
, ldb
, v2
);
482 if ( ! ldb_dn_validate(dn2
)) {
487 ret
= ldb_dn_compare(dn1
, dn2
);
495 compare two utc time values. 1 second resolution
497 static int ldb_comparison_utctime(struct ldb_context
*ldb
, void *mem_ctx
,
498 const struct ldb_val
*v1
, const struct ldb_val
*v2
)
501 ldb_val_to_time(v1
, &t1
);
502 ldb_val_to_time(v2
, &t2
);
503 if (t1
== t2
) return 0;
504 return t1
> t2
? 1 : -1;
508 canonicalise a utc time
510 static int ldb_canonicalise_utctime(struct ldb_context
*ldb
, void *mem_ctx
,
511 const struct ldb_val
*in
, struct ldb_val
*out
)
515 ret
= ldb_val_to_time(in
, &t
);
516 if (ret
!= LDB_SUCCESS
) {
519 out
->data
= (uint8_t *)ldb_timestring_utc(mem_ctx
, t
);
520 if (out
->data
== NULL
) {
522 return LDB_ERR_OPERATIONS_ERROR
;
524 out
->length
= strlen((char *)out
->data
);
529 canonicalise a generalized time
531 static int ldb_canonicalise_generalizedtime(struct ldb_context
*ldb
, void *mem_ctx
,
532 const struct ldb_val
*in
, struct ldb_val
*out
)
536 ret
= ldb_val_to_time(in
, &t
);
537 if (ret
!= LDB_SUCCESS
) {
540 out
->data
= (uint8_t *)ldb_timestring(mem_ctx
, t
);
541 if (out
->data
== NULL
) {
543 return LDB_ERR_OPERATIONS_ERROR
;
545 out
->length
= strlen((char *)out
->data
);
550 table of standard attribute handlers
552 static const struct ldb_schema_syntax ldb_standard_syntaxes
[] = {
554 .name
= LDB_SYNTAX_INTEGER
,
555 .ldif_read_fn
= ldb_handler_copy
,
556 .ldif_write_fn
= ldb_handler_copy
,
557 .canonicalise_fn
= ldb_canonicalise_Integer
,
558 .comparison_fn
= ldb_comparison_Integer
561 .name
= LDB_SYNTAX_ORDERED_INTEGER
,
562 .ldif_read_fn
= ldb_handler_copy
,
563 .ldif_write_fn
= ldb_handler_copy
,
564 .canonicalise_fn
= ldb_canonicalise_Integer
,
565 .index_format_fn
= ldb_index_format_Integer
,
566 .comparison_fn
= ldb_comparison_Integer
569 .name
= LDB_SYNTAX_OCTET_STRING
,
570 .ldif_read_fn
= ldb_handler_copy
,
571 .ldif_write_fn
= ldb_handler_copy
,
572 .canonicalise_fn
= ldb_handler_copy
,
573 .comparison_fn
= ldb_comparison_binary
576 .name
= LDB_SYNTAX_DIRECTORY_STRING
,
577 .ldif_read_fn
= ldb_handler_copy
,
578 .ldif_write_fn
= ldb_handler_copy
,
579 .canonicalise_fn
= ldb_handler_fold
,
580 .comparison_fn
= ldb_comparison_fold
583 .name
= LDB_SYNTAX_DN
,
584 .ldif_read_fn
= ldb_handler_copy
,
585 .ldif_write_fn
= ldb_handler_copy
,
586 .canonicalise_fn
= ldb_canonicalise_dn
,
587 .comparison_fn
= ldb_comparison_dn
590 .name
= LDB_SYNTAX_OBJECTCLASS
,
591 .ldif_read_fn
= ldb_handler_copy
,
592 .ldif_write_fn
= ldb_handler_copy
,
593 .canonicalise_fn
= ldb_handler_fold
,
594 .comparison_fn
= ldb_comparison_fold
597 .name
= LDB_SYNTAX_UTC_TIME
,
598 .ldif_read_fn
= ldb_handler_copy
,
599 .ldif_write_fn
= ldb_handler_copy
,
600 .canonicalise_fn
= ldb_canonicalise_utctime
,
601 .comparison_fn
= ldb_comparison_utctime
604 .name
= LDB_SYNTAX_GENERALIZED_TIME
,
605 .ldif_read_fn
= ldb_handler_copy
,
606 .ldif_write_fn
= ldb_handler_copy
,
607 .canonicalise_fn
= ldb_canonicalise_generalizedtime
,
608 .comparison_fn
= ldb_comparison_utctime
611 .name
= LDB_SYNTAX_BOOLEAN
,
612 .ldif_read_fn
= ldb_handler_copy
,
613 .ldif_write_fn
= ldb_handler_copy
,
614 .canonicalise_fn
= ldb_canonicalise_Boolean
,
615 .comparison_fn
= ldb_comparison_Boolean
621 return the attribute handlers for a given syntax name
623 const struct ldb_schema_syntax
*ldb_standard_syntax_by_name(struct ldb_context
*ldb
,
627 unsigned num_handlers
= sizeof(ldb_standard_syntaxes
)/sizeof(ldb_standard_syntaxes
[0]);
628 /* TODO: should be replaced with a binary search */
629 for (i
=0;i
<num_handlers
;i
++) {
630 if (strcmp(ldb_standard_syntaxes
[i
].name
, syntax
) == 0) {
631 return &ldb_standard_syntaxes
[i
];
637 int ldb_any_comparison(struct ldb_context
*ldb
, void *mem_ctx
,
638 ldb_attr_handler_t canonicalise_fn
,
639 const struct ldb_val
*v1
,
640 const struct ldb_val
*v2
)
643 struct ldb_val v1_canon
, v2_canon
;
644 TALLOC_CTX
*tmp_ctx
= talloc_new(mem_ctx
);
646 /* I could try and bail if tmp_ctx was NULL, but what return
649 * It seems easier to continue on the NULL context
651 ret1
= canonicalise_fn(ldb
, tmp_ctx
, v1
, &v1_canon
);
652 ret2
= canonicalise_fn(ldb
, tmp_ctx
, v2
, &v2_canon
);
654 if (ret1
== LDB_SUCCESS
&& ret2
== LDB_SUCCESS
) {
655 ret
= ldb_comparison_binary(ldb
, mem_ctx
, &v1_canon
, &v2_canon
);
657 ret
= ldb_comparison_binary(ldb
, mem_ctx
, v1
, v2
);
659 talloc_free(tmp_ctx
);