s4-ldb: ldap attribute names can contain a '.'
[Samba/aatanasov.git] / source4 / lib / ldb / common / ldb_parse.c
blob0fab0026f39bbcd1ae97938865e1f08f0489df73
1 /*
2 ldb database library
4 Copyright (C) Andrew Tridgell 2004
6 ** NOTE! The following LGPL license applies to the ldb
7 ** library. This does NOT imply that all of Samba is released
8 ** under the LGPL
10 This library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 3 of the License, or (at your option) any later version.
15 This library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License for more details.
20 You should have received a copy of the GNU Lesser General Public
21 License along with this library; if not, see <http://www.gnu.org/licenses/>.
25 * Name: ldb
27 * Component: ldb expression parsing
29 * Description: parse LDAP-like search expressions
31 * Author: Andrew Tridgell
35 TODO:
36 - add RFC2254 binary string handling
37 - possibly add ~=, <= and >= handling
38 - expand the test suite
39 - add better parse error handling
43 #include "ldb_private.h"
44 #include "system/locale.h"
47 a filter is defined by:
48 <filter> ::= '(' <filtercomp> ')'
49 <filtercomp> ::= <and> | <or> | <not> | <simple>
50 <and> ::= '&' <filterlist>
51 <or> ::= '|' <filterlist>
52 <not> ::= '!' <filter>
53 <filterlist> ::= <filter> | <filter> <filterlist>
54 <simple> ::= <attributetype> <filtertype> <attributevalue>
55 <filtertype> ::= '=' | '~=' | '<=' | '>='
59 decode a RFC2254 binary string representation of a buffer.
60 Used in LDAP filters.
62 struct ldb_val ldb_binary_decode(void *mem_ctx, const char *str)
64 int i, j;
65 struct ldb_val ret;
66 int slen = str?strlen(str):0;
68 ret.data = (uint8_t *)talloc_size(mem_ctx, slen+1);
69 ret.length = 0;
70 if (ret.data == NULL) return ret;
72 for (i=j=0;i<slen;i++) {
73 if (str[i] == '\\') {
74 unsigned c;
75 if (sscanf(&str[i+1], "%02X", &c) != 1) {
76 talloc_free(ret.data);
77 memset(&ret, 0, sizeof(ret));
78 return ret;
80 ((uint8_t *)ret.data)[j++] = c;
81 i += 2;
82 } else {
83 ((uint8_t *)ret.data)[j++] = str[i];
86 ret.length = j;
87 ((uint8_t *)ret.data)[j] = 0;
89 return ret;
94 encode a blob as a RFC2254 binary string, escaping any
95 non-printable or '\' characters
97 char *ldb_binary_encode(void *mem_ctx, struct ldb_val val)
99 int i;
100 char *ret;
101 int len = val.length;
102 unsigned char *buf = val.data;
104 for (i=0;i<val.length;i++) {
105 if (!isprint(buf[i]) || strchr(" *()\\&|!\"", buf[i])) {
106 len += 2;
109 ret = talloc_array(mem_ctx, char, len+1);
110 if (ret == NULL) return NULL;
112 len = 0;
113 for (i=0;i<val.length;i++) {
114 if (!isprint(buf[i]) || strchr(" *()\\&|!\"", buf[i])) {
115 snprintf(ret+len, 4, "\\%02X", buf[i]);
116 len += 3;
117 } else {
118 ret[len++] = buf[i];
122 ret[len] = 0;
124 return ret;
128 encode a string as a RFC2254 binary string, escaping any
129 non-printable or '\' characters. This routine is suitable for use
130 in escaping user data in ldap filters.
132 char *ldb_binary_encode_string(void *mem_ctx, const char *string)
134 struct ldb_val val;
135 val.data = discard_const_p(uint8_t, string);
136 val.length = strlen(string);
137 return ldb_binary_encode(mem_ctx, val);
140 /* find the first matching wildcard */
141 static char *ldb_parse_find_wildcard(char *value)
143 while (*value) {
144 value = strpbrk(value, "\\*");
145 if (value == NULL) return NULL;
147 if (value[0] == '\\') {
148 if (value[1] == '\0') return NULL;
149 value += 2;
150 continue;
153 if (value[0] == '*') return value;
156 return NULL;
159 /* return a NULL terminated list of binary strings representing the value
160 chunks separated by wildcards that makes the value portion of the filter
162 static struct ldb_val **ldb_wildcard_decode(void *mem_ctx, const char *string)
164 struct ldb_val **ret = NULL;
165 int val = 0;
166 char *wc, *str;
168 wc = talloc_strdup(mem_ctx, string);
169 if (wc == NULL) return NULL;
171 while (wc && *wc) {
172 str = wc;
173 wc = ldb_parse_find_wildcard(str);
174 if (wc && *wc) {
175 if (wc == str) {
176 wc++;
177 continue;
179 *wc = 0;
180 wc++;
183 ret = talloc_realloc(mem_ctx, ret, struct ldb_val *, val + 2);
184 if (ret == NULL) return NULL;
186 ret[val] = talloc(mem_ctx, struct ldb_val);
187 if (ret[val] == NULL) return NULL;
189 *(ret[val]) = ldb_binary_decode(mem_ctx, str);
190 if ((ret[val])->data == NULL) return NULL;
192 val++;
195 if (ret != NULL) {
196 ret[val] = NULL;
199 return ret;
202 static struct ldb_parse_tree *ldb_parse_filter(void *mem_ctx, const char **s);
206 parse an extended match
208 possible forms:
209 (attr:oid:=value)
210 (attr:dn:oid:=value)
211 (attr:dn:=value)
212 (:dn:oid:=value)
214 the ':dn' part sets the dnAttributes boolean if present
215 the oid sets the rule_id string
218 static struct ldb_parse_tree *ldb_parse_extended(struct ldb_parse_tree *ret,
219 char *attr, char *value)
221 char *p1, *p2;
223 ret->operation = LDB_OP_EXTENDED;
224 ret->u.extended.value = ldb_binary_decode(ret, value);
225 if (ret->u.extended.value.data == NULL) goto failed;
227 p1 = strchr(attr, ':');
228 if (p1 == NULL) goto failed;
229 p2 = strchr(p1+1, ':');
231 *p1 = 0;
232 if (p2) *p2 = 0;
234 ret->u.extended.attr = attr;
235 if (strcmp(p1+1, "dn") == 0) {
236 ret->u.extended.dnAttributes = 1;
237 if (p2) {
238 ret->u.extended.rule_id = talloc_strdup(ret, p2+1);
239 if (ret->u.extended.rule_id == NULL) goto failed;
240 } else {
241 ret->u.extended.rule_id = NULL;
243 } else {
244 ret->u.extended.dnAttributes = 0;
245 ret->u.extended.rule_id = talloc_strdup(ret, p1+1);
246 if (ret->u.extended.rule_id == NULL) goto failed;
249 return ret;
251 failed:
252 talloc_free(ret);
253 return NULL;
256 static enum ldb_parse_op ldb_parse_filtertype(void *mem_ctx, char **type, char **value, const char **s)
258 enum ldb_parse_op filter = 0;
259 char *name, *val, *k;
260 const char *p = *s;
261 const char *t, *t1;
263 /* retrieve attributetype name */
264 t = p;
266 if (*p == '@') { /* for internal attributes the first char can be @ */
267 p++;
270 while ((isascii(*p) && isalnum((unsigned char)*p)) || (*p == '-') || (*p == '.')) {
271 /* attribute names can only be alphanums */
272 p++;
275 if (*p == ':') { /* but extended searches have : and . chars too */
276 p = strstr(p, ":=");
277 if (p == NULL) { /* malformed attribute name */
278 return 0;
282 t1 = p;
284 while (isspace((unsigned char)*p)) p++;
286 if (!strchr("=<>~:", *p)) {
287 return 0;
290 /* save name */
291 name = (char *)talloc_memdup(mem_ctx, t, t1 - t + 1);
292 if (name == NULL) return 0;
293 name[t1 - t] = '\0';
295 /* retrieve filtertype */
297 if (*p == '=') {
298 filter = LDB_OP_EQUALITY;
299 } else if (*(p + 1) == '=') {
300 switch (*p) {
301 case '<':
302 filter = LDB_OP_LESS;
303 p++;
304 break;
305 case '>':
306 filter = LDB_OP_GREATER;
307 p++;
308 break;
309 case '~':
310 filter = LDB_OP_APPROX;
311 p++;
312 break;
313 case ':':
314 filter = LDB_OP_EXTENDED;
315 p++;
316 break;
319 if (!filter) {
320 talloc_free(name);
321 return filter;
323 p++;
325 while (isspace((unsigned char)*p)) p++;
327 /* retrieve value */
328 t = p;
330 while (*p && ((*p != ')') || ((*p == ')') && (*(p - 1) == '\\')))) p++;
332 val = (char *)talloc_memdup(mem_ctx, t, p - t + 1);
333 if (val == NULL) {
334 talloc_free(name);
335 return 0;
337 val[p - t] = '\0';
339 k = &(val[p - t]);
341 /* remove trailing spaces from value */
342 while ((k > val) && (isspace((unsigned char)*(k - 1)))) k--;
343 *k = '\0';
345 *type = name;
346 *value = val;
347 *s = p;
348 return filter;
352 <simple> ::= <attributetype> <filtertype> <attributevalue>
354 static struct ldb_parse_tree *ldb_parse_simple(void *mem_ctx, const char **s)
356 char *attr, *value;
357 struct ldb_parse_tree *ret;
358 enum ldb_parse_op filtertype;
360 ret = talloc(mem_ctx, struct ldb_parse_tree);
361 if (!ret) {
362 errno = ENOMEM;
363 return NULL;
366 filtertype = ldb_parse_filtertype(ret, &attr, &value, s);
367 if (!filtertype) {
368 talloc_free(ret);
369 return NULL;
372 switch (filtertype) {
374 case LDB_OP_PRESENT:
375 ret->operation = LDB_OP_PRESENT;
376 ret->u.present.attr = attr;
377 break;
379 case LDB_OP_EQUALITY:
381 if (strcmp(value, "*") == 0) {
382 ret->operation = LDB_OP_PRESENT;
383 ret->u.present.attr = attr;
384 break;
387 if (ldb_parse_find_wildcard(value) != NULL) {
388 ret->operation = LDB_OP_SUBSTRING;
389 ret->u.substring.attr = attr;
390 ret->u.substring.start_with_wildcard = 0;
391 ret->u.substring.end_with_wildcard = 0;
392 ret->u.substring.chunks = ldb_wildcard_decode(ret, value);
393 if (ret->u.substring.chunks == NULL){
394 talloc_free(ret);
395 return NULL;
397 if (value[0] == '*')
398 ret->u.substring.start_with_wildcard = 1;
399 if (value[strlen(value) - 1] == '*')
400 ret->u.substring.end_with_wildcard = 1;
401 talloc_free(value);
403 break;
406 ret->operation = LDB_OP_EQUALITY;
407 ret->u.equality.attr = attr;
408 ret->u.equality.value = ldb_binary_decode(ret, value);
409 if (ret->u.equality.value.data == NULL) {
410 talloc_free(ret);
411 return NULL;
413 talloc_free(value);
414 break;
416 case LDB_OP_GREATER:
417 ret->operation = LDB_OP_GREATER;
418 ret->u.comparison.attr = attr;
419 ret->u.comparison.value = ldb_binary_decode(ret, value);
420 if (ret->u.comparison.value.data == NULL) {
421 talloc_free(ret);
422 return NULL;
424 talloc_free(value);
425 break;
427 case LDB_OP_LESS:
428 ret->operation = LDB_OP_LESS;
429 ret->u.comparison.attr = attr;
430 ret->u.comparison.value = ldb_binary_decode(ret, value);
431 if (ret->u.comparison.value.data == NULL) {
432 talloc_free(ret);
433 return NULL;
435 talloc_free(value);
436 break;
438 case LDB_OP_APPROX:
439 ret->operation = LDB_OP_APPROX;
440 ret->u.comparison.attr = attr;
441 ret->u.comparison.value = ldb_binary_decode(ret, value);
442 if (ret->u.comparison.value.data == NULL) {
443 talloc_free(ret);
444 return NULL;
446 talloc_free(value);
447 break;
449 case LDB_OP_EXTENDED:
451 ret = ldb_parse_extended(ret, attr, value);
452 break;
454 default:
455 talloc_free(ret);
456 return NULL;
459 return ret;
464 parse a filterlist
465 <and> ::= '&' <filterlist>
466 <or> ::= '|' <filterlist>
467 <filterlist> ::= <filter> | <filter> <filterlist>
469 static struct ldb_parse_tree *ldb_parse_filterlist(void *mem_ctx, const char **s)
471 struct ldb_parse_tree *ret, *next;
472 enum ldb_parse_op op;
473 const char *p = *s;
475 switch (*p) {
476 case '&':
477 op = LDB_OP_AND;
478 break;
479 case '|':
480 op = LDB_OP_OR;
481 break;
482 default:
483 return NULL;
485 p++;
487 while (isspace((unsigned char)*p)) p++;
489 ret = talloc(mem_ctx, struct ldb_parse_tree);
490 if (!ret) {
491 errno = ENOMEM;
492 return NULL;
495 ret->operation = op;
496 ret->u.list.num_elements = 1;
497 ret->u.list.elements = talloc(ret, struct ldb_parse_tree *);
498 if (!ret->u.list.elements) {
499 errno = ENOMEM;
500 talloc_free(ret);
501 return NULL;
504 ret->u.list.elements[0] = ldb_parse_filter(ret->u.list.elements, &p);
505 if (!ret->u.list.elements[0]) {
506 talloc_free(ret);
507 return NULL;
510 while (isspace((unsigned char)*p)) p++;
512 while (*p && (next = ldb_parse_filter(ret->u.list.elements, &p))) {
513 struct ldb_parse_tree **e;
514 e = talloc_realloc(ret, ret->u.list.elements,
515 struct ldb_parse_tree *,
516 ret->u.list.num_elements + 1);
517 if (!e) {
518 errno = ENOMEM;
519 talloc_free(ret);
520 return NULL;
522 ret->u.list.elements = e;
523 ret->u.list.elements[ret->u.list.num_elements] = next;
524 ret->u.list.num_elements++;
525 while (isspace((unsigned char)*p)) p++;
528 *s = p;
530 return ret;
535 <not> ::= '!' <filter>
537 static struct ldb_parse_tree *ldb_parse_not(void *mem_ctx, const char **s)
539 struct ldb_parse_tree *ret;
540 const char *p = *s;
542 if (*p != '!') {
543 return NULL;
545 p++;
547 ret = talloc(mem_ctx, struct ldb_parse_tree);
548 if (!ret) {
549 errno = ENOMEM;
550 return NULL;
553 ret->operation = LDB_OP_NOT;
554 ret->u.isnot.child = ldb_parse_filter(ret, &p);
555 if (!ret->u.isnot.child) {
556 talloc_free(ret);
557 return NULL;
560 *s = p;
562 return ret;
566 parse a filtercomp
567 <filtercomp> ::= <and> | <or> | <not> | <simple>
569 static struct ldb_parse_tree *ldb_parse_filtercomp(void *mem_ctx, const char **s)
571 struct ldb_parse_tree *ret;
572 const char *p = *s;
574 while (isspace((unsigned char)*p)) p++;
576 switch (*p) {
577 case '&':
578 ret = ldb_parse_filterlist(mem_ctx, &p);
579 break;
581 case '|':
582 ret = ldb_parse_filterlist(mem_ctx, &p);
583 break;
585 case '!':
586 ret = ldb_parse_not(mem_ctx, &p);
587 break;
589 case '(':
590 case ')':
591 return NULL;
593 default:
594 ret = ldb_parse_simple(mem_ctx, &p);
598 *s = p;
599 return ret;
604 <filter> ::= '(' <filtercomp> ')'
606 static struct ldb_parse_tree *ldb_parse_filter(void *mem_ctx, const char **s)
608 struct ldb_parse_tree *ret;
609 const char *p = *s;
611 if (*p != '(') {
612 return NULL;
614 p++;
616 ret = ldb_parse_filtercomp(mem_ctx, &p);
618 if (*p != ')') {
619 return NULL;
621 p++;
623 while (isspace((unsigned char)*p)) {
624 p++;
627 *s = p;
629 return ret;
634 main parser entry point. Takes a search string and returns a parse tree
636 expression ::= <simple> | <filter>
638 struct ldb_parse_tree *ldb_parse_tree(void *mem_ctx, const char *s)
640 if (s == NULL || *s == 0) {
641 s = "(|(objectClass=*)(distinguishedName=*))";
644 while (isspace((unsigned char)*s)) s++;
646 if (*s == '(') {
647 return ldb_parse_filter(mem_ctx, &s);
650 return ldb_parse_simple(mem_ctx, &s);
655 construct a ldap parse filter given a parse tree
657 char *ldb_filter_from_tree(void *mem_ctx, struct ldb_parse_tree *tree)
659 char *s, *s2, *ret;
660 int i;
662 if (tree == NULL) {
663 return NULL;
666 switch (tree->operation) {
667 case LDB_OP_AND:
668 case LDB_OP_OR:
669 ret = talloc_asprintf(mem_ctx, "(%c", tree->operation==LDB_OP_AND?'&':'|');
670 if (ret == NULL) return NULL;
671 for (i=0;i<tree->u.list.num_elements;i++) {
672 s = ldb_filter_from_tree(mem_ctx, tree->u.list.elements[i]);
673 if (s == NULL) {
674 talloc_free(ret);
675 return NULL;
677 s2 = talloc_asprintf_append(ret, "%s", s);
678 talloc_free(s);
679 if (s2 == NULL) {
680 talloc_free(ret);
681 return NULL;
683 ret = s2;
685 s = talloc_asprintf_append(ret, ")");
686 if (s == NULL) {
687 talloc_free(ret);
688 return NULL;
690 return s;
691 case LDB_OP_NOT:
692 s = ldb_filter_from_tree(mem_ctx, tree->u.isnot.child);
693 if (s == NULL) return NULL;
695 ret = talloc_asprintf(mem_ctx, "(!%s)", s);
696 talloc_free(s);
697 return ret;
698 case LDB_OP_EQUALITY:
699 s = ldb_binary_encode(mem_ctx, tree->u.equality.value);
700 if (s == NULL) return NULL;
701 ret = talloc_asprintf(mem_ctx, "(%s=%s)",
702 tree->u.equality.attr, s);
703 talloc_free(s);
704 return ret;
705 case LDB_OP_SUBSTRING:
706 ret = talloc_asprintf(mem_ctx, "(%s=%s", tree->u.substring.attr,
707 tree->u.substring.start_with_wildcard?"*":"");
708 if (ret == NULL) return NULL;
709 for (i = 0; tree->u.substring.chunks[i]; i++) {
710 s2 = ldb_binary_encode(mem_ctx, *(tree->u.substring.chunks[i]));
711 if (s2 == NULL) {
712 talloc_free(ret);
713 return NULL;
715 if (tree->u.substring.chunks[i+1] ||
716 tree->u.substring.end_with_wildcard) {
717 s = talloc_asprintf_append(ret, "%s*", s2);
718 } else {
719 s = talloc_asprintf_append(ret, "%s", s2);
721 if (s == NULL) {
722 talloc_free(ret);
723 return NULL;
725 ret = s;
727 s = talloc_asprintf_append(ret, ")");
728 if (s == NULL) {
729 talloc_free(ret);
730 return NULL;
732 ret = s;
733 return ret;
734 case LDB_OP_GREATER:
735 s = ldb_binary_encode(mem_ctx, tree->u.equality.value);
736 if (s == NULL) return NULL;
737 ret = talloc_asprintf(mem_ctx, "(%s>=%s)",
738 tree->u.equality.attr, s);
739 talloc_free(s);
740 return ret;
741 case LDB_OP_LESS:
742 s = ldb_binary_encode(mem_ctx, tree->u.equality.value);
743 if (s == NULL) return NULL;
744 ret = talloc_asprintf(mem_ctx, "(%s<=%s)",
745 tree->u.equality.attr, s);
746 talloc_free(s);
747 return ret;
748 case LDB_OP_PRESENT:
749 ret = talloc_asprintf(mem_ctx, "(%s=*)", tree->u.present.attr);
750 return ret;
751 case LDB_OP_APPROX:
752 s = ldb_binary_encode(mem_ctx, tree->u.equality.value);
753 if (s == NULL) return NULL;
754 ret = talloc_asprintf(mem_ctx, "(%s~=%s)",
755 tree->u.equality.attr, s);
756 talloc_free(s);
757 return ret;
758 case LDB_OP_EXTENDED:
759 s = ldb_binary_encode(mem_ctx, tree->u.extended.value);
760 if (s == NULL) return NULL;
761 ret = talloc_asprintf(mem_ctx, "(%s%s%s%s:=%s)",
762 tree->u.extended.attr?tree->u.extended.attr:"",
763 tree->u.extended.dnAttributes?":dn":"",
764 tree->u.extended.rule_id?":":"",
765 tree->u.extended.rule_id?tree->u.extended.rule_id:"",
767 talloc_free(s);
768 return ret;
771 return NULL;
776 replace any occurances of an attribute name in the parse tree with a
777 new name
779 void ldb_parse_tree_attr_replace(struct ldb_parse_tree *tree,
780 const char *attr,
781 const char *replace)
783 int i;
784 switch (tree->operation) {
785 case LDB_OP_AND:
786 case LDB_OP_OR:
787 for (i=0;i<tree->u.list.num_elements;i++) {
788 ldb_parse_tree_attr_replace(tree->u.list.elements[i],
789 attr, replace);
791 break;
792 case LDB_OP_NOT:
793 ldb_parse_tree_attr_replace(tree->u.isnot.child, attr, replace);
794 break;
795 case LDB_OP_EQUALITY:
796 case LDB_OP_GREATER:
797 case LDB_OP_LESS:
798 case LDB_OP_APPROX:
799 if (ldb_attr_cmp(tree->u.equality.attr, attr) == 0) {
800 tree->u.equality.attr = replace;
802 break;
803 case LDB_OP_SUBSTRING:
804 if (ldb_attr_cmp(tree->u.substring.attr, attr) == 0) {
805 tree->u.substring.attr = replace;
807 break;
808 case LDB_OP_PRESENT:
809 if (ldb_attr_cmp(tree->u.present.attr, attr) == 0) {
810 tree->u.present.attr = replace;
812 break;
813 case LDB_OP_EXTENDED:
814 if (tree->u.extended.attr &&
815 ldb_attr_cmp(tree->u.extended.attr, attr) == 0) {
816 tree->u.extended.attr = replace;
818 break;