vfs_ceph: Disable the module on unsupported Ceph versions
[Samba.git] / lib / ldb / common / ldb_utf8.c
blob6891de84101bdba932593bd3cab18a9199e3f3c6
1 /*
2 ldb database library
4 Copyright (C) Andrew Tridgell 2004
6 ** NOTE! The following LGPL license applies to the ldb
7 ** library. This does NOT imply that all of Samba is released
8 ** under the LGPL
10 This library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 3 of the License, or (at your option) any later version.
15 This library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License for more details.
20 You should have received a copy of the GNU Lesser General Public
21 License along with this library; if not, see <http://www.gnu.org/licenses/>.
25 * Name: ldb
27 * Component: ldb utf8 handling
29 * Description: case folding and case comparison for UTF8 strings
31 * Author: Andrew Tridgell
34 #include "ldb_private.h"
35 #include "system/locale.h"
38 * Set functions for comparing and case-folding case-insensitive ldb val
39 * strings.
41 void ldb_set_utf8_functions(struct ldb_context *ldb,
42 void *context,
43 char *(*casefold)(void *, void *, const char *, size_t),
44 int (*casecmp)(void *ctx,
45 const struct ldb_val *v1,
46 const struct ldb_val *v2))
48 if (context) {
49 ldb->utf8_fns.context = context;
51 if (casefold) {
52 ldb->utf8_fns.casefold = casefold;
54 if (casecmp) {
55 ldb->utf8_fns.casecmp = casecmp;
60 this allow the user to pass in a caseless comparison
61 function to handle utf8 caseless comparisons
63 void ldb_set_utf8_fns(struct ldb_context *ldb,
64 void *context,
65 char *(*casefold)(void *, void *, const char *, size_t))
67 ldb_set_utf8_functions(ldb, context, casefold, NULL);
72 a simple case folding function
73 NOTE: does not handle UTF8
75 char *ldb_casefold_default(void *context, TALLOC_CTX *mem_ctx, const char *s, size_t n)
77 size_t i;
78 char *ret = talloc_strndup(mem_ctx, s, n);
79 if (!s) {
80 errno = ENOMEM;
81 return NULL;
83 for (i=0;ret[i];i++) {
84 ret[i] = ldb_ascii_toupper(ret[i]);
86 return ret;
91 * The default comparison fold function only knows ASCII. Multiple
92 * spaces (0x20) are collapsed into one, and [a-z] map to [A-Z]. All
93 * other bytes are compared without casefolding.
95 * Note that as well as not handling UTF-8, this function does not exactly
96 * implement RFC 4518 (2.6.1. Insignificant Space Handling and Appendix B).
99 int ldb_comparison_fold_ascii(void *ignored,
100 const struct ldb_val *v1,
101 const struct ldb_val *v2)
103 const uint8_t *s1 = v1->data;
104 const uint8_t *s2 = v2->data;
105 size_t n1 = v1->length, n2 = v2->length;
107 while (n1 && *s1 == ' ') { s1++; n1--; };
108 while (n2 && *s2 == ' ') { s2++; n2--; };
110 while (n1 && n2 && *s1 && *s2) {
111 if (ldb_ascii_toupper(*s1) != ldb_ascii_toupper(*s2)) {
112 break;
114 if (*s1 == ' ') {
115 while (n1 > 1 && s1[0] == s1[1]) { s1++; n1--; }
116 while (n2 > 1 && s2[0] == s2[1]) { s2++; n2--; }
118 s1++; s2++;
119 n1--; n2--;
122 /* check for trailing spaces only if the other pointers has
123 * reached the end of the strings otherwise we can
124 * mistakenly match. ex. "domain users" <->
125 * "domainUpdates"
127 if (n1 && *s1 == ' ' && (!n2 || !*s2)) {
128 while (n1 && *s1 == ' ') { s1++; n1--; }
130 if (n2 && *s2 == ' ' && (!n1 || !*s1)) {
131 while (n2 && *s2 == ' ') { s2++; n2--; }
133 if (n1 == 0 && n2 != 0) {
134 return *s2 ? -1 : 0;
136 if (n2 == 0 && n1 != 0) {
137 return *s1 ? 1 : 0;
139 if (n1 == 0 && n2 == 0) {
140 return 0;
142 return NUMERIC_CMP(*s1, *s2);
145 void ldb_set_utf8_default(struct ldb_context *ldb)
147 ldb_set_utf8_functions(ldb, NULL,
148 ldb_casefold_default,
149 ldb_comparison_fold_ascii);
152 char *ldb_casefold(struct ldb_context *ldb, TALLOC_CTX *mem_ctx, const char *s, size_t n)
154 return ldb->utf8_fns.casefold(ldb->utf8_fns.context, mem_ctx, s, n);
158 check the attribute name is valid according to rfc2251
159 returns 1 if the name is ok
162 int ldb_valid_attr_name(const char *s)
164 size_t i;
166 if (!s || !s[0])
167 return 0;
169 /* handle special ldb_tdb wildcard */
170 if (strcmp(s, "*") == 0) return 1;
172 for (i = 0; s[i]; i++) {
173 if (! isascii(s[i])) {
174 return 0;
176 if (i == 0) { /* first char must be an alpha (or our special '@' identifier) */
177 if (! (isalpha(s[i]) || (s[i] == '@'))) {
178 return 0;
180 } else {
181 if (! (isalnum(s[i]) || (s[i] == '-'))) {
182 return 0;
186 return 1;
189 char *ldb_attr_casefold(TALLOC_CTX *mem_ctx, const char *s)
191 size_t i;
192 char *ret = talloc_strdup(mem_ctx, s);
193 if (!ret) {
194 errno = ENOMEM;
195 return NULL;
197 for (i = 0; ret[i]; i++) {
198 ret[i] = ldb_ascii_toupper(ret[i]);
200 return ret;
204 we accept either 'dn' or 'distinguishedName' for a distinguishedName
206 int ldb_attr_dn(const char *attr)
208 if (ldb_attr_cmp(attr, "dn") == 0 ||
209 ldb_attr_cmp(attr, "distinguishedName") == 0) {
210 return 0;
212 return -1;
215 _PRIVATE_ char ldb_ascii_toupper(char c) {
217 * We are aiming for a 1970s C-locale toupper(), when all letters
218 * were 7-bit and behaved with true American spirit.
220 * For example, we don't want the "i" in "<guid=" to be upper-cased to
221 * "İ" as would happen in some locales, or we won't be able to parse
222 * that properly. This is unfortunate for cases where we are dealing
223 * with real text; a search for the name "Ali" would need to be
224 * written "Alİ" to match.
226 return ('a' <= c && c <= 'z') ? c ^ 0x20 : c;