2 Unix SMB/CIFS implementation.
3 Samba utility functions
4 Copyright (C) Andrew Tridgell 1992-2001
5 Copyright (C) Simo Sorce 2001
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "system/locale.h"
24 struct smb_iconv_convenience
*global_iconv_convenience
= NULL
;
26 static inline struct smb_iconv_convenience
*get_iconv_convenience(void)
28 if (global_iconv_convenience
== NULL
)
29 global_iconv_convenience
= smb_iconv_convenience_init(talloc_autofree_context(), "ASCII", "UTF-8", true);
30 return global_iconv_convenience
;
34 Case insensitive string compararison
36 _PUBLIC_
int strcasecmp_m(const char *s1
, const char *s2
)
38 codepoint_t c1
=0, c2
=0;
40 struct smb_iconv_convenience
*iconv_convenience
= get_iconv_convenience();
42 /* handle null ptr comparisons to simplify the use in qsort */
43 if (s1
== s2
) return 0;
44 if (s1
== NULL
) return -1;
45 if (s2
== NULL
) return 1;
48 c1
= next_codepoint_convenience(iconv_convenience
, s1
, &size1
);
49 c2
= next_codepoint_convenience(iconv_convenience
, s2
, &size2
);
58 if (c1
== INVALID_CODEPOINT
||
59 c2
== INVALID_CODEPOINT
) {
60 /* what else can we do?? */
61 return strcasecmp(s1
, s2
);
64 if (toupper_m(c1
) != toupper_m(c2
)) {
73 * Get the next token from a string, return False if none found.
74 * Handles double-quotes.
76 * Based on a routine by GJC@VILLAGE.COM.
77 * Extensively modified by Andrew.Tridgell@anu.edu.au
79 _PUBLIC_
bool next_token(const char **ptr
,char *buff
, const char *sep
, size_t bufsize
)
90 /* default to simple separators */
94 /* find the first non sep char */
95 while (*s
&& strchr_m(sep
,*s
))
102 /* copy over the token */
103 for (quoted
= false; len
< bufsize
&& *s
&& (quoted
|| !strchr_m(sep
,*s
)); s
++) {
112 *ptr
= (*s
) ? s
+1 : s
;
119 Case insensitive string compararison, length limited
121 _PUBLIC_
int strncasecmp_m(const char *s1
, const char *s2
, size_t n
)
123 codepoint_t c1
=0, c2
=0;
125 struct smb_iconv_convenience
*iconv_convenience
= get_iconv_convenience();
127 /* handle null ptr comparisons to simplify the use in qsort */
128 if (s1
== s2
) return 0;
129 if (s1
== NULL
) return -1;
130 if (s2
== NULL
) return 1;
132 while (*s1
&& *s2
&& n
) {
135 c1
= next_codepoint_convenience(iconv_convenience
, s1
, &size1
);
136 c2
= next_codepoint_convenience(iconv_convenience
, s2
, &size2
);
145 if (c1
== INVALID_CODEPOINT
||
146 c2
== INVALID_CODEPOINT
) {
147 /* what else can we do?? */
148 return strcasecmp(s1
, s2
);
151 if (toupper_m(c1
) != toupper_m(c2
)) {
166 * @note The comparison is case-insensitive.
168 _PUBLIC_
bool strequal_m(const char *s1
, const char *s2
)
170 return strcasecmp_m(s1
,s2
) == 0;
174 Compare 2 strings (case sensitive).
176 _PUBLIC_
bool strcsequal_m(const char *s1
,const char *s2
)
183 return strcmp(s1
,s2
) == 0;
189 NOTE: oldc and newc must be 7 bit characters
191 _PUBLIC_
void string_replace_m(char *s
, char oldc
, char newc
)
193 struct smb_iconv_convenience
*ic
= get_iconv_convenience();
196 codepoint_t c
= next_codepoint_convenience(ic
, s
, &size
);
205 Paranoid strcpy into a buffer of given length (includes terminating
206 zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
207 and replaces with '_'. Deliberately does *NOT* check for multibyte
208 characters. Don't change it !
211 _PUBLIC_
char *alpha_strcpy(char *dest
, const char *src
, const char *other_safe_chars
, size_t maxlength
)
215 if (maxlength
== 0) {
216 /* can't fit any bytes at all! */
221 DEBUG(0,("ERROR: NULL dest in alpha_strcpy\n"));
231 if (len
>= maxlength
)
234 if (!other_safe_chars
)
235 other_safe_chars
= "";
237 for(i
= 0; i
< len
; i
++) {
238 int val
= (src
[i
] & 0xff);
239 if (isupper(val
) || islower(val
) || isdigit(val
) || strchr_m(other_safe_chars
, val
))
251 Count the number of UCS2 characters in a string. Normally this will
252 be the same as the number of bytes in a string for single byte strings,
253 but will be different for multibyte.
255 _PUBLIC_
size_t strlen_m(const char *s
)
258 struct smb_iconv_convenience
*ic
= get_iconv_convenience();
264 while (*s
&& !(((uint8_t)*s
) & 0x80)) {
275 codepoint_t c
= next_codepoint_convenience(ic
, s
, &c_size
);
288 Work out the number of multibyte chars in a string, including the NULL
291 _PUBLIC_
size_t strlen_m_term(const char *s
)
297 return strlen_m(s
) + 1;
301 * Weird helper routine for the winreg pipe: If nothing is around, return 0,
302 * if a string is there, include the terminator.
305 _PUBLIC_
size_t strlen_m_term_null(const char *s
)
320 Strchr and strrchr_m are a bit complex on general multi-byte strings.
322 _PUBLIC_
char *strchr_m(const char *s
, char c
)
324 struct smb_iconv_convenience
*ic
= get_iconv_convenience();
328 /* characters below 0x3F are guaranteed to not appear in
329 non-initial position in multi-byte charsets */
330 if ((c
& 0xC0) == 0) {
336 codepoint_t c2
= next_codepoint_convenience(ic
, s
, &size
);
338 return discard_const_p(char, s
);
347 * Multibyte-character version of strrchr
349 _PUBLIC_
char *strrchr_m(const char *s
, char c
)
351 struct smb_iconv_convenience
*ic
= get_iconv_convenience();
358 /* characters below 0x3F are guaranteed to not appear in
359 non-initial position in multi-byte charsets */
360 if ((c
& 0xC0) == 0) {
361 return strrchr(s
, c
);
366 codepoint_t c2
= next_codepoint_convenience(ic
, s
, &size
);
368 ret
= discard_const_p(char, s
);
377 return True if any (multi-byte) character is lower case
379 _PUBLIC_
bool strhaslower(const char *string
)
381 struct smb_iconv_convenience
*ic
= get_iconv_convenience();
387 s
= next_codepoint_convenience(ic
, string
, &c_size
);
393 return true; /* that means it has lower case chars */
401 return True if any (multi-byte) character is upper case
403 _PUBLIC_
bool strhasupper(const char *string
)
405 struct smb_iconv_convenience
*ic
= get_iconv_convenience();
411 s
= next_codepoint_convenience(ic
, string
, &c_size
);
417 return true; /* that means it has upper case chars */
425 Convert a string to lower case, allocated with talloc
427 _PUBLIC_
char *strlower_talloc(TALLOC_CTX
*ctx
, const char *src
)
431 struct smb_iconv_convenience
*iconv_convenience
= get_iconv_convenience();
433 /* this takes advantage of the fact that upper/lower can't
434 change the length of a character by more than 1 byte */
435 dest
= talloc_array(ctx
, char, 2*(strlen(src
))+1);
442 codepoint_t c
= next_codepoint_convenience(iconv_convenience
, src
, &c_size
);
447 c_size
= push_codepoint_convenience(iconv_convenience
, dest
+size
, c
);
457 /* trim it so talloc_append_string() works */
458 dest
= talloc_realloc(ctx
, dest
, char, size
+1);
460 talloc_set_name_const(dest
, dest
);
466 Convert a string to UPPER case, allocated with talloc
467 source length limited to n bytes
469 _PUBLIC_
char *strupper_talloc_n(TALLOC_CTX
*ctx
, const char *src
, size_t n
)
473 struct smb_iconv_convenience
*iconv_convenience
= get_iconv_convenience();
479 /* this takes advantage of the fact that upper/lower can't
480 change the length of a character by more than 1 byte */
481 dest
= talloc_array(ctx
, char, 2*(n
+1));
486 while (n
-- && *src
) {
488 codepoint_t c
= next_codepoint_convenience(iconv_convenience
, src
, &c_size
);
493 c_size
= push_codepoint_convenience(iconv_convenience
, dest
+size
, c
);
503 /* trim it so talloc_append_string() works */
504 dest
= talloc_realloc(ctx
, dest
, char, size
+1);
506 talloc_set_name_const(dest
, dest
);
512 Convert a string to UPPER case, allocated with talloc
514 _PUBLIC_
char *strupper_talloc(TALLOC_CTX
*ctx
, const char *src
)
516 return strupper_talloc_n(ctx
, src
, src
?strlen(src
):0);
520 talloc_strdup() a unix string to upper case.
522 _PUBLIC_
char *talloc_strdup_upper(TALLOC_CTX
*ctx
, const char *src
)
524 return strupper_talloc(ctx
, src
);
528 Convert a string to lower case.
530 _PUBLIC_
void strlower_m(char *s
)
533 struct smb_iconv_convenience
*iconv_convenience
;
535 /* this is quite a common operation, so we want it to be
536 fast. We optimise for the ascii case, knowing that all our
537 supported multi-byte character sets are ascii-compatible
538 (ie. they match for the first 128 chars) */
539 while (*s
&& !(((uint8_t)*s
) & 0x80)) {
540 *s
= tolower((uint8_t)*s
);
547 iconv_convenience
= get_iconv_convenience();
552 size_t c_size
, c_size2
;
553 codepoint_t c
= next_codepoint_convenience(iconv_convenience
, s
, &c_size
);
554 c_size2
= push_codepoint_convenience(iconv_convenience
, d
, tolower_m(c
));
555 if (c_size2
> c_size
) {
556 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n",
557 c
, tolower_m(c
), (int)c_size
, (int)c_size2
));
558 smb_panic("codepoint expansion in strlower_m\n");
567 Convert a string to UPPER case.
569 _PUBLIC_
void strupper_m(char *s
)
572 struct smb_iconv_convenience
*iconv_convenience
;
574 /* this is quite a common operation, so we want it to be
575 fast. We optimise for the ascii case, knowing that all our
576 supported multi-byte character sets are ascii-compatible
577 (ie. they match for the first 128 chars) */
578 while (*s
&& !(((uint8_t)*s
) & 0x80)) {
579 *s
= toupper((uint8_t)*s
);
586 iconv_convenience
= get_iconv_convenience();
591 size_t c_size
, c_size2
;
592 codepoint_t c
= next_codepoint_convenience(iconv_convenience
, s
, &c_size
);
593 c_size2
= push_codepoint_convenience(iconv_convenience
, d
, toupper_m(c
));
594 if (c_size2
> c_size
) {
595 DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n",
596 c
, toupper_m(c
), (int)c_size
, (int)c_size2
));
597 smb_panic("codepoint expansion in strupper_m\n");
607 Find the number of 'c' chars in a string
609 _PUBLIC_
size_t count_chars_m(const char *s
, char c
)
611 struct smb_iconv_convenience
*ic
= get_iconv_convenience();
616 codepoint_t c2
= next_codepoint_convenience(ic
, s
, &size
);
617 if (c2
== c
) count
++;
626 * Copy a string from a char* unix src to a dos codepage string destination.
628 * @return the number of bytes occupied by the string in the destination.
630 * @param flags can include
632 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
633 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
636 * @param dest_len the maximum length in bytes allowed in the
637 * destination. If @p dest_len is -1 then no maximum is used.
639 static ssize_t
push_ascii(void *dest
, const char *src
, size_t dest_len
, int flags
)
644 if (flags
& STR_UPPER
) {
645 char *tmpbuf
= strupper_talloc(NULL
, src
);
646 if (tmpbuf
== NULL
) {
649 ret
= push_ascii(dest
, tmpbuf
, dest_len
, flags
& ~STR_UPPER
);
654 src_len
= strlen(src
);
656 if (flags
& (STR_TERMINATE
| STR_TERMINATE_ASCII
))
659 return convert_string(CH_UNIX
, CH_DOS
, src
, src_len
, dest
, dest_len
, false);
663 * Copy a string from a unix char* src to an ASCII destination,
664 * allocating a buffer using talloc().
666 * @param dest always set at least to NULL
668 * @returns The number of bytes occupied by the string in the destination
669 * or -1 in case of error.
671 _PUBLIC_
bool push_ascii_talloc(TALLOC_CTX
*ctx
, char **dest
, const char *src
, size_t *converted_size
)
673 size_t src_len
= strlen(src
)+1;
675 return convert_string_talloc(ctx
, CH_UNIX
, CH_DOS
, src
, src_len
, (void **)dest
, converted_size
, false);
680 * Copy a string from a dos codepage source to a unix char* destination.
682 * The resulting string in "dest" is always null terminated.
684 * @param flags can have:
686 * <dt>STR_TERMINATE</dt>
687 * <dd>STR_TERMINATE means the string in @p src
688 * is null terminated, and src_len is ignored.</dd>
691 * @param src_len is the length of the source area in bytes.
692 * @returns the number of bytes occupied by the string in @p src.
694 static ssize_t
pull_ascii(char *dest
, const void *src
, size_t dest_len
, size_t src_len
, int flags
)
698 if (flags
& (STR_TERMINATE
| STR_TERMINATE_ASCII
)) {
699 if (src_len
== (size_t)-1) {
700 src_len
= strlen((const char *)src
) + 1;
702 size_t len
= strnlen((const char *)src
, src_len
);
709 ret
= convert_string(CH_DOS
, CH_UNIX
, src
, src_len
, dest
, dest_len
, false);
712 dest
[MIN(ret
, dest_len
-1)] = 0;
718 * Copy a string from a char* src to a unicode destination.
720 * @returns the number of bytes occupied by the string in the destination.
722 * @param flags can have:
725 * <dt>STR_TERMINATE <dd>means include the null termination.
726 * <dt>STR_UPPER <dd>means uppercase in the destination.
727 * <dt>STR_NOALIGN <dd>means don't do alignment.
730 * @param dest_len is the maximum length allowed in the
731 * destination. If dest_len is -1 then no maxiumum is used.
733 static ssize_t
push_ucs2(void *dest
, const char *src
, size_t dest_len
, int flags
)
736 size_t src_len
= strlen(src
);
739 if (flags
& STR_UPPER
) {
740 char *tmpbuf
= strupper_talloc(NULL
, src
);
741 if (tmpbuf
== NULL
) {
744 ret
= push_ucs2(dest
, tmpbuf
, dest_len
, flags
& ~STR_UPPER
);
749 if (flags
& STR_TERMINATE
)
752 if (ucs2_align(NULL
, dest
, flags
)) {
754 dest
= (void *)((char *)dest
+ 1);
755 if (dest_len
) dest_len
--;
759 /* ucs2 is always a multiple of 2 bytes */
762 ret
= convert_string(CH_UNIX
, CH_UTF16
, src
, src_len
, dest
, dest_len
, false);
763 if (ret
== (size_t)-1) {
774 * Copy a string from a unix char* src to a UCS2 destination,
775 * allocating a buffer using talloc().
777 * @param dest always set at least to NULL
779 * @returns The number of bytes occupied by the string in the destination
780 * or -1 in case of error.
782 _PUBLIC_
bool push_ucs2_talloc(TALLOC_CTX
*ctx
, smb_ucs2_t
**dest
, const char *src
, size_t *converted_size
)
784 size_t src_len
= strlen(src
)+1;
786 return convert_string_talloc(ctx
, CH_UNIX
, CH_UTF16
, src
, src_len
, (void **)dest
, converted_size
, false);
791 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
793 * @param dest always set at least to NULL
795 * @returns The number of bytes occupied by the string in the destination
798 _PUBLIC_
bool push_utf8_talloc(TALLOC_CTX
*ctx
, char **dest
, const char *src
, size_t *converted_size
)
800 size_t src_len
= strlen(src
)+1;
802 return convert_string_talloc(ctx
, CH_UNIX
, CH_UTF8
, src
, src_len
, (void **)dest
, converted_size
, false);
806 Copy a string from a ucs2 source to a unix char* destination.
808 STR_TERMINATE means the string in src is null terminated.
809 STR_NOALIGN means don't try to align.
810 if STR_TERMINATE is set then src_len is ignored if it is -1.
811 src_len is the length of the source area in bytes
812 Return the number of bytes occupied by the string in src.
813 The resulting string in "dest" is always null terminated.
816 static size_t pull_ucs2(char *dest
, const void *src
, size_t dest_len
, size_t src_len
, int flags
)
820 if (ucs2_align(NULL
, src
, flags
)) {
821 src
= (const void *)((const char *)src
+ 1);
826 if (flags
& STR_TERMINATE
) {
827 if (src_len
== (size_t)-1) {
828 src_len
= utf16_len(src
);
830 src_len
= utf16_len_n(src
, src_len
);
834 /* ucs2 is always a multiple of 2 bytes */
835 if (src_len
!= (size_t)-1)
838 ret
= convert_string(CH_UTF16
, CH_UNIX
, src
, src_len
, dest
, dest_len
, false);
840 dest
[MIN(ret
, dest_len
-1)] = 0;
846 * Copy a string from a ASCII src to a unix char * destination, allocating a buffer using talloc
848 * @param dest always set at least to NULL
850 * @returns The number of bytes occupied by the string in the destination
853 _PUBLIC_
bool pull_ascii_talloc(TALLOC_CTX
*ctx
, char **dest
, const char *src
, size_t *converted_size
)
855 size_t src_len
= strlen(src
)+1;
857 return convert_string_talloc(ctx
, CH_DOS
, CH_UNIX
, src
, src_len
, (void **)dest
, converted_size
, false);
861 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
863 * @param dest always set at least to NULL
865 * @returns The number of bytes occupied by the string in the destination
868 _PUBLIC_
bool pull_ucs2_talloc(TALLOC_CTX
*ctx
, char **dest
, const smb_ucs2_t
*src
, size_t *converted_size
)
870 size_t src_len
= utf16_len(src
);
872 return convert_string_talloc(ctx
, CH_UTF16
, CH_UNIX
, src
, src_len
, (void **)dest
, converted_size
, false);
876 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
878 * @param dest always set at least to NULL
880 * @returns The number of bytes occupied by the string in the destination
883 _PUBLIC_
bool pull_utf8_talloc(TALLOC_CTX
*ctx
, char **dest
, const char *src
, size_t *converted_size
)
885 size_t src_len
= strlen(src
)+1;
887 return convert_string_talloc(ctx
, CH_UTF8
, CH_UNIX
, src
, src_len
, (void **)dest
, converted_size
, false);
891 Copy a string from a char* src to a unicode or ascii
892 dos codepage destination choosing unicode or ascii based on the
893 flags in the SMB buffer starting at base_ptr.
894 Return the number of bytes occupied by the string in the destination.
896 STR_TERMINATE means include the null termination.
897 STR_UPPER means uppercase in the destination.
898 STR_ASCII use ascii even with unicode packet.
899 STR_NOALIGN means don't do alignment.
900 dest_len is the maximum length allowed in the destination. If dest_len
901 is -1 then no maxiumum is used.
904 _PUBLIC_ ssize_t
push_string(void *dest
, const char *src
, size_t dest_len
, int flags
)
906 if (flags
& STR_ASCII
) {
907 return push_ascii(dest
, src
, dest_len
, flags
);
908 } else if (flags
& STR_UNICODE
) {
909 return push_ucs2(dest
, src
, dest_len
, flags
);
911 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
918 Copy a string from a unicode or ascii source (depending on
919 the packet flags) to a char* destination.
921 STR_TERMINATE means the string in src is null terminated.
922 STR_UNICODE means to force as unicode.
923 STR_ASCII use ascii even with unicode packet.
924 STR_NOALIGN means don't do alignment.
925 if STR_TERMINATE is set then src_len is ignored is it is -1
926 src_len is the length of the source area in bytes.
927 Return the number of bytes occupied by the string in src.
928 The resulting string in "dest" is always null terminated.
931 _PUBLIC_ ssize_t
pull_string(char *dest
, const void *src
, size_t dest_len
, size_t src_len
, int flags
)
933 if (flags
& STR_ASCII
) {
934 return pull_ascii(dest
, src
, dest_len
, src_len
, flags
);
935 } else if (flags
& STR_UNICODE
) {
936 return pull_ucs2(dest
, src
, dest_len
, src_len
, flags
);
938 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
945 * Convert string from one encoding to another, making error checking etc
947 * @param src pointer to source string (multibyte or singlebyte)
948 * @param srclen length of the source string in bytes
949 * @param dest pointer to destination string (multibyte or singlebyte)
950 * @param destlen maximal length allowed for string
951 * @returns the number of bytes occupied in the destination
953 _PUBLIC_
size_t convert_string(charset_t from
, charset_t to
,
954 void const *src
, size_t srclen
,
955 void *dest
, size_t destlen
,
956 bool allow_badcharcnv
)
959 if (!convert_string_convenience(get_iconv_convenience(), from
, to
,
968 * Convert between character sets, allocating a new buffer using talloc for the result.
970 * @param srclen length of source buffer.
971 * @param dest always set at least to NULL
972 * @param converted_size Size in bytes of the converted string
973 * @note -1 is not accepted for srclen.
975 * @returns boolean indication whether the conversion succeeded
978 _PUBLIC_
bool convert_string_talloc(TALLOC_CTX
*ctx
,
979 charset_t from
, charset_t to
,
980 void const *src
, size_t srclen
,
981 void *dest
, size_t *converted_size
,
982 bool allow_badcharcnv
)
984 return convert_string_talloc_convenience(ctx
, get_iconv_convenience(),
985 from
, to
, src
, srclen
, dest
,
991 _PUBLIC_ codepoint_t
next_codepoint(const char *str
, size_t *size
)
993 return next_codepoint_convenience(get_iconv_convenience(), str
, size
);
996 _PUBLIC_ ssize_t
push_codepoint(char *str
, codepoint_t c
)
998 return push_codepoint_convenience(get_iconv_convenience(), str
, c
);