2 * msvcrt.dll mbcs functions
4 * Copyright 1999 Alexandre Julliard
5 * Copyright 2000 Jon Griffths
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * Not currently binary compatible with win32. MSVCRT_mbctype must be
23 * populated correctly and the ismb* functions should reference it.
28 #include "msvcrt/mbctype.h"
29 #include "msvcrt/mbstring.h"
30 #include "msvcrt/stdlib.h"
31 #include "msvcrt/string.h"
32 #include "msvcrt/wctype.h"
34 #include "wine/unicode.h"
35 #include "wine/debug.h"
37 WINE_DEFAULT_DEBUG_CHANNEL(msvcrt
);
39 unsigned char MSVCRT_mbctype
[257];
40 int MSVCRT___mb_cur_max
= 1;
42 static WCHAR
msvcrt_mbc_to_wc(unsigned int ch
)
52 mbch
[0] = (ch
>> 8) & 0xff;
56 if (!MultiByteToWideChar(MSVCRT_current_lc_all_cp
, 0, mbch
, n_chars
, &chW
, 1))
58 WARN("MultiByteToWideChar failed on %x\n", ch
);
64 /*********************************************************************
65 * __p__mbctype (MSVCRT.@)
67 unsigned char* __p__mbctype(void)
69 return MSVCRT_mbctype
;
72 /*********************************************************************
73 * __p___mb_cur_max(MSVCRT.@)
75 int* __p___mb_cur_max(void)
77 return &MSVCRT___mb_cur_max
;
80 /*********************************************************************
83 unsigned int _mbsnextc(const unsigned char* str
)
85 if(MSVCRT___mb_cur_max
> 1 && MSVCRT_isleadbyte(*str
))
86 return *str
<< 8 | str
[1];
87 return *str
; /* ASCII CP or SB char */
90 /*********************************************************************
91 * _mbctolower(MSVCRT.@)
93 unsigned int _mbctolower(unsigned int c
)
95 if (MSVCRT_isleadbyte(c
))
97 FIXME("Handle MBC chars\n");
100 return tolower(c
); /* ASCII CP or SB char */
103 /*********************************************************************
104 * _mbctoupper(MSVCRT.@)
106 unsigned int _mbctoupper(unsigned int c
)
108 if (MSVCRT_isleadbyte(c
))
110 FIXME("Handle MBC chars\n");
113 return toupper(c
); /* ASCII CP or SB char */
116 /*********************************************************************
119 unsigned char* _mbsdec(const unsigned char* start
, const unsigned char* cur
)
121 if(MSVCRT___mb_cur_max
> 1)
122 return (char *)(_ismbstrail(start
,cur
-1) ? cur
- 2 : cur
-1);
124 return (char *)cur
- 1; /* ASCII CP or SB char */
127 /*********************************************************************
130 unsigned char* _mbsinc(const unsigned char* str
)
132 if(MSVCRT___mb_cur_max
> 1 && MSVCRT_isleadbyte(*str
))
133 return (unsigned char*)str
+ 2; /* MB char */
135 return (unsigned char*)str
+ 1; /* ASCII CP or SB char */
138 /*********************************************************************
141 unsigned char* _mbsninc(const unsigned char* str
, MSVCRT_size_t num
)
145 if(MSVCRT___mb_cur_max
> 1)
149 return (unsigned char*)str
;
151 return (unsigned char*)str
+ num
; /* ASCII CP */
154 /*********************************************************************
157 unsigned int _mbclen(const unsigned char* str
)
159 return MSVCRT_isleadbyte(*str
) ? 2 : 1;
162 /*********************************************************************
165 int MSVCRT_mblen(const char* str
, MSVCRT_size_t size
)
167 if (str
&& *str
&& size
)
169 if(MSVCRT___mb_cur_max
== 1)
170 return 1; /* ASCII CP */
172 return !MSVCRT_isleadbyte(*str
) ? 1 : (size
>1 ? 2 : -1);
177 /*********************************************************************
180 MSVCRT_size_t
_mbslen(const unsigned char* str
)
182 if(MSVCRT___mb_cur_max
> 1)
184 MSVCRT_size_t len
= 0;
187 str
+= MSVCRT_isleadbyte(*str
) ? 2 : 1;
192 return strlen(str
); /* ASCII CP */
195 /*********************************************************************
196 * _mbstrlen(MSVCRT.@)
198 MSVCRT_size_t
_mbstrlen(const char* str
)
200 if(MSVCRT___mb_cur_max
> 1)
202 MSVCRT_size_t len
= 0;
205 /* FIXME: According to the documentation we are supposed to test for
206 * multi-byte character validity. Whatever that means
208 str
+= MSVCRT_isleadbyte(*str
) ? 2 : 1;
213 return strlen(str
); /* ASCII CP */
216 /*********************************************************************
219 void _mbccpy(unsigned char* dest
, const unsigned char* src
)
222 if(MSVCRT___mb_cur_max
> 1 && MSVCRT_isleadbyte(*src
))
223 *dest
= *++src
; /* MB char */
225 ERR("failure.. is this ok?\n");
228 /*********************************************************************
231 unsigned char* _mbsncpy(unsigned char* dst
, const unsigned char* src
, MSVCRT_size_t n
)
235 if(MSVCRT___mb_cur_max
> 1)
237 unsigned char* ret
= dst
;
241 if (MSVCRT_isleadbyte(*src
++))
248 return strncpy(dst
, src
, n
); /* ASCII CP */
251 /*********************************************************************
252 * _mbsnbcpy(MSVCRT.@)
254 unsigned char* _mbsnbcpy(unsigned char* dst
, const unsigned char* src
, MSVCRT_size_t n
)
258 if(MSVCRT___mb_cur_max
> 1)
260 unsigned char* ret
= dst
;
261 while (*src
&& (n
-- > 1))
264 if (MSVCRT_isleadbyte(*src
++))
270 if (*src
&& n
&& !MSVCRT_isleadbyte(*src
))
272 /* If the last character is a multi-byte character then
273 * we cannot copy it since we have only one byte left
282 return strncpy(dst
, src
, n
); /* ASCII CP */
285 /*********************************************************************
288 int _mbscmp(const unsigned char* str
, const unsigned char* cmp
)
290 if(MSVCRT___mb_cur_max
> 1)
292 unsigned int strc
, cmpc
;
295 return *cmp
? -1 : 0;
298 strc
= _mbsnextc(str
);
299 cmpc
= _mbsnextc(cmp
);
301 return strc
< cmpc
? -1 : 1;
302 str
+=(strc
> 255) ? 2 : 1;
303 cmp
+=(strc
> 255) ? 2 : 1; /* equal, use same increment */
306 return strcmp(str
, cmp
); /* ASCII CP */
309 /*********************************************************************
312 int _mbsicmp(const unsigned char* str
, const unsigned char* cmp
)
314 if(MSVCRT___mb_cur_max
> 1)
316 unsigned int strc
, cmpc
;
319 return *cmp
? -1 : 0;
322 strc
= _mbctolower(_mbsnextc(str
));
323 cmpc
= _mbctolower(_mbsnextc(cmp
));
325 return strc
< cmpc
? -1 : 1;
326 str
+=(strc
> 255) ? 2 : 1;
327 cmp
+=(strc
> 255) ? 2 : 1; /* equal, use same increment */
330 return strcasecmp(str
, cmp
); /* ASCII CP */
333 /*********************************************************************
336 int _mbsncmp(const unsigned char* str
, const unsigned char* cmp
, MSVCRT_size_t len
)
341 if(MSVCRT___mb_cur_max
> 1)
343 unsigned int strc
, cmpc
;
348 return *cmp
? -1 : 0;
351 strc
= _mbsnextc(str
);
352 cmpc
= _mbsnextc(cmp
);
354 return strc
< cmpc
? -1 : 1;
355 inc
=(strc
> 255) ? 2 : 1; /* Equal, use same increment */
359 return 0; /* Matched len chars */
361 return strncmp(str
, cmp
, len
); /* ASCII CP */
364 /*********************************************************************
365 * _mbsnbcmp(MSVCRT.@)
367 int _mbsnbcmp(const unsigned char* str
, const unsigned char* cmp
, MSVCRT_size_t len
)
371 if(MSVCRT___mb_cur_max
> 1)
373 unsigned int strc
, cmpc
;
378 return *cmp
? -1 : 0;
381 if (MSVCRT_isleadbyte(*str
))
383 strc
=(len
>=2)?_mbsnextc(str
):0;
391 if (MSVCRT_isleadbyte(*cmp
))
392 cmpc
=(len
>=2)?_mbsnextc(cmp
):0;
396 return strc
< cmpc
? -1 : 1;
401 return 0; /* Matched len chars */
402 FIXME("%s %s %d\n",str
,cmp
,len
);
404 return strncmp(str
,cmp
,len
);
407 /*********************************************************************
408 * _mbsnicmp(MSVCRT.@)
410 * Compare two multibyte strings case insensitively to 'len' characters.
412 int _mbsnicmp(const unsigned char* str
, const unsigned char* cmp
, MSVCRT_size_t len
)
414 /* FIXME: No tolower() for mb strings yet */
415 if(MSVCRT___mb_cur_max
> 1)
417 unsigned int strc
, cmpc
;
421 return *cmp
? -1 : 0;
424 strc
= _mbctolower(_mbsnextc(str
));
425 cmpc
= _mbctolower(_mbsnextc(cmp
));
427 return strc
< cmpc
? -1 : 1;
428 str
+=(strc
> 255) ? 2 : 1;
429 cmp
+=(strc
> 255) ? 2 : 1; /* Equal, use same increment */
431 return 0; /* Matched len chars */
433 return strncasecmp(str
, cmp
, len
); /* ASCII CP */
436 /*********************************************************************
437 * _mbsnbicmp(MSVCRT.@)
439 int _mbsnbicmp(const unsigned char* str
, const unsigned char* cmp
, MSVCRT_size_t len
)
443 if(MSVCRT___mb_cur_max
> 1)
445 unsigned int strc
, cmpc
;
450 return *cmp
? -1 : 0;
453 if (MSVCRT_isleadbyte(*str
))
455 strc
=(len
>=2)?_mbsnextc(str
):0;
463 if (MSVCRT_isleadbyte(*cmp
))
464 cmpc
=(len
>=2)?_mbsnextc(cmp
):0;
467 strc
= _mbctolower(strc
);
468 cmpc
= _mbctolower(cmpc
);
470 return strc
< cmpc
? -1 : 1;
475 return 0; /* Matched len bytes */
476 FIXME("%s %s %d\n",str
,cmp
,len
);
478 return strncmp(str
,cmp
,len
);
481 /*********************************************************************
484 * Find a multibyte character in a multibyte string.
486 unsigned char* _mbschr(const unsigned char* s
, unsigned int x
)
488 if(MSVCRT___mb_cur_max
> 1)
495 return (unsigned char*)s
;
498 s
+= c
> 255 ? 2 : 1;
501 return strchr(s
, x
); /* ASCII CP */
504 /*********************************************************************
507 unsigned char* _mbsrchr(const unsigned char* s
, unsigned int x
)
509 if(MSVCRT___mb_cur_max
> 1)
512 unsigned char* match
=NULL
;
518 match
=(unsigned char*)s
;
521 s
+=(c
> 255) ? 2 : 1;
527 /*********************************************************************
530 int MSVCRT_mbtowc(WCHAR
*dst
, const char* str
, MSVCRT_size_t n
)
534 if(!MultiByteToWideChar(CP_ACP
, 0, str
, n
, dst
, 1))
536 /* return the number of bytes from src that have been used */
539 if(n
>= 2 && MSVCRT_isleadbyte(*str
) && str
[1])
544 /*********************************************************************
545 * _mbbtombc(MSVCRT.@)
547 unsigned int _mbbtombc(unsigned int c
)
549 if(MSVCRT___mb_cur_max
> 1 &&
550 ((c
>= 0x20 && c
<=0x7e) ||(c
>= 0xa1 && c
<= 0xdf)))
552 /* FIXME: I can't get this function to return anything
553 * different to what I pass it...
556 return c
; /* ASCII CP or no MB char */
559 /*********************************************************************
560 * _ismbbkana(MSVCRT.@)
562 int _ismbbkana(unsigned int c
)
564 /* FIXME: use lc_ctype when supported, not lc_all */
565 if(MSVCRT_current_lc_all_cp
== 932)
567 /* Japanese/Katakana, CP 932 */
568 return (c
>= 0xa1 && c
<= 0xdf);
573 /*********************************************************************
574 * _ismbcdigit(MSVCRT.@)
576 int _ismbcdigit(unsigned int ch
)
578 WCHAR wch
= msvcrt_mbc_to_wc( ch
);
579 return (get_char_typeW( wch
) & C1_DIGIT
);
582 /*********************************************************************
583 * _ismbcgraph(MSVCRT.@)
585 int _ismbcgraph(unsigned int ch
)
587 WCHAR wch
= msvcrt_mbc_to_wc( ch
);
588 return (get_char_typeW( wch
) & (C1_UPPER
| C1_LOWER
| C1_DIGIT
| C1_PUNCT
| C1_ALPHA
));
591 /*********************************************************************
592 * _ismbcalpha (MSVCRT.@)
594 int _ismbcalpha(unsigned int ch
)
596 WCHAR wch
= msvcrt_mbc_to_wc( ch
);
597 return (get_char_typeW( wch
) & C1_ALPHA
);
600 /*********************************************************************
601 * _ismbclower (MSVCRT.@)
603 int _ismbclower(unsigned int ch
)
605 WCHAR wch
= msvcrt_mbc_to_wc( ch
);
606 return (get_char_typeW( wch
) & C1_UPPER
);
609 /*********************************************************************
610 * _ismbcupper (MSVCRT.@)
612 int _ismbcupper(unsigned int ch
)
614 WCHAR wch
= msvcrt_mbc_to_wc( ch
);
615 return (get_char_typeW( wch
) & C1_LOWER
);
618 /*********************************************************************
619 * _ismbcsymbol(MSVCRT.@)
621 int _ismbcsymbol(unsigned int ch
)
623 WCHAR wch
= msvcrt_mbc_to_wc( ch
);
625 if (!GetStringTypeW(CT_CTYPE3
, &wch
, 1, &ctype
))
627 WARN("GetStringTypeW failed on %x\n", ch
);
630 return ((ctype
& C3_SYMBOL
) != 0);
633 /*********************************************************************
634 * _ismbcalnum (MSVCRT.@)
636 int _ismbcalnum(unsigned int ch
)
638 WCHAR wch
= msvcrt_mbc_to_wc( ch
);
639 return (get_char_typeW( wch
) & (C1_ALPHA
| C1_DIGIT
));
642 /*********************************************************************
643 * _ismbcspace (MSVCRT.@)
645 int _ismbcspace(unsigned int ch
)
647 WCHAR wch
= msvcrt_mbc_to_wc( ch
);
648 return (get_char_typeW( wch
) & C1_SPACE
);
651 /*********************************************************************
652 * _ismbcprint (MSVCRT.@)
654 int _ismbcprint(unsigned int ch
)
656 WCHAR wch
= msvcrt_mbc_to_wc( ch
);
657 return (get_char_typeW( wch
) & (C1_UPPER
| C1_LOWER
| C1_DIGIT
| C1_PUNCT
| C1_ALPHA
| C1_SPACE
));
660 /*********************************************************************
661 * _ismbcpunct(MSVCRT.@)
663 int _ismbcpunct(unsigned int ch
)
665 WCHAR wch
= msvcrt_mbc_to_wc( ch
);
666 return (get_char_typeW( wch
) & C1_PUNCT
);
669 /*********************************************************************
670 * _ismbchira(MSVCRT.@)
672 int _ismbchira(unsigned int c
)
674 /* FIXME: use lc_ctype when supported, not lc_all */
675 if(MSVCRT_current_lc_all_cp
== 932)
677 /* Japanese/Hiragana, CP 932 */
678 return (c
>= 0x829f && c
<= 0x82f1);
683 /*********************************************************************
684 * _ismbckata(MSVCRT.@)
686 int _ismbckata(unsigned int c
)
688 /* FIXME: use lc_ctype when supported, not lc_all */
689 if(MSVCRT_current_lc_all_cp
== 932)
692 return _ismbbkana(c
);
693 /* Japanese/Katakana, CP 932 */
694 return (c
>= 0x8340 && c
<= 0x8396 && c
!= 0x837f);
699 /*********************************************************************
700 * _ismbblead(MSVCRT.@)
702 int _ismbblead(unsigned int c
)
704 /* FIXME: should reference MSVCRT_mbctype */
705 return MSVCRT___mb_cur_max
> 1 && MSVCRT_isleadbyte(c
);
709 /*********************************************************************
710 * _ismbbtrail(MSVCRT.@)
712 int _ismbbtrail(unsigned int c
)
714 /* FIXME: should reference MSVCRT_mbctype */
715 return !_ismbblead(c
);
718 /*********************************************************************
719 * _ismbslead(MSVCRT.@)
721 int _ismbslead(const unsigned char* start
, const unsigned char* str
)
723 /* Lead bytes can also be trail bytes if caller messed up
724 * iterating through the string...
726 if(MSVCRT___mb_cur_max
> 1)
729 start
+= MSVCRT_isleadbyte(*str
) ? 2 : 1;
732 return MSVCRT_isleadbyte(*str
);
734 return 0; /* Must have been a trail, we skipped it */
737 /*********************************************************************
738 * _ismbstrail(MSVCRT.@)
740 int _ismbstrail(const unsigned char* start
, const unsigned char* str
)
742 /* Must not be a lead, and must be preceeded by one */
743 return !_ismbslead(start
, str
) && MSVCRT_isleadbyte(str
[-1]);
746 /*********************************************************************
749 unsigned char* _mbsset(unsigned char* str
, unsigned int c
)
751 unsigned char* ret
= str
;
753 if(MSVCRT___mb_cur_max
== 1 || c
< 256)
754 return _strset(str
, c
); /* ASCII CP or SB char */
756 c
&= 0xffff; /* Strip high bits */
758 while(str
[0] && str
[1])
764 str
[0] = '\0'; /* FIXME: OK to shorten? */
769 /*********************************************************************
772 unsigned char* _mbsnset(unsigned char* str
, unsigned int c
, MSVCRT_size_t len
)
774 unsigned char *ret
= str
;
779 if(MSVCRT___mb_cur_max
== 1 || c
< 256)
780 return _strnset(str
, c
, len
); /* ASCII CP or SB char */
782 c
&= 0xffff; /* Strip high bits */
784 while(str
[0] && str
[1] && len
--)
790 str
[0] = '\0'; /* FIXME: OK to shorten? */
795 /*********************************************************************
796 * _mbsnccnt(MSVCRT.@)
797 * 'c' is for 'character'.
799 MSVCRT_size_t
_mbsnccnt(const unsigned char* str
, MSVCRT_size_t len
)
802 if(MSVCRT___mb_cur_max
> 1)
805 while(*str
&& len
-- > 0)
807 if(MSVCRT_isleadbyte(*str
))
820 return min(ret
, len
); /* ASCII CP */
823 /*********************************************************************
824 * _mbsnbcnt(MSVCRT.@)
825 * 'b' is for byte count.
827 MSVCRT_size_t
_mbsnbcnt(const unsigned char* str
, MSVCRT_size_t len
)
830 if(MSVCRT___mb_cur_max
> 1)
832 const unsigned char* xstr
= str
;
833 while(*xstr
&& len
-- > 0)
835 if (MSVCRT_isleadbyte(*xstr
++))
841 return min(ret
, len
); /* ASCII CP */
845 /*********************************************************************
848 unsigned char* _mbsncat(unsigned char* dst
, const unsigned char* src
, MSVCRT_size_t len
)
850 if(MSVCRT___mb_cur_max
> 1)
855 if (MSVCRT_isleadbyte(*dst
++))
858 while (*src
&& len
--)
861 if(MSVCRT_isleadbyte(*src
++))
867 return strncat(dst
, src
, len
); /* ASCII CP */
871 /*********************************************************************
874 unsigned char* _mbslwr(unsigned char* s
)
878 if (MSVCRT___mb_cur_max
> 1)
884 c
= _mbctolower(_mbsnextc(s
));
885 /* Note that I assume that the size of the character is unchanged */
899 /*********************************************************************
902 unsigned char* _mbsupr(unsigned char* s
)
906 if (MSVCRT___mb_cur_max
> 1)
912 c
= _mbctoupper(_mbsnextc(s
));
913 /* Note that I assume that the size of the character is unchanged */
927 /*********************************************************************
930 MSVCRT_size_t
_mbsspn(const unsigned char* string
, const unsigned char* set
)
932 const unsigned char *p
, *q
;
934 for (p
= string
; *p
; p
++)
936 if (MSVCRT_isleadbyte(*p
))
938 for (q
= set
; *q
; q
++)
942 if ((*p
== *q
) && (p
[1] == q
[1]))
950 for (q
= set
; *q
; q
++)
957 /*********************************************************************
960 MSVCRT_size_t
_mbscspn(const unsigned char* str
, const unsigned char* cmp
)
962 if (MSVCRT___mb_cur_max
> 1)
963 FIXME("don't handle double character case\n");
964 return strcspn(str
, cmp
);
967 /*********************************************************************
970 unsigned char* _mbsrev(unsigned char* str
)
972 int i
, len
= _mbslen(str
);
973 unsigned char *p
, *temp
=MSVCRT_malloc(len
*2);
978 /* unpack multibyte string to temp buffer */
982 if (MSVCRT_isleadbyte(*p
))
994 /* repack it in the reverse order */
996 for(i
=len
-1; i
>=0; i
--)
998 if(MSVCRT_isleadbyte(temp
[i
*2]))
1014 /*********************************************************************
1015 * _mbspbrk (MSVCRT.@)
1017 unsigned char* _mbspbrk(const unsigned char* str
, const unsigned char* accept
)
1019 const unsigned char* p
;
1023 for(p
= accept
; *p
; p
+= (MSVCRT_isleadbyte(*p
)?2:1) )
1026 if( !MSVCRT_isleadbyte(*p
) || ( *(p
+1) == *(str
+1) ) )
1027 return (unsigned char*)str
;
1029 str
+= (MSVCRT_isleadbyte(*str
)?2:1);