2 * Description: This module contains utf8 <-> ucs2 conversion routines
7 /* gcc <malloc.h> has been replaced by <stdlib.h> */
10 #ifndef _MEMORY_DEBUG_
12 #define _CRTDBG_MAP_ALLOC
14 #endif /* _MEMORY_DEBUG_ */
21 #define byte3check 0xfffff800
22 #define byte2_base 0x80c0
23 #define byte2_mask1 0x07c0
24 #define byte2_mask2 0x003f
25 #define byte3_base 0x8080e0
26 #define byte3_mask1 0xf000
27 #define byte3_mask2 0x0fc0
28 #define byte3_mask3 0x003f
36 int iswascii(wchar_t c
)
38 return isascii(wctob(c
));
40 #endif /* HAVE_ISWASCII */
43 SQLULEN
ucs2strlen(const SQLWCHAR
* ucs2str
)
46 for (len
= 0; ucs2str
[len
]; len
++)
50 char *ucs2_to_utf8(const SQLWCHAR
* ucs2str
, SQLLEN ilen
, SQLLEN
* olen
,
51 BOOL lower_identifier
)
54 /*mylog("ucs2_to_utf8 %p ilen=%d ", ucs2str, ilen);*/
58 *olen
= SQL_NULL_DATA
;
62 ilen
= ucs2strlen(ucs2str
);
63 /*mylog(" newlen=%d", ilen);*/
64 utf8str
= (char *) malloc(ilen
* 3 + 1);
72 for (i
= 0, wstr
= ucs2str
; i
< ilen
; i
++, wstr
++)
76 else if (0 == (*wstr
& 0xffffff80)) /* ASCII */
79 utf8str
[len
++] = (char) tolower(*wstr
);
81 utf8str
[len
++] = (char) *wstr
;
82 } else if ((*wstr
& byte3check
) == 0)
84 byte2code
= byte2_base
|
85 ((byte2_mask1
& *wstr
) >> 6) |
86 ((byte2_mask2
& *wstr
) << 8);
87 memcpy(utf8str
+ len
, (char *) &byte2code
,
89 len
+= sizeof(byte2code
);
92 byte4code
= byte3_base
|
93 ((byte3_mask1
& *wstr
) >> 12) |
94 ((byte3_mask2
& *wstr
) << 2) |
95 ((byte3_mask3
& *wstr
) << 16);
96 memcpy(utf8str
+ len
, (char *) &byte4code
, 3);
104 /*mylog(" olen=%d %s\n", *olen, utf8str ? utf8str : "");*/
108 #define byte3_m1 0x0f
109 #define byte3_m2 0x3f
110 #define byte3_m3 0x3f
111 #define byte2_m1 0x1f
112 #define byte2_m2 0x3f
113 SQLULEN
utf8_to_ucs2_lf(const char *utf8str
, SQLLEN ilen
, BOOL lfconv
,
114 SQLWCHAR
* ucs2str
, SQLULEN bufcount
)
117 SQLULEN ocount
, wcode
;
120 /*mylog("utf8_to_ucs2 ilen=%d bufcount=%d", ilen, bufcount);*/
123 /*mylog(" string=%s\n", utf8str);*/
129 ilen
= strlen(utf8str
);
130 for (i
= 0, ocount
= 0, str
= (const UCHAR
*)utf8str
; i
< ilen
&& *str
;)
132 /* if (iswascii(*str)) */
135 if (lfconv
&& PG_LINEFEED
== *str
&&
136 (i
== 0 || PG_CARRIAGE_RETURN
!= str
[-1]))
138 if (ocount
< bufcount
)
139 ucs2str
[ocount
] = PG_CARRIAGE_RETURN
;
142 if (ocount
< bufcount
)
143 ucs2str
[ocount
] = *str
;
147 } else if (0xe0 == (*str
& 0xe0)) /* 3 byte code */
149 if (ocount
< bufcount
)
151 wcode
= ((((UInt4
) * str
) & byte3_m1
) << 12) |
152 ((((UInt4
) str
[1]) & byte3_m2
) << 6) |
153 (((UInt4
) str
[2]) & byte3_m3
);
154 ucs2str
[ocount
] = (SQLWCHAR
) wcode
;
161 if (ocount
< bufcount
)
163 wcode
= ((((UInt4
) * str
) & byte2_m1
) << 6) |
164 (((UInt4
) str
[1]) & byte2_m2
);
165 ucs2str
[ocount
] = (SQLWCHAR
) wcode
;
172 if (ocount
< bufcount
&& ucs2str
)
174 /*mylog(" ocount=%d\n", ocount);*/