Merge commit 'origin/master'
[versaplex.git] / vxodbc / multibyte.cc
blob83afbb81b17ab4bcb282c4e9c7477e8bebbb92dc
1 /*
2 * Description: New Multibyte related additional function.
4 * Create 2001-03-03 Eiji Tokuya
5 * New Create 2001-09-16 Eiji Tokuya
6 */
8 #include "multibyte.h"
9 #include "misc.h"
10 #include "connection.h"
11 #include "pgapifunc.h"
12 #include <string.h>
13 #include <ctype.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #ifndef TRUE
17 #define TRUE 1
18 #endif
20 static pg_CS CS_Table[] = {
21 {"SQL_ASCII", SQL_ASCII},
22 {"EUC_JP", EUC_JP},
23 {"EUC_CN", EUC_CN},
24 {"EUC_KR", EUC_KR},
25 {"EUC_TW", EUC_TW},
26 {"JOHAB", JOHAB}, /* since 7.3 */
27 {"UTF8", UTF8}, /* since 7.2 */
28 {"MULE_INTERNAL", MULE_INTERNAL},
29 {"LATIN1", LATIN1},
30 {"LATIN2", LATIN2},
31 {"LATIN3", LATIN3},
32 {"LATIN4", LATIN4},
33 {"LATIN5", LATIN5},
34 {"LATIN6", LATIN6},
35 {"LATIN7", LATIN7},
36 {"LATIN8", LATIN8},
37 {"LATIN9", LATIN9},
38 {"LATIN10", LATIN10},
39 {"WIN1256", WIN1256}, /* Arabic since 7.3 */
40 {"WIN1258", WIN1258}, /* Vietnamese since 8.1 */
41 {"WIN866", WIN866}, /* since 8.1 */
42 {"WIN874", WIN874}, /* Thai since 7.3 */
43 {"KOI8", KOI8R},
44 {"WIN1251", WIN1251}, /* Cyrillic */
45 {"WIN1252", WIN1252}, /* Western Europe since 8.1 */
46 {"ISO_8859_5", ISO_8859_5},
47 {"ISO_8859_6", ISO_8859_6},
48 {"ISO_8859_7", ISO_8859_7},
49 {"ISO_8859_8", ISO_8859_8},
50 {"WIN1250", WIN1250}, /* Central Europe */
51 {"WIN1253", WIN1253}, /* Greek since 8.2 */
52 {"WIN1254", WIN1254}, /* Turkish since 8.2 */
53 {"WIN1255", WIN1255}, /* Hebrew since 8.2 */
54 {"WIN1257", WIN1257}, /* Baltic(North Europe) since 8.2 */
56 {"SJIS", SJIS},
57 {"BIG5", BIG5},
58 {"GBK", GBK}, /* since 7.3 */
59 {"UHC", UHC}, /* since 7.3 */
60 {"GB18030", GB18030}, /* since 7.3 */
61 {"OTHER", OTHER}
64 static pg_CS CS_Alias[] = {
65 {"UNICODE", UTF8},
66 {"TCVN", WIN1258},
67 {"ALT", WIN866},
68 {"WIN", WIN1251},
69 {"OTHER", OTHER}
72 CSTR OTHER_STRING = "OTHER";
74 int pg_CS_code(const UCHAR * characterset_string)
76 int i, c = -1;
78 for (i = 0; CS_Table[i].code != OTHER; i++)
80 if (0 == stricmp((const char *)characterset_string, CS_Table[i].name))
82 c = CS_Table[i].code;
83 break;
86 if (c < 0)
88 for (i = 0; CS_Alias[i].code != OTHER; i++)
90 if (0 == stricmp((const char *)characterset_string, CS_Alias[i].name))
92 c = CS_Alias[i].code;
93 break;
97 if (c < 0)
98 c = OTHER;
99 return (c);
102 UCHAR *check_client_encoding(const UCHAR * conn_settings)
104 const UCHAR *cptr, *sptr = NULL;
105 UCHAR *rptr;
106 BOOL allowed_cmd = TRUE, in_quote = FALSE;
107 int step = 0;
108 size_t len = 0;
110 for (cptr = conn_settings; *cptr; cptr++)
112 if (in_quote)
113 if (LITERAL_QUOTE == *cptr)
115 in_quote = FALSE;
116 continue;
118 if (';' == *cptr)
120 allowed_cmd = TRUE;
121 step = 0;
122 continue;
124 if (!allowed_cmd)
125 continue;
126 if (isspace(*cptr))
127 continue;
128 switch (step)
130 case 0:
131 if (0 != strnicmp((const char *)cptr, "set", 3))
133 allowed_cmd = FALSE;
134 continue;
136 step++;
137 cptr += 3;
138 break;
139 case 1:
140 if (0 != strnicmp((const char *)cptr, "client_encoding", 15))
142 allowed_cmd = FALSE;
143 continue;
145 step++;
146 cptr += 15;
147 break;
148 case 2:
149 if (0 != strnicmp((const char *)cptr, "to", 2))
151 allowed_cmd = FALSE;
152 continue;
154 step++;
155 cptr += 2;
156 break;
157 case 3:
158 if (LITERAL_QUOTE == *cptr)
160 cptr++;
161 for (sptr = cptr; *cptr && *cptr != LITERAL_QUOTE;
162 cptr++);
163 } else
165 for (sptr = cptr; *cptr && !isspace(*cptr); cptr++);
167 len = cptr - sptr;
168 step++;
169 break;
172 if (!sptr)
173 return NULL;
174 rptr = (UCHAR *)malloc(len + 1);
175 memcpy(rptr, sptr, len);
176 rptr[len] = '\0';
177 mylog("extracted a client_encoding '%s' from conn_settings\n",
178 rptr);
179 return rptr;
182 const UCHAR *pg_CS_name(int characterset_code)
184 int i;
185 for (i = 0; CS_Table[i].code != OTHER; i++)
187 if (CS_Table[i].code == characterset_code)
188 return (const UCHAR *)CS_Table[i].name;
190 return (const UCHAR *)(OTHER_STRING);
193 static int pg_mb_maxlen(int characterset_code)
195 switch (characterset_code)
197 case UTF8:
198 return 6;
199 case EUC_TW:
200 return 4;
201 case EUC_JP:
202 case GB18030:
203 return 3;
204 case SJIS:
205 case BIG5:
206 case GBK:
207 case UHC:
208 case EUC_CN:
209 case EUC_KR:
210 case JOHAB:
211 return 2;
212 default:
213 return 1;
217 int pg_CS_stat(int stat, unsigned int character, int characterset_code)
219 if (character == 0)
220 stat = 0;
221 switch (characterset_code)
223 case UTF8:
225 if (stat < 2 && character >= 0x80)
227 if (character >= 0xfc)
228 stat = 6;
229 else if (character >= 0xf8)
230 stat = 5;
231 else if (character >= 0xf0)
232 stat = 4;
233 else if (character >= 0xe0)
234 stat = 3;
235 else if (character >= 0xc0)
236 stat = 2;
237 } else if (stat > 2 && character > 0x7f)
238 stat--;
239 else
240 stat = 0;
242 break;
243 /* Shift-JIS Support. */
244 case SJIS:
246 if (stat < 2 &&
247 character > 0x80 &&
248 !(character > 0x9f && character < 0xe0))
249 stat = 2;
250 else if (stat == 2)
251 stat = 1;
252 else
253 stat = 0;
255 break;
256 /* Chinese Big5 Support. */
257 case BIG5:
259 if (stat < 2 && character > 0xA0)
260 stat = 2;
261 else if (stat == 2)
262 stat = 1;
263 else
264 stat = 0;
266 break;
267 /* Chinese GBK Support. */
268 case GBK:
270 if (stat < 2 && character > 0x7F)
271 stat = 2;
272 else if (stat == 2)
273 stat = 1;
274 else
275 stat = 0;
277 break;
279 /* Korian UHC Support. */
280 case UHC:
282 if (stat < 2 && character > 0x7F)
283 stat = 2;
284 else if (stat == 2)
285 stat = 1;
286 else
287 stat = 0;
289 break;
291 /* EUC_JP Support */
292 case EUC_JP:
294 if (stat < 3 && character == 0x8f) /* JIS X 0212 */
295 stat = 3;
296 else if (stat != 2 && (character == 0x8e || character > 0xa0)) /* Half Katakana HighByte & Kanji HighByte */
297 stat = 2;
298 else if (stat == 2)
299 stat = 1;
300 else
301 stat = 0;
303 break;
305 /* EUC_CN, EUC_KR, JOHAB Support */
306 case EUC_CN:
307 case EUC_KR:
308 case JOHAB:
310 if (stat < 2 && character > 0xa0)
311 stat = 2;
312 else if (stat == 2)
313 stat = 1;
314 else
315 stat = 0;
317 break;
318 case EUC_TW:
320 if (stat < 4 && character == 0x8e)
321 stat = 4;
322 else if (stat == 4 && character > 0xa0)
323 stat = 3;
324 else if ((stat == 3 || stat < 2) && character > 0xa0)
325 stat = 2;
326 else if (stat == 2)
327 stat = 1;
328 else
329 stat = 0;
331 break;
332 /*Chinese GB18030 support.Added by Bill Huang <bhuang@redhat.com> <bill_huanghb@ybb.ne.jp> */
333 case GB18030:
335 if (stat < 2 && character > 0x80)
336 stat = 2;
337 else if (stat == 2)
339 if (character >= 0x30 && character <= 0x39)
340 stat = 3;
341 else
342 stat = 1;
343 } else if (stat == 3)
345 if (character >= 0x30 && character <= 0x39)
346 stat = 1;
347 else
348 stat = 3;
349 } else
350 stat = 0;
352 break;
353 default:
355 stat = 0;
357 break;
359 return stat;
363 UCHAR *pg_mbschr(int csc, const UCHAR * string, unsigned int character)
365 int mb_st = 0;
366 const UCHAR *s, *rs = NULL;
368 for (s = string; *s; s++)
370 mb_st = pg_CS_stat(mb_st, (UCHAR) * s, csc);
371 if (mb_st == 0 && (*s == character))
373 rs = s;
374 break;
377 return ((UCHAR *) rs);
380 size_t pg_mbslen(int csc, const UCHAR * string)
382 UCHAR *s;
383 size_t len;
384 int cs_stat;
385 for (len = 0, cs_stat = 0, s = (UCHAR *) string; *s != 0; s++)
387 cs_stat = pg_CS_stat(cs_stat, (unsigned int) *s, csc);
388 if (cs_stat < 2)
389 len++;
391 return len;
394 UCHAR *pg_mbsinc(int csc, const UCHAR * current)
396 int mb_stat = 0;
397 if (*current != 0)
399 mb_stat = (int) pg_CS_stat(mb_stat, *current, csc);
400 if (mb_stat == 0)
401 mb_stat = 1;
402 return ((UCHAR *) current + mb_stat);
403 } else
404 return NULL;
407 static char *CC_lookup_cs_new(ConnectionClass * self)
409 char *encstr = NULL;
410 QResultClass *res;
412 res =
413 CC_send_query(self, "select pg_client_encoding()", NULL,
414 IGNORE_ABORT_ON_CONN | ROLLBACK_ON_ERROR, NULL);
415 if (QR_command_maybe_successful(res))
417 const char *enc = QR_get_value_backend_text(res, 0, 0);
419 if (enc)
420 encstr = strdup(enc);
422 QR_Destructor(res);
423 return encstr;
425 static char *CC_lookup_cs_old(ConnectionClass * self)
427 char *encstr = NULL;
428 HSTMT hstmt;
429 RETCODE result;
431 result = PGAPI_AllocStmt(self, &hstmt);
432 if (!SQL_SUCCEEDED(result))
433 return encstr;
435 result =
436 PGAPI_ExecDirect(hstmt, (const UCHAR *)"Show Client_Encoding",
437 SQL_NTS, 0);
438 if (result == SQL_SUCCESS_WITH_INFO)
440 char sqlState[8], errormsg[128], enc[32];
442 if (PGAPI_Error(NULL, NULL, hstmt, (UCHAR *)sqlState,
443 NULL, (UCHAR *)errormsg, sizeof(errormsg), NULL)
444 == SQL_SUCCESS &&
445 sscanf(errormsg, "%*s %*s %*s %*s %*s %s", enc) > 0)
446 encstr = strdup(enc);
448 PGAPI_FreeStmt(hstmt, SQL_DROP);
449 return encstr;
453 * This function works under Windows or Unicode case only.
454 * Simply returns NULL under other OSs.
456 const char *get_environment_encoding(const ConnectionClass * conn,
457 const char *setenc,
458 const char *currenc, BOOL bStartup)
460 const char *wenc = NULL;
461 int acp;
463 #ifdef UNICODE_SUPPORT
464 if (CC_is_in_unicode_driver(conn))
465 return "UTF8";
466 #endif /* UNICODE_SUPPORT */
467 if (setenc && stricmp(setenc, OTHER_STRING))
468 return setenc;
469 #ifdef WIN32
470 acp = GetACP();
471 if (acp >= 1251 && acp <= 1258)
473 if (bStartup || stricmp(currenc, "SQL_ASCII") == 0)
474 return wenc;
476 switch (acp)
478 case 932:
479 wenc = "SJIS";
480 break;
481 case 936:
482 if (!bStartup && PG_VERSION_GT(conn, 7.2))
483 wenc = "GBK";
484 break;
485 case 949:
486 if (!bStartup && PG_VERSION_GT(conn, 7.2))
487 wenc = "UHC";
488 break;
489 case 950:
490 wenc = "BIG5";
491 break;
492 case 1250:
493 wenc = "WIN1250";
494 break;
495 case 1251:
496 wenc = "WIN1251";
497 break;
498 case 1256:
499 if (PG_VERSION_GE(conn, 7.3))
500 wenc = "WIN1256";
501 break;
502 case 1252:
503 if (strnicmp(currenc, "LATIN", 5) == 0)
504 break;
505 if (PG_VERSION_GE(conn, 8.1))
506 wenc = "WIN1252";
507 else
508 wenc = "LATIN1";
509 break;
510 case 1258:
511 if (PG_VERSION_GE(conn, 8.1))
512 wenc = "WIN1258";
513 break;
514 case 1253:
515 if (PG_VERSION_GE(conn, 8.2))
516 wenc = "WIN1253";
517 break;
518 case 1254:
519 if (PG_VERSION_GE(conn, 8.2))
520 wenc = "WIN1254";
521 break;
522 case 1255:
523 if (PG_VERSION_GE(conn, 8.2))
524 wenc = "WIN1255";
525 break;
526 case 1257:
527 if (PG_VERSION_GE(conn, 8.2))
528 wenc = "WIN1257";
529 break;
531 #endif /* WIN32 */
532 return wenc;
535 void CC_lookup_characterset(ConnectionClass * self)
537 char *encspec = NULL, *currenc = NULL, *tencstr;
538 CSTR func = "CC_lookup_characterset";
540 mylog("%s: entering...\n", func);
541 if (self->original_client_encoding)
542 encspec = strdup(self->original_client_encoding);
543 if (self->current_client_encoding)
544 currenc = strdup(self->current_client_encoding);
545 else if (PG_VERSION_LT(self, 7.2))
546 currenc = CC_lookup_cs_old(self);
547 else
548 currenc = CC_lookup_cs_new(self);
549 tencstr = encspec ? encspec : currenc;
550 if (self->original_client_encoding)
552 if (stricmp(self->original_client_encoding, tencstr))
554 char msg[256];
556 snprintf(msg, sizeof(msg),
557 "The client_encoding '%s' was changed to '%s'",
558 self->original_client_encoding, tencstr);
559 CC_set_error(self, CONN_OPTION_VALUE_CHANGED, msg, func);
561 free(self->original_client_encoding);
563 #ifndef UNICODE_SUPPORT
564 else
566 const char *wenc =
567 get_environment_encoding(self, encspec, currenc, FALSE);
568 if (wenc && (!tencstr || stricmp(tencstr, wenc)))
570 QResultClass *res;
571 char query[64];
572 int errnum = CC_get_errornumber(self);
573 BOOL cmd_success;
575 sprintf(query, "set client_encoding to '%s'", wenc);
576 res =
577 CC_send_query(self, query, NULL,
578 IGNORE_ABORT_ON_CONN | ROLLBACK_ON_ERROR,
579 NULL);
580 cmd_success = QR_command_maybe_successful(res);
581 QR_Destructor(res);
582 CC_set_errornumber(self, errnum);
583 if (cmd_success)
585 self->original_client_encoding = strdup(wenc);
586 self->ccsc = pg_CS_code(self->original_client_encoding);
587 if (encspec)
588 free(encspec);
589 if (currenc)
590 free(currenc);
591 return;
595 #endif /* UNICODE_SUPPORT */
596 if (tencstr)
598 self->original_client_encoding = tencstr;
599 if (encspec && currenc)
600 free(currenc);
601 self->ccsc = pg_CS_code((const UCHAR *)tencstr);
602 qlog(" [ Client encoding = '%s' (code = %d) ]\n",
603 self->original_client_encoding, self->ccsc);
604 if (self->ccsc < 0)
606 char msg[256];
608 snprintf(msg, sizeof(msg),
609 "would handle the encoding '%s' like ASCII",
610 tencstr);
611 CC_set_error(self, CONN_OPTION_VALUE_CHANGED, msg, func);
613 } else
615 self->ccsc = SQL_ASCII;
616 self->original_client_encoding = NULL;
618 self->mb_maxbyte_per_char = pg_mb_maxlen(self->ccsc);
621 void encoded_str_constr(encoded_str * encstr, int ccsc, const char *str)
623 encstr->ccsc = ccsc;
624 encstr->encstr = (const UCHAR *)str;
625 encstr->pos = -1;
626 encstr->ccst = 0;
629 int encoded_nextchar(encoded_str * encstr)
631 int chr;
633 chr = encstr->encstr[++encstr->pos];
634 encstr->ccst =
635 pg_CS_stat(encstr->ccst, (unsigned int) chr, encstr->ccsc);
636 return chr;
639 ssize_t encoded_position_shift(encoded_str * encstr, size_t shift)
641 encstr->pos += shift;
642 return encstr->pos;
645 int encoded_byte_check(encoded_str * encstr, size_t abspos)
647 int chr;
649 chr = encstr->encstr[encstr->pos = abspos];
650 encstr->ccst =
651 pg_CS_stat(encstr->ccst, (unsigned int) chr, encstr->ccsc);
652 return chr;